Удалить акценты / диакритические знаки в строке в JavaScript
Как удалить акцентированные символы из строки? Особенно в IE6 у меня было что-то вроде этого:
accentsTidy = function(s){
var r=s.toLowerCase();
r = r.replace(new RegExp(/\s/g),"");
r = r.replace(new RegExp(/[àáâãäå]/g),"a");
r = r.replace(new RegExp(/æ/g),"ae");
r = r.replace(new RegExp(/ç/g),"c");
r = r.replace(new RegExp(/[èéêë]/g),"e");
r = r.replace(new RegExp(/[ìíîï]/g),"i");
r = r.replace(new RegExp(/ñ/g),"n");
r = r.replace(new RegExp(/[òóôõö]/g),"o");
r = r.replace(new RegExp(/œ/g),"oe");
r = r.replace(new RegExp(/[ùúûü]/g),"u");
r = r.replace(new RegExp(/[ýÿ]/g),"y");
r = r.replace(new RegExp(/\W/g),"");
return r;
};
но IE6 доставляет мне неприятности, кажется, мне не нравится мое регулярное выражение.
36 ответов
С ES2015 / ES6 String.Prototype.Normalize (),
const str = "Crème Brulée"
str.normalize('NFD').replace(/[\u0300-\u036f]/g, "")
> 'Creme Brulee'
Здесь происходят две вещи:
normalize()
вNFD
Нормальная форма Unicode раскладывает объединенные графемы в комбинацию простых.è
изCrème
в конечном итоге выражается какe
+̀
,- Используя класс символов регулярного выражения для соответствия диапазону U+0300 → U+036F, теперь тривиально
g
в общем избавиться от диакритических знаков, которые стандарт Unicode удобно группировать как блок Unicode " Объединение диакритических знаков".
Смотрите комментарий для тестирования производительности.
В качестве альтернативы, если вы просто хотите сортировку
Intl.Collator имеет достаточную поддержку ~85% прямо сейчас, здесь также доступен полифил, но я его не тестировал.
const c = new Intl.Collator();
['creme brulee', 'crème brulée', 'crame brulai', 'crome brouillé',
'creme brulay', 'creme brulfé', 'creme bruléa'].sort(c.compare)
[ 'crame brulai','creme brulay','creme bruléa','creme brulee',
'crème brulée','creme brulfé','crome brouillé' ]
['creme brulee', 'crème brulée', 'crame brulai', 'crome brouillé'].sort((a,b) => a>b)
["crame brulai", "creme brulee", "crome brouillé", "crème brulée"]
Я немного изменил версию khel по одной причине: каждый анализ / замена регулярного выражения будет стоить O(n) операций, где n - количество символов в целевом тексте. Но регулярное выражение не совсем то, что нам нужно. Так:
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
var defaultDiacriticsRemovalMap = [
{'base':'A', 'letters':'\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F'},
{'base':'AA','letters':'\uA732'},
{'base':'AE','letters':'\u00C6\u01FC\u01E2'},
{'base':'AO','letters':'\uA734'},
{'base':'AU','letters':'\uA736'},
{'base':'AV','letters':'\uA738\uA73A'},
{'base':'AY','letters':'\uA73C'},
{'base':'B', 'letters':'\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181'},
{'base':'C', 'letters':'\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E'},
{'base':'D', 'letters':'\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0'},
{'base':'DZ','letters':'\u01F1\u01C4'},
{'base':'Dz','letters':'\u01F2\u01C5'},
{'base':'E', 'letters':'\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E'},
{'base':'F', 'letters':'\u0046\u24BB\uFF26\u1E1E\u0191\uA77B'},
{'base':'G', 'letters':'\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E'},
{'base':'H', 'letters':'\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D'},
{'base':'I', 'letters':'\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197'},
{'base':'J', 'letters':'\u004A\u24BF\uFF2A\u0134\u0248'},
{'base':'K', 'letters':'\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2'},
{'base':'L', 'letters':'\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780'},
{'base':'LJ','letters':'\u01C7'},
{'base':'Lj','letters':'\u01C8'},
{'base':'M', 'letters':'\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C'},
{'base':'N', 'letters':'\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4'},
{'base':'NJ','letters':'\u01CA'},
{'base':'Nj','letters':'\u01CB'},
{'base':'O', 'letters':'\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C'},
{'base':'OI','letters':'\u01A2'},
{'base':'OO','letters':'\uA74E'},
{'base':'OU','letters':'\u0222'},
{'base':'OE','letters':'\u008C\u0152'},
{'base':'oe','letters':'\u009C\u0153'},
{'base':'P', 'letters':'\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754'},
{'base':'Q', 'letters':'\u0051\u24C6\uFF31\uA756\uA758\u024A'},
{'base':'R', 'letters':'\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782'},
{'base':'S', 'letters':'\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784'},
{'base':'T', 'letters':'\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786'},
{'base':'TZ','letters':'\uA728'},
{'base':'U', 'letters':'\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244'},
{'base':'V', 'letters':'\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245'},
{'base':'VY','letters':'\uA760'},
{'base':'W', 'letters':'\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72'},
{'base':'X', 'letters':'\u0058\u24CD\uFF38\u1E8A\u1E8C'},
{'base':'Y', 'letters':'\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE'},
{'base':'Z', 'letters':'\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762'},
{'base':'a', 'letters':'\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250'},
{'base':'aa','letters':'\uA733'},
{'base':'ae','letters':'\u00E6\u01FD\u01E3'},
{'base':'ao','letters':'\uA735'},
{'base':'au','letters':'\uA737'},
{'base':'av','letters':'\uA739\uA73B'},
{'base':'ay','letters':'\uA73D'},
{'base':'b', 'letters':'\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253'},
{'base':'c', 'letters':'\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184'},
{'base':'d', 'letters':'\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A'},
{'base':'dz','letters':'\u01F3\u01C6'},
{'base':'e', 'letters':'\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD'},
{'base':'f', 'letters':'\u0066\u24D5\uFF46\u1E1F\u0192\uA77C'},
{'base':'g', 'letters':'\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F'},
{'base':'h', 'letters':'\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265'},
{'base':'hv','letters':'\u0195'},
{'base':'i', 'letters':'\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131'},
{'base':'j', 'letters':'\u006A\u24D9\uFF4A\u0135\u01F0\u0249'},
{'base':'k', 'letters':'\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3'},
{'base':'l', 'letters':'\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747'},
{'base':'lj','letters':'\u01C9'},
{'base':'m', 'letters':'\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F'},
{'base':'n', 'letters':'\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5'},
{'base':'nj','letters':'\u01CC'},
{'base':'o', 'letters':'\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275'},
{'base':'oi','letters':'\u01A3'},
{'base':'ou','letters':'\u0223'},
{'base':'oo','letters':'\uA74F'},
{'base':'p','letters':'\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755'},
{'base':'q','letters':'\u0071\u24E0\uFF51\u024B\uA757\uA759'},
{'base':'r','letters':'\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783'},
{'base':'s','letters':'\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B'},
{'base':'t','letters':'\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787'},
{'base':'tz','letters':'\uA729'},
{'base':'u','letters': '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289'},
{'base':'v','letters':'\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C'},
{'base':'vy','letters':'\uA761'},
{'base':'w','letters':'\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73'},
{'base':'x','letters':'\u0078\u24E7\uFF58\u1E8B\u1E8D'},
{'base':'y','letters':'\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF'},
{'base':'z','letters':'\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763'}
];
var diacriticsMap = {};
for (var i=0; i < defaultDiacriticsRemovalMap .length; i++){
var letters = defaultDiacriticsRemovalMap [i].letters;
for (var j=0; j < letters.length ; j++){
diacriticsMap[letters[j]] = defaultDiacriticsRemovalMap [i].base;
}
}
// "what?" version ... http://jsperf.com/diacritics/12
function removeDiacritics (str) {
return str.replace(/[^\u0000-\u007E]/g, function(a){
return diacriticsMap[a] || a;
});
}
var paragraph = "L'avantage d'utiliser le lorem ipsum est bien évidemment de pouvoir créer des maquettes ou de remplir un site internet de contenus qui présentent un rendu s'approchant un maximum du rendu final. \n Par défaut lorem ipsum ne contient pas d'accent ni de caractères spéciaux contrairement à la langue française qui en contient beaucoup. C'est sur ce critère que nous proposons une solution avec cet outil qui générant du faux-texte lorem ipsum mais avec en plus, des caractères spéciaux tel que les accents ou certains symboles utiles pour la langue française. \n L'utilisation du lorem standard est facile d’utilisation mais lorsque le futur client utilisera votre logiciel il se peut que certains caractères spéciaux ou qu'un accent ne soient pas codés correctement. \n Cette page a pour but donc de pouvoir perdre le moins de temps possible et donc de tester directement si tous les encodages de base de donnée ou des sites sont les bons de plus il permet de récuperer un code css avec le texte formaté !";
alert(removeDiacritics(paragraph));
Чтобы проверить свою теорию, я написал тест на http://jsperf.com/diacritics/12. Результаты:
Тестирование в Chrome 28.0.1500.95 32-битный на Windows 8 64-битный:
Использование регулярных выражений
4558 операций в секунду ±4,16%. На 37% медленнее
Стиль String Builder
7 308 операций в секунду ±4,88%. быстрый
Обновить
Тестирование в Chrome 33.0.1750 в Windows 8 64-разрядная версия:
Использование регулярных выражений
5260 ± 1,25% операций в секунду на 76% медленнее
Использование версии @skerit
22,138 ±2,12% операций / сек
Обновление - 19/03/2014
Добавление недостающих "О" диакритических знаков.
Обновление - 27/03/2014
Используя более быстрый способ пересечения строки, используя js - "Что?" Версия
Обновление - 14/05/2014
Сообщество вики
Более полная версия с чувствительной к регистру поддержкой, лигатуры и еще много чего. Оригинальный источник по адресу: http://web.archive.org/web/20120918093154/http://lehelk.com/2011/05/06/script-to-remove-diacritics/
var defaultDiacriticsRemovalMap = [
{'base':'A', 'letters':/[\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F]/g},
{'base':'AA','letters':/[\uA732]/g},
{'base':'AE','letters':/[\u00C6\u01FC\u01E2]/g},
{'base':'AO','letters':/[\uA734]/g},
{'base':'AU','letters':/[\uA736]/g},
{'base':'AV','letters':/[\uA738\uA73A]/g},
{'base':'AY','letters':/[\uA73C]/g},
{'base':'B', 'letters':/[\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181]/g},
{'base':'C', 'letters':/[\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E]/g},
{'base':'D', 'letters':/[\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779]/g},
{'base':'DZ','letters':/[\u01F1\u01C4]/g},
{'base':'Dz','letters':/[\u01F2\u01C5]/g},
{'base':'E', 'letters':/[\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E]/g},
{'base':'F', 'letters':/[\u0046\u24BB\uFF26\u1E1E\u0191\uA77B]/g},
{'base':'G', 'letters':/[\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E]/g},
{'base':'H', 'letters':/[\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D]/g},
{'base':'I', 'letters':/[\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197]/g},
{'base':'J', 'letters':/[\u004A\u24BF\uFF2A\u0134\u0248]/g},
{'base':'K', 'letters':/[\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2]/g},
{'base':'L', 'letters':/[\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780]/g},
{'base':'LJ','letters':/[\u01C7]/g},
{'base':'Lj','letters':/[\u01C8]/g},
{'base':'M', 'letters':/[\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C]/g},
{'base':'N', 'letters':/[\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4]/g},
{'base':'NJ','letters':/[\u01CA]/g},
{'base':'Nj','letters':/[\u01CB]/g},
{'base':'O', 'letters':/[\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C]/g},
{'base':'OI','letters':/[\u01A2]/g},
{'base':'OO','letters':/[\uA74E]/g},
{'base':'OU','letters':/[\u0222]/g},
{'base':'P', 'letters':/[\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754]/g},
{'base':'Q', 'letters':/[\u0051\u24C6\uFF31\uA756\uA758\u024A]/g},
{'base':'R', 'letters':/[\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782]/g},
{'base':'S', 'letters':/[\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784]/g},
{'base':'T', 'letters':/[\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786]/g},
{'base':'TZ','letters':/[\uA728]/g},
{'base':'U', 'letters':/[\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244]/g},
{'base':'V', 'letters':/[\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245]/g},
{'base':'VY','letters':/[\uA760]/g},
{'base':'W', 'letters':/[\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72]/g},
{'base':'X', 'letters':/[\u0058\u24CD\uFF38\u1E8A\u1E8C]/g},
{'base':'Y', 'letters':/[\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE]/g},
{'base':'Z', 'letters':/[\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762]/g},
{'base':'a', 'letters':/[\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250]/g},
{'base':'aa','letters':/[\uA733]/g},
{'base':'ae','letters':/[\u00E6\u01FD\u01E3]/g},
{'base':'ao','letters':/[\uA735]/g},
{'base':'au','letters':/[\uA737]/g},
{'base':'av','letters':/[\uA739\uA73B]/g},
{'base':'ay','letters':/[\uA73D]/g},
{'base':'b', 'letters':/[\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253]/g},
{'base':'c', 'letters':/[\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184]/g},
{'base':'d', 'letters':/[\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A]/g},
{'base':'dz','letters':/[\u01F3\u01C6]/g},
{'base':'e', 'letters':/[\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD]/g},
{'base':'f', 'letters':/[\u0066\u24D5\uFF46\u1E1F\u0192\uA77C]/g},
{'base':'g', 'letters':/[\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F]/g},
{'base':'h', 'letters':/[\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265]/g},
{'base':'hv','letters':/[\u0195]/g},
{'base':'i', 'letters':/[\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131]/g},
{'base':'j', 'letters':/[\u006A\u24D9\uFF4A\u0135\u01F0\u0249]/g},
{'base':'k', 'letters':/[\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3]/g},
{'base':'l', 'letters':/[\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747]/g},
{'base':'lj','letters':/[\u01C9]/g},
{'base':'m', 'letters':/[\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F]/g},
{'base':'n', 'letters':/[\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5]/g},
{'base':'nj','letters':/[\u01CC]/g},
{'base':'o', 'letters':/[\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275]/g},
{'base':'oi','letters':/[\u01A3]/g},
{'base':'ou','letters':/[\u0223]/g},
{'base':'oo','letters':/[\uA74F]/g},
{'base':'p','letters':/[\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755]/g},
{'base':'q','letters':/[\u0071\u24E0\uFF51\u024B\uA757\uA759]/g},
{'base':'r','letters':/[\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783]/g},
{'base':'s','letters':/[\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B]/g},
{'base':'t','letters':/[\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787]/g},
{'base':'tz','letters':/[\uA729]/g},
{'base':'u','letters':/[\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289]/g},
{'base':'v','letters':/[\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C]/g},
{'base':'vy','letters':/[\uA761]/g},
{'base':'w','letters':/[\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73]/g},
{'base':'x','letters':/[\u0078\u24E7\uFF58\u1E8B\u1E8D]/g},
{'base':'y','letters':/[\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF]/g},
{'base':'z','letters':/[\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763]/g}
];
var changes;
function removeDiacritics (str) {
if(!changes) {
changes = defaultDiacriticsRemovalMap;
}
for(var i=0; i<changes.length; i++) {
str = str.replace(changes[i].letters, changes[i].base);
}
return str;
}
Вот очень быстрый скрипт, основанный на стандарте Unicode, взятый отсюда: http://semplicewebsites.com/removing-accents-javascript
var Latinise={};Latinise.latin_map={"Á":"A","Ă":"A","Ắ":"A","Ặ":"A","Ằ":"A","Ẳ":"A","Ẵ":"A","Ǎ":"A","Â":"A","Ấ":"A","Ậ":"A","Ầ":"A","Ẩ":"A","Ẫ":"A","Ä":"A","Ǟ":"A","Ȧ":"A","Ǡ":"A","Ạ":"A","Ȁ":"A","À":"A","Ả":"A","Ȃ":"A","Ā":"A","Ą":"A","Å":"A","Ǻ":"A","Ḁ":"A","Ⱥ":"A","Ã":"A","Ꜳ":"AA","Æ":"AE","Ǽ":"AE","Ǣ":"AE","Ꜵ":"AO","Ꜷ":"AU","Ꜹ":"AV","Ꜻ":"AV","Ꜽ":"AY","Ḃ":"B","Ḅ":"B","Ɓ":"B","Ḇ":"B","Ƀ":"B","Ƃ":"B","Ć":"C","Č":"C","Ç":"C","Ḉ":"C","Ĉ":"C","Ċ":"C","Ƈ":"C","Ȼ":"C","Ď":"D","Ḑ":"D","Ḓ":"D","Ḋ":"D","Ḍ":"D","Ɗ":"D","Ḏ":"D","Dz":"D","Dž":"D","Đ":"D","Ƌ":"D","DZ":"DZ","DŽ":"DZ","É":"E","Ĕ":"E","Ě":"E","Ȩ":"E","Ḝ":"E","Ê":"E","Ế":"E","Ệ":"E","Ề":"E","Ể":"E","Ễ":"E","Ḙ":"E","Ë":"E","Ė":"E","Ẹ":"E","Ȅ":"E","È":"E","Ẻ":"E","Ȇ":"E","Ē":"E","Ḗ":"E","Ḕ":"E","Ę":"E","Ɇ":"E","Ẽ":"E","Ḛ":"E","Ꝫ":"ET","Ḟ":"F","Ƒ":"F","Ǵ":"G","Ğ":"G","Ǧ":"G","Ģ":"G","Ĝ":"G","Ġ":"G","Ɠ":"G","Ḡ":"G","Ǥ":"G","Ḫ":"H","Ȟ":"H","Ḩ":"H","Ĥ":"H","Ⱨ":"H","Ḧ":"H","Ḣ":"H","Ḥ":"H","Ħ":"H","Í":"I","Ĭ":"I","Ǐ":"I","Î":"I","Ï":"I","Ḯ":"I","İ":"I","Ị":"I","Ȉ":"I","Ì":"I","Ỉ":"I","Ȋ":"I","Ī":"I","Į":"I","Ɨ":"I","Ĩ":"I","Ḭ":"I","Ꝺ":"D","Ꝼ":"F","Ᵹ":"G","Ꞃ":"R","Ꞅ":"S","Ꞇ":"T","Ꝭ":"IS","Ĵ":"J","Ɉ":"J","Ḱ":"K","Ǩ":"K","Ķ":"K","Ⱪ":"K","Ꝃ":"K","Ḳ":"K","Ƙ":"K","Ḵ":"K","Ꝁ":"K","Ꝅ":"K","Ĺ":"L","Ƚ":"L","Ľ":"L","Ļ":"L","Ḽ":"L","Ḷ":"L","Ḹ":"L","Ⱡ":"L","Ꝉ":"L","Ḻ":"L","Ŀ":"L","Ɫ":"L","Lj":"L","Ł":"L","LJ":"LJ","Ḿ":"M","Ṁ":"M","Ṃ":"M","Ɱ":"M","Ń":"N","Ň":"N","Ņ":"N","Ṋ":"N","Ṅ":"N","Ṇ":"N","Ǹ":"N","Ɲ":"N","Ṉ":"N","Ƞ":"N","Nj":"N","Ñ":"N","NJ":"NJ","Ó":"O","Ŏ":"O","Ǒ":"O","Ô":"O","Ố":"O","Ộ":"O","Ồ":"O","Ổ":"O","Ỗ":"O","Ö":"O","Ȫ":"O","Ȯ":"O","Ȱ":"O","Ọ":"O","Ő":"O","Ȍ":"O","Ò":"O","Ỏ":"O","Ơ":"O","Ớ":"O","Ợ":"O","Ờ":"O","Ở":"O","Ỡ":"O","Ȏ":"O","Ꝋ":"O","Ꝍ":"O","Ō":"O","Ṓ":"O","Ṑ":"O","Ɵ":"O","Ǫ":"O","Ǭ":"O","Ø":"O","Ǿ":"O","Õ":"O","Ṍ":"O","Ṏ":"O","Ȭ":"O","Ƣ":"OI","Ꝏ":"OO","Ɛ":"E","Ɔ":"O","Ȣ":"OU","Ṕ":"P","Ṗ":"P","Ꝓ":"P","Ƥ":"P","Ꝕ":"P","Ᵽ":"P","Ꝑ":"P","Ꝙ":"Q","Ꝗ":"Q","Ŕ":"R","Ř":"R","Ŗ":"R","Ṙ":"R","Ṛ":"R","Ṝ":"R","Ȑ":"R","Ȓ":"R","Ṟ":"R","Ɍ":"R","Ɽ":"R","Ꜿ":"C","Ǝ":"E","Ś":"S","Ṥ":"S","Š":"S","Ṧ":"S","Ş":"S","Ŝ":"S","Ș":"S","Ṡ":"S","Ṣ":"S","Ṩ":"S","Ť":"T","Ţ":"T","Ṱ":"T","Ț":"T","Ⱦ":"T","Ṫ":"T","Ṭ":"T","Ƭ":"T","Ṯ":"T","Ʈ":"T","Ŧ":"T","Ɐ":"A","Ꞁ":"L","Ɯ":"M","Ʌ":"V","Ꜩ":"TZ","Ú":"U","Ŭ":"U","Ǔ":"U","Û":"U","Ṷ":"U","Ü":"U","Ǘ":"U","Ǚ":"U","Ǜ":"U","Ǖ":"U","Ṳ":"U","Ụ":"U","Ű":"U","Ȕ":"U","Ù":"U","Ủ":"U","Ư":"U","Ứ":"U","Ự":"U","Ừ":"U","Ử":"U","Ữ":"U","Ȗ":"U","Ū":"U","Ṻ":"U","Ų":"U","Ů":"U","Ũ":"U","Ṹ":"U","Ṵ":"U","Ꝟ":"V","Ṿ":"V","Ʋ":"V","Ṽ":"V","Ꝡ":"VY","Ẃ":"W","Ŵ":"W","Ẅ":"W","Ẇ":"W","Ẉ":"W","Ẁ":"W","Ⱳ":"W","Ẍ":"X","Ẋ":"X","Ý":"Y","Ŷ":"Y","Ÿ":"Y","Ẏ":"Y","Ỵ":"Y","Ỳ":"Y","Ƴ":"Y","Ỷ":"Y","Ỿ":"Y","Ȳ":"Y","Ɏ":"Y","Ỹ":"Y","Ź":"Z","Ž":"Z","Ẑ":"Z","Ⱬ":"Z","Ż":"Z","Ẓ":"Z","Ȥ":"Z","Ẕ":"Z","Ƶ":"Z","IJ":"IJ","Œ":"OE","ᴀ":"A","ᴁ":"AE","ʙ":"B","ᴃ":"B","ᴄ":"C","ᴅ":"D","ᴇ":"E","ꜰ":"F","ɢ":"G","ʛ":"G","ʜ":"H","ɪ":"I","ʁ":"R","ᴊ":"J","ᴋ":"K","ʟ":"L","ᴌ":"L","ᴍ":"M","ɴ":"N","ᴏ":"O","ɶ":"OE","ᴐ":"O","ᴕ":"OU","ᴘ":"P","ʀ":"R","ᴎ":"N","ᴙ":"R","ꜱ":"S","ᴛ":"T","ⱻ":"E","ᴚ":"R","ᴜ":"U","ᴠ":"V","ᴡ":"W","ʏ":"Y","ᴢ":"Z","á":"a","ă":"a","ắ":"a","ặ":"a","ằ":"a","ẳ":"a","ẵ":"a","ǎ":"a","â":"a","ấ":"a","ậ":"a","ầ":"a","ẩ":"a","ẫ":"a","ä":"a","ǟ":"a","ȧ":"a","ǡ":"a","ạ":"a","ȁ":"a","à":"a","ả":"a","ȃ":"a","ā":"a","ą":"a","ᶏ":"a","ẚ":"a","å":"a","ǻ":"a","ḁ":"a","ⱥ":"a","ã":"a","ꜳ":"aa","æ":"ae","ǽ":"ae","ǣ":"ae","ꜵ":"ao","ꜷ":"au","ꜹ":"av","ꜻ":"av","ꜽ":"ay","ḃ":"b","ḅ":"b","ɓ":"b","ḇ":"b","ᵬ":"b","ᶀ":"b","ƀ":"b","ƃ":"b","ɵ":"o","ć":"c","č":"c","ç":"c","ḉ":"c","ĉ":"c","ɕ":"c","ċ":"c","ƈ":"c","ȼ":"c","ď":"d","ḑ":"d","ḓ":"d","ȡ":"d","ḋ":"d","ḍ":"d","ɗ":"d","ᶑ":"d","ḏ":"d","ᵭ":"d","ᶁ":"d","đ":"d","ɖ":"d","ƌ":"d","ı":"i","ȷ":"j","ɟ":"j","ʄ":"j","dz":"dz","dž":"dz","é":"e","ĕ":"e","ě":"e","ȩ":"e","ḝ":"e","ê":"e","ế":"e","ệ":"e","ề":"e","ể":"e","ễ":"e","ḙ":"e","ë":"e","ė":"e","ẹ":"e","ȅ":"e","è":"e","ẻ":"e","ȇ":"e","ē":"e","ḗ":"e","ḕ":"e","ⱸ":"e","ę":"e","ᶒ":"e","ɇ":"e","ẽ":"e","ḛ":"e","ꝫ":"et","ḟ":"f","ƒ":"f","ᵮ":"f","ᶂ":"f","ǵ":"g","ğ":"g","ǧ":"g","ģ":"g","ĝ":"g","ġ":"g","ɠ":"g","ḡ":"g","ᶃ":"g","ǥ":"g","ḫ":"h","ȟ":"h","ḩ":"h","ĥ":"h","ⱨ":"h","ḧ":"h","ḣ":"h","ḥ":"h","ɦ":"h","ẖ":"h","ħ":"h","ƕ":"hv","í":"i","ĭ":"i","ǐ":"i","î":"i","ï":"i","ḯ":"i","ị":"i","ȉ":"i","ì":"i","ỉ":"i","ȋ":"i","ī":"i","į":"i","ᶖ":"i","ɨ":"i","ĩ":"i","ḭ":"i","ꝺ":"d","ꝼ":"f","ᵹ":"g","ꞃ":"r","ꞅ":"s","ꞇ":"t","ꝭ":"is","ǰ":"j","ĵ":"j","ʝ":"j","ɉ":"j","ḱ":"k","ǩ":"k","ķ":"k","ⱪ":"k","ꝃ":"k","ḳ":"k","ƙ":"k","ḵ":"k","ᶄ":"k","ꝁ":"k","ꝅ":"k","ĺ":"l","ƚ":"l","ɬ":"l","ľ":"l","ļ":"l","ḽ":"l","ȴ":"l","ḷ":"l","ḹ":"l","ⱡ":"l","ꝉ":"l","ḻ":"l","ŀ":"l","ɫ":"l","ᶅ":"l","ɭ":"l","ł":"l","lj":"lj","ſ":"s","ẜ":"s","ẛ":"s","ẝ":"s","ḿ":"m","ṁ":"m","ṃ":"m","ɱ":"m","ᵯ":"m","ᶆ":"m","ń":"n","ň":"n","ņ":"n","ṋ":"n","ȵ":"n","ṅ":"n","ṇ":"n","ǹ":"n","ɲ":"n","ṉ":"n","ƞ":"n","ᵰ":"n","ᶇ":"n","ɳ":"n","ñ":"n","nj":"nj","ó":"o","ŏ":"o","ǒ":"o","ô":"o","ố":"o","ộ":"o","ồ":"o","ổ":"o","ỗ":"o","ö":"o","ȫ":"o","ȯ":"o","ȱ":"o","ọ":"o","ő":"o","ȍ":"o","ò":"o","ỏ":"o","ơ":"o","ớ":"o","ợ":"o","ờ":"o","ở":"o","ỡ":"o","ȏ":"o","ꝋ":"o","ꝍ":"o","ⱺ":"o","ō":"o","ṓ":"o","ṑ":"o","ǫ":"o","ǭ":"o","ø":"o","ǿ":"o","õ":"o","ṍ":"o","ṏ":"o","ȭ":"o","ƣ":"oi","ꝏ":"oo","ɛ":"e","ᶓ":"e","ɔ":"o","ᶗ":"o","ȣ":"ou","ṕ":"p","ṗ":"p","ꝓ":"p","ƥ":"p","ᵱ":"p","ᶈ":"p","ꝕ":"p","ᵽ":"p","ꝑ":"p","ꝙ":"q","ʠ":"q","ɋ":"q","ꝗ":"q","ŕ":"r","ř":"r","ŗ":"r","ṙ":"r","ṛ":"r","ṝ":"r","ȑ":"r","ɾ":"r","ᵳ":"r","ȓ":"r","ṟ":"r","ɼ":"r","ᵲ":"r","ᶉ":"r","ɍ":"r","ɽ":"r","ↄ":"c","ꜿ":"c","ɘ":"e","ɿ":"r","ś":"s","ṥ":"s","š":"s","ṧ":"s","ş":"s","ŝ":"s","ș":"s","ṡ":"s","ṣ":"s","ṩ":"s","ʂ":"s","ᵴ":"s","ᶊ":"s","ȿ":"s","ɡ":"g","ᴑ":"o","ᴓ":"o","ᴝ":"u","ť":"t","ţ":"t","ṱ":"t","ț":"t","ȶ":"t","ẗ":"t","ⱦ":"t","ṫ":"t","ṭ":"t","ƭ":"t","ṯ":"t","ᵵ":"t","ƫ":"t","ʈ":"t","ŧ":"t","ᵺ":"th","ɐ":"a","ᴂ":"ae","ǝ":"e","ᵷ":"g","ɥ":"h","ʮ":"h","ʯ":"h","ᴉ":"i","ʞ":"k","ꞁ":"l","ɯ":"m","ɰ":"m","ᴔ":"oe","ɹ":"r","ɻ":"r","ɺ":"r","ⱹ":"r","ʇ":"t","ʌ":"v","ʍ":"w","ʎ":"y","ꜩ":"tz","ú":"u","ŭ":"u","ǔ":"u","û":"u","ṷ":"u","ü":"u","ǘ":"u","ǚ":"u","ǜ":"u","ǖ":"u","ṳ":"u","ụ":"u","ű":"u","ȕ":"u","ù":"u","ủ":"u","ư":"u","ứ":"u","ự":"u","ừ":"u","ử":"u","ữ":"u","ȗ":"u","ū":"u","ṻ":"u","ų":"u","ᶙ":"u","ů":"u","ũ":"u","ṹ":"u","ṵ":"u","ᵫ":"ue","ꝸ":"um","ⱴ":"v","ꝟ":"v","ṿ":"v","ʋ":"v","ᶌ":"v","ⱱ":"v","ṽ":"v","ꝡ":"vy","ẃ":"w","ŵ":"w","ẅ":"w","ẇ":"w","ẉ":"w","ẁ":"w","ⱳ":"w","ẘ":"w","ẍ":"x","ẋ":"x","ᶍ":"x","ý":"y","ŷ":"y","ÿ":"y","ẏ":"y","ỵ":"y","ỳ":"y","ƴ":"y","ỷ":"y","ỿ":"y","ȳ":"y","ẙ":"y","ɏ":"y","ỹ":"y","ź":"z","ž":"z","ẑ":"z","ʑ":"z","ⱬ":"z","ż":"z","ẓ":"z","ȥ":"z","ẕ":"z","ᵶ":"z","ᶎ":"z","ʐ":"z","ƶ":"z","ɀ":"z","ff":"ff","ffi":"ffi","ffl":"ffl","fi":"fi","fl":"fl","ij":"ij","œ":"oe","st":"st","ₐ":"a","ₑ":"e","ᵢ":"i","ⱼ":"j","ₒ":"o","ᵣ":"r","ᵤ":"u","ᵥ":"v","ₓ":"x"};
String.prototype.latinise=function(){return this.replace(/[^A-Za-z0-9\[\] ]/g,function(a){return Latinise.latin_map[a]||a})};
String.prototype.latinize=String.prototype.latinise;
String.prototype.isLatin=function(){return this==this.latinise()}
Некоторые примеры:
> "Piqué".latinize();
"Pique"
> "Piqué".isLatin();
false
> "Pique".isLatin();
true
> "Piqué".latinise().isLatin();
true
Чтобы вышеуказанное latin_map не было повреждено копированием / вставкой или другими преобразованиями, используйте эту строку в кодировке base64, заменив первую строку выше:
var base64map="eyLDgSI6IkEiLCLEgiI6IkEiLCLhuq4iOiJBIiwi4bq2IjoiQSIsIuG6sCI6IkEiLCLhurIiOiJBIiwi4bq0IjoiQSIsIseNIjoiQSIsIsOCIjoiQSIsIuG6pCI6IkEiLCLhuqwiOiJBIiwi4bqmIjoiQSIsIuG6qCI6IkEiLCLhuqoiOiJBIiwiw4QiOiJBIiwix54iOiJBIiwiyKYiOiJBIiwix6AiOiJBIiwi4bqgIjoiQSIsIsiAIjoiQSIsIsOAIjoiQSIsIuG6oiI6IkEiLCLIgiI6IkEiLCLEgCI6IkEiLCLEhCI6IkEiLCLDhSI6IkEiLCLHuiI6IkEiLCLhuIAiOiJBIiwiyLoiOiJBIiwiw4MiOiJBIiwi6pyyIjoiQUEiLCLDhiI6IkFFIiwix7wiOiJBRSIsIseiIjoiQUUiLCLqnLQiOiJBTyIsIuqctiI6IkFVIiwi6py4IjoiQVYiLCLqnLoiOiJBViIsIuqcvCI6IkFZIiwi4biCIjoiQiIsIuG4hCI6IkIiLCLGgSI6IkIiLCLhuIYiOiJCIiwiyYMiOiJCIiwixoIiOiJCIiwixIYiOiJDIiwixIwiOiJDIiwiw4ciOiJDIiwi4biIIjoiQyIsIsSIIjoiQyIsIsSKIjoiQyIsIsaHIjoiQyIsIsi7IjoiQyIsIsSOIjoiRCIsIuG4kCI6IkQiLCLhuJIiOiJEIiwi4biKIjoiRCIsIuG4jCI6IkQiLCLGiiI6IkQiLCLhuI4iOiJEIiwix7IiOiJEIiwix4UiOiJEIiwixJAiOiJEIiwixosiOiJEIiwix7EiOiJEWiIsIseEIjoiRFoiLCLDiSI6IkUiLCLElCI6IkUiLCLEmiI6IkUiLCLIqCI6IkUiLCLhuJwiOiJFIiwiw4oiOiJFIiwi4bq+IjoiRSIsIuG7hiI6IkUiLCLhu4AiOiJFIiwi4buCIjoiRSIsIuG7hCI6IkUiLCLhuJgiOiJFIiwiw4siOiJFIiwixJYiOiJFIiwi4bq4IjoiRSIsIsiEIjoiRSIsIsOIIjoiRSIsIuG6uiI6IkUiLCLIhiI6IkUiLCLEkiI6IkUiLCLhuJYiOiJFIiwi4biUIjoiRSIsIsSYIjoiRSIsIsmGIjoiRSIsIuG6vCI6IkUiLCLhuJoiOiJFIiwi6p2qIjoiRVQiLCLhuJ4iOiJGIiwixpEiOiJGIiwix7QiOiJHIiwixJ4iOiJHIiwix6YiOiJHIiwixKIiOiJHIiwixJwiOiJHIiwixKAiOiJHIiwixpMiOiJHIiwi4bigIjoiRyIsIsekIjoiRyIsIuG4qiI6IkgiLCLIniI6IkgiLCLhuKgiOiJIIiwixKQiOiJIIiwi4rGnIjoiSCIsIuG4piI6IkgiLCLhuKIiOiJIIiwi4bikIjoiSCIsIsSmIjoiSCIsIsONIjoiSSIsIsSsIjoiSSIsIsePIjoiSSIsIsOOIjoiSSIsIsOPIjoiSSIsIuG4riI6IkkiLCLEsCI6IkkiLCLhu4oiOiJJIiwiyIgiOiJJIiwiw4wiOiJJIiwi4buIIjoiSSIsIsiKIjoiSSIsIsSqIjoiSSIsIsSuIjoiSSIsIsaXIjoiSSIsIsSoIjoiSSIsIuG4rCI6IkkiLCLqnbkiOiJEIiwi6p27IjoiRiIsIuqdvSI6IkciLCLqnoIiOiJSIiwi6p6EIjoiUyIsIuqehiI6IlQiLCLqnawiOiJJUyIsIsS0IjoiSiIsIsmIIjoiSiIsIuG4sCI6IksiLCLHqCI6IksiLCLEtiI6IksiLCLisakiOiJLIiwi6p2CIjoiSyIsIuG4siI6IksiLCLGmCI6IksiLCLhuLQiOiJLIiwi6p2AIjoiSyIsIuqdhCI6IksiLCLEuSI6IkwiLCLIvSI6IkwiLCLEvSI6IkwiLCLEuyI6IkwiLCLhuLwiOiJMIiwi4bi2IjoiTCIsIuG4uCI6IkwiLCLisaAiOiJMIiwi6p2IIjoiTCIsIuG4uiI6IkwiLCLEvyI6IkwiLCLisaIiOiJMIiwix4giOiJMIiwixYEiOiJMIiwix4ciOiJMSiIsIuG4viI6Ik0iLCLhuYAiOiJNIiwi4bmCIjoiTSIsIuKxriI6Ik0iLCLFgyI6Ik4iLCLFhyI6Ik4iLCLFhSI6Ik4iLCLhuYoiOiJOIiwi4bmEIjoiTiIsIuG5hiI6Ik4iLCLHuCI6Ik4iLCLGnSI6Ik4iLCLhuYgiOiJOIiwiyKAiOiJOIiwix4siOiJOIiwiw5EiOiJOIiwix4oiOiJOSiIsIsOTIjoiTyIsIsWOIjoiTyIsIseRIjoiTyIsIsOUIjoiTyIsIuG7kCI6Ik8iLCLhu5giOiJPIiwi4buSIjoiTyIsIuG7lCI6Ik8iLCLhu5YiOiJPIiwiw5YiOiJPIiwiyKoiOiJPIiwiyK4iOiJPIiwiyLAiOiJPIiwi4buMIjoiTyIsIsWQIjoiTyIsIsiMIjoiTyIsIsOSIjoiTyIsIuG7jiI6Ik8iLCLGoCI6Ik8iLCLhu5oiOiJPIiwi4buiIjoiTyIsIuG7nCI6Ik8iLCLhu54iOiJPIiwi4bugIjoiTyIsIsiOIjoiTyIsIuqdiiI6Ik8iLCLqnYwiOiJPIiwixYwiOiJPIiwi4bmSIjoiTyIsIuG5kCI6Ik8iLCLGnyI6Ik8iLCLHqiI6Ik8iLCLHrCI6Ik8iLCLDmCI6Ik8iLCLHviI6Ik8iLCLDlSI6Ik8iLCLhuYwiOiJPIiwi4bmOIjoiTyIsIsisIjoiTyIsIsaiIjoiT0kiLCLqnY4iOiJPTyIsIsaQIjoiRSIsIsaGIjoiTyIsIsiiIjoiT1UiLCLhuZQiOiJQIiwi4bmWIjoiUCIsIuqdkiI6IlAiLCLGpCI6IlAiLCLqnZQiOiJQIiwi4rGjIjoiUCIsIuqdkCI6IlAiLCLqnZgiOiJRIiwi6p2WIjoiUSIsIsWUIjoiUiIsIsWYIjoiUiIsIsWWIjoiUiIsIuG5mCI6IlIiLCLhuZoiOiJSIiwi4bmcIjoiUiIsIsiQIjoiUiIsIsiSIjoiUiIsIuG5niI6IlIiLCLJjCI6IlIiLCLisaQiOiJSIiwi6py+IjoiQyIsIsaOIjoiRSIsIsWaIjoiUyIsIuG5pCI6IlMiLCLFoCI6IlMiLCLhuaYiOiJTIiwixZ4iOiJTIiwixZwiOiJTIiwiyJgiOiJTIiwi4bmgIjoiUyIsIuG5oiI6IlMiLCLhuagiOiJTIiwixaQiOiJUIiwixaIiOiJUIiwi4bmwIjoiVCIsIsiaIjoiVCIsIsi+IjoiVCIsIuG5qiI6IlQiLCLhuawiOiJUIiwixqwiOiJUIiwi4bmuIjoiVCIsIsauIjoiVCIsIsWmIjoiVCIsIuKxryI6IkEiLCLqnoAiOiJMIiwixpwiOiJNIiwiyYUiOiJWIiwi6pyoIjoiVFoiLCLDmiI6IlUiLCLFrCI6IlUiLCLHkyI6IlUiLCLDmyI6IlUiLCLhubYiOiJVIiwiw5wiOiJVIiwix5ciOiJVIiwix5kiOiJVIiwix5siOiJVIiwix5UiOiJVIiwi4bmyIjoiVSIsIuG7pCI6IlUiLCLFsCI6IlUiLCLIlCI6IlUiLCLDmSI6IlUiLCLhu6YiOiJVIiwixq8iOiJVIiwi4buoIjoiVSIsIuG7sCI6IlUiLCLhu6oiOiJVIiwi4busIjoiVSIsIuG7riI6IlUiLCLIliI6IlUiLCLFqiI6IlUiLCLhuboiOiJVIiwixbIiOiJVIiwixa4iOiJVIiwixagiOiJVIiwi4bm4IjoiVSIsIuG5tCI6IlUiLCLqnZ4iOiJWIiwi4bm+IjoiViIsIsayIjoiViIsIuG5vCI6IlYiLCLqnaAiOiJWWSIsIuG6giI6IlciLCLFtCI6IlciLCLhuoQiOiJXIiwi4bqGIjoiVyIsIuG6iCI6IlciLCLhuoAiOiJXIiwi4rGyIjoiVyIsIuG6jCI6IlgiLCLhuooiOiJYIiwiw50iOiJZIiwixbYiOiJZIiwixbgiOiJZIiwi4bqOIjoiWSIsIuG7tCI6IlkiLCLhu7IiOiJZIiwixrMiOiJZIiwi4bu2IjoiWSIsIuG7viI6IlkiLCLIsiI6IlkiLCLJjiI6IlkiLCLhu7giOiJZIiwixbkiOiJaIiwixb0iOiJaIiwi4bqQIjoiWiIsIuKxqyI6IloiLCLFuyI6IloiLCLhupIiOiJaIiwiyKQiOiJaIiwi4bqUIjoiWiIsIsa1IjoiWiIsIsSyIjoiSUoiLCLFkiI6Ik9FIiwi4bSAIjoiQSIsIuG0gSI6IkFFIiwiypkiOiJCIiwi4bSDIjoiQiIsIuG0hCI6IkMiLCLhtIUiOiJEIiwi4bSHIjoiRSIsIuqcsCI6IkYiLCLJoiI6IkciLCLKmyI6IkciLCLKnCI6IkgiLCLJqiI6IkkiLCLKgSI6IlIiLCLhtIoiOiJKIiwi4bSLIjoiSyIsIsqfIjoiTCIsIuG0jCI6IkwiLCLhtI0iOiJNIiwiybQiOiJOIiwi4bSPIjoiTyIsIsm2IjoiT0UiLCLhtJAiOiJPIiwi4bSVIjoiT1UiLCLhtJgiOiJQIiwiyoAiOiJSIiwi4bSOIjoiTiIsIuG0mSI6IlIiLCLqnLEiOiJTIiwi4bSbIjoiVCIsIuKxuyI6IkUiLCLhtJoiOiJSIiwi4bScIjoiVSIsIuG0oCI6IlYiLCLhtKEiOiJXIiwiyo8iOiJZIiwi4bSiIjoiWiIsIsOhIjoiYSIsIsSDIjoiYSIsIuG6ryI6ImEiLCLhurciOiJhIiwi4bqxIjoiYSIsIuG6syI6ImEiLCLhurUiOiJhIiwix44iOiJhIiwiw6IiOiJhIiwi4bqlIjoiYSIsIuG6rSI6ImEiLCLhuqciOiJhIiwi4bqpIjoiYSIsIuG6qyI6ImEiLCLDpCI6ImEiLCLHnyI6ImEiLCLIpyI6ImEiLCLHoSI6ImEiLCLhuqEiOiJhIiwiyIEiOiJhIiwiw6AiOiJhIiwi4bqjIjoiYSIsIsiDIjoiYSIsIsSBIjoiYSIsIsSFIjoiYSIsIuG2jyI6ImEiLCLhupoiOiJhIiwiw6UiOiJhIiwix7siOiJhIiwi4biBIjoiYSIsIuKxpSI6ImEiLCLDoyI6ImEiLCLqnLMiOiJhYSIsIsOmIjoiYWUiLCLHvSI6ImFlIiwix6MiOiJhZSIsIuqctSI6ImFvIiwi6py3IjoiYXUiLCLqnLkiOiJhdiIsIuqcuyI6ImF2Iiwi6py9IjoiYXkiLCLhuIMiOiJiIiwi4biFIjoiYiIsIsmTIjoiYiIsIuG4hyI6ImIiLCLhtawiOiJiIiwi4baAIjoiYiIsIsaAIjoiYiIsIsaDIjoiYiIsIsm1IjoibyIsIsSHIjoiYyIsIsSNIjoiYyIsIsOnIjoiYyIsIuG4iSI6ImMiLCLEiSI6ImMiLCLJlSI6ImMiLCLEiyI6ImMiLCLGiCI6ImMiLCLIvCI6ImMiLCLEjyI6ImQiLCLhuJEiOiJkIiwi4biTIjoiZCIsIsihIjoiZCIsIuG4iyI6ImQiLCLhuI0iOiJkIiwiyZciOiJkIiwi4baRIjoiZCIsIuG4jyI6ImQiLCLhta0iOiJkIiwi4baBIjoiZCIsIsSRIjoiZCIsIsmWIjoiZCIsIsaMIjoiZCIsIsSxIjoiaSIsIsi3IjoiaiIsIsmfIjoiaiIsIsqEIjoiaiIsIsezIjoiZHoiLCLHhiI6ImR6Iiwiw6kiOiJlIiwixJUiOiJlIiwixJsiOiJlIiwiyKkiOiJlIiwi4bidIjoiZSIsIsOqIjoiZSIsIuG6vyI6ImUiLCLhu4ciOiJlIiwi4buBIjoiZSIsIuG7gyI6ImUiLCLhu4UiOiJlIiwi4biZIjoiZSIsIsOrIjoiZSIsIsSXIjoiZSIsIuG6uSI6ImUiLCLIhSI6ImUiLCLDqCI6ImUiLCLhursiOiJlIiwiyIciOiJlIiwixJMiOiJlIiwi4biXIjoiZSIsIuG4lSI6ImUiLCLisbgiOiJlIiwixJkiOiJlIiwi4baSIjoiZSIsIsmHIjoiZSIsIuG6vSI6ImUiLCLhuJsiOiJlIiwi6p2rIjoiZXQiLCLhuJ8iOiJmIiwixpIiOiJmIiwi4bWuIjoiZiIsIuG2giI6ImYiLCLHtSI6ImciLCLEnyI6ImciLCLHpyI6ImciLCLEoyI6ImciLCLEnSI6ImciLCLEoSI6ImciLCLJoCI6ImciLCLhuKEiOiJnIiwi4baDIjoiZyIsIselIjoiZyIsIuG4qyI6ImgiLCLInyI6ImgiLCLhuKkiOiJoIiwixKUiOiJoIiwi4rGoIjoiaCIsIuG4pyI6ImgiLCLhuKMiOiJoIiwi4bilIjoiaCIsIsmmIjoiaCIsIuG6liI6ImgiLCLEpyI6ImgiLCLGlSI6Imh2Iiwiw60iOiJpIiwixK0iOiJpIiwix5AiOiJpIiwiw64iOiJpIiwiw68iOiJpIiwi4bivIjoiaSIsIuG7iyI6ImkiLCLIiSI6ImkiLCLDrCI6ImkiLCLhu4kiOiJpIiwiyIsiOiJpIiwixKsiOiJpIiwixK8iOiJpIiwi4baWIjoiaSIsIsmoIjoiaSIsIsSpIjoiaSIsIuG4rSI6ImkiLCLqnboiOiJkIiwi6p28IjoiZiIsIuG1uSI6ImciLCLqnoMiOiJyIiwi6p6FIjoicyIsIuqehyI6InQiLCLqna0iOiJpcyIsIsewIjoiaiIsIsS1IjoiaiIsIsqdIjoiaiIsIsmJIjoiaiIsIuG4sSI6ImsiLCLHqSI6ImsiLCLEtyI6ImsiLCLisaoiOiJrIiwi6p2DIjoiayIsIuG4syI6ImsiLCLGmSI6ImsiLCLhuLUiOiJrIiwi4baEIjoiayIsIuqdgSI6ImsiLCLqnYUiOiJrIiwixLoiOiJsIiwixpoiOiJsIiwiyawiOiJsIiwixL4iOiJsIiwixLwiOiJsIiwi4bi9IjoibCIsIsi0IjoibCIsIuG4tyI6ImwiLCLhuLkiOiJsIiwi4rGhIjoibCIsIuqdiSI6ImwiLCLhuLsiOiJsIiwixYAiOiJsIiwiyasiOiJsIiwi4baFIjoibCIsIsmtIjoibCIsIsWCIjoibCIsIseJIjoibGoiLCLFvyI6InMiLCLhupwiOiJzIiwi4bqbIjoicyIsIuG6nSI6InMiLCLhuL8iOiJtIiwi4bmBIjoibSIsIuG5gyI6Im0iLCLJsSI6Im0iLCLhta8iOiJtIiwi4baGIjoibSIsIsWEIjoibiIsIsWIIjoibiIsIsWGIjoibiIsIuG5iyI6Im4iLCLItSI6Im4iLCLhuYUiOiJuIiwi4bmHIjoibiIsIse5IjoibiIsIsmyIjoibiIsIuG5iSI6Im4iLCLGniI6Im4iLCLhtbAiOiJuIiwi4baHIjoibiIsIsmzIjoibiIsIsOxIjoibiIsIseMIjoibmoiLCLDsyI6Im8iLCLFjyI6Im8iLCLHkiI6Im8iLCLDtCI6Im8iLCLhu5EiOiJvIiwi4buZIjoibyIsIuG7kyI6Im8iLCLhu5UiOiJvIiwi4buXIjoibyIsIsO2IjoibyIsIsirIjoibyIsIsivIjoibyIsIsixIjoibyIsIuG7jSI6Im8iLCLFkSI6Im8iLCLIjSI6Im8iLCLDsiI6Im8iLCLhu48iOiJvIiwixqEiOiJvIiwi4bubIjoibyIsIuG7oyI6Im8iLCLhu50iOiJvIiwi4bufIjoibyIsIuG7oSI6Im8iLCLIjyI6Im8iLCLqnYsiOiJvIiwi6p2NIjoibyIsIuKxuiI6Im8iLCLFjSI6Im8iLCLhuZMiOiJvIiwi4bmRIjoibyIsIserIjoibyIsIsetIjoibyIsIsO4IjoibyIsIse/IjoibyIsIsO1IjoibyIsIuG5jSI6Im8iLCLhuY8iOiJvIiwiyK0iOiJvIiwixqMiOiJvaSIsIuqdjyI6Im9vIiwiyZsiOiJlIiwi4baTIjoiZSIsIsmUIjoibyIsIuG2lyI6Im8iLCLIoyI6Im91Iiwi4bmVIjoicCIsIuG5lyI6InAiLCLqnZMiOiJwIiwixqUiOiJwIiwi4bWxIjoicCIsIuG2iCI6InAiLCLqnZUiOiJwIiwi4bW9IjoicCIsIuqdkSI6InAiLCLqnZkiOiJxIiwiyqAiOiJxIiwiyYsiOiJxIiwi6p2XIjoicSIsIsWVIjoiciIsIsWZIjoiciIsIsWXIjoiciIsIuG5mSI6InIiLCLhuZsiOiJyIiwi4bmdIjoiciIsIsiRIjoiciIsIsm+IjoiciIsIuG1syI6InIiLCLIkyI6InIiLCLhuZ8iOiJyIiwiybwiOiJyIiwi4bWyIjoiciIsIuG2iSI6InIiLCLJjSI6InIiLCLJvSI6InIiLCLihoQiOiJjIiwi6py/IjoiYyIsIsmYIjoiZSIsIsm/IjoiciIsIsWbIjoicyIsIuG5pSI6InMiLCLFoSI6InMiLCLhuaciOiJzIiwixZ8iOiJzIiwixZ0iOiJzIiwiyJkiOiJzIiwi4bmhIjoicyIsIuG5oyI6InMiLCLhuakiOiJzIiwiyoIiOiJzIiwi4bW0IjoicyIsIuG2iiI6InMiLCLIvyI6InMiLCLJoSI6ImciLCLhtJEiOiJvIiwi4bSTIjoibyIsIuG0nSI6InUiLCLFpSI6InQiLCLFoyI6InQiLCLhubEiOiJ0IiwiyJsiOiJ0IiwiyLYiOiJ0Iiwi4bqXIjoidCIsIuKxpiI6InQiLCLhuasiOiJ0Iiwi4bmtIjoidCIsIsatIjoidCIsIuG5ryI6InQiLCLhtbUiOiJ0IiwixqsiOiJ0IiwiyogiOiJ0IiwixaciOiJ0Iiwi4bW6IjoidGgiLCLJkCI6ImEiLCLhtIIiOiJhZSIsIsedIjoiZSIsIuG1tyI6ImciLCLJpSI6ImgiLCLKriI6ImgiLCLKryI6ImgiLCLhtIkiOiJpIiwiyp4iOiJrIiwi6p6BIjoibCIsIsmvIjoibSIsIsmwIjoibSIsIuG0lCI6Im9lIiwiybkiOiJyIiwiybsiOiJyIiwiyboiOiJyIiwi4rG5IjoiciIsIsqHIjoidCIsIsqMIjoidiIsIsqNIjoidyIsIsqOIjoieSIsIuqcqSI6InR6Iiwiw7oiOiJ1Iiwixa0iOiJ1Iiwix5QiOiJ1Iiwiw7siOiJ1Iiwi4bm3IjoidSIsIsO8IjoidSIsIseYIjoidSIsIseaIjoidSIsIsecIjoidSIsIseWIjoidSIsIuG5syI6InUiLCLhu6UiOiJ1IiwixbEiOiJ1IiwiyJUiOiJ1Iiwiw7kiOiJ1Iiwi4bunIjoidSIsIsawIjoidSIsIuG7qSI6InUiLCLhu7EiOiJ1Iiwi4burIjoidSIsIuG7rSI6InUiLCLhu68iOiJ1IiwiyJciOiJ1IiwixasiOiJ1Iiwi4bm7IjoidSIsIsWzIjoidSIsIuG2mSI6InUiLCLFryI6InUiLCLFqSI6InUiLCLhubkiOiJ1Iiwi4bm1IjoidSIsIuG1qyI6InVlIiwi6p24IjoidW0iLCLisbQiOiJ2Iiwi6p2fIjoidiIsIuG5vyI6InYiLCLKiyI6InYiLCLhtowiOiJ2Iiwi4rGxIjoidiIsIuG5vSI6InYiLCLqnaEiOiJ2eSIsIuG6gyI6InciLCLFtSI6InciLCLhuoUiOiJ3Iiwi4bqHIjoidyIsIuG6iSI6InciLCLhuoEiOiJ3Iiwi4rGzIjoidyIsIuG6mCI6InciLCLhuo0iOiJ4Iiwi4bqLIjoieCIsIuG2jSI6IngiLCLDvSI6InkiLCLFtyI6InkiLCLDvyI6InkiLCLhuo8iOiJ5Iiwi4bu1IjoieSIsIuG7syI6InkiLCLGtCI6InkiLCLhu7ciOiJ5Iiwi4bu/IjoieSIsIsizIjoieSIsIuG6mSI6InkiLCLJjyI6InkiLCLhu7kiOiJ5IiwixboiOiJ6Iiwixb4iOiJ6Iiwi4bqRIjoieiIsIsqRIjoieiIsIuKxrCI6InoiLCLFvCI6InoiLCLhupMiOiJ6IiwiyKUiOiJ6Iiwi4bqVIjoieiIsIuG1tiI6InoiLCLhto4iOiJ6IiwiypAiOiJ6IiwixrYiOiJ6IiwiyYAiOiJ6Iiwi76yAIjoiZmYiLCLvrIMiOiJmZmkiLCLvrIQiOiJmZmwiLCLvrIEiOiJmaSIsIu+sgiI6ImZsIiwixLMiOiJpaiIsIsWTIjoib2UiLCLvrIYiOiJzdCIsIuKCkCI6ImEiLCLigpEiOiJlIiwi4bWiIjoiaSIsIuKxvCI6ImoiLCLigpIiOiJvIiwi4bWjIjoiciIsIuG1pCI6InUiLCLhtaUiOiJ2Iiwi4oKTIjoieCJ9";
var Latinise={};Latinise.latin_map=JSON.parse(decodeURIComponent(escape(atob(base64map))));
Вы можете использовать _.deburr
метод из библиотеки Lodash.
Он доступен в виде отдельного пакета NPM lodash.deburr
или как часть lodash
пакет.
let myStringWithAccent = 'Mon café est plein de caféïne';
let myStringWithoutAccent = deburr(myStringWithAccent);
Результатом будет: "Mon cafe est plein de cafeine"
Формат для нового RegExp
RegExp(something, 'modifiers');
Так вы бы хотели
accentsTidy = function(s){
var r=s.toLowerCase();
r = r.replace(new RegExp("\\s", 'g'),"");
r = r.replace(new RegExp("[àáâãäå]", 'g'),"a");
r = r.replace(new RegExp("æ", 'g'),"ae");
r = r.replace(new RegExp("ç", 'g'),"c");
r = r.replace(new RegExp("[èéêë]", 'g'),"e");
r = r.replace(new RegExp("[ìíîï]", 'g'),"i");
r = r.replace(new RegExp("ñ", 'g'),"n");
r = r.replace(new RegExp("[òóôõö]", 'g'),"o");
r = r.replace(new RegExp("œ", 'g'),"oe");
r = r.replace(new RegExp("[ùúûü]", 'g'),"u");
r = r.replace(new RegExp("[ýÿ]", 'g'),"y");
r = r.replace(new RegExp("\\W", 'g'),"");
return r;
};
В NPM есть пакет для этого: latinize
Это очень хороший пакет для решения этой проблемы.
Сокращенный код, основанный на превосходном решении Яна Эллиотта:
accentsTidy = function(s){
var r = s.toLowerCase();
non_asciis = {'a': '[àáâãäå]', 'ae': 'æ', 'c': 'ç', 'e': '[èéêë]', 'i': '[ìíîï]', 'n': 'ñ', 'o': '[òóôõö]', 'oe': 'œ', 'u': '[ùúûűü]', 'y': '[ýÿ]'};
for (i in non_asciis) { r = r.replace(new RegExp(non_asciis[i], 'g'), i); }
return r;
};
Редактировать: исправленный нерабочий код
Там много всего, но я думаю, что это просто и достаточно хорошо:
function remove_accents(strAccents) {
var strAccents = strAccents.split('');
var strAccentsOut = new Array();
var strAccentsLen = strAccents.length;
var accents = "ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž";
var accentsOut = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
for (var y = 0; y < strAccentsLen; y++) {
if (accents.indexOf(strAccents[y]) != -1) {
strAccentsOut[y] = accentsOut.substr(accents.indexOf(strAccents[y]), 1);
} else
strAccentsOut[y] = strAccents[y];
}
strAccentsOut = strAccentsOut.join('');
return strAccentsOut;
}
Если вы также хотите удалить специальные символы и преобразовать пробелы и дефисы в подчеркиваниях, сделайте следующее:
string = remove_accents(string);
string = string.replace(/[^a-z0-9\s]/gi, '').replace(/[-\s]/g, '_');
Одно из решений, которое кажется намного более быстрым с помощью данного теста: http://jsperf.com/diacritics/9
function removeDiacritics(str) {
return str.replace(/[^A-Za-z0-9\s]+/g, function(a){
return diacriticsMap[a] || a;
});
}
removeDiacritics(teste);
Рабочий пример: http://jsbin.com/sovorute/1/edit
Причина: одна из причин, по которой это происходит намного быстрее, заключается в том, что мы выполняем итерации только по специальным символам, выбранным с помощью шаблона отрицательных регулярных выражений. Самый быстрый из тестов (String Iteration без in) повторяет 1001 для данного текста, что означает каждый символ. Этот повторяется только 35 раз и выдает тот же результат. Имейте в виду, что это заменит только то, что указано на карте.
Классическая статья на эту тему: http://alistapart.com/article/accent-folding-for-auto-complete
Кредит: http://semplicewebsites.com/removing-accents-javascript, также предоставляет хорошую карту персонажей.
Более простой способ заменить диакритические знаки.
function replaceDiacritics(str){
var diacritics = [
{char: 'A', base: /[\300-\306]/g},
{char: 'a', base: /[\340-\346]/g},
{char: 'E', base: /[\310-\313]/g},
{char: 'e', base: /[\350-\353]/g},
{char: 'I', base: /[\314-\317]/g},
{char: 'i', base: /[\354-\357]/g},
{char: 'O', base: /[\322-\330]/g},
{char: 'o', base: /[\362-\370]/g},
{char: 'U', base: /[\331-\334]/g},
{char: 'u', base: /[\371-\374]/g},
{char: 'N', base: /[\321]/g},
{char: 'n', base: /[\361]/g},
{char: 'C', base: /[\307]/g},
{char: 'c', base: /[\347]/g}
]
diacritics.forEach(function(letter){
str = str.replace(letter.base, letter.char);
});
return str;
};
Я billy код billy http://jsfiddle.net/billybraga/UHmnf/ (из его поста) в следующее: http://jsfiddle.net/infralabs/dJX58/
Я исправил транскрипцию символов ſ и ß, а также добавил покрытие этих символов: Þþ, Ðð ,,, IJij, Œœ.
Модифицированный фрагмент ниже:
var defaultDiacriticsRemovalMap = [{
'base': "A",
'letters': /(A|Ⓐ|A|À|Á|Â|Ầ|Ấ|Ẫ|Ẩ|Ã|Ā|Ă|Ằ|Ắ|Ẵ|Ẳ|Ȧ|Ǡ|Ä|Ǟ|Ả|Å|Ǻ|Ǎ|Ȁ|Ȃ|Ạ|Ậ|Ặ|Ḁ|Ą|Ⱥ|Ɐ|[\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F])/g
}, {
'base': "AA",
'letters': /(Ꜳ|[\uA732])/g
}, {
'base': "AE",
'letters': /(Æ|Ǽ|Ǣ|[\u00C6\u01FC\u01E2])/g
}, {
'base': "AO",
'letters': /(Ꜵ|[\uA734])/g
}, {
'base': "AU",
'letters': /(Ꜷ|[\uA736])/g
}, {
'base': "AV",
'letters': /(Ꜹ|Ꜻ|[\uA738\uA73A])/g
}, {
'base': "AY",
'letters': /(Ꜽ|[\uA73C])/g
}, {
'base': "B",
'letters': /(B|Ⓑ|B|Ḃ|Ḅ|Ḇ|Ƀ|Ƃ|Ɓ|[\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181])/g
}, {
'base': "C",
'letters': /(C|Ⓒ|C|Ć|Ĉ|Ċ|Č|Ç|Ḉ|Ƈ|Ȼ|Ꜿ|[\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E])/g
}, {
'base': "D",
'letters': /(D|Ⓓ|D|Ḋ|Ď|Ḍ|Ḑ|Ḓ|Ḏ|Đ|Ƌ|Ɗ|Ɖ|Ꝺ|Ð|[\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0])/g
}, {
'base': "DZ",
'letters': /(DZ|DŽ|[\u01F1\u01C4])/g
}, {
'base': "Dz",
'letters': /(Dz|Dž|[\u01F2\u01C5])/g
}, {
'base': "E",
'letters': /(E|Ⓔ|E|È|É|Ê|Ề|Ế|Ễ|Ể|Ẽ|Ē|Ḕ|Ḗ|Ĕ|Ė|Ë|Ẻ|Ě|Ȅ|Ȇ|Ẹ|Ệ|Ȩ|Ḝ|Ę|Ḙ|Ḛ|Ɛ|Ǝ|[\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E])/g
}, {
'base': "F",
'letters': /(F|Ⓕ|F|Ḟ|Ƒ|Ꝼ|[\u0046\u24BB\uFF26\u1E1E\u0191\uA77B])/g
}, {
'base': "G",
'letters': /(G|Ⓖ|G|Ǵ|Ĝ|Ḡ|Ğ|Ġ|Ǧ|Ģ|Ǥ|Ɠ|Ꞡ|Ᵹ|Ꝿ|[\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E])/g
}, {
'base': "H",
'letters': /(H|Ⓗ|H|Ĥ|Ḣ|Ḧ|Ȟ|Ḥ|Ḩ|Ḫ|Ħ|Ⱨ|Ⱶ|Ɥ|[\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D])/g
}, {
'base': "I",
'letters': /(I|Ⓘ|I|Ì|Í|Î|Ĩ|Ī|Ĭ|İ|Ï|Ḯ|Ỉ|Ǐ|Ȉ|Ȋ|Ị|Į|Ḭ|Ɨ|[\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197])/g
}, {
'base': "J",
'letters': /(J|Ⓙ|J|Ĵ|Ɉ|[\u004A\u24BF\uFF2A\u0134\u0248])/g
}, {
'base': "K",
'letters': /(K|Ⓚ|K|Ḱ|Ǩ|Ḳ|Ķ|Ḵ|Ƙ|Ⱪ|Ꝁ|Ꝃ|Ꝅ|Ꞣ|[\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2])/g
}, {
'base': "L",
'letters': /(L|Ⓛ|L|Ŀ|Ĺ|Ľ|Ḷ|Ḹ|Ļ|Ḽ|Ḻ|Ł|Ƚ|Ɫ|Ⱡ|Ꝉ|Ꝇ|Ꞁ|[\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780])/g
}, {
'base': "LJ",
'letters': /(LJ|[\u01C7])/g
}, {
'base': "Lj",
'letters': /(Lj|[\u01C8])/g
}, {
'base': "M",
'letters': /(M|Ⓜ|M|Ḿ|Ṁ|Ṃ|Ɱ|Ɯ|[\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C])/g
}, {
'base': "N",
'letters': /(N|Ⓝ|N|Ǹ|Ń|Ñ|Ṅ|Ň|Ṇ|Ņ|Ṋ|Ṉ|Ƞ|Ɲ|Ꞑ|Ꞥ|Ŋ|[\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4\u014A])/g
}, {
'base': "NJ",
'letters': /(NJ|[\u01CA])/g
}, {
'base': "Nj",
'letters': /(Nj|[\u01CB])/g
}, {
'base': "O",
'letters': /(O|Ⓞ|O|Ò|Ó|Ô|Ồ|Ố|Ỗ|Ổ|Õ|Ṍ|Ȭ|Ṏ|Ō|Ṑ|Ṓ|Ŏ|Ȯ|Ȱ|Ö|Ȫ|Ỏ|Ő|Ǒ|Ȍ|Ȏ|Ơ|Ờ|Ớ|Ỡ|Ở|Ợ|Ọ|Ộ|Ǫ|Ǭ|Ø|Ǿ|Ɔ|Ɵ|Ꝋ|Ꝍ|[\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C])/g
}, {
'base': "OE",
'letters': /(Œ|[\u0152])/g
}, {
'base': "OI",
'letters': /(Ƣ|[\u01A2])/g
}, {
'base': "OO",
'letters': /(Ꝏ|[\uA74E])/g
}, {
'base': "OU",
'letters': /(Ȣ|[\u0222])/g
}, {
'base': "P",
'letters': /(P|Ⓟ|P|Ṕ|Ṗ|Ƥ|Ᵽ|Ꝑ|Ꝓ|Ꝕ|[\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754])/g
}, {
'base': "Q",
'letters': /(Q|Ⓠ|Q|Ꝗ|Ꝙ|Ɋ|[\u0051\u24C6\uFF31\uA756\uA758\u024A])/g
}, {
'base': "R",
'letters': /(R|Ⓡ|R|Ŕ|Ṙ|Ř|Ȑ|Ȓ|Ṛ|Ṝ|Ŗ|Ṟ|Ɍ|Ɽ|Ꝛ|Ꞧ|Ꞃ|[\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782])/g
}, {
'base': "S",
'letters': /(S|Ⓢ|S|ẞ|Ś|Ṥ|Ŝ|Ṡ|Š|Ṧ|Ṣ|Ṩ|Ș|Ş|Ȿ|Ꞩ|Ꞅ|[\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784])/g
}, {
'base': "T",
'letters': /(T|Ⓣ|T|Ṫ|Ť|Ṭ|Ț|Ţ|Ṱ|Ṯ|Ŧ|Ƭ|Ʈ|Ⱦ|Ꞇ|[\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786])/g
}, {
'base': "TH",
'letters': /(Þ|[\u00DE])/g
}, {
'base': "TZ",
'letters': /(Ꜩ|[\uA728])/g
}, {
'base': "U",
'letters': /(U|Ⓤ|U|Ù|Ú|Û|Ũ|Ṹ|Ū|Ṻ|Ŭ|Ü|Ǜ|Ǘ|Ǖ|Ǚ|Ủ|Ů|Ű|Ǔ|Ȕ|Ȗ|Ư|Ừ|Ứ|Ữ|Ử|Ự|Ụ|Ṳ|Ų|Ṷ|Ṵ|Ʉ|[\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244])/g
}, {
'base': "V",
'letters': /(V|Ⓥ|V|Ṽ|Ṿ|Ʋ|Ꝟ|Ʌ|[\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245])/g
}, {
'base': "VY",
'letters': /(Ꝡ|[\uA760])/g
}, {
'base': "W",
'letters': /(W|Ⓦ|W|Ẁ|Ẃ|Ŵ|Ẇ|Ẅ|Ẉ|Ⱳ|[\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72])/g
}, {
'base': "X",
'letters': /(X|Ⓧ|X|Ẋ|Ẍ|[\u0058\u24CD\uFF38\u1E8A\u1E8C])/g
}, {
'base': "Y",
'letters': /(Y|Ⓨ|Y|Ỳ|Ý|Ŷ|Ỹ|Ȳ|Ẏ|Ÿ|Ỷ|Ỵ|Ƴ|Ɏ|Ỿ|[\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE])/g
}, {
'base': "Z",
'letters': /(Z|Ⓩ|Z|Ź|Ẑ|Ż|Ž|Ẓ|Ẕ|Ƶ|Ȥ|Ɀ|Ⱬ|Ꝣ|[\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762])/g
}, {
'base': "a",
'letters': /(a|ⓐ|a|ẚ|à|á|â|ầ|ấ|ẫ|ẩ|ã|ā|ă|ằ|ắ|ẵ|ẳ|ȧ|ǡ|ä|ǟ|ả|å|ǻ|ǎ|ȁ|ȃ|ạ|ậ|ặ|ḁ|ą|ⱥ|ɐ|[\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250])/g
}, {
'base': "aa",
'letters': /(ꜳ|[\uA733])/g
}, {
'base': "ae",
'letters': /(æ|ǽ|ǣ|[\u00E6\u01FD\u01E3])/g
}, {
'base': "ao",
'letters': /(ꜵ|[\uA735])/g
}, {
'base': "au",
'letters': /(ꜷ|[\uA737])/g
}, {
'base': "av",
'letters': /(ꜹ|ꜻ|[\uA739\uA73B])/g
}, {
'base': "ay",
'letters': /(ꜽ|[\uA73D])/g
}, {
'base': "b",
'letters': /(b|ⓑ|b|ḃ|ḅ|ḇ|ƀ|ƃ|ɓ|[\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253])/g
}, {
'base': "c",
'letters': /(c|ⓒ|c|ć|ĉ|ċ|č|ç|ḉ|ƈ|ȼ|ꜿ|ↄ|[\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184])/g
}, {
'base': "d",
'letters': /(d|ⓓ|d|ḋ|ď|ḍ|ḑ|ḓ|ḏ|đ|ƌ|ɖ|ɗ|ꝺ|ð|[\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A\u00F0])/g
}, {
'base': "dz",
'letters': /(dz|dž|[\u01F3\u01C6])/g
}, {
'base': "e",
'letters': /(e|ⓔ|e|è|é|ê|ề|ế|ễ|ể|ẽ|ē|ḕ|ḗ|ĕ|ė|ë|ẻ|ě|ȅ|ȇ|ẹ|ệ|ȩ|ḝ|ę|ḙ|ḛ|ɇ|ɛ|ǝ|[\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD])/g
}, {
'base': "f",
'letters': /(f|ⓕ|f|ḟ|ƒ|ꝼ|[\u0066\u24D5\uFF46\u1E1F\u0192\uA77C])/g
}, {
'base': "g",
'letters': /(g|ⓖ|g|ǵ|ĝ|ḡ|ğ|ġ|ǧ|ģ|ǥ|ɠ|ꞡ|ᵹ|ꝿ|[\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F])/g
}, {
'base': "h",
'letters': /(h|ⓗ|h|ĥ|ḣ|ḧ|ȟ|ḥ|ḩ|ḫ|ẖ|ħ|ⱨ|ⱶ|ɥ|[\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265])/g
}, {
'base': "hv",
'letters': /(ƕ|[\u0195])/g
}, {
'base': "i",
'letters': /(i|ⓘ|i|ì|í|î|ĩ|ī|ĭ|ï|ḯ|ỉ|ǐ|ȉ|ȋ|ị|į|ḭ|ɨ|ı|[\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131])/g
}, {
'base': "ij",
'letters': /(ij|[\u0133])/g
}, {
'base': "j",
'letters': /(j|ⓙ|j|ĵ|ǰ|ɉ|[\u006A\u24D9\uFF4A\u0135\u01F0\u0249])/g
}, {
'base': "k",
'letters': /(k|ⓚ|k|ḱ|ǩ|ḳ|ķ|ḵ|ƙ|ⱪ|ꝁ|ꝃ|ꝅ|ꞣ|[\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3])/g
}, {
'base': "l",
'letters': /(l|ⓛ|l|ŀ|ĺ|ľ|ḷ|ḹ|ļ|ḽ|ḻ|ł|ƚ|ɫ|ⱡ|ꝉ|ꞁ|ꝇ|[\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u0142\u019A\u026B\u2C61\uA749\uA781\uA747])/g
}, {
'base': "lj",
'letters': /(lj|[\u01C9])/g
}, {
'base': "m",
'letters': /(m|ⓜ|m|ḿ|ṁ|ṃ|ɱ|ɯ|[\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F])/g
}, {
'base': "n",
'letters': /(n|ⓝ|n|ǹ|ń|ñ|ṅ|ň|ṇ|ņ|ṋ|ṉ|ƞ|ɲ|ʼn|ꞑ|ꞥ|ŋ|[\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5\u014B])/g
}, {
'base': "nj",
'letters': /(nj|[\u01CC])/g
}, {
'base': "o",
'letters': /(o|ⓞ|o|ò|ó|ô|ồ|ố|ỗ|ổ|õ|ṍ|ȭ|ṏ|ō|ṑ|ṓ|ŏ|ȯ|ȱ|ö|ȫ|ỏ|ő|ǒ|ȍ|ȏ|ơ|ờ|ớ|ỡ|ở|ợ|ọ|ộ|ǫ|ǭ|ø|ǿ|ɔ|ꝋ|ꝍ|ɵ|[\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275])/g
}, {
'base': "oe",
'letters': /(œ|[\u0153])/g
}, {
'base': "oi",
'letters': /(ƣ|[\u01A3])/g
}, {
'base': "ou",
'letters': /(ȣ|[\u0223])/g
}, {
'base': "oo",
'letters': /(ꝏ|[\uA74F])/g
}, {
'base': "p",
'letters': /(p|ⓟ|p|ṕ|ṗ|ƥ|ᵽ|ꝑ|ꝓ|ꝕ|[\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755])/g
}, {
'base': "q",
'letters': /(q|ⓠ|q|ɋ|ꝗ|ꝙ|[\u0071\u24E0\uFF51\u024B\uA757\uA759])/g
}, {
'base': "r",
'letters': /(r|ⓡ|r|ŕ|ṙ|ř|ȑ|ȓ|ṛ|ṝ|ŗ|ṟ|ɍ|ɽ|ꝛ|ꞧ|ꞃ|[\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783])/g
}, {
'base': "s",
'letters': /(s|ⓢ|s|ś|ṥ|ŝ|ṡ|š|ṧ|ṣ|ṩ|ș|ş|ȿ|ꞩ|ꞅ|ẛ|ſ|[\u0073\u24E2\uFF53\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B\u017F])/g
}, {
'base': "ss",
'letters': /(ß|[\u00DF])/g
}, {
'base': "t",
'letters': /(t|ⓣ|t|ṫ|ẗ|ť|ṭ|ț|ţ|ṱ|ṯ|ŧ|ƭ|ʈ|ⱦ|ꞇ|[\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787])/g
}, {
'base': "th",
'letters': /(þ|[\u00FE])/g
}, {
'base': "tz",
'letters': /(ꜩ|[\uA729])/g
}, {
'base': "u",
'letters': /(u|ⓤ|u|ù|ú|û|ũ|ṹ|ū|ṻ|ŭ|ü|ǜ|ǘ|ǖ|ǚ|ủ|ů|ű|ǔ|ȕ|ȗ|ư|ừ|ứ|ữ|ử|ự|ụ|ṳ|ų|ṷ|ṵ|ʉ|[\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289])/g
}, {
'base': "v",
'letters': /(v|ⓥ|v|ṽ|ṿ|ʋ|ꝟ|ʌ|[\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C])/g
}, {
'base': "vy",
'letters': /(ꝡ|[\uA761])/g
}, {
'base': "w",
'letters': /(w|ⓦ|w|ẁ|ẃ|ŵ|ẇ|ẅ|ẘ|ẉ|ⱳ|[\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73])/g
}, {
'base': "x",
'letters': /(x|ⓧ|x|ẋ|ẍ|[\u0078\u24E7\uFF58\u1E8B\u1E8D])/g
}, {
'base': "y",
'letters': /(y|ⓨ|y|ỳ|ý|ŷ|ỹ|ȳ|ẏ|ÿ|ỷ|ẙ|ỵ|ƴ|ɏ|ỿ|[\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF])/g
}, {
'base': "z",
'letters': /(z|ⓩ|z|ź|ẑ|ż|ž|ẓ|ẕ|ƶ|ȥ|ɀ|ⱬ|ꝣ|[\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763])/g
}];
Я использовал этот репозиторий gitHub dundalek в качестве модуля узла: https://github.com/dundalek/latinize
Работает это просто так:
Вот моя модифицированная версия версии lehelk.com, которая также удаляет HTML-объекты, которые являются акцентами:
http://jsfiddle.net/billybraga/UHmnf/
Я все еще не знаю о производительности, хотя...
var defaultDiacriticsRemovalMap = [{
'base': "A",
'letters': /(A|Ⓐ|A|À|Á|Â|Ầ|Ấ|Ẫ|Ẩ|Ã|Ā|Ă|Ằ|Ắ|Ẵ|Ẳ|Ȧ|Ǡ|Ä|Ǟ|Ả|Å|Ǻ|Ǎ|Ȁ|Ȃ|Ạ|Ậ|Ặ|Ḁ|Ą|Ⱥ|Ɐ|[\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F])/g},
{
'base': "AA",
'letters': /(Ꜳ|[\uA732])/g},
{
'base': "AE",
'letters': /(Æ|Ǽ|Ǣ|[\u00C6\u01FC\u01E2])/g},
{
'base': "AO",
'letters': /(Ꜵ|[\uA734])/g},
{
'base': "AU",
'letters': /(Ꜷ|[\uA736])/g},
{
'base': "AV",
'letters': /(Ꜹ|Ꜻ|[\uA738\uA73A])/g},
{
'base': "AY",
'letters': /(Ꜽ|[\uA73C])/g},
{
'base': "B",
'letters': /(B|Ⓑ|B|Ḃ|Ḅ|Ḇ|Ƀ|Ƃ|Ɓ|[\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181])/g},
{
'base': "C",
'letters': /(C|Ⓒ|C|Ć|Ĉ|Ċ|Č|Ç|Ḉ|Ƈ|Ȼ|Ꜿ|[\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E])/g},
{
'base': "D",
'letters': /(D|Ⓓ|D|Ḋ|Ď|Ḍ|Ḑ|Ḓ|Ḏ|Đ|Ƌ|Ɗ|Ɖ|Ꝺ|[\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779])/g},
{
'base': "DZ",
'letters': /(DZ|DŽ|[\u01F1\u01C4])/g},
{
'base': "Dz",
'letters': /(Dz|Dž|[\u01F2\u01C5])/g},
{
'base': "E",
'letters': /(E|Ⓔ|E|È|É|Ê|Ề|Ế|Ễ|Ể|Ẽ|Ē|Ḕ|Ḗ|Ĕ|Ė|Ë|Ẻ|Ě|Ȅ|Ȇ|Ẹ|Ệ|Ȩ|Ḝ|Ę|Ḙ|Ḛ|Ɛ|Ǝ|[\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E])/g},
{
'base': "F",
'letters': /(F|Ⓕ|F|Ḟ|Ƒ|Ꝼ|[\u0046\u24BB\uFF26\u1E1E\u0191\uA77B])/g},
{
'base': "G",
'letters': /(G|Ⓖ|G|Ǵ|Ĝ|Ḡ|Ğ|Ġ|Ǧ|Ģ|Ǥ|Ɠ|Ꞡ|Ᵹ|Ꝿ|[\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E])/g},
{
'base': "H",
'letters': /(H|Ⓗ|H|Ĥ|Ḣ|Ḧ|Ȟ|Ḥ|Ḩ|Ḫ|Ħ|Ⱨ|Ⱶ|Ɥ|[\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D])/g},
{
'base': "I",
'letters': /(I|Ⓘ|I|Ì|Í|Î|Ĩ|Ī|Ĭ|İ|Ï|Ḯ|Ỉ|Ǐ|Ȉ|Ȋ|Ị|Į|Ḭ|Ɨ|[\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197])/g},
{
'base': "J",
'letters': /(J|Ⓙ|J|Ĵ|Ɉ|[\u004A\u24BF\uFF2A\u0134\u0248])/g},
{
'base': "K",
'letters': /(K|Ⓚ|K|Ḱ|Ǩ|Ḳ|Ķ|Ḵ|Ƙ|Ⱪ|Ꝁ|Ꝃ|Ꝅ|Ꞣ|[\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2])/g},
{
'base': "L",
'letters': /(L|Ⓛ|L|Ŀ|Ĺ|Ľ|Ḷ|Ḹ|Ļ|Ḽ|Ḻ|Ł|Ƚ|Ɫ|Ⱡ|Ꝉ|Ꝇ|Ꞁ|[\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780])/g},
{
'base': "LJ",
'letters': /(LJ|[\u01C7])/g},
{
'base': "Lj",
'letters': /(Lj|[\u01C8])/g},
{
'base': "M",
'letters': /(M|Ⓜ|M|Ḿ|Ṁ|Ṃ|Ɱ|Ɯ|[\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C])/g},
{
'base': "N",
'letters': /(N|Ⓝ|N|Ǹ|Ń|Ñ|Ṅ|Ň|Ṇ|Ņ|Ṋ|Ṉ|Ƞ|Ɲ|Ꞑ|Ꞥ|[\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4])/g},
{
'base': "NJ",
'letters': /(NJ|[\u01CA])/g},
{
'base': "Nj",
'letters': /(Nj|[\u01CB])/g},
{
'base': "O",
'letters': /(O|Ⓞ|O|Ò|Ó|Ô|Ồ|Ố|Ỗ|Ổ|Õ|Ṍ|Ȭ|Ṏ|Ō|Ṑ|Ṓ|Ŏ|Ȯ|Ȱ|Ö|Ȫ|Ỏ|Ő|Ǒ|Ȍ|Ȏ|Ơ|Ờ|Ớ|Ỡ|Ở|Ợ|Ọ|Ộ|Ǫ|Ǭ|Ø|Ǿ|Ɔ|Ɵ|Ꝋ|Ꝍ|[\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C])/g},
{
'base': "OI",
'letters': /(Ƣ|[\u01A2])/g},
{
'base': "OO",
'letters': /(Ꝏ|[\uA74E])/g},
{
'base': "OU",
'letters': /(Ȣ|[\u0222])/g},
{
'base': "P",
'letters': /(P|Ⓟ|P|Ṕ|Ṗ|Ƥ|Ᵽ|Ꝑ|Ꝓ|Ꝕ|[\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754])/g},
{
'base': "Q",
'letters': /(Q|Ⓠ|Q|Ꝗ|Ꝙ|Ɋ|[\u0051\u24C6\uFF31\uA756\uA758\u024A])/g},
{
'base': "R",
'letters': /(R|Ⓡ|R|Ŕ|Ṙ|Ř|Ȑ|Ȓ|Ṛ|Ṝ|Ŗ|Ṟ|Ɍ|Ɽ|Ꝛ|Ꞧ|Ꞃ|[\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782])/g},
{
'base': "S",
'letters': /(S|Ⓢ|S|ẞ|Ś|Ṥ|Ŝ|Ṡ|Š|Ṧ|Ṣ|Ṩ|Ș|Ş|Ȿ|Ꞩ|Ꞅ|[\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784])/g},
{
'base': "T",
'letters': /(T|Ⓣ|T|Ṫ|Ť|Ṭ|Ț|Ţ|Ṱ|Ṯ|Ŧ|Ƭ|Ʈ|Ⱦ|Ꞇ|[\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786])/g},
{
'base': "TZ",
'letters': /(Ꜩ|[\uA728])/g},
{
'base': "U",
'letters': /(U|Ⓤ|U|Ù|Ú|Û|Ũ|Ṹ|Ū|Ṻ|Ŭ|Ü|Ǜ|Ǘ|Ǖ|Ǚ|Ủ|Ů|Ű|Ǔ|Ȕ|Ȗ|Ư|Ừ|Ứ|Ữ|Ử|Ự|Ụ|Ṳ|Ų|Ṷ|Ṵ|Ʉ|[\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244])/g},
{
'base': "V",
'letters': /(V|Ⓥ|V|Ṽ|Ṿ|Ʋ|Ꝟ|Ʌ|[\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245])/g},
{
'base': "VY",
'letters': /(Ꝡ|[\uA760])/g},
{
'base': "W",
'letters': /(W|Ⓦ|W|Ẁ|Ẃ|Ŵ|Ẇ|Ẅ|Ẉ|Ⱳ|[\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72])/g},
{
'base': "X",
'letters': /(X|Ⓧ|X|Ẋ|Ẍ|[\u0058\u24CD\uFF38\u1E8A\u1E8C])/g},
{
'base': "Y",
'letters': /(Y|Ⓨ|Y|Ỳ|Ý|Ŷ|Ỹ|Ȳ|Ẏ|Ÿ|Ỷ|Ỵ|Ƴ|Ɏ|Ỿ|[\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE])/g},
{
'base': "Z",
'letters': /(Z|Ⓩ|Z|Ź|Ẑ|Ż|Ž|Ẓ|Ẕ|Ƶ|Ȥ|Ɀ|Ⱬ|Ꝣ|[\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762])/g},
{
'base': "a",
'letters': /(a|ⓐ|a|ẚ|à|á|â|ầ|ấ|ẫ|ẩ|ã|ā|ă|ằ|ắ|ẵ|ẳ|ȧ|ǡ|ä|ǟ|ả|å|ǻ|ǎ|ȁ|ȃ|ạ|ậ|ặ|ḁ|ą|ⱥ|ɐ|[\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250])/g},
{
'base': "aa",
'letters': /(ꜳ|[\uA733])/g},
{
'base': "ae",
'letters': /(æ|ǽ|ǣ|[\u00E6\u01FD\u01E3])/g},
{
'base': "ao",
'letters': /(ꜵ|[\uA735])/g},
{
'base': "au",
'letters': /(ꜷ|[\uA737])/g},
{
'base': "av",
'letters': /(ꜹ|ꜻ|[\uA739\uA73B])/g},
{
'base': "ay",
'letters': /(ꜽ|[\uA73D])/g},
{
'base': "b",
'letters': /(b|ⓑ|b|ḃ|ḅ|ḇ|ƀ|ƃ|ɓ|[\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253])/g},
{
'base': "c",
'letters': /(c|ⓒ|c|ć|ĉ|ċ|č|ç|ḉ|ƈ|ȼ|ꜿ|ↄ|[\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184])/g},
{
'base': "d",
'letters': /(d|ⓓ|d|ḋ|ď|ḍ|ḑ|ḓ|ḏ|đ|ƌ|ɖ|ɗ|ꝺ|[\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A])/g},
{
'base': "dz",
'letters': /(dz|dž|[\u01F3\u01C6])/g},
{
'base': "e",
'letters': /(e|ⓔ|e|è|é|ê|ề|ế|ễ|ể|ẽ|ē|ḕ|ḗ|ĕ|ė|ë|ẻ|ě|ȅ|ȇ|ẹ|ệ|ȩ|ḝ|ę|ḙ|ḛ|ɇ|ɛ|ǝ|[\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD])/g},
{
'base': "f",
'letters': /(f|ⓕ|f|ḟ|ƒ|ꝼ|[\u0066\u24D5\uFF46\u1E1F\u0192\uA77C])/g},
{
'base': "g",
'letters': /(g|ⓖ|g|ǵ|ĝ|ḡ|ğ|ġ|ǧ|ģ|ǥ|ɠ|ꞡ|ᵹ|ꝿ|[\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F])/g},
{
'base': "h",
'letters': /(h|ⓗ|h|ĥ|ḣ|ḧ|ȟ|ḥ|ḩ|ḫ|ẖ|ħ|ⱨ|ⱶ|ɥ|[\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265])/g},
{
'base': "hv",
'letters': /(ƕ|[\u0195])/g},
{
'base': "i",
'letters': /(i|ⓘ|i|ì|í|î|ĩ|ī|ĭ|ï|ḯ|ỉ|ǐ|ȉ|ȋ|ị|į|ḭ|ɨ|ı|[\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131])/g},
{
'base': "j",
'letters': /(j|ⓙ|j|ĵ|ǰ|ɉ|[\u006A\u24D9\uFF4A\u0135\u01F0\u0249])/g},
{
'base': "k",
'letters': /(k|ⓚ|k|ḱ|ǩ|ḳ|ķ|ḵ|ƙ|ⱪ|ꝁ|ꝃ|ꝅ|ꞣ|[\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3])/g},
{
'base': "l",
'letters': /(l|ⓛ|l|ŀ|ĺ|ľ|ḷ|ḹ|ļ|ḽ|ḻ|ſ|ł|ƚ|ɫ|ⱡ|ꝉ|ꞁ|ꝇ|[\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747])/g},
{
'base': "lj",
'letters': /(lj|[\u01C9])/g},
{
'base': "m",
'letters': /(m|ⓜ|m|ḿ|ṁ|ṃ|ɱ|ɯ|[\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F])/g},
{
'base': "n",
'letters': /(n|ⓝ|n|ǹ|ń|ñ|ṅ|ň|ṇ|ņ|ṋ|ṉ|ƞ|ɲ|ʼn|ꞑ|ꞥ|[\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5])/g},
{
'base': "nj",
'letters': /(nj|[\u01CC])/g},
{
'base': "o",
'letters': /(o|ⓞ|o|ò|ó|ô|ồ|ố|ỗ|ổ|õ|ṍ|ȭ|ṏ|ō|ṑ|ṓ|ŏ|ȯ|ȱ|ö|ȫ|ỏ|ő|ǒ|ȍ|ȏ|ơ|ờ|ớ|ỡ|ở|ợ|ọ|ộ|ǫ|ǭ|ø|ǿ|ɔ|ꝋ|ꝍ|ɵ|[\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275])/g},
{
'base': "oi",
'letters': /(ƣ|[\u01A3])/g},
{
'base': "ou",
'letters': /(ȣ|[\u0223])/g},
{
'base': "oo",
'letters': /(ꝏ|[\uA74F])/g},
{
'base': "p",
'letters': /(p|ⓟ|p|ṕ|ṗ|ƥ|ᵽ|ꝑ|ꝓ|ꝕ|[\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755])/g},
{
'base': "q",
'letters': /(q|ⓠ|q|ɋ|ꝗ|ꝙ|[\u0071\u24E0\uFF51\u024B\uA757\uA759])/g},
{
'base': "r",
'letters': /(r|ⓡ|r|ŕ|ṙ|ř|ȑ|ȓ|ṛ|ṝ|ŗ|ṟ|ɍ|ɽ|ꝛ|ꞧ|ꞃ|[\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783])/g},
{
'base': "s",
'letters': /(s|ⓢ|s|ß|ś|ṥ|ŝ|ṡ|š|ṧ|ṣ|ṩ|ș|ş|ȿ|ꞩ|ꞅ|ẛ|[\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B])/g},
{
'base': "t",
'letters': /(t|ⓣ|t|ṫ|ẗ|ť|ṭ|ț|ţ|ṱ|ṯ|ŧ|ƭ|ʈ|ⱦ|ꞇ|[\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787])/g},
{
'base': "tz",
'letters': /(ꜩ|[\uA729])/g},
{
'base': "u",
'letters': /(u|ⓤ|u|ù|ú|û|ũ|ṹ|ū|ṻ|ŭ|ü|ǜ|ǘ|ǖ|ǚ|ủ|ů|ű|ǔ|ȕ|ȗ|ư|ừ|ứ|ữ|ử|ự|ụ|ṳ|ų|ṷ|ṵ|ʉ|[\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289])/g},
{
'base': "v",
'letters': /(v|ⓥ|v|ṽ|ṿ|ʋ|ꝟ|ʌ|[\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C])/g},
{
'base': "vy",
'letters': /(ꝡ|[\uA761])/g},
{
'base': "w",
'letters': /(w|ⓦ|w|ẁ|ẃ|ŵ|ẇ|ẅ|ẘ|ẉ|ⱳ|[\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73])/g},
{
'base': "x",
'letters': /(x|ⓧ|x|ẋ|ẍ|[\u0078\u24E7\uFF58\u1E8B\u1E8D])/g},
{
'base': "y",
'letters': /(y|ⓨ|y|ỳ|ý|ŷ|ỹ|ȳ|ẏ|ÿ|ỷ|ẙ|ỵ|ƴ|ɏ|ỿ|[\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF])/g},
{
'base': "z",
'letters': /(z|ⓩ|z|ź|ẑ|ż|ž|ẓ|ẕ|ƶ|ȥ|ɀ|ⱬ|ꝣ|[\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763])/g}];
function removeDiacritics(str) {
for (var i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
str = str.replace(defaultDiacriticsRemovalMap[i].letters, defaultDiacriticsRemovalMap[i].base);
}
return str;
}
Проверен самый быстрый способ удаления диакритических знаков из строки, которая охватывает практически все символы. нашел наибольшую производительность среди всех других методов
var diacriticsMap = {
'\u0041': 'A','\u24B6': 'A','\uFF21': 'A','\u00C0': 'A','\u00C1': 'A','\u00C2': 'A','\u1EA6': 'A','\u1EA4': 'A','\u1EAA': 'A','\u1EA8': 'A',
'\u00C3': 'A','\u0100': 'A','\u0102': 'A','\u1EB0': 'A','\u1EAE': 'A','\u1EB4': 'A','\u1EB2': 'A','\u0226': 'A','\u01E0': 'A','\u00C4': 'A',
'\u01DE': 'A','\u1EA2': 'A','\u00C5': 'A','\u01FA': 'A','\u01CD': 'A','\u0200': 'A','\u0202': 'A','\u1EA0': 'A','\u1EAC': 'A','\u1EB6': 'A',
'\u1E00': 'A','\u0104': 'A','\u023A': 'A','\u2C6F': 'A',
'\uA732': 'AA',
'\u00C6': 'AE','\u01FC': 'AE','\u01E2': 'AE',
'\uA734': 'AO',
'\uA736': 'AU',
'\uA738': 'AV','\uA73A': 'AV',
'\uA73C': 'AY',
'\u0042': 'B','\u24B7': 'B','\uFF22': 'B','\u1E02': 'B','\u1E04': 'B','\u1E06': 'B','\u0243': 'B','\u0182': 'B','\u0181': 'B',
'\u0043': 'C','\u24B8': 'C','\uFF23': 'C','\u0106': 'C','\u0108': 'C','\u010A': 'C','\u010C': 'C','\u00C7': 'C','\u1E08': 'C','\u0187': 'C',
'\u023B': 'C','\uA73E': 'C',
'\u0044': 'D','\u24B9': 'D','\uFF24': 'D','\u1E0A': 'D','\u010E': 'D','\u1E0C': 'D','\u1E10': 'D','\u1E12': 'D','\u1E0E': 'D','\u0110': 'D',
'\u018B': 'D','\u018A': 'D','\u0189': 'D','\uA779': 'D',
'\u01F1': 'DZ','\u01C4': 'DZ',
'\u01F2': 'Dz','\u01C5': 'Dz',
'\u0045': 'E','\u24BA': 'E','\uFF25': 'E','\u00C8': 'E','\u00C9': 'E','\u00CA': 'E','\u1EC0': 'E','\u1EBE': 'E','\u1EC4': 'E','\u1EC2': 'E',
'\u1EBC': 'E','\u0112': 'E','\u1E14': 'E','\u1E16': 'E','\u0114': 'E','\u0116': 'E','\u00CB': 'E','\u1EBA': 'E','\u011A': 'E','\u0204': 'E',
'\u0206': 'E','\u1EB8': 'E','\u1EC6': 'E','\u0228': 'E','\u1E1C': 'E','\u0118': 'E','\u1E18': 'E','\u1E1A': 'E','\u0190': 'E','\u018E': 'E',
'\u0046': 'F','\u24BB': 'F','\uFF26': 'F','\u1E1E': 'F','\u0191': 'F','\uA77B': 'F',
'\u0047': 'G','\u24BC': 'G','\uFF27': 'G','\u01F4': 'G','\u011C': 'G','\u1E20': 'G','\u011E': 'G','\u0120': 'G','\u01E6': 'G','\u0122': 'G',
'\u01E4': 'G','\u0193': 'G','\uA7A0': 'G','\uA77D': 'G','\uA77E': 'G',
'\u0048': 'H','\u24BD': 'H','\uFF28': 'H','\u0124': 'H','\u1E22': 'H','\u1E26': 'H','\u021E': 'H','\u1E24': 'H','\u1E28': 'H','\u1E2A': 'H',
'\u0126': 'H','\u2C67': 'H','\u2C75': 'H','\uA78D': 'H',
'\u0049': 'I','\u24BE': 'I','\uFF29': 'I','\u00CC': 'I','\u00CD': 'I','\u00CE': 'I','\u0128': 'I','\u012A': 'I','\u012C': 'I','\u0130': 'I',
'\u00CF': 'I','\u1E2E': 'I','\u1EC8': 'I','\u01CF': 'I','\u0208': 'I','\u020A': 'I','\u1ECA': 'I','\u012E': 'I','\u1E2C': 'I','\u0197': 'I',
'\u004A': 'J','\u24BF': 'J','\uFF2A': 'J','\u0134': 'J','\u0248': 'J',
'\u004B': 'K','\u24C0': 'K','\uFF2B': 'K','\u1E30': 'K','\u01E8': 'K','\u1E32': 'K','\u0136': 'K','\u1E34': 'K','\u0198': 'K','\u2C69': 'K',
'\uA740': 'K','\uA742': 'K','\uA744': 'K','\uA7A2': 'K',
'\u004C': 'L','\u24C1': 'L','\uFF2C': 'L','\u013F': 'L','\u0139': 'L','\u013D': 'L','\u1E36': 'L','\u1E38': 'L','\u013B': 'L','\u1E3C': 'L',
'\u1E3A': 'L','\u0141': 'L','\u023D': 'L','\u2C62': 'L','\u2C60': 'L','\uA748': 'L','\uA746': 'L','\uA780': 'L',
'\u01C7': 'LJ',
'\u01C8': 'Lj',
'\u004D': 'M','\u24C2': 'M','\uFF2D': 'M','\u1E3E': 'M','\u1E40': 'M','\u1E42': 'M','\u2C6E': 'M','\u019C': 'M',
'\u004E': 'N','\u24C3': 'N','\uFF2E': 'N','\u01F8': 'N','\u0143': 'N','\u00D1': 'N','\u1E44': 'N','\u0147': 'N','\u1E46': 'N','\u0145': 'N',
'\u1E4A': 'N','\u1E48': 'N','\u0220': 'N','\u019D': 'N','\uA790': 'N','\uA7A4': 'N',
'\u01CA': 'NJ',
'\u01CB': 'Nj',
'\u004F': 'O','\u24C4': 'O','\uFF2F': 'O','\u00D2': 'O','\u00D3': 'O','\u00D4': 'O','\u1ED2': 'O','\u1ED0': 'O','\u1ED6': 'O','\u1ED4': 'O',
'\u00D5': 'O','\u1E4C': 'O','\u022C': 'O','\u1E4E': 'O','\u014C': 'O','\u1E50': 'O','\u1E52': 'O','\u014E': 'O','\u022E': 'O','\u0230': 'O',
'\u00D6': 'O','\u022A': 'O','\u1ECE': 'O','\u0150': 'O','\u01D1': 'O','\u020C': 'O','\u020E': 'O','\u01A0': 'O','\u1EDC': 'O','\u1EDA': 'O',
'\u1EE0': 'O','\u1EDE': 'O','\u1EE2': 'O','\u1ECC': 'O','\u1ED8': 'O','\u01EA': 'O','\u01EC': 'O','\u00D8': 'O','\u01FE': 'O','\u0186': 'O',
'\u019F': 'O','\uA74A': 'O','\uA74C': 'O',
'\u01A2': 'OI',
'\uA74E': 'OO',
'\u0222': 'OU',
'\u0050': 'P','\u24C5': 'P','\uFF30': 'P','\u1E54': 'P','\u1E56': 'P','\u01A4': 'P','\u2C63': 'P','\uA750': 'P','\uA752': 'P','\uA754': 'P',
'\u0051': 'Q','\u24C6': 'Q','\uFF31': 'Q','\uA756': 'Q','\uA758': 'Q','\u024A': 'Q',
'\u0052': 'R','\u24C7': 'R','\uFF32': 'R','\u0154': 'R','\u1E58': 'R','\u0158': 'R','\u0210': 'R','\u0212': 'R','\u1E5A': 'R','\u1E5C': 'R',
'\u0156': 'R','\u1E5E': 'R','\u024C': 'R','\u2C64': 'R','\uA75A': 'R','\uA7A6': 'R','\uA782': 'R',
'\u0053': 'S','\u24C8': 'S','\uFF33': 'S','\u1E9E': 'S','\u015A': 'S','\u1E64': 'S','\u015C': 'S','\u1E60': 'S','\u0160': 'S','\u1E66': 'S',
'\u1E62': 'S','\u1E68': 'S','\u0218': 'S','\u015E': 'S','\u2C7E': 'S','\uA7A8': 'S','\uA784': 'S',
'\u0054': 'T','\u24C9': 'T','\uFF34': 'T','\u1E6A': 'T','\u0164': 'T','\u1E6C': 'T','\u021A': 'T','\u0162': 'T','\u1E70': 'T','\u1E6E': 'T',
'\u0166': 'T','\u01AC': 'T','\u01AE': 'T','\u023E': 'T','\uA786': 'T',
'\uA728': 'TZ',
'\u0055': 'U','\u24CA': 'U','\uFF35': 'U','\u00D9': 'U','\u00DA': 'U','\u00DB': 'U','\u0168': 'U','\u1E78': 'U','\u016A': 'U','\u1E7A': 'U',
'\u016C': 'U','\u00DC': 'U','\u01DB': 'U','\u01D7': 'U','\u01D5': 'U','\u01D9': 'U','\u1EE6': 'U','\u016E': 'U','\u0170': 'U','\u01D3': 'U',
'\u0214': 'U','\u0216': 'U','\u01AF': 'U','\u1EEA': 'U','\u1EE8': 'U','\u1EEE': 'U','\u1EEC': 'U','\u1EF0': 'U','\u1EE4': 'U','\u1E72': 'U',
'\u0172': 'U','\u1E76': 'U','\u1E74': 'U','\u0244': 'U',
'\u0056': 'V','\u24CB': 'V','\uFF36': 'V','\u1E7C': 'V','\u1E7E': 'V','\u01B2': 'V','\uA75E': 'V','\u0245': 'V',
'\uA760': 'VY',
'\u0057': 'W','\u24CC': 'W','\uFF37': 'W','\u1E80': 'W','\u1E82': 'W','\u0174': 'W','\u1E86': 'W','\u1E84': 'W','\u1E88': 'W','\u2C72': 'W',
'\u0058': 'X','\u24CD': 'X','\uFF38': 'X','\u1E8A': 'X','\u1E8C': 'X',
'\u0059': 'Y','\u24CE': 'Y','\uFF39': 'Y','\u1EF2': 'Y','\u00DD': 'Y','\u0176': 'Y','\u1EF8': 'Y','\u0232': 'Y','\u1E8E': 'Y','\u0178': 'Y',
'\u1EF6': 'Y','\u1EF4': 'Y','\u01B3': 'Y','\u024E': 'Y','\u1EFE': 'Y',
'\u005A': 'Z','\u24CF': 'Z','\uFF3A': 'Z','\u0179': 'Z','\u1E90': 'Z','\u017B': 'Z','\u017D': 'Z','\u1E92': 'Z','\u1E94': 'Z','\u01B5': 'Z',
'\u0224': 'Z','\u2C7F': 'Z','\u2C6B': 'Z','\uA762': 'Z',
'\u0061': 'a','\u24D0': 'a','\uFF41': 'a','\u1E9A': 'a','\u00E0': 'a','\u00E1': 'a','\u00E2': 'a','\u1EA7': 'a','\u1EA5': 'a','\u1EAB': 'a',
'\u1EA9': 'a','\u00E3': 'a','\u0101': 'a','\u0103': 'a','\u1EB1': 'a','\u1EAF': 'a','\u1EB5': 'a','\u1EB3': 'a','\u0227': 'a','\u01E1': 'a',
'\u00E4': 'a','\u01DF': 'a','\u1EA3': 'a','\u00E5': 'a','\u01FB': 'a','\u01CE': 'a','\u0201': 'a','\u0203': 'a','\u1EA1': 'a','\u1EAD': 'a',
'\u1EB7': 'a','\u1E01': 'a','\u0105': 'a','\u2C65': 'a','\u0250': 'a',
'\uA733': 'aa',
'\u00E6': 'ae','\u01FD': 'ae','\u01E3': 'ae',
'\uA735': 'ao',
'\uA737': 'au',
'\uA739': 'av','\uA73B': 'av',
'\uA73D': 'ay',
'\u0062': 'b','\u24D1': 'b','\uFF42': 'b','\u1E03': 'b','\u1E05': 'b','\u1E07': 'b','\u0180': 'b','\u0183': 'b','\u0253': 'b',
'\u0063': 'c','\u24D2': 'c','\uFF43': 'c','\u0107': 'c','\u0109': 'c','\u010B': 'c','\u010D': 'c','\u00E7': 'c','\u1E09': 'c','\u0188': 'c',
'\u023C': 'c','\uA73F': 'c','\u2184': 'c',
'\u0064': 'd','\u24D3': 'd','\uFF44': 'd','\u1E0B': 'd','\u010F': 'd','\u1E0D': 'd','\u1E11': 'd','\u1E13': 'd','\u1E0F': 'd','\u0111': 'd',
'\u018C': 'd','\u0256': 'd','\u0257': 'd','\uA77A': 'd',
'\u01F3': 'dz','\u01C6': 'dz',
'\u0065': 'e','\u24D4': 'e','\uFF45': 'e','\u00E8': 'e','\u00E9': 'e','\u00EA': 'e','\u1EC1': 'e','\u1EBF': 'e','\u1EC5': 'e','\u1EC3': 'e',
'\u1EBD': 'e','\u0113': 'e','\u1E15': 'e','\u1E17': 'e','\u0115': 'e','\u0117': 'e','\u00EB': 'e','\u1EBB': 'e','\u011B': 'e','\u0205': 'e',
'\u0207': 'e','\u1EB9': 'e','\u1EC7': 'e','\u0229': 'e','\u1E1D': 'e','\u0119': 'e','\u1E19': 'e','\u1E1B': 'e','\u0247': 'e','\u025B': 'e',
'\u01DD': 'e',
'\u0066': 'f','\u24D5': 'f','\uFF46': 'f','\u1E1F': 'f','\u0192': 'f','\uA77C': 'f',
'\u0067': 'g','\u24D6': 'g','\uFF47': 'g','\u01F5': 'g','\u011D': 'g','\u1E21': 'g','\u011F': 'g','\u0121': 'g','\u01E7': 'g','\u0123': 'g',
'\u01E5': 'g','\u0260': 'g','\uA7A1': 'g','\u1D79': 'g','\uA77F': 'g',
'\u0068': 'h','\u24D7': 'h','\uFF48': 'h','\u0125': 'h','\u1E23': 'h','\u1E27': 'h','\u021F': 'h','\u1E25': 'h','\u1E29': 'h','\u1E2B': 'h',
'\u1E96': 'h','\u0127': 'h','\u2C68': 'h','\u2C76': 'h','\u0265': 'h',
'\u0195': 'hv',
'\u0069': 'i','\u24D8': 'i','\uFF49': 'i','\u00EC': 'i','\u00ED': 'i','\u00EE': 'i','\u0129': 'i','\u012B': 'i','\u012D': 'i','\u00EF': 'i',
'\u1E2F': 'i','\u1EC9': 'i','\u01D0': 'i','\u0209': 'i','\u020B': 'i','\u1ECB': 'i','\u012F': 'i','\u1E2D': 'i','\u0268': 'i','\u0131': 'i',
'\u006A': 'j','\u24D9': 'j','\uFF4A': 'j','\u0135': 'j','\u01F0': 'j','\u0249': 'j',
'\u006B': 'k','\u24DA': 'k','\uFF4B': 'k','\u1E31': 'k','\u01E9': 'k','\u1E33': 'k','\u0137': 'k','\u1E35': 'k','\u0199': 'k','\u2C6A': 'k',
'\uA741': 'k','\uA743': 'k','\uA745': 'k','\uA7A3': 'k',
'\u006C': 'l','\u24DB': 'l','\uFF4C': 'l','\u0140': 'l','\u013A': 'l','\u013E': 'l','\u1E37': 'l','\u1E39': 'l','\u013C': 'l','\u1E3D': 'l',
'\u1E3B': 'l','\u017F': 'l','\u0142': 'l','\u019A': 'l','\u026B': 'l','\u2C61': 'l','\uA749': 'l','\uA781': 'l','\uA747': 'l',
'\u01C9': 'lj',
'\u006D': 'm','\u24DC': 'm','\uFF4D': 'm','\u1E3F': 'm','\u1E41': 'm','\u1E43': 'm','\u0271': 'm','\u026F': 'm',
'\u006E': 'n','\u24DD': 'n','\uFF4E': 'n','\u01F9': 'n','\u0144': 'n','\u00F1': 'n','\u1E45': 'n','\u0148': 'n','\u1E47': 'n','\u0146': 'n',
'\u1E4B': 'n','\u1E49': 'n','\u019E': 'n','\u0272': 'n','\u0149': 'n','\uA791': 'n','\uA7A5': 'n',
'\u01CC': 'nj',
'\u006F': 'o','\u24DE': 'o','\uFF4F': 'o','\u00F2': 'o','\u00F3': 'o','\u00F4': 'o','\u1ED3': 'o','\u1ED1': 'o','\u1ED7': 'o','\u1ED5': 'o',
'\u00F5': 'o','\u1E4D': 'o','\u022D': 'o','\u1E4F': 'o','\u014D': 'o','\u1E51': 'o','\u1E53': 'o','\u014F': 'o','\u022F': 'o','\u0231': 'o',
'\u00F6': 'o','\u022B': 'o','\u1ECF': 'o','\u0151': 'o','\u01D2': 'o','\u020D': 'o','\u020F': 'o','\u01A1': 'o','\u1EDD': 'o','\u1EDB': 'o',
'\u1EE1': 'o','\u1EDF': 'o','\u1EE3': 'o','\u1ECD': 'o','\u1ED9': 'o','\u01EB': 'o','\u01ED': 'o','\u00F8': 'o','\u01FF': 'o','\u0254': 'o',
'\uA74B': 'o','\uA74D': 'o','\u0275': 'o',
'\u01A3': 'oi',
'\u0223': 'ou',
'\uA74F': 'oo',
'\u0070': 'p','\u24DF': 'p','\uFF50': 'p','\u1E55': 'p','\u1E57': 'p','\u01A5': 'p','\u1D7D': 'p','\uA751': 'p','\uA753': 'p','\uA755': 'p',
'\u0071': 'q','\u24E0': 'q','\uFF51': 'q','\u024B': 'q','\uA757': 'q','\uA759': 'q',
'\u0072': 'r','\u24E1': 'r','\uFF52': 'r','\u0155': 'r','\u1E59': 'r','\u0159': 'r','\u0211': 'r','\u0213': 'r','\u1E5B': 'r','\u1E5D': 'r',
'\u0157': 'r','\u1E5F': 'r','\u024D': 'r','\u027D': 'r','\uA75B': 'r','\uA7A7': 'r','\uA783': 'r',
'\u0073': 's','\u24E2': 's','\uFF53': 's','\u00DF': 's','\u015B': 's','\u1E65': 's','\u015D': 's','\u1E61': 's','\u0161': 's','\u1E67': 's',
'\u1E63': 's','\u1E69': 's','\u0219': 's','\u015F': 's','\u023F': 's','\uA7A9': 's','\uA785': 's','\u1E9B': 's',
'\u0074': 't','\u24E3': 't','\uFF54': 't','\u1E6B': 't','\u1E97': 't','\u0165': 't','\u1E6D': 't','\u021B': 't','\u0163': 't','\u1E71': 't',
'\u1E6F': 't','\u0167': 't','\u01AD': 't','\u0288': 't','\u2C66': 't','\uA787': 't',
'\uA729': 'tz',
'\u0075': 'u','\u24E4': 'u','\uFF55': 'u','\u00F9': 'u','\u00FA': 'u','\u00FB': 'u','\u0169': 'u','\u1E79': 'u','\u016B': 'u','\u1E7B': 'u',
'\u016D': 'u','\u00FC': 'u','\u01DC': 'u','\u01D8': 'u','\u01D6': 'u','\u01DA': 'u','\u1EE7': 'u','\u016F': 'u','\u0171': 'u','\u01D4': 'u',
'\u0215': 'u','\u0217': 'u','\u01B0': 'u','\u1EEB': 'u','\u1EE9': 'u','\u1EEF': 'u','\u1EED': 'u','\u1EF1': 'u','\u1EE5': 'u','\u1E73': 'u',
'\u0173': 'u','\u1E77': 'u','\u1E75': 'u','\u0289': 'u',
'\u0076': 'v','\u24E5': 'v','\uFF56': 'v','\u1E7D': 'v','\u1E7F': 'v','\u028B': 'v','\uA75F': 'v','\u028C': 'v',
'\uA761': 'vy',
'\u0077': 'w','\u24E6': 'w','\uFF57': 'w','\u1E81': 'w','\u1E83': 'w','\u0175': 'w','\u1E87': 'w','\u1E85': 'w','\u1E98': 'w','\u1E89': 'w',
'\u2C73': 'w',
'\u0078': 'x','\u24E7': 'x','\uFF58': 'x','\u1E8B': 'x','\u1E8D': 'x',
'\u0079': 'y','\u24E8': 'y','\uFF59': 'y','\u1EF3': 'y','\u00FD': 'y','\u0177': 'y','\u1EF9': 'y','\u0233': 'y','\u1E8F': 'y','\u00FF': 'y',
'\u1EF7': 'y','\u1E99': 'y','\u1EF5': 'y','\u01B4': 'y','\u024F': 'y','\u1EFF': 'y',
'\u007A': 'z','\u24E9': 'z','\uFF5A': 'z','\u017A': 'z','\u1E91': 'z','\u017C': 'z','\u017E': 'z','\u1E93': 'z','\u1E95': 'z','\u01B6': 'z',
'\u0225': 'z','\u0240': 'z','\u2C6C': 'z','\uA763': 'z',
};
function removeDicretics(str) {
return str.replace(/[^\u0000-\u007E]/g, function (weirdo) {
return diacriticsMap[weirdo] || weirdo;
});
}
// Call
var cleanStr = removeDicretics("bułłśhìt"); // bullshit
normalize-diacritics очень полезен
const { normalize } = require('normalize-diacritics');
/** Assuming top-level await is enabled... */
await normalize('söme stüff with áccènts'); // 'some stuff with accents'
Вот очень простое решение без слишком большого кода, использующее очень простую карту диакритических знаков, которая включает некоторые или все сопоставления с эквивалентами ascii, содержащими более одного символа, т.е. Æ => AE, ffi => ffi и т. Д.... Также включены некоторые очень основные функциональные тесты
var diacriticsMap = {
'\u00C0': 'A', // À => A
'\u00C1': 'A', // Á => A
'\u00C2': 'A', // Â => A
'\u00C3': 'A', // Ã => A
'\u00C4': 'A', // Ä => A
'\u00C5': 'A', // Å => A
'\u00C6': 'AE', // Æ => AE
'\u00C7': 'C', // Ç => C
'\u00C8': 'E', // È => E
'\u00C9': 'E', // É => E
'\u00CA': 'E', // Ê => E
'\u00CB': 'E', // Ë => E
'\u00CC': 'I', // Ì => I
'\u00CD': 'I', // Í => I
'\u00CE': 'I', // Î => I
'\u00CF': 'I', // Ï => I
'\u0132': 'IJ', // IJ => IJ
'\u00D0': 'D', // Ð => D
'\u00D1': 'N', // Ñ => N
'\u00D2': 'O', // Ò => O
'\u00D3': 'O', // Ó => O
'\u00D4': 'O', // Ô => O
'\u00D5': 'O', // Õ => O
'\u00D6': 'O', // Ö => O
'\u00D8': 'O', // Ø => O
'\u0152': 'OE', // Œ => OE
'\u00DE': 'TH', // Þ => TH
'\u00D9': 'U', // Ù => U
'\u00DA': 'U', // Ú => U
'\u00DB': 'U', // Û => U
'\u00DC': 'U', // Ü => U
'\u00DD': 'Y', // Ý => Y
'\u0178': 'Y', // Ÿ => Y
'\u00E0': 'a', // à => a
'\u00E1': 'a', // á => a
'\u00E2': 'a', // â => a
'\u00E3': 'a', // ã => a
'\u00E4': 'a', // ä => a
'\u00E5': 'a', // å => a
'\u00E6': 'ae', // æ => ae
'\u00E7': 'c', // ç => c
'\u00E8': 'e', // è => e
'\u00E9': 'e', // é => e
'\u00EA': 'e', // ê => e
'\u00EB': 'e', // ë => e
'\u00EC': 'i', // ì => i
'\u00ED': 'i', // í => i
'\u00EE': 'i', // î => i
'\u00EF': 'i', // ï => i
'\u0133': 'ij', // ij => ij
'\u00F0': 'd', // ð => d
'\u00F1': 'n', // ñ => n
'\u00F2': 'o', // ò => o
'\u00F3': 'o', // ó => o
'\u00F4': 'o', // ô => o
'\u00F5': 'o', // õ => o
'\u00F6': 'o', // ö => o
'\u00F8': 'o', // ø => o
'\u0153': 'oe', // œ => oe
'\u00DF': 'ss', // ß => ss
'\u00FE': 'th', // þ => th
'\u00F9': 'u', // ù => u
'\u00FA': 'u', // ú => u
'\u00FB': 'u', // û => u
'\u00FC': 'u', // ü => u
'\u00FD': 'y', // ý => y
'\u00FF': 'y', // ÿ => y
'\uFB00': 'ff', // ff => ff
'\uFB01': 'fi', // fi => fi
'\uFB02': 'fl', // fl => fl
'\uFB03': 'ffi', // ffi => ffi
'\uFB04': 'ffl', // ffl => ffl
'\uFB05': 'ft', // ſt => ft
'\uFB06': 'st' // st => st
};
function replaceDiacritics(str) {
var returnStr = '';
if(str) {
for (var i = 0; i < str.length; i++) {
if (diacriticsMap[str[i]]) {
returnStr += diacriticsMap[str[i]];
} else {
returnStr += str[i];
}
}
}
return returnStr;
}
function testStripDiacritics(input, expected) {
var coChar = replaceDiacritics(input);
console.log('The character passed in was ' + input);
console.log('The character that came out was ' + coChar);
console.log('The character expected was' + expected);
}
testStripDiacritics('À','A');
testStripDiacritics('A','A');
testStripDiacritics('Æ','AE');
testStripDiacritics('AE','AE');
testStripDiacritics('ÇhÀrlËšYŸZŽ','ChArlEsYYZZ');
Спасибо всем
Я использую эту версию и говорю почему (потому что я пропускаю эти объяснения в начале, поэтому я пытаюсь помочь следующему читателю, если он такой же скучный, как я...)
Замечание: я хотел эффективное решение, поэтому:
- только одна компиляция регулярных выражений (при необходимости)
- только одна строка сканирования для каждой строки
- эффективный способ найти переведенные символы и т.д...
Моя версия:
(в нем нет нового технического трюка, только некоторые из них + объяснения почему)
makeSortString = (function() {
var translate_re = /[¹²³áàâãäåaaaÀÁÂÃÄÅAAAÆccç©CCÇÐÐèéê?ëeeeeeÈÊË?EEEEE€gGiìíîïìiiiÌÍÎÏ?ÌIIIlLnnñNNÑòóôõöoooøÒÓÔÕÖOOOØŒr®Ršs?ߊS?ùúûüuuuuÙÚÛÜUUUUýÿÝŸžzzŽZZ]/g;
var translate = {
"¹":"1","²":"2","³":"3","á":"a","à":"a","â":"a","ã":"a","ä":"a","å":"a","a":"a","a":"a","a":"a","À":"a","Á":"a","Â":"a","Ã":"a","Ä":"a","Å":"a","A":"a","A":"a",
"A":"a","Æ":"a","c":"c","c":"c","ç":"c","©":"c","C":"c","C":"c","Ç":"c","Ð":"d","Ð":"d","è":"e","é":"e","ê":"e","?":"e","ë":"e","e":"e","e":"e","e":"e","e":"e",
"e":"e","È":"e","Ê":"e","Ë":"e","?":"e","E":"e","E":"e","E":"e","E":"e","E":"e","€":"e","g":"g","G":"g","i":"i","ì":"i","í":"i","î":"i","ï":"i","ì":"i","i":"i",
"i":"i","i":"i","Ì":"i","Í":"i","Î":"i","Ï":"i","?":"i","Ì":"i","I":"i","I":"i","I":"i","l":"l","L":"l","n":"n","n":"n","ñ":"n","N":"n","N":"n","Ñ":"n","ò":"o",
"ó":"o","ô":"o","õ":"o","ö":"o","o":"o","o":"o","o":"o","ø":"o","Ò":"o","Ó":"o","Ô":"o","Õ":"o","Ö":"o","O":"o","O":"o","O":"o","Ø":"o","Œ":"o","r":"r","®":"r",
"R":"r","š":"s","s":"s","?":"s","ß":"s","Š":"s","S":"s","?":"s","ù":"u","ú":"u","û":"u","ü":"u","u":"u","u":"u","u":"u","u":"u","Ù":"u","Ú":"u","Û":"u","Ü":"u",
"U":"u","U":"u","U":"u","U":"u","ý":"y","ÿ":"y","Ý":"y","Ÿ":"y","ž":"z","z":"z","z":"z","Ž":"z","Z":"z","Z":"z"
};
return function(s) {
return(s.replace(translate_re, function(match){return translate[match];}) );
}
})();
и я использую это так:
var without_accents = makeSortString("wïthêüÄTrèsBïgüeAk100t");
// I let you guess the result,
// no I was kidding you : I give you the result : witheuatresbigueak100t
Комментарии:
- Инструкция внутри нее выполняется один раз (после makeSortString!= Undefined)
- function () {...} сохраняется один раз в makeSortString, поэтому "большие" объекты translate_re и translate сохраняются один раз
- Когда вы вызываете makeSortString("что-то"), он вызывает непосредственно внутреннюю функцию, которая вызывает только s.replace(...): это эффективно
- s.replace использует regexp (специальный синтаксис var translate_re= .... фактически эквивалентен var translate_re = new RegExp("[¹....Z]","g"); но компиляция регулярного выражения выполняется один раз для всех, и проверка строки s выполняется один раз для вызова функции (не для каждого символа, как это было бы в цикле)
- Для каждого найденного символа s.replace вызывает функцию (match), где параметр match содержит найденный символ и вызывает соответствующий переведенный символ (translate[match]).
- Translate[match], вероятно, также эффективен, так как объект перевода javascript, вероятно, реализован javascript с хеш-таблицей или чем-то аналогичным и позволяет программе находить переведенный символ почти напрямую, а не, например, через цикл в массиве всех символов, чтобы найти правильный (который был бы ужасно неэффективным).
Я предоставил этот ответ на аналогичный вопрос. Он основан на быстрой замене таблицы для выбранных символов (латинский 1+2), один на один (невозможно изменить немецкий ü на "ue"), но хорошо работает для базовой "нормализации" до 7-битного ASCII.
TAB_00C0 = "AAAAAAACEEEEIIII" +
"DNOOOOO*OUUUUYIs" +
"aaaaaaaceeeeiiii" +
"?nooooo/ouuuuy?y" +
"AaAaAaCcCcCcCcDd" +
"DdEeEeEeEeEeGgGg" +
"GgGgHhHhIiIiIiIi" +
"IiJjJjKkkLlLlLlL" +
"lLlNnNnNnnNnOoOo" +
"OoOoRrRrRrSsSsSs" +
"SsTtTtTtUuUuUuUu" +
"UuUuWwYyYZzZzZzF";
function stripDiacritics(source) {
var result = source.split('');
for (var i = 0; i < result.length; i++) {
var c = source.charCodeAt(i);
if (c >= 0x00c0 && c <= 0x017f) {
result[i] = String.fromCharCode(TAB_00C0.charCodeAt(c - 0x00c0));
} else if (c > 127) {
result[i] = '?';
}
}
return result.join('');
}
stripDiacritics("Šupa, čo? ľšťčžýæøåℌð")
Любые другие символы конвертируются в?, То есть результат определенно является 7-битным ASCII. Нет регулярных выражений, нет магии, простая работа с массивами символов.
Я использовал метод latinise() для string.js, который позволяет вам делать это так:
var output = S(input).latinise().toString();
Я нашел все это немного неуклюжим, и я не слишком разбираюсь в регулярных выражениях, так что вот более простая версия. Было бы довольно легко перевести его на ваш любимый серверный язык, предполагая, что строка уже в Unicode:
// String containing replacement characters for stripping accents
var stripstring =
'AAAAAAACEEEEIIII'+
'DNOOOOO.OUUUUY..'+
'aaaaaaaceeeeiiii'+
'dnooooo.ouuuuy.y'+
'AaAaAaCcCcCcCcDd'+
'DdEeEeEeEeEeGgGg'+
'GgGgHhHhIiIiIiIi'+
'IiIiJjKkkLlLlLlL'+
'lJlNnNnNnnNnOoOo'+
'OoOoRrRrRrSsSsSs'+
'SsTtTtTtUuUuUuUu'+
'UuUuWwYyYZzZzZz.';
function stripaccents(str){
var answer='';
for(var i=0;i<str.length;i++){
var ch=str[i];
var chindex=ch.charCodeAt(0)-192; // Index of character code in the strip string
if(chindex>=0 && chindex<stripstring.length){
// Character is within our table, so we can strip the accent...
var outch=stripstring.charAt(chindex);
// ...unless it was shown as a '.'
if(outch!='.')ch=outch;
}
answer+=ch;
}
return answer;
}
Передайте пользовательскую функцию Array.sort()
метод, и в этой пользовательской функции использовать String.localeCompare()
function myCompareFunction(a, b) {
return a.localeCompare(b);
}
var values = ["pêches", "épinards", "tomates", "fraises"];
// WRONG: ["fraises", "pêches", "tomates", "épinards"]
values.sort();
// **GOOD**: ["épinards", "fraises", "pêches", "tomates"]
values.sort(myCompareFunction);
Предполагая, что вы знаете, что делаете, я подозреваю, что IE6 неправильно интерпретирует кодировку файла и, следовательно, не распознает символы не-ASCII в файле:
- Убедитесь, что файл сохранен как UTF-8 (скажем)
- Используйте Fiddler или другой инструмент для проверки того, что веб-сервер отправляет правильный HTTP-заголовок Content-Encoding.
(Это "пахнет" неправильно, хотя, я бы посмотрел на выполнение сортировки, скажем, на сервере, используя что-то, что с учетом языка... но в любом случае...)
Если вы открыты для использования библиотеки, вы можете использовать String.js latinize: http://stringjs.com/
Я знаю, что это "обман" для этого на стороне сервера, но на прошлой неделе у меня была похожая задача в Javascript, и я представил простой Java-сервлет и удалил акценты в Java. Это было действительно быстро:)
Пакет NPM remove- accents представляет собой довольно простой способ решения таких проблем:
var input = 'ÀÁÂÃÄÅ';
var output = removeAccents(input);
console.log(output); // AAAAAA
Удаление макронов из языка те рео маори Для всех, кто пришел сюда, чтобы удалить макроны из языка те рео маори. Примечание. Stackblitz удалил одинарные кавычки из ключей карты, когда я форматировал код — они должны быть уверены, что все в порядке. Подмножество этого кода, спасибо
Это сделало это для меня. JavaScript, скрипты Google Apps, GAS
function normalizetext(text) {
var weird = 'öüóőúéáàűíÖÜÓŐÚÉÁÀŰÍçÇ!@£$%^&*()_+?/*."';
var normalized = 'ouooueaauiOUOOUEAAUIcC ';
var idoff = -1,new_text = '';
var lentext = text.toString().length -1
for (i = 0; i <= lentext; i++) {
idoff = weird.search(text.charAt(i));
if (idoff == -1) {
new_text = new_text + text.charAt(i);
} else {
new_text = new_text + normalized.charAt(idoff);
}
}
return new_text;
}
Если вам также интересно преобразовать греческие символы в латинские, я включил греческий alhpabet вместе с некоторыми диграфами; он также может использоваться для сочетания орфографического и фонетического греческого языка.
Кроме того, эта версия принимает дополнительные параметры с:
override
Object {replace: string -> search: regex,...}
Он отменяет замены по умолчаниюignore
Массив <строка | регулярное выражение> | строка
Игнорирует элементы списка, которые могут быть строками или регулярными выражениями
const defaultDiacriticsRemovalMap = {
'1' : /[\u00B9]/g,
'2' : /[\u00B2]/g,
'3' : /[\u00B3]/g,
'AU': /\u0391\u03A5|\u0386\u03A5/g,
'EU': /\u0395\u03A5|\u0388\u03A5/g,
'OU': /\u039F\u03A5|\u039F\u038E/g,
'au': /\u03B1\u03C5|\u03AC\u03C5/g,
'eu': /\u03B5\u03C5|\u03AD\u03C5/g,
'ou': /\u03BF\u03C5|\u03BF\u03CD/g,
'Au': /\u0391\u03C5|\u0386\u03C5/g,
'Eu': /\u0395\u03C5|\u0388\u03C5/g,
'Ou': /\u039F\u03C5|\u039F\u03CD/g,
'A' : /[\u0391\u0386\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F]/g,
'AA': /[\uA732]/g,
'AE': /[\u00C6\u01FC\u01E2]/g,
'AF': /[\u00C6\u01FC\u01E2]/g,
'AO': /[\uA734]/g,
'AU': /[\uA736]/g,
'AV': /[\uA738\uA73A]/g,
'AY': /[\uA73C]/g,
'B' : /[\u0392\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181]/g,
'C' : /[\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E]/g,
'D' : /[\u00D0\u0394\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779]/g,
'DZ': /[\u01F1\u01C4]/g,
'Dz': /[\u01F2\u01C5]/g,
'E' : /[\u0395\u0388\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E]/g,
'F' : /[\u03A6\u0046\u24BB\uFF26\u1E1E\u0191\uA77B]/g,
'G' : /[\u0393\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E]/g,
'H' : /[\u0397\u0389\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D]/g,
'I' : /[\u0399\u038A\u03AA\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197]/g,
'J' : /[\u004A\u24BF\uFF2A\u0134\u0248]/g,
'K' : /[\u039A\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2]/g,
'KS': /[\u039E]/g,
'L' : /[\u039B\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780]/g,
'LJ': /[\u01C7]/g,
'Lj': /[\u01C8]/g,
'M' : /[\u039C\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C]/g,
'N' : /[\u039D\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4]/g,
'NJ': /[\u01CA]/g,
'Nj': /[\u01CB]/g,
'O' : /[\u038C\u039F\u03A9\u038F\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C]/g,
'OE': /[\u0152]/g,
'OI': /[\u01A2]/g,
'OO': /[\uA74E]/g,
'OU': /[\u0222]/g,
'P' : /[\u03A0\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754]/g,
'PS': /[\u03A8]/g,
'Q' : /[\u0051\u24C6\uFF31\uA756\uA758\u024A]/g,
'R' : /[\u03A1\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782]/g,
'S' : /[\u03A3\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784]/g,
'T' : /[\u03A4\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786]/g,
'TZ': /[\uA728]/g,
'TH': /[\u0398]/g,
'U' : /[\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244]/g,
'V' : /[\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245]/g,
'VY': /[\uA760]/g,
'W' : /[\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72]/g,
'X' : /[\u03A7\u0058\u24CD\uFF38\u1E8A\u1E8C]/g,
'Y' : /[\u03A5\u038E\u03AB\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE]/g,
'Z' : /[\u0396\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762]/g,
'a' : /[\u03B1\u03AC\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250]/g,
'aa': /[\uA733]/g,
'ae': /[\u00E6\u01FD\u01E3]/g,
'ao': /[\uA735]/g,
'au': /[\uA737]/g,
'av': /[\uA739\uA73B]/g,
'ay': /[\uA73D]/g,
'b' : /[\u03B2\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253]/g,
'c' : /[\u00A9\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184]/g,
'd' : /[\u03B4\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A]/g,
'dz': /[\u01F3\u01C6]/g,
'e' : /[\u20AC\u00F0\u03B5\u03AD\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD]/g,
'f' : /[\u03C6\u0066\u24D5\uFF46\u1E1F\u0192\uA77C]/g,
'g' : /[\u03B3\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F]/g,
'h' : /[\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265]/g,
'hv': /[\u0195]/g,
'i' : /[\u03B9\u03AF\u03CA\u0390\u03B7\u03AE\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131]/g,
'j' : /[\u006A\u24D9\uFF4A\u0135\u01F0\u0249]/g,
'k' : /[\u03BA\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3]/g,
'ks': /[\u03BE]/g,
'l' : /[\u03BB\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747]/g,
'lj': /[\u01C9]/g,
'm' : /[\u03BC\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F]/g,
'n' : /[\u03BD\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5]/g,
'nj': /[\u01CC]/g,
'o' : /[\u03C9\u03CE\u03CC\u03BF\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275]/g,
'oe': /[\u0153]/g,
'oi': /[\u01A3]/g,
'ou': /[\u0223]/g,
'oo': /[\uA74F]/g,
'p' : /[\u03C0\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755]/g,
'ps': /[\u03C8]/g,
'q' : /[\u0071\u24E0\uFF51\u024B\uA757\uA759]/g,
'r' : /[\u00AE\u03C1\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783]/g,
's' : /[\u03C2\u03C3\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B]/g,
't' : /[\u03C4\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787]/g,
'tz': /[\uA729]/g,
'th': /[\u03B8]/g,
'u' : /[\u00B5\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289]/g,
'v' : /[\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C]/g,
'vy': /[\uA761]/g,
'w' : /[\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73]/g,
'x' : /[\u03C7\u0078\u24E7\uFF58\u1E8B\u1E8D]/g,
'y' : /[\u03C5\u03CD\u03CB\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF]/g,
'z' : /[\u03B6\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763]/g,
};
function removeAccent(str, {
override,
ignore
} = {}) {
// Function to convert strings to unicode char code escape strings
const toHex = s => s.split('').map(c => `\\u${c.charCodeAt().toString(16).padStart(4, "0")}`).join('');
const useToIgnore = ignore && typeof(ignore) === 'string' ? ignore.split('') : ignore;
// For every ignore item, replace it with {\uXXXX} so we'll get it back when we finish
let useStr = useToIgnore ?
useToIgnore.reduce((acc, cur) =>
acc.replace(cur, m => `{${toHex(m)}}`), str) :
str;
// Loop through overrides first
if (override) {
for (const base in override) {
useStr = useStr.replace(override[base], base);
}
}
// Loop through the defaults
for (const base in defaultDiacriticsRemovalMap) {
useStr = useStr.replace(defaultDiacriticsRemovalMap[base], base);
}
// Restore ignored mappings
return useToIgnore ?
useStr.replace(
/\{?(\\u([0-9a-f]*))\}?/g,
(m, g1, hex) => String.fromCharCode(parseInt(hex, 16))
) :
useStr;
}
// Helper function to convert and print the subjects
RegExp.prototype.toJSON = RegExp.prototype.toString;
function print([str, options]) {
const subjects = document.getElementById('subjects');
const result = document.createElement('div');
result.className = 'result';
if (options) {
const opts = document.createElement('div');
opts.textContent = JSON.stringify(options) + "\n";
result.appendChild(opts);
}
const texts = document.createTextNode(`› ${str}\n‹ ${removeAccent(str, options)}\n`);
result.appendChild(texts);
subjects.appendChild(result);
}
// The subjects with some options
const subjects = [
["Mon café est plein de caféïne"],
["Τι αξία έχει ο γάμος στην αγάπη μας μπροστά, είναι κι άλλοι παντρεμένοι κι όμως ζούνε χωριστά. - Ζαμπέτας"],
["ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜ ùúûüÑñŠšŸÿýŽž"],
["¹²³áàâãäåaaaÀÁÂÃÄÅAAAÆccç©CCÇÐÐèéêëeeeeeÈÊËEEEEE€g GiìíîïìiiiÌÍÎÏÌIIIlLnnñNNÑòóôõöoooøÒÓÔÕÖOOOØŒr®RšsߊS ùúûüuuuuÙÚÛÜUUUUýÿÝŸžzzŽZZ", {
ignore: ['€', 'Ð', 'Æ']
}],
[
"Ο λούτσος του Αυγούστου", {
ignore: [/\u03BF\u03C5|\u03BF\u03CD/g]
}],
["Η χθεσινή της Κυριακής"],
["Η χθεσινή της Κυριακής", {
override: { 'H': /[ΗΉ]/g, 'h': /[ηή]/g }
}]
];
// Print all subjects
subjects.forEach(print);
.result {
font-family: monospace;
white-space: pre-wrap;
margin-top: 10px;
padding-bottom: 10px;
border-bottom: 1px dashed lightgrey;
}
.result:last-child {
border:0;
}
.result > div {
color: blue;
margin-bottom: 4px;
}
<div id="subjects"></div>
Мой дубль за удаление всех акцентов и специальных символов. Я смешал элегантный метод удаления акцента Льюиса Даймонда с быстрой заменой остальных персонажей вики сообщества. Результатом является строка Ascii.
function convertToAscii(string) {
const unicodeToAsciiMap = {'Ⱥ':'A','Æ':'AE','Ꜻ':'AV','Ɓ':'B','Ƀ':'B','Ƃ':'B','Ƈ':'C','Ȼ':'C','Ɗ':'D','Dz':'D','Dž':'D','Đ':'D','Ƌ':'D','DŽ':'DZ','Ɇ':'E','Ꝫ':'ET','Ƒ':'F','Ɠ':'G','Ǥ':'G','Ⱨ':'H','Ħ':'H','Ɨ':'I','Ꝺ':'D','Ꝼ':'F','Ᵹ':'G','Ꞃ':'R','Ꞅ':'S','Ꞇ':'T','Ꝭ':'IS','Ɉ':'J','Ⱪ':'K','Ꝃ':'K','Ƙ':'K','Ꝁ':'K','Ꝅ':'K','Ƚ':'L','Ⱡ':'L','Ꝉ':'L','Ŀ':'L','Ɫ':'L','Lj':'L','Ł':'L','Ɱ':'M','Ɲ':'N','Ƞ':'N','Nj':'N','Ꝋ':'O','Ꝍ':'O','Ɵ':'O','Ø':'O','Ƣ':'OI','Ɛ':'E','Ɔ':'O','Ȣ':'OU','Ꝓ':'P','Ƥ':'P','Ꝕ':'P','Ᵽ':'P','Ꝑ':'P','Ꝙ':'Q','Ꝗ':'Q','Ɍ':'R','Ɽ':'R','Ꜿ':'C','Ǝ':'E','Ⱦ':'T','Ƭ':'T','Ʈ':'T','Ŧ':'T','Ɐ':'A','Ꞁ':'L','Ɯ':'M','Ʌ':'V','Ꝟ':'V','Ʋ':'V','Ⱳ':'W','Ƴ':'Y','Ỿ':'Y','Ɏ':'Y','Ⱬ':'Z','Ȥ':'Z','Ƶ':'Z','Œ':'OE','ᴀ':'A','ᴁ':'AE','ʙ':'B','ᴃ':'B','ᴄ':'C','ᴅ':'D','ᴇ':'E','ꜰ':'F','ɢ':'G','ʛ':'G','ʜ':'H','ɪ':'I','ʁ':'R','ᴊ':'J','ᴋ':'K','ʟ':'L','ᴌ':'L','ᴍ':'M','ɴ':'N','ᴏ':'O','ɶ':'OE','ᴐ':'O','ᴕ':'OU','ᴘ':'P','ʀ':'R','ᴎ':'N','ᴙ':'R','ꜱ':'S','ᴛ':'T','ⱻ':'E','ᴚ':'R','ᴜ':'U','ᴠ':'V','ᴡ':'W','ʏ':'Y','ᴢ':'Z','ᶏ':'a','ẚ':'a','ⱥ':'a','æ':'ae','ꜻ':'av','ɓ':'b','ᵬ':'b','ᶀ':'b','ƀ':'b','ƃ':'b','ɵ':'o','ɕ':'c','ƈ':'c','ȼ':'c','ȡ':'d','ɗ':'d','ᶑ':'d','ᵭ':'d','ᶁ':'d','đ':'d','ɖ':'d','ƌ':'d','ı':'i','ȷ':'j','ɟ':'j','ʄ':'j','dž':'dz','ⱸ':'e','ᶒ':'e','ɇ':'e','ꝫ':'et','ƒ':'f','ᵮ':'f','ᶂ':'f','ɠ':'g','ᶃ':'g','ǥ':'g','ⱨ':'h','ɦ':'h','ħ':'h','ƕ':'hv','ᶖ':'i','ɨ':'i','ꝺ':'d','ꝼ':'f','ᵹ':'g','ꞃ':'r','ꞅ':'s','ꞇ':'t','ꝭ':'is','ʝ':'j','ɉ':'j','ⱪ':'k','ꝃ':'k','ƙ':'k','ᶄ':'k','ꝁ':'k','ꝅ':'k','ƚ':'l','ɬ':'l','ȴ':'l','ⱡ':'l','ꝉ':'l','ŀ':'l','ɫ':'l','ᶅ':'l','ɭ':'l','ł':'l','ſ':'s','ẜ':'s','ẝ':'s','ɱ':'m','ᵯ':'m','ᶆ':'m','ȵ':'n','ɲ':'n','ƞ':'n','ᵰ':'n','ᶇ':'n','ɳ':'n','ꝋ':'o','ꝍ':'o','ⱺ':'o','ø':'o','ƣ':'oi','ɛ':'e','ᶓ':'e','ɔ':'o','ᶗ':'o','ȣ':'ou','ꝓ':'p','ƥ':'p','ᵱ':'p','ᶈ':'p','ꝕ':'p','ᵽ':'p','ꝑ':'p','ꝙ':'q','ʠ':'q','ɋ':'q','ꝗ':'q','ɾ':'r','ᵳ':'r','ɼ':'r','ᵲ':'r','ᶉ':'r','ɍ':'r','ɽ':'r','ↄ':'c','ꜿ':'c','ɘ':'e','ɿ':'r','ʂ':'s','ᵴ':'s','ᶊ':'s','ȿ':'s','ɡ':'g','ᴑ':'o','ᴓ':'o','ᴝ':'u','ȶ':'t','ⱦ':'t','ƭ':'t','ᵵ':'t','ƫ':'t','ʈ':'t','ŧ':'t','ᵺ':'th','ɐ':'a','ᴂ':'ae','ǝ':'e','ᵷ':'g','ɥ':'h','ʮ':'h','ʯ':'h','ᴉ':'i','ʞ':'k','ꞁ':'l','ɯ':'m','ɰ':'m','ᴔ':'oe','ɹ':'r','ɻ':'r','ɺ':'r','ⱹ':'r','ʇ':'t','ʌ':'v','ʍ':'w','ʎ':'y','ᶙ':'u','ᵫ':'ue','ꝸ':'um','ⱴ':'v','ꝟ':'v','ʋ':'v','ᶌ':'v','ⱱ':'v','ⱳ':'w','ᶍ':'x','ƴ':'y','ỿ':'y','ɏ':'y','ʑ':'z','ⱬ':'z','ȥ':'z','ᵶ':'z','ᶎ':'z','ʐ':'z','ƶ':'z','ɀ':'z','œ':'oe','ₓ':'x'};
const stringWithoutAccents = string.normalize("NFD").replace(/[\u0300-\u036f]/g, '');
return stringWithoutAccents.replace(/[^\u0000-\u007E]/g, character => unicodeToAsciiMap[character] || '');
}