| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """Language name to ISO 639-3 code mapping. |
| |
| Auto-generated from ``docs/lang_id_name_map.tsv``. Provides ``LANG_NAME_TO_ID`` |
| (for resolving language names to codes) and ``LANG_IDS`` (the set of supported |
| ISO 639-3 codes). Used by ``OmniVoice.generate()`` to resolve user-provided |
| language names. |
| """ |
|
|
| |
| |
|
|
| LANG_NAME_TO_ID = { |
| "abadi": "kbt", |
| "abkhazian": "ab", |
| "abron": "abr", |
| "abua": "abn", |
| "adamawa fulfulde": "fub", |
| "adyghe": "ady", |
| "afade": "aal", |
| "afrikaans": "af", |
| "agwagwune": "yay", |
| "aja (benin)": "ajg", |
| "akebu": "keu", |
| "alago": "ala", |
| "albanian": "sq", |
| "algerian arabic": "arq", |
| "algerian saharan arabic": "aao", |
| "ambo-pasco quechua": "qva", |
| "ambonese malay": "abs", |
| "amdo tibetan": "adx", |
| "amharic": "am", |
| "anaang": "anw", |
| "angika": "anp", |
| "antankarana malagasy": "xmv", |
| "aragonese": "an", |
| "arbëreshë albanian": "aae", |
| "arequipa-la unión quechua": "qxu", |
| "armenian": "hy", |
| "ashe": "ahs", |
| "ashéninka perené": "prq", |
| "askopan": "eiv", |
| "assamese": "as", |
| "asturian": "ast", |
| "atayal": "tay", |
| "awak": "awo", |
| "ayacucho quechua": "quy", |
| "azerbaijani": "az", |
| "baatonum": "bba", |
| "bacama": "bcy", |
| "bade": "bde", |
| "bafia": "ksf", |
| "bafut": "bfd", |
| "bagirmi fulfulde": "fui", |
| "bago-kusuntu": "bqg", |
| "baharna arabic": "abv", |
| "bakoko": "bkh", |
| "balanta-ganja": "bjt", |
| "balti": "bft", |
| "bamenyam": "bce", |
| "bamun": "bax", |
| "bangwinji": "bsj", |
| "banjar": "bjn", |
| "bankon": "abb", |
| "baoulé": "bci", |
| "bara malagasy": "bhr", |
| "barok": "bjk", |
| "basa (cameroon)": "bas", |
| "basa (nigeria)": "bzw", |
| "bashkir": "ba", |
| "basque": "eu", |
| "batak mandailing": "btm", |
| "batanga": "bnm", |
| "bateri": "btv", |
| "bats": "bbl", |
| "bayot": "bda", |
| "bebele": "beb", |
| "belarusian": "be", |
| "bengali": "bn", |
| "betawi": "bew", |
| "bhili": "bhb", |
| "bhojpuri": "bho", |
| "bilur": "bxf", |
| "bima": "bhp", |
| "bodo": "brx", |
| "boghom": "bux", |
| "bokyi": "bky", |
| "bomu": "bmq", |
| "bondei": "bou", |
| "borgu fulfulde": "fue", |
| "bosnian": "bs", |
| "brahui": "brh", |
| "braj": "bra", |
| "breton": "br", |
| "buduma": "bdm", |
| "buginese": "bug", |
| "bukharic": "bhh", |
| "bulgarian": "bg", |
| "bulu (cameroon)": "bum", |
| "bundeli": "bns", |
| "bunun": "bnn", |
| "bura-pabir": "bwr", |
| "burak": "bys", |
| "burmese": "my", |
| "burushaski": "bsk", |
| "cacaloxtepec mixtec": "miu", |
| "cajatambo north lima quechua": "qvl", |
| "cakfem-mushere": "cky", |
| "cameroon pidgin": "wes", |
| "campidanese sardinian": "sro", |
| "cantonese": "yue", |
| "catalan": "ca", |
| "cebuano": "ceb", |
| "cen": "cen", |
| "central kurdish": "ckb", |
| "central nahuatl": "nhn", |
| "central pame": "pbs", |
| "central pashto": "pst", |
| "central puebla nahuatl": "ncx", |
| "central tarahumara": "tar", |
| "central yupik": "esu", |
| "central-eastern niger fulfulde": "fuq", |
| "chadian arabic": "shu", |
| "chichewa": "ny", |
| "chichicapan zapotec": "zpv", |
| "chiga": "cgg", |
| "chimalapa zoque": "zoh", |
| "chimborazo highland quichua": "qug", |
| "chinese": "zh", |
| "chiquián ancash quechua": "qxa", |
| "chitwania tharu": "the", |
| "chokwe": "cjk", |
| "chuvash": "cv", |
| "cibak": "ckl", |
| "coastal konjo": "kjc", |
| "copainalá zoque": "zoc", |
| "cornish": "kw", |
| "corongo ancash quechua": "qwa", |
| "croatian": "hr", |
| "cross river mbembe": "mfn", |
| "cuyamecalco mixtec": "xtu", |
| "czech": "cs", |
| "dadiya": "dbd", |
| "dagbani": "dag", |
| "dameli": "dml", |
| "danish": "da", |
| "dargwa": "dar", |
| "dazaga": "dzg", |
| "deccan": "dcc", |
| "degema": "deg", |
| "dera (nigeria)": "kna", |
| "dghwede": "dgh", |
| "dhatki": "mki", |
| "dhivehi": "dv", |
| "dhofari arabic": "adf", |
| "dijim-bwilim": "cfa", |
| "dogri": "dgo", |
| "domaaki": "dmk", |
| "dotyali": "dty", |
| "duala": "dua", |
| "dutch": "nl", |
| "dũya": "ldb", |
| "dyula": "dyu", |
| "eastern balochi": "bgp", |
| "eastern bolivian guaraní": "gui", |
| "eastern egyptian bedawi arabic": "avl", |
| "eastern krahn": "kqo", |
| "eastern mari": "mhr", |
| "eastern yiddish": "ydd", |
| "ebrié": "ebr", |
| "eggon": "ego", |
| "egyptian arabic": "arz", |
| "ejagham": "etu", |
| "eleme": "elm", |
| "eloyi": "afo", |
| "embu": "ebu", |
| "english": "en", |
| "erzya": "myv", |
| "esan": "ish", |
| "esperanto": "eo", |
| "estonian": "et", |
| "eton (cameroon)": "eto", |
| "ewondo": "ewo", |
| "extremaduran": "ext", |
| "fang (equatorial guinea)": "fan", |
| "fanti": "fat", |
| "farefare": "gur", |
| "fe'fe'": "fmp", |
| "filipino": "fil", |
| "filomena mata-coahuitlán totonac": "tlp", |
| "finnish": "fi", |
| "fipa": "fip", |
| "french": "fr", |
| "fulah": "ff", |
| "galician": "gl", |
| "gambian wolof": "wof", |
| "ganda": "lg", |
| "garhwali": "gbm", |
| "gawar-bati": "gwt", |
| "gawri": "gwc", |
| "gbagyi": "gbr", |
| "gbari": "gby", |
| "geji": "gyz", |
| "gen": "gej", |
| "georgian": "ka", |
| "german": "de", |
| "geser-gorom": "ges", |
| "gheg albanian": "aln", |
| "ghomálá'": "bbj", |
| "gidar": "gid", |
| "glavda": "glw", |
| "goan konkani": "gom", |
| "goaria": "gig", |
| "goemai": "ank", |
| "gola": "gol", |
| "greek": "el", |
| "guarani": "gn", |
| "guduf-gava": "gdf", |
| "guerrero amuzgo": "amu", |
| "gujarati": "gu", |
| "gujari": "gju", |
| "gulf arabic": "afb", |
| "gurgula": "ggg", |
| "gusii": "guz", |
| "gusilay": "gsl", |
| "gweno": "gwe", |
| "güilá zapotec": "ztu", |
| "hadothi": "hoj", |
| "hahon": "hah", |
| "haitian": "ht", |
| "hakha chin": "cnh", |
| "hakö": "hao", |
| "halia": "hla", |
| "hausa": "ha", |
| "hawaiian": "haw", |
| "hazaragi": "haz", |
| "hebrew": "he", |
| "hemba": "hem", |
| "herero": "hz", |
| "highland konjo": "kjk", |
| "hijazi arabic": "acw", |
| "hindi": "hi", |
| "huarijio": "var", |
| "huautla mazatec": "mau", |
| "huaxcaleca nahuatl": "nhq", |
| "huba": "hbb", |
| "huitepec mixtec": "mxs", |
| "hula": "hul", |
| "hungarian": "hu", |
| "hunjara-kaina ke": "hkk", |
| "hwana": "hwo", |
| "ibibio": "ibb", |
| "icelandic": "is", |
| "idakho-isukha-tiriki": "ida", |
| "idoma": "idu", |
| "igbo": "ig", |
| "igo": "ahl", |
| "ikposo": "kpo", |
| "ikwere": "ikw", |
| "imbabura highland quichua": "qvi", |
| "indonesian": "id", |
| "indus kohistani": "mvy", |
| "interlingua (international auxiliary language association)": "ia", |
| "inupiaq": "ik", |
| "irish": "ga", |
| "iron ossetic": "os", |
| "isekiri": "its", |
| "isoko": "iso", |
| "italian": "it", |
| "ito": "itw", |
| "itzá": "itz", |
| "ixtayutla mixtec": "vmj", |
| "izon": "ijc", |
| "jambi malay": "jax", |
| "japanese": "ja", |
| "jaqaru": "jqr", |
| "jauja wanca quechua": "qxw", |
| "jaunsari": "jns", |
| "javanese": "jv", |
| "jiba": "juo", |
| "jju": "kaj", |
| "judeo-moroccan arabic": "aju", |
| "juxtlahuaca mixtec": "vmc", |
| "kabardian": "kbd", |
| "kabras": "lkb", |
| "kabuverdianu": "kea", |
| "kabyle": "kab", |
| "kachi koli": "gjk", |
| "kairak": "ckr", |
| "kalabari": "ijn", |
| "kalasha": "kls", |
| "kalenjin": "kln", |
| "kalkoti": "xka", |
| "kamba": "kam", |
| "kamo": "kcq", |
| "kanauji": "bjj", |
| "kanembu": "kbl", |
| "kannada": "kn", |
| "karekare": "kai", |
| "kashmiri": "ks", |
| "kathoriya tharu": "tkt", |
| "kati": "bsh", |
| "kazakh": "kk", |
| "keiyo": "eyo", |
| "khams tibetan": "khg", |
| "khana": "ogo", |
| "khetrani": "xhe", |
| "khmer": "km", |
| "khowar": "khw", |
| "kinga": "zga", |
| "kinnauri": "kfk", |
| "kinyarwanda": "rw", |
| "kirghiz": "ky", |
| "kirya-konzəl": "fkk", |
| "kochila tharu": "thq", |
| "kohistani shina": "plk", |
| "kohumono": "bcs", |
| "kok borok": "trp", |
| "kol (papua new guinea)": "kol", |
| "kom (cameroon)": "bkm", |
| "koma": "kmy", |
| "konkani": "knn", |
| "konzo": "koo", |
| "korean": "ko", |
| "korwa": "kfp", |
| "kota (india)": "kfe", |
| "koti": "eko", |
| "kuanua": "ksd", |
| "kuanyama": "kj", |
| "kui (india)": "uki", |
| "kulung (nigeria)": "bbu", |
| "kuot": "kto", |
| "kushi": "kuh", |
| "kwambi": "kwm", |
| "kwasio": "nmg", |
| "lala-roba": "lla", |
| "lamang": "hia", |
| "lao": "lo", |
| "larike-wakasihu": "alo", |
| "lasi": "lss", |
| "latgalian": "ltg", |
| "latvian": "lv", |
| "levantine arabic": "apc", |
| "liana-seti": "ste", |
| "liberia kpelle": "xpe", |
| "liberian english": "lir", |
| "libyan arabic": "ayl", |
| "ligurian": "lij", |
| "lijili": "mgi", |
| "lingala": "ln", |
| "lithuanian": "lt", |
| "loarki": "lrk", |
| "logooli": "rag", |
| "logudorese sardinian": "src", |
| "loja highland quichua": "qvj", |
| "loloda": "loa", |
| "longuda": "lnu", |
| "loxicha zapotec": "ztp", |
| "luba-lulua": "lua", |
| "luo": "luo", |
| "lushai": "lus", |
| "luxembourgish": "lb", |
| "maasina fulfulde": "ffm", |
| "maba (chad)": "mde", |
| "macedo-romanian": "rup", |
| "macedonian": "mk", |
| "mada (cameroon)": "mxu", |
| "mafa": "maf", |
| "maithili": "mai", |
| "malay": "ms", |
| "malayalam": "ml", |
| "mali": "gcc", |
| "malinaltepec me'phaa": "tcf", |
| "maltese": "mt", |
| "mandara": "tbf", |
| "mandjak": "mfv", |
| "manggarai": "mqy", |
| "manipuri": "mni", |
| "mansoanka": "msw", |
| "manx": "gv", |
| "maori": "mi", |
| "marathi": "mr", |
| "marghi central": "mrt", |
| "marghi south": "mfm", |
| "maria (india)": "mrr", |
| "marwari (pakistan)": "mve", |
| "masana": "mcn", |
| "masikoro malagasy": "msh", |
| "matsés": "mcf", |
| "mazaltepec zapotec": "zpy", |
| "mazatlán mazatec": "vmz", |
| "mazatlán mixe": "mzl", |
| "mbe": "mfo", |
| "mbo (cameroon)": "mbo", |
| "mbum": "mdd", |
| "medumba": "byv", |
| "mekeo": "mek", |
| "meru": "mer", |
| "mesopotamian arabic": "acm", |
| "mewari": "mtr", |
| "min nan chinese": "nan", |
| "mingrelian": "xmf", |
| "mitlatongo mixtec": "vmm", |
| "miya": "mkf", |
| "mokpwe": "bri", |
| "moksha": "mdf", |
| "mom jango": "ver", |
| "mongolian": "mn", |
| "moroccan arabic": "ary", |
| "motu": "meu", |
| "mpiemo": "mcx", |
| "mpumpong": "mgg", |
| "mundang": "mua", |
| "mungaka": "mhk", |
| "musey": "mse", |
| "musgu": "mug", |
| "musi": "mui", |
| "naba": "mne", |
| "najdi arabic": "ars", |
| "nalik": "nal", |
| "nawdm": "nmz", |
| "ndonga": "ng", |
| "neapolitan": "nap", |
| "nepali": "npi", |
| "ngamo": "nbh", |
| "ngas": "anc", |
| "ngiemboon": "nnh", |
| "ngizim": "ngi", |
| "ngomba": "jgo", |
| "ngombale": "nla", |
| "nigerian fulfulde": "fuv", |
| "nigerian pidgin": "pcm", |
| "nimadi": "noe", |
| "nobiin": "fia", |
| "north mesopotamian arabic": "ayp", |
| "north moluccan malay": "max", |
| "northern betsimisaraka malagasy": "bmm", |
| "northern hindko": "hno", |
| "northern kurdish": "kmr", |
| "northern pame": "pmq", |
| "northern pashto": "pbu", |
| "northern uzbek": "uzn", |
| "northwest gbaya": "gya", |
| "norwegian": "no", |
| "norwegian bokmål": "nb", |
| "norwegian nynorsk": "nn", |
| "notsi": "ncf", |
| "nyankpa": "yes", |
| "nyungwe": "nyu", |
| "nzanyi": "nja", |
| "nüpode huitoto": "hux", |
| "occitan": "oc", |
| "od": "odk", |
| "odia": "ory", |
| "odual": "odu", |
| "omani arabic": "acx", |
| "orizaba nahuatl": "nlv", |
| "orma": "orc", |
| "ormuri": "oru", |
| "oromo": "om", |
| "pahari-potwari": "phr", |
| "paiwan": "pwn", |
| "panjabi": "pa", |
| "papuan malay": "pmy", |
| "parkari koli": "kvx", |
| "pedi": "nso", |
| "pero": "pip", |
| "persian": "fa", |
| "petats": "pex", |
| "phalura": "phl", |
| "piemontese": "pms", |
| "piya-kwonci": "piy", |
| "plateau malagasy": "plt", |
| "polish": "pl", |
| "poqomam": "poc", |
| "portuguese": "pt", |
| "pulaar": "fuc", |
| "pular": "fuf", |
| "puno quechua": "qxp", |
| "pushto": "ps", |
| "pökoot": "pko", |
| "qaqet": "byx", |
| "quiotepec chinantec": "chq", |
| "rana tharu": "thr", |
| "rangi": "lag", |
| "rapoisi": "kyx", |
| "ratahan": "rth", |
| "rayón zoque": "zor", |
| "romanian": "ro", |
| "romansh": "rm", |
| "rombo": "rof", |
| "rotokas": "roo", |
| "rukai": "dru", |
| "russian": "ru", |
| "sacapulteco": "quv", |
| "saidi arabic": "aec", |
| "sakalava malagasy": "skg", |
| "sakizaya": "szy", |
| "saleman": "sau", |
| "samba daka": "ccg", |
| "samba leko": "ndi", |
| "san felipe otlaltepec popoloca": "pow", |
| "san francisco del mar huave": "hue", |
| "san juan atzingo popoloca": "poe", |
| "san martín itunyoso triqui": "trq", |
| "san miguel el grande mixtec": "mig", |
| "sansi": "ssi", |
| "sanskrit": "sa", |
| "santa ana de tusi pasco quechua": "qxt", |
| "santa catarina albarradas zapotec": "ztn", |
| "santali": "sat", |
| "santiago del estero quichua": "qus", |
| "saposa": "sps", |
| "saraiki": "skr", |
| "sardinian": "sc", |
| "saya": "say", |
| "sediq": "trv", |
| "serbian": "sr", |
| "seri": "sei", |
| "shina": "scl", |
| "shona": "sn", |
| "siar-lak": "sjr", |
| "sibe": "nco", |
| "sicilian": "scn", |
| "sihuas ancash quechua": "qws", |
| "sikkimese": "sip", |
| "sinaugoro": "snc", |
| "sindhi": "sd", |
| "sindhi bhil": "sbn", |
| "sinhala": "si", |
| "sinicahua mixtec": "xti", |
| "sipacapense": "qum", |
| "siwai": "siw", |
| "slovak": "sk", |
| "slovenian": "sl", |
| "solos": "sol", |
| "somali": "so", |
| "soninke": "snk", |
| "south giziga": "giz", |
| "south ucayali ashéninka": "cpy", |
| "southeastern nochixtlán mixtec": "mxy", |
| "southern betsimisaraka malagasy": "bzc", |
| "southern pashto": "pbt", |
| "southern pastaza quechua": "qup", |
| "soyaltepec mazatec": "vmp", |
| "spanish": "es", |
| "standard arabic": "arb", |
| "standard moroccan tamazight": "zgh", |
| "sudanese arabic": "apd", |
| "sulka": "sua", |
| "svan": "sva", |
| "swahili": "sw", |
| "swedish": "sv", |
| "tae'": "rob", |
| "tahaggart tamahaq": "thv", |
| "taita": "dav", |
| "tajik": "tg", |
| "tamil": "ta", |
| "tandroy-mahafaly malagasy": "tdx", |
| "tangale": "tan", |
| "tanosy malagasy": "txy", |
| "tarok": "yer", |
| "tatar": "tt", |
| "tedaga": "tuq", |
| "telugu": "te", |
| "tem": "kdh", |
| "teop": "tio", |
| "tepeuxila cuicatec": "cux", |
| "tepinapa chinantec": "cte", |
| "tera": "ttr", |
| "terei": "buo", |
| "termanu": "twu", |
| "tesaka malagasy": "tkg", |
| "tetelcingo nahuatl": "nhg", |
| "teutila cuicatec": "cut", |
| "thai": "th", |
| "tibetan": "bo", |
| "tidaá mixtec": "mtx", |
| "tidore": "tvo", |
| "tigak": "tgc", |
| "tigre": "tig", |
| "tigrinya": "ti", |
| "tilquiapan zapotec": "zts", |
| "tinputz": "tpz", |
| "tlacoapa me'phaa": "tpl", |
| "tlacoatzintepec chinantec": "ctl", |
| "tlingit": "tli", |
| "toki pona": "tok", |
| "tomoip": "tqp", |
| "tondano": "tdn", |
| "tonsea": "txs", |
| "tooro": "ttj", |
| "torau": "ttu", |
| "torwali": "trw", |
| "tsimihety malagasy": "xmw", |
| "tsotso": "lto", |
| "tswana": "tn", |
| "tugen": "tuy", |
| "tuki": "bag", |
| "tula": "tul", |
| "tulu": "tcy", |
| "tunen": "tvu", |
| "tungag": "lcm", |
| "tunisian arabic": "aeb", |
| "tupuri": "tui", |
| "turkana": "tuv", |
| "turkish": "tr", |
| "turkmen": "tk", |
| "tututepec mixtec": "mtu", |
| "twi": "tw", |
| "ubaghara": "byc", |
| "uighur": "ug", |
| "ukrainian": "uk", |
| "umbundu": "umb", |
| "upper sorbian": "hsb", |
| "urdu": "ur", |
| "ushojo": "ush", |
| "uzbek": "uz", |
| "vai": "vai", |
| "vietnamese": "vi", |
| "votic": "vot", |
| "võro": "vro", |
| "waci gbe": "wci", |
| "wadiyara koli": "kxp", |
| "waja": "wja", |
| "wakhi": "wbl", |
| "wanga": "lwg", |
| "wapan": "juk", |
| "warji": "wji", |
| "welsh": "cy", |
| "wemale": "weo", |
| "western frisian": "fy", |
| "western highland purepecha": "pua", |
| "western juxtlahuaca mixtec": "jmx", |
| "western maninkakan": "mlq", |
| "western mari": "mrj", |
| "western niger fulfulde": "fuh", |
| "western panjabi": "pnb", |
| "wolof": "wo", |
| "wuzlam": "udl", |
| "xanaguía zapotec": "ztg", |
| "xhosa": "xh", |
| "yace": "ekr", |
| "yakut": "sah", |
| "yalahatan": "jal", |
| "yanahuanca pasco quechua": "qur", |
| "yangben": "yav", |
| "yaqui": "yaq", |
| "yauyos quechua": "qux", |
| "yekhee": "ets", |
| "yiddish": "yi", |
| "yidgha": "ydg", |
| "yoruba": "yo", |
| "yutanduchi mixtec": "mab", |
| "zacatlán-ahuacatlán-tepetzintla nahuatl": "nhi", |
| "zarma": "dje", |
| "zaza": "zza", |
| "zulu": "zu", |
| "ömie": "aom", |
| } |
|
|
| LANG_NAMES = set(LANG_NAME_TO_ID.keys()) |
| LANG_IDS = set(LANG_NAME_TO_ID.values()) |
|
|
| |
| _TITLE_EXCEPTIONS = { |
| "fe'fe'": "Fe'fe'", |
| "dũya": "Dũya", |
| "santiago del estero quichua": "Santiago del Estero Quichua", |
| "santa ana de tusi pasco quechua": "Santa Ana de Tusi Pasco Quechua", |
| "malinaltepec me'phaa": "Malinaltepec Me'phaa", |
| "tlacoapa me'phaa": "Tlacoapa Me'phaa", |
| } |
|
|
|
|
| def lang_display_name(name: str) -> str: |
| """Return a display-friendly version of a lowercase language name. |
| |
| Uses .title() for most names, with manual exceptions for cases like |
| apostrophes and small words (de, del) that should stay lowercase. |
| """ |
| return _TITLE_EXCEPTIONS.get(name, name.title()) |
|
|