jeanma's picture
Omnilingual ASR transcription demo
ae238b3 verified
// Map of script codes to their names and descriptions
const SCRIPTS: {
[code: string]: { name: string; description?: string };
} = {
Adlm: {
name: "Adlam",
description:
"\ud83a\udd00\t\ud83a\udd01\t\ud83a\udd02\t\ud83a\udd03\t\ud83a\udd04 \ud83a\udd05",
},
Afak: {
name: "Afaka",
},
Arab: {
name: "Arabic or Arabic-based",
description: "\u0684 \u075d \u0620 \u0641 \u076d",
},
Armn: {
name: "Armenian",
description: "\u0531 \u0532 \u0533 \u0534 \u0535",
},
Bali: {
name: "Balinese",
description: "\u1b07 \u1b0a \u1b0b \u1b0e \u1b45",
},
Bamu: {
name: "Bamun",
description: "\ua6c8 \ua6ca \ua6cb \ua6cc \ua6cd",
},
Bass: {
name: "Bassa Vah",
description:
"\ud81a\uded0 \ud81a\uded1 \ud81a\uded2 \ud81a\uded3 \ud81a\uded4 \ud81a\uded5",
},
Batk: {
name: "Batak",
description: "\u1bd5 \u1bd6 \u1bd7 \u1bd8 \u1bd9",
},
Beng: {
name: "Bengali (Bangla)",
description: "\u09a0 \u09a1 \u09a2 \u09a3 \u09a4",
},
Berf: {
name: "Beria Erfe",
},
Bopo: {
name: "Bopomofo",
description: "\u3113 \u310a \u312b \u310d \u3108",
},
Brah: {
name: "Brahmi",
description:
"\ud804\udc30 \ud804\udc31 \ud804\udc32 \ud804\udc33 \ud804\udc34",
},
Bugi: {
name: "Buginese",
description: "\u1a01 \u1a02 \u1a03 \u1a05 \u1a15",
},
Buhd: {
name: "Buhid",
description: "\u1741 \u1742 \u1743 \u1744 \u1745 \u1746",
},
Cakm: {
name: "Chakma",
description:
"\ud804\udd07 \ud804\udd08 \ud804\udd0e \ud804\udd25 \ud804\udd26 \ud804\udd04",
},
Cans: {
name: "Unified Canadian Aboriginal Syllabics",
description: "\u142b \u1444 \u145c \u1573 \u15c6 \u1614 \u1608 \u1578",
},
Cham: {
name: "Cham",
description: "\uaa07 \uaa19 \uaa24 \uaa42 \uaa52 \uaa51",
},
Cher: {
name: "Cherokee",
description: "\u13b6 \u13e4 \u13e5 \u13d4 \u13d5 \u13b2",
},
Copt: {
name: "Coptic",
description: "\u2c85 \u2c87 \u2caf \u2ccb \u2ce3 \u2cbf",
},
Cyrl: {
name: "Cyrillic or Cyrillic-based",
description: "\u0433 \u0434 \u0435 \u0436 \u0437",
},
Deva: {
name: "Devanagari (Nagari)",
description: "\u0914 \u0915 \u0916 \u0932 \u0939 \u091d",
},
Ethi: {
name: "Ethiopic (Ge\u02bbez)",
description: "\u1214 \u1235 \u1254 \u1296 \u12b8 \u1315",
},
Geor: {
name: "Georgian (Mkhedruli and Mtavruli)",
description: "\u10e0 \u10e1 \u10e2 \u10e3 \u10e4 \u10e5 \u10e6",
},
Gran: {
name: "Grantha",
description:
"\ud804\udf22 \ud804\udf23 \ud804\udf24 \ud804\udf25 \ud804\udf26 \ud804\udf08",
},
Grek: {
name: "Greek",
description: "\u03b2 \u03b3 \u03b4 \u03b5 \u03b6 \u03b8",
},
Gujr: {
name: "Gujarati",
description: "\u0a94 \u0a95 \u0a96 \u0ab6 \u0ab7 \u0ae1 \u0a8a",
},
Guru: {
name: "Gurmukhi",
description: "\u0a18 \u0a1b \u0a1c \u0a1d \u0a28 \u0a07",
},
Hang: {
name: "Hangul (Hang\u016dl, Hangeul)",
description: "\u3145 \u3153 \u314e \u3136 \u315e \u3140",
},
Hani: {
name: "Han (Hanzi, Kanji, Hanja)",
description: "\u6f22\u5b57\u5b87\u5b99\u4e16\u754c",
},
Hano: {
name: "Hanunoo (Hanun\u00f3o)",
description: "\u1721 \u1722 \u1723 \u1724 \u1728 \u172a",
},
Hans: {
name: "Simplified Han script",
description: "\u7b80\u4f53\u5b57\u793a\u8303",
},
Hant: {
name: "Traditional Han script",
description: "\u7e41\u9ad4\u5b57\u793a\u7bc4",
},
Hebr: {
name: "Hebrew",
description: "\u05d0 \u05d1 \u05d2 \u05d3 \u05d4",
},
Hmng: {
name: "Pahawh Hmong",
description:
"\ud81a\udf03 \ud81a\udf05 \ud81a\udf19 \ud81a\udf1a \ud81a\udf28 \ud81a\udf29",
},
Java: {
name: "Javanese",
description: "\ua993 \ua999 \ua9a2 \ua9c2 \ua9d5 \ua9d6",
},
Jpan: {
name: "Japanese (Han, Hiragana, Katakana)",
description: "\u30ab\u30ca\u3068\u6f22\u5b57",
},
Kali: {
name: "Kayah Li",
description: "\ua910 \ua911 \ua912 \ua913 \ua90a \ua90b",
},
Kana: {
name: "Katakana",
description: "\u30ab\u30bf\u30ab\u30ca",
},
Khmr: {
name: "Khmer",
description: "\u1780 \u1781 \u1782 \u1783 \u17a0 \u17a6",
},
Khoj: {
name: "Khojki",
description:
"\ud804\ude06 \ud804\ude07 \ud804\ude08 \ud804\ude23 \ud804\ude24 \ud804\ude25",
},
Knda: {
name: "Kannada",
description: "\u0ca3 \u0ca4 \u0ca5 \u0c87 \u0c88 \u0c89",
},
Kore: {
name: "Korean (Hangul, Han)",
description: "\ud55c\uae00\uacfc\u6f22\u5b57",
},
Kpel: {
name: "Kpelle",
},
Kthi: {
name: "Kaithi",
description:
"\ud804\udc92 \ud804\udc93 \ud804\udc94 \ud804\udc95 \ud804\udc96 \ud804\udc9c",
},
Lana: {
name: "Tai Tham (Lanna)",
description: "\u1a3a \u1a47 \u1a4d \u1a52 \u1a42 \u1a24",
},
Laoo: {
name: "Lao",
description: "\u0eaa \u0eab \u0e9d \u0ed5 \u0ec6 \u0ec2",
},
Latn: {
name: "Latin or Latin-based (a\u2026z)",
description: "a \u00f4 \u00f1 \u010d \u015f \u00fc \u0254 \u03ad",
},
Leke: {
name: "Leke",
},
Lepc: {
name: "Lepcha (R\u00f3ng)",
description: "\u1c4e \u1c4f \u1c0f \u1c12 \u1c03 \u1c05",
},
Limb: {
name: "Limbu",
description: "\u1907 \u1908 \u1909 \u190a \u1915 \u1916",
},
Lisu: {
name: "Lisu (Fraser)",
description: "\ua4e7 \ua4e8 \ua4e9 \ua4f5 \ua4db \ua4de",
},
Loma: {
name: "Loma",
},
Mand: {
name: "Mandaic, Mandaean",
description: "\u084b\u0849\u0854\u0840\u084d\u0840",
},
Marc: {
name: "Marchen",
description: "\ud807\udc72\ud807\udc8a\ud807\udc8f\ud807\udc70\ud807\udc75",
},
Mend: {
name: "Mende Kikakui",
description:
"\ud83a\udc86\u200e\ud83a\udc87\ud83a\udcbb\ud83a\udc5a\ud83a\udc01",
},
Mlym: {
name: "Malayalam",
description: "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02",
},
Modi: {
name: "Modi, Mo\u1e0d\u012b",
description:
"\u092e\u094b\u0921\u0940\ud805\ude26\ud805\ude3b\ud805\ude1a\ud805\ude32",
},
Mong: {
name: "Mongolian",
description: "\u182a\u1822\u1834\u1822\u182d",
},
Mroo: {
name: "Mro, Mru",
description: "\ud81a\ude43\ud81a\ude44\ud81a\ude45\ud81a\ude46\ud81a\ude47",
},
Mtei: {
name: "Meitei Mayek (Meithei, Meetei)",
description: "\uabc3\uabe9\uabc7\uabe9\uabc3\uabcc\uabe6\uabdb",
},
Mymr: {
name: "Myanmar (Burmese)",
description:
"\u1019\u103c\u1014\u103a\u1019\u102c\u1021\u1000\u1039\u1001\u101b\u102c",
},
Nagm: {
name: "Nag Mundari",
},
Newa: {
name: "Newa, Newar, Newari, Nep\u0101la lipi",
description:
"\ud805\udc0e \ud805\udc14 \ud805\udc1a \ud805\udc1f \ud805\udc25",
},
Nkdb: {
name: "Naxi Dongba (na\u00b2\u00b9\u0255i\u00b3\u00b3 to\u00b3\u00b3ba\u00b2\u00b9, Nakhi Tomba)",
},
Nkoo: {
name: "NKo, N\u2019Ko",
description: " \u07d0 \u07cf \u07ce \u07cd \u07cc",
},
Olck: {
name: "Ol Chiki (Ol Cemet\u2019, Ol, Santali)",
description: "\u1c5a \u1c5b \u1c5c \u1c5d \u1c5e",
},
Orya: {
name: "Oriya (Odia)",
description: "\u0b15 \u0b16 \u0b17 \u0b18 \u0b19",
},
Osge: {
name: "Osage",
description:
"\ud801\udcbe \ud801\udcc1 \ud801\udcca \ud801\udcd0 \ud801\udcc0",
},
Pauc: {
name: "Pau Cin Hau",
description:
"\ud806\udec0 \ud806\udec6 \ud806\udec7 \ud806\udeca \ud806\udecf",
},
Phag: {
name: "Phags-pa",
description: "\ua840 \ua841 \ua843 \ua845 \ua84b",
},
Plrd: {
name: "Miao (Pollard)",
description:
"\ud81b\udf01 \ud81b\udf0b \ud81b\udf15 \ud81b\udf1f \ud81b\udf38",
},
Rjng: {
name: "Rejang (Redjang, Kaganga)",
description: "\ua930 \ua932 \ua935 \ua938 \ua943",
},
Rohg: {
name: "Hanifi Rohingya",
description:
"\ud803\udd03 \ud803\udd0b \ud803\udd1c \ud803\udd11 \ud803\udd1e \ud803\udd1d\ud803\udd24",
},
Saur: {
name: "Saurashtra",
description: "\ua893 \ua8a6 \ua8aa \ua88d \ua8a0",
},
Sinh: {
name: "Sinhala",
description: "\u0da3 \u0db4 \u0d8b \u0db0 \u0d9a\u0dca\u200d\u0dc2",
},
Sora: {
name: "Sora Sompeng",
description:
"\ud804\udcd3 \ud804\udcd7 \ud804\udcdb \ud804\udce5 \ud804\udce0",
},
Sunu: {
name: "Sunuwar",
},
Sylo: {
name: "Syloti Nagri",
description: "\ua807 \ua80e \ua813 \ua81f \ua81d",
},
Syrc: {
name: "Syriac",
description: "\u0712 \u0717 \u071b \u0723 \u072b",
},
Tagb: {
name: "Tagbanwa",
description: "\u1765 \u1769\u1772 \u176c \u176b\u1773 \u1770",
},
Takr: {
name: "Takri, \u1e6c\u0101kr\u012b, \u1e6c\u0101\u1e45kr\u012b",
description:
"\ud805\ude83 \ud805\ude89 \ud805\ude8f \ud805\ude95 \ud805\udea5",
},
Tale: {
name: "Tai Le",
description: "\u1955 \u195a \u195b \u196a \u1970",
},
Talu: {
name: "New Tai Lue",
description: "\u19a6 \u19a7\t\u19a8 \u19a9 \u19aa \u19ab",
},
Taml: {
name: "Tamil",
description:
"\u0ba3\u0bcd \u0b9e\u0bcd \u0bb1\u0bcd \u0bb9\u0bcd \u0b95\u0bcd\u0bb7\u0bcd \u0b90",
},
Tavt: {
name: "Tai Viet",
description: "\uaa80 \uaa86 \uaa88 \uaa90 \uaaa2",
},
Telu: {
name: "Telugu",
description: "\u0c60 \u0c21\u0c4d\u0c1d \u0c0f \u0c14 \u0c0a",
},
Tfng: {
name: "Tifinagh (Berber)",
description: "\u2d31 \u2d32 \u2d33 \u2d34 \u2d35",
},
Thaa: {
name: "Thaana",
description: "\u0780 \u0781 \u0782 \u0783 \u0784 \u0785 \u0786",
},
Thai: {
name: "Thai",
description: "\u0e12 \u0e13 \u0e14 \u0e15 \u0e16",
},
Tibt: {
name: "Tibetan",
description: "\u0f00 \u0f01 \u0f02 \u0f03 \u0f04 \u0f05 \u0f06",
},
Toto: {
name: "Toto",
},
Tutg: {
name: "Tulu-Tigalari",
},
Vaii: {
name: "Vai",
description: "\ua559\ua524\ua552\ua578\ua59d",
},
Wara: {
name: "Warang Citi (Varang Kshiti)",
description: "\ud806\udcb9\ud806\udcd7\ud806\udcc1\ud806\udcdc\ud806\udcca",
},
Wcho: {
name: "Wancho",
description: "\ud838\uded2\ud838\udec0\ud838\udec9\ud838\udec3\ud838\uded5",
},
Yiii: {
name: "Yi",
description: "\ua016\ua038\ua056\ua076\ua091",
},
};
// Helper functions for working with scripts
export function getScriptName(code: string): string {
return SCRIPTS[code]?.name || code;
}
export function getScriptDescription(code: string): string | undefined {
return SCRIPTS[code]?.description;
}
export function getScriptInfo(
code: string,
): { name: string; description?: string } | null {
return SCRIPTS[code] || null;
}
export function getAllScripts(): Array<{
code: string;
name: string;
description?: string;
}> {
return Object.entries(SCRIPTS).map(([code, info]) => ({
code,
...info,
}));
}
export function formatScriptDisplay(codes: string[]): string {
return codes.map((code) => getScriptName(code)).join(", ");
}
export default SCRIPTS;