{ "total_toponyms": 66924548, "in_training_namespaces": 57593810, "with_ipa": 31113585, "with_panphon_embedding": 31113585, "panphon_coverage_pct": 54.02244616218305, "from_db_cache": 31113562, "from_precomputed": 2, "from_epitran": 21, "by_script": { "CYRILLIC": 3614762, "LATIN": 55617677, "CJK": 2973525, "ARABIC": 2098089, "HEBREW": 151960, "KATAKANA": 340555, "MALAYALAM": 68176, "HIRAGANA": 151980, "OTHER": 342642, "GEORGIAN": 105902, "GREEK": 217997, "DEVANAGARI": 166957, "ARMENIAN": 153467, "THAI": 251458, "KANNADA": 43155, "HANGUL": 393996, "GUJARATI": 21428, "BENGALI": 106896, "TAMIL": 52486, "TELUGU": 51440 }, "by_script_lang_ipa": { "LATIN:en": 8039689, "LATIN:fr": 2311837, "LATIN:nl": 2292068, "LATIN:de": 2063027, "LATIN:sv": 1715947, "LATIN:es": 1518527, "CJK:zh": 1306961, "LATIN:id": 931192, "LATIN:tr": 843744, "LATIN:it": 815130, "CYRILLIC:ru": 803734, "LATIN:pt": 711346, "LATIN:pl": 655880, "LATIN:cs": 593385, "ARABIC:fa": 576743, "LATIN:fi": 532010, "CYRILLIC:uk": 435644, "LATIN:no": 428240, "ARABIC:ar": 412306, "LATIN:ro": 375292, "KATAKANA:ja": 310410, "LATIN:da": 297142, "LATIN:ms": 285578, "LATIN:vi": 267432, "LATIN:hu": 247134, "CYRILLIC:bg": 235749, "CYRILLIC:sr": 235582, "HANGUL:ko": 228523, "THAI:th": 210310, "GREEK:el": 168827, "ARMENIAN:hy": 143819, "HEBREW:he": 127337, "LATIN:sw": 113131, "ARABIC:ur": 109688, "GEORGIAN:ka": 86021, "BENGALI:bn": 77935, "LATIN:la": 77703, "CYRILLIC:mk": 61607, "DEVANAGARI:hi": 60800, "MALAYALAM:ml": 53546, "CJK:wuu": 48883, "TAMIL:ta": 47700, "TELUGU:te": 47617, "HIRAGANA:ja": 47533, "CJK:gan": 37097, "CJK:yue": 31345, "DEVANAGARI:mr": 24452, "KANNADA:kn": 20962, "GUJARATI:gu": 20329, "LATIN:yue": 13719, "DEVANAGARI:ne": 10249, "CJK:ko": 2060, "LATIN:wuu": 254, "KATAKANA:zh": 69, "LATIN:gan": 60, "CYRILLIC:zh": 53, "OTHER:zh": 51, "OTHER:ko": 21, "HIRAGANA:yue": 15, "ARABIC:yue": 14, "HANGUL:zh": 14, "HIRAGANA:zh": 14, "ARABIC:zh": 13, "OTHER:he": 10, "GREEK:zh": 9, "THAI:yue": 7, "THAI:zh": 7, "CYRILLIC:yue": 7, "OTHER:yue": 5, "KATAKANA:ko": 4, "HEBREW:yue": 4, "THAI:wuu": 3, "CYRILLIC:ko": 3, "BENGALI:yue": 3, "DEVANAGARI:zh": 3, "KATAKANA:yue": 3, "TAMIL:yue": 2, "ARMENIAN:yue": 2, "TELUGU:zh": 2, "HANGUL:yue": 2, "CYRILLIC:wuu": 2, "GREEK:ko": 1, "OTHER:wuu": 1, "ARABIC:ko": 1, "GEORGIAN:yue": 1, "HEBREW:zh": 1, "OTHER:gan": 1, "TAMIL:zh": 1 }, "training_namespaces": [ "gn", "wd", "tgn" ], "num_workers": 62, "db_engine": "DuckDB", "ipa_backends": [ "epitran", "phonikud", "charsiu_g2p" ] }