File size: 2,986 Bytes
4558539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
{
  "total_toponyms": 66924548,
  "in_training_namespaces": 57593810,
  "with_ipa": 31113585,
  "with_panphon_embedding": 31113585,
  "panphon_coverage_pct": 54.02244616218305,
  "from_db_cache": 31113562,
  "from_precomputed": 2,
  "from_epitran": 21,
  "by_script": {
    "CYRILLIC": 3614762,
    "LATIN": 55617677,
    "CJK": 2973525,
    "ARABIC": 2098089,
    "HEBREW": 151960,
    "KATAKANA": 340555,
    "MALAYALAM": 68176,
    "HIRAGANA": 151980,
    "OTHER": 342642,
    "GEORGIAN": 105902,
    "GREEK": 217997,
    "DEVANAGARI": 166957,
    "ARMENIAN": 153467,
    "THAI": 251458,
    "KANNADA": 43155,
    "HANGUL": 393996,
    "GUJARATI": 21428,
    "BENGALI": 106896,
    "TAMIL": 52486,
    "TELUGU": 51440
  },
  "by_script_lang_ipa": {
    "LATIN:en": 8039689,
    "LATIN:fr": 2311837,
    "LATIN:nl": 2292068,
    "LATIN:de": 2063027,
    "LATIN:sv": 1715947,
    "LATIN:es": 1518527,
    "CJK:zh": 1306961,
    "LATIN:id": 931192,
    "LATIN:tr": 843744,
    "LATIN:it": 815130,
    "CYRILLIC:ru": 803734,
    "LATIN:pt": 711346,
    "LATIN:pl": 655880,
    "LATIN:cs": 593385,
    "ARABIC:fa": 576743,
    "LATIN:fi": 532010,
    "CYRILLIC:uk": 435644,
    "LATIN:no": 428240,
    "ARABIC:ar": 412306,
    "LATIN:ro": 375292,
    "KATAKANA:ja": 310410,
    "LATIN:da": 297142,
    "LATIN:ms": 285578,
    "LATIN:vi": 267432,
    "LATIN:hu": 247134,
    "CYRILLIC:bg": 235749,
    "CYRILLIC:sr": 235582,
    "HANGUL:ko": 228523,
    "THAI:th": 210310,
    "GREEK:el": 168827,
    "ARMENIAN:hy": 143819,
    "HEBREW:he": 127337,
    "LATIN:sw": 113131,
    "ARABIC:ur": 109688,
    "GEORGIAN:ka": 86021,
    "BENGALI:bn": 77935,
    "LATIN:la": 77703,
    "CYRILLIC:mk": 61607,
    "DEVANAGARI:hi": 60800,
    "MALAYALAM:ml": 53546,
    "CJK:wuu": 48883,
    "TAMIL:ta": 47700,
    "TELUGU:te": 47617,
    "HIRAGANA:ja": 47533,
    "CJK:gan": 37097,
    "CJK:yue": 31345,
    "DEVANAGARI:mr": 24452,
    "KANNADA:kn": 20962,
    "GUJARATI:gu": 20329,
    "LATIN:yue": 13719,
    "DEVANAGARI:ne": 10249,
    "CJK:ko": 2060,
    "LATIN:wuu": 254,
    "KATAKANA:zh": 69,
    "LATIN:gan": 60,
    "CYRILLIC:zh": 53,
    "OTHER:zh": 51,
    "OTHER:ko": 21,
    "HIRAGANA:yue": 15,
    "ARABIC:yue": 14,
    "HANGUL:zh": 14,
    "HIRAGANA:zh": 14,
    "ARABIC:zh": 13,
    "OTHER:he": 10,
    "GREEK:zh": 9,
    "THAI:yue": 7,
    "THAI:zh": 7,
    "CYRILLIC:yue": 7,
    "OTHER:yue": 5,
    "KATAKANA:ko": 4,
    "HEBREW:yue": 4,
    "THAI:wuu": 3,
    "CYRILLIC:ko": 3,
    "BENGALI:yue": 3,
    "DEVANAGARI:zh": 3,
    "KATAKANA:yue": 3,
    "TAMIL:yue": 2,
    "ARMENIAN:yue": 2,
    "TELUGU:zh": 2,
    "HANGUL:yue": 2,
    "CYRILLIC:wuu": 2,
    "GREEK:ko": 1,
    "OTHER:wuu": 1,
    "ARABIC:ko": 1,
    "GEORGIAN:yue": 1,
    "HEBREW:zh": 1,
    "OTHER:gan": 1,
    "TAMIL:zh": 1
  },
  "training_namespaces": [
    "gn",
    "wd",
    "tgn"
  ],
  "num_workers": 62,
  "db_engine": "DuckDB",
  "ipa_backends": [
    "epitran",
    "phonikud",
    "charsiu_g2p"
  ]
}