updated
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- data/Abkhaz-Adyghe.json +27 -27
- data/Afro-Asiatic.json +0 -0
- data/Algic.json +179 -179
- data/Amto-Musan.json +9 -9
- data/Andamanese.json +57 -57
- data/Arafundi.json +12 -12
- data/Arai (Left May).json +21 -21
- data/Arauan.json +24 -24
- data/Australian.json +0 -0
- data/Austro-Asiatic.json +0 -0
- data/Austronesian.json +0 -0
- data/Aymaran.json +18 -18
- data/Barbacoan.json +18 -18
- data/Bayono-Awbono.json +9 -9
- data/Border.json +57 -57
- data/Bororoan.json +12 -12
- data/Botocudoan.json +6 -6
- data/Caddoan.json +27 -27
- data/Cahuapanan.json +9 -9
- data/Cariban.json +159 -159
- data/Central Solomons.json +15 -15
- data/Chapacuran.json +21 -21
- data/Chibchan.json +108 -108
- data/Chimakuan.json +9 -9
- data/Chinookan.json +12 -12
- data/Chipaya-Uru.json +9 -9
- data/Chocoan.json +33 -33
- data/Cholonan.json +9 -9
- data/Chon.json +12 -12
- data/Chukotko-Kamchatkan.json +30 -30
- data/Chumashan.json +27 -27
- data/Cochimí-Yuman.json +45 -45
- data/Comecrudan.json +18 -18
- data/Constructed language.json +6 -6
- data/Coosan.json +9 -9
- data/Creole.json +639 -1185
- data/Dravidian.json +562 -891
- data/East Bird’s Head-Sentani.json +48 -48
- data/East Geelvink Bay.json +42 -42
- data/East New Britain.json +30 -30
- data/Eastern Trans-Fly.json +15 -15
- data/Eskimo-Aleut.json +51 -51
- data/Eyak-Athabaskan.json +186 -186
- data/Fas.json +9 -9
- data/Guajiboan.json +21 -21
- data/Guaykuruan.json +24 -24
- data/Gum.json +3 -3
- data/Haida.json +9 -9
- data/Harákmbut.json +9 -9
- data/Hmong-Mien.json +153 -153
data/Abkhaz-Adyghe.json
CHANGED
|
@@ -2,101 +2,101 @@
|
|
| 2 |
"name": "Abkhaz-Adyghe",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Abkhaz-Abazin",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Abkhaz",
|
| 15 |
"iso_1_code": "ab",
|
| 16 |
"iso_3_code": "abk",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "2",
|
|
|
|
| 20 |
"scripts": [
|
| 21 |
"Cyrl"
|
| 22 |
-
]
|
| 23 |
-
"own_tokenizer": false
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"name": "Abaza",
|
| 27 |
"iso_1_code": null,
|
| 28 |
"iso_3_code": "abq",
|
| 29 |
-
"tokenizers": {},
|
| 30 |
"children": [],
|
|
|
|
| 31 |
"node_i": "3",
|
|
|
|
| 32 |
"scripts": [
|
| 33 |
"Cyrl"
|
| 34 |
-
]
|
| 35 |
-
"own_tokenizer": false
|
| 36 |
}
|
| 37 |
],
|
|
|
|
| 38 |
"node_i": "1",
|
| 39 |
-
"
|
| 40 |
-
"
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"name": "Circassian",
|
| 44 |
"iso_1_code": null,
|
| 45 |
"iso_3_code": null,
|
| 46 |
-
"tokenizers": {},
|
| 47 |
"children": [
|
| 48 |
{
|
| 49 |
"name": "Adyghe",
|
| 50 |
"iso_1_code": null,
|
| 51 |
"iso_3_code": "ady",
|
| 52 |
-
"tokenizers": {},
|
| 53 |
"children": [],
|
|
|
|
| 54 |
"node_i": "5",
|
|
|
|
| 55 |
"scripts": [
|
| 56 |
"Cyrl"
|
| 57 |
-
]
|
| 58 |
-
"own_tokenizer": false
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"name": "Kabardian",
|
| 62 |
"iso_1_code": null,
|
| 63 |
"iso_3_code": "kbd",
|
| 64 |
-
"tokenizers": {},
|
| 65 |
"children": [],
|
|
|
|
| 66 |
"node_i": "6",
|
|
|
|
| 67 |
"scripts": [
|
| 68 |
"Cyrl"
|
| 69 |
-
]
|
| 70 |
-
"own_tokenizer": false
|
| 71 |
}
|
| 72 |
],
|
|
|
|
| 73 |
"node_i": "4",
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"name": "Ubyx",
|
| 79 |
"iso_1_code": null,
|
| 80 |
"iso_3_code": null,
|
| 81 |
-
"tokenizers": {},
|
| 82 |
"children": [
|
| 83 |
{
|
| 84 |
"name": "Ubykh",
|
| 85 |
"iso_1_code": null,
|
| 86 |
"iso_3_code": "uby",
|
| 87 |
-
"tokenizers": {},
|
| 88 |
"children": [],
|
|
|
|
| 89 |
"node_i": "8",
|
| 90 |
-
"
|
| 91 |
-
"
|
| 92 |
}
|
| 93 |
],
|
|
|
|
| 94 |
"node_i": "7",
|
| 95 |
-
"
|
| 96 |
-
"
|
| 97 |
}
|
| 98 |
],
|
|
|
|
| 99 |
"node_i": "0",
|
| 100 |
-
"
|
| 101 |
-
"
|
| 102 |
}
|
|
|
|
| 2 |
"name": "Abkhaz-Adyghe",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Abkhaz-Abazin",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Abkhaz",
|
| 13 |
"iso_1_code": "ab",
|
| 14 |
"iso_3_code": "abk",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "2",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
"scripts": [
|
| 20 |
"Cyrl"
|
| 21 |
+
]
|
|
|
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Abaza",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "abq",
|
|
|
|
| 27 |
"children": [],
|
| 28 |
+
"tokenizers": {},
|
| 29 |
"node_i": "3",
|
| 30 |
+
"native_tokenizers": [],
|
| 31 |
"scripts": [
|
| 32 |
"Cyrl"
|
| 33 |
+
]
|
|
|
|
| 34 |
}
|
| 35 |
],
|
| 36 |
+
"tokenizers": {},
|
| 37 |
"node_i": "1",
|
| 38 |
+
"native_tokenizers": [],
|
| 39 |
+
"scripts": []
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "Circassian",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": null,
|
|
|
|
| 45 |
"children": [
|
| 46 |
{
|
| 47 |
"name": "Adyghe",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "ady",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "5",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
"scripts": [
|
| 55 |
"Cyrl"
|
| 56 |
+
]
|
|
|
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"name": "Kabardian",
|
| 60 |
"iso_1_code": null,
|
| 61 |
"iso_3_code": "kbd",
|
|
|
|
| 62 |
"children": [],
|
| 63 |
+
"tokenizers": {},
|
| 64 |
"node_i": "6",
|
| 65 |
+
"native_tokenizers": [],
|
| 66 |
"scripts": [
|
| 67 |
"Cyrl"
|
| 68 |
+
]
|
|
|
|
| 69 |
}
|
| 70 |
],
|
| 71 |
+
"tokenizers": {},
|
| 72 |
"node_i": "4",
|
| 73 |
+
"native_tokenizers": [],
|
| 74 |
+
"scripts": []
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"name": "Ubyx",
|
| 78 |
"iso_1_code": null,
|
| 79 |
"iso_3_code": null,
|
|
|
|
| 80 |
"children": [
|
| 81 |
{
|
| 82 |
"name": "Ubykh",
|
| 83 |
"iso_1_code": null,
|
| 84 |
"iso_3_code": "uby",
|
|
|
|
| 85 |
"children": [],
|
| 86 |
+
"tokenizers": {},
|
| 87 |
"node_i": "8",
|
| 88 |
+
"native_tokenizers": [],
|
| 89 |
+
"scripts": []
|
| 90 |
}
|
| 91 |
],
|
| 92 |
+
"tokenizers": {},
|
| 93 |
"node_i": "7",
|
| 94 |
+
"native_tokenizers": [],
|
| 95 |
+
"scripts": []
|
| 96 |
}
|
| 97 |
],
|
| 98 |
+
"tokenizers": {},
|
| 99 |
"node_i": "0",
|
| 100 |
+
"native_tokenizers": [],
|
| 101 |
+
"scripts": []
|
| 102 |
}
|
data/Afro-Asiatic.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Algic.json
CHANGED
|
@@ -2,630 +2,630 @@
|
|
| 2 |
"name": "Algic",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Algonquian",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Blackfoot",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "bla",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "558",
|
|
|
|
| 20 |
"scripts": [
|
| 21 |
"Latn"
|
| 22 |
-
]
|
| 23 |
-
"own_tokenizer": false
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"name": "Cheyenne",
|
| 27 |
"iso_1_code": null,
|
| 28 |
"iso_3_code": "chy",
|
| 29 |
-
"tokenizers": {},
|
| 30 |
"children": [],
|
|
|
|
| 31 |
"node_i": "559",
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"name": "Menominee",
|
| 37 |
"iso_1_code": null,
|
| 38 |
"iso_3_code": "mez",
|
| 39 |
-
"tokenizers": {},
|
| 40 |
"children": [],
|
|
|
|
| 41 |
"node_i": "560",
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"name": "Miami",
|
| 47 |
"iso_1_code": null,
|
| 48 |
"iso_3_code": "mia",
|
| 49 |
-
"tokenizers": {},
|
| 50 |
"children": [],
|
|
|
|
| 51 |
"node_i": "561",
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"name": "Nawathinehena",
|
| 57 |
"iso_1_code": null,
|
| 58 |
"iso_3_code": "nwa",
|
| 59 |
-
"tokenizers": {},
|
| 60 |
"children": [],
|
|
|
|
| 61 |
"node_i": "562",
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
},
|
| 65 |
{
|
| 66 |
"name": "Shawnee",
|
| 67 |
"iso_1_code": null,
|
| 68 |
"iso_3_code": "sjw",
|
| 69 |
-
"tokenizers": {},
|
| 70 |
"children": [],
|
|
|
|
| 71 |
"node_i": "563",
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"name": "Arapaho",
|
| 77 |
"iso_1_code": null,
|
| 78 |
"iso_3_code": null,
|
| 79 |
-
"tokenizers": {},
|
| 80 |
"children": [
|
| 81 |
{
|
| 82 |
"name": "Arapaho",
|
| 83 |
"iso_1_code": null,
|
| 84 |
"iso_3_code": "arp",
|
| 85 |
-
"tokenizers": {},
|
| 86 |
"children": [],
|
|
|
|
| 87 |
"node_i": "565",
|
|
|
|
| 88 |
"scripts": [
|
| 89 |
"Latn"
|
| 90 |
-
]
|
| 91 |
-
"own_tokenizer": false
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"name": "Gros Ventre",
|
| 95 |
"iso_1_code": null,
|
| 96 |
"iso_3_code": "ats",
|
| 97 |
-
"tokenizers": {},
|
| 98 |
"children": [],
|
|
|
|
| 99 |
"node_i": "566",
|
| 100 |
-
"
|
| 101 |
-
"
|
| 102 |
}
|
| 103 |
],
|
|
|
|
| 104 |
"node_i": "564",
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"name": "Cree-Montagnais",
|
| 110 |
"iso_1_code": null,
|
| 111 |
"iso_3_code": null,
|
| 112 |
-
"tokenizers": {},
|
| 113 |
"children": [
|
| 114 |
{
|
| 115 |
"name": "Atikamekw",
|
| 116 |
"iso_1_code": null,
|
| 117 |
"iso_3_code": "atj",
|
| 118 |
-
"tokenizers": {},
|
| 119 |
"children": [],
|
|
|
|
| 120 |
"node_i": "568",
|
|
|
|
| 121 |
"scripts": [
|
| 122 |
"Latn"
|
| 123 |
-
]
|
| 124 |
-
"own_tokenizer": false
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"name": "Cree, Southern East",
|
| 128 |
"iso_1_code": "cr",
|
| 129 |
"iso_3_code": "crj",
|
| 130 |
-
"tokenizers": {},
|
| 131 |
"children": [],
|
|
|
|
| 132 |
"node_i": "569",
|
|
|
|
| 133 |
"scripts": [
|
| 134 |
"Cans"
|
| 135 |
-
]
|
| 136 |
-
"own_tokenizer": false
|
| 137 |
},
|
| 138 |
{
|
| 139 |
"name": "Cree, Plains",
|
| 140 |
"iso_1_code": "cr",
|
| 141 |
"iso_3_code": "crk",
|
| 142 |
-
"tokenizers": {},
|
| 143 |
"children": [],
|
|
|
|
| 144 |
"node_i": "570",
|
|
|
|
| 145 |
"scripts": [
|
| 146 |
"Latn",
|
| 147 |
"Cans"
|
| 148 |
-
]
|
| 149 |
-
"own_tokenizer": false
|
| 150 |
},
|
| 151 |
{
|
| 152 |
"name": "Cree, Northern East",
|
| 153 |
"iso_1_code": "cr",
|
| 154 |
"iso_3_code": "crl",
|
| 155 |
-
"tokenizers": {},
|
| 156 |
"children": [],
|
|
|
|
| 157 |
"node_i": "571",
|
|
|
|
| 158 |
"scripts": [
|
| 159 |
"Cans"
|
| 160 |
-
]
|
| 161 |
-
"own_tokenizer": false
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"name": "Cree, Moose",
|
| 165 |
"iso_1_code": "cr",
|
| 166 |
"iso_3_code": "crm",
|
| 167 |
-
"tokenizers": {},
|
| 168 |
"children": [],
|
|
|
|
| 169 |
"node_i": "572",
|
|
|
|
| 170 |
"scripts": [
|
| 171 |
"Cans"
|
| 172 |
-
]
|
| 173 |
-
"own_tokenizer": false
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"name": "Cree, Swampy",
|
| 177 |
"iso_1_code": "cr",
|
| 178 |
"iso_3_code": "csw",
|
| 179 |
-
"tokenizers": {},
|
| 180 |
"children": [],
|
|
|
|
| 181 |
"node_i": "573",
|
|
|
|
| 182 |
"scripts": [
|
| 183 |
"Latn"
|
| 184 |
-
]
|
| 185 |
-
"own_tokenizer": false
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"name": "Cree, Woods",
|
| 189 |
"iso_1_code": "cr",
|
| 190 |
"iso_3_code": "cwd",
|
| 191 |
-
"tokenizers": {},
|
| 192 |
"children": [],
|
|
|
|
| 193 |
"node_i": "574",
|
|
|
|
| 194 |
"scripts": [
|
| 195 |
"Cans"
|
| 196 |
-
]
|
| 197 |
-
"own_tokenizer": false
|
| 198 |
},
|
| 199 |
{
|
| 200 |
"name": "Innu",
|
| 201 |
"iso_1_code": null,
|
| 202 |
"iso_3_code": "moe",
|
| 203 |
-
"tokenizers": {},
|
| 204 |
"children": [],
|
|
|
|
| 205 |
"node_i": "575",
|
| 206 |
-
"
|
| 207 |
-
"
|
| 208 |
},
|
| 209 |
{
|
| 210 |
"name": "Naskapi",
|
| 211 |
"iso_1_code": null,
|
| 212 |
"iso_3_code": "nsk",
|
| 213 |
-
"tokenizers": {},
|
| 214 |
"children": [],
|
|
|
|
| 215 |
"node_i": "576",
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
}
|
| 219 |
],
|
|
|
|
| 220 |
"node_i": "567",
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
},
|
| 224 |
{
|
| 225 |
"name": "Eastern Algonquian",
|
| 226 |
"iso_1_code": null,
|
| 227 |
"iso_3_code": null,
|
| 228 |
-
"tokenizers": {},
|
| 229 |
"children": [
|
| 230 |
{
|
| 231 |
"name": "Carolina Algonquian",
|
| 232 |
"iso_1_code": null,
|
| 233 |
"iso_3_code": "crr",
|
| 234 |
-
"tokenizers": {},
|
| 235 |
"children": [],
|
|
|
|
| 236 |
"node_i": "578",
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
},
|
| 240 |
{
|
| 241 |
"name": "Etchemin",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": "etc",
|
| 244 |
-
"tokenizers": {},
|
| 245 |
"children": [],
|
|
|
|
| 246 |
"node_i": "579",
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"name": "Mi\u2019kmaq",
|
| 252 |
"iso_1_code": null,
|
| 253 |
"iso_3_code": "mic",
|
| 254 |
-
"tokenizers": {},
|
| 255 |
"children": [],
|
|
|
|
| 256 |
"node_i": "580",
|
|
|
|
| 257 |
"scripts": [
|
| 258 |
"Latn"
|
| 259 |
-
]
|
| 260 |
-
"own_tokenizer": false
|
| 261 |
},
|
| 262 |
{
|
| 263 |
"name": "Powhatan",
|
| 264 |
"iso_1_code": null,
|
| 265 |
"iso_3_code": "pim",
|
| 266 |
-
"tokenizers": {},
|
| 267 |
"children": [],
|
|
|
|
| 268 |
"node_i": "581",
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"name": "Malecite-Passamaquoddy",
|
| 274 |
"iso_1_code": null,
|
| 275 |
"iso_3_code": "pqm",
|
| 276 |
-
"tokenizers": {},
|
| 277 |
"children": [],
|
|
|
|
| 278 |
"node_i": "582",
|
| 279 |
-
"
|
| 280 |
-
"
|
| 281 |
},
|
| 282 |
{
|
| 283 |
"name": "Quiripi",
|
| 284 |
"iso_1_code": null,
|
| 285 |
"iso_3_code": "qyp",
|
| 286 |
-
"tokenizers": {},
|
| 287 |
"children": [],
|
|
|
|
| 288 |
"node_i": "583",
|
| 289 |
-
"
|
| 290 |
-
"
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"name": "Wampanoag",
|
| 294 |
"iso_1_code": null,
|
| 295 |
"iso_3_code": "wam",
|
| 296 |
-
"tokenizers": {},
|
| 297 |
"children": [],
|
|
|
|
| 298 |
"node_i": "584",
|
| 299 |
-
"
|
| 300 |
-
"
|
| 301 |
},
|
| 302 |
{
|
| 303 |
"name": "Loup B",
|
| 304 |
"iso_1_code": null,
|
| 305 |
"iso_3_code": "xlb",
|
| 306 |
-
"tokenizers": {},
|
| 307 |
"children": [],
|
|
|
|
| 308 |
"node_i": "585",
|
| 309 |
-
"
|
| 310 |
-
"
|
| 311 |
},
|
| 312 |
{
|
| 313 |
"name": "Loup A",
|
| 314 |
"iso_1_code": null,
|
| 315 |
"iso_3_code": "xlo",
|
| 316 |
-
"tokenizers": {},
|
| 317 |
"children": [],
|
|
|
|
| 318 |
"node_i": "586",
|
| 319 |
-
"
|
| 320 |
-
"
|
| 321 |
},
|
| 322 |
{
|
| 323 |
"name": "Narragansett",
|
| 324 |
"iso_1_code": null,
|
| 325 |
"iso_3_code": "xnt",
|
| 326 |
-
"tokenizers": {},
|
| 327 |
"children": [],
|
|
|
|
| 328 |
"node_i": "587",
|
| 329 |
-
"
|
| 330 |
-
"
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"name": "Mohegan-Pequot",
|
| 334 |
"iso_1_code": null,
|
| 335 |
"iso_3_code": "xpq",
|
| 336 |
-
"tokenizers": {},
|
| 337 |
"children": [],
|
|
|
|
| 338 |
"node_i": "588",
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
},
|
| 342 |
{
|
| 343 |
"name": "Abenaki",
|
| 344 |
"iso_1_code": null,
|
| 345 |
"iso_3_code": null,
|
| 346 |
-
"tokenizers": {},
|
| 347 |
"children": [
|
| 348 |
{
|
| 349 |
"name": "Abenaki, Eastern",
|
| 350 |
"iso_1_code": null,
|
| 351 |
"iso_3_code": "aaq",
|
| 352 |
-
"tokenizers": {},
|
| 353 |
"children": [],
|
|
|
|
| 354 |
"node_i": "590",
|
| 355 |
-
"
|
| 356 |
-
"
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"name": "Abenaki, Western",
|
| 360 |
"iso_1_code": null,
|
| 361 |
"iso_3_code": "abe",
|
| 362 |
-
"tokenizers": {},
|
| 363 |
"children": [],
|
|
|
|
| 364 |
"node_i": "591",
|
| 365 |
-
"
|
| 366 |
-
"
|
| 367 |
}
|
| 368 |
],
|
|
|
|
| 369 |
"node_i": "589",
|
| 370 |
-
"
|
| 371 |
-
"
|
| 372 |
},
|
| 373 |
{
|
| 374 |
"name": "Delaware",
|
| 375 |
"iso_1_code": null,
|
| 376 |
"iso_3_code": null,
|
| 377 |
-
"tokenizers": {},
|
| 378 |
"children": [
|
| 379 |
{
|
| 380 |
"name": "Mahican",
|
| 381 |
"iso_1_code": null,
|
| 382 |
"iso_3_code": "mjy",
|
| 383 |
-
"tokenizers": {},
|
| 384 |
"children": [],
|
|
|
|
| 385 |
"node_i": "593",
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
},
|
| 389 |
{
|
| 390 |
"name": "Munsee",
|
| 391 |
"iso_1_code": null,
|
| 392 |
"iso_3_code": "umu",
|
| 393 |
-
"tokenizers": {},
|
| 394 |
"children": [],
|
|
|
|
| 395 |
"node_i": "594",
|
| 396 |
-
"
|
| 397 |
-
"
|
| 398 |
},
|
| 399 |
{
|
| 400 |
"name": "Unami",
|
| 401 |
"iso_1_code": null,
|
| 402 |
"iso_3_code": "unm",
|
| 403 |
-
"tokenizers": {},
|
| 404 |
"children": [],
|
|
|
|
| 405 |
"node_i": "595",
|
| 406 |
-
"
|
| 407 |
-
"
|
| 408 |
}
|
| 409 |
],
|
|
|
|
| 410 |
"node_i": "592",
|
| 411 |
-
"
|
| 412 |
-
"
|
| 413 |
},
|
| 414 |
{
|
| 415 |
"name": "Nanticoke-Conoy",
|
| 416 |
"iso_1_code": null,
|
| 417 |
"iso_3_code": null,
|
| 418 |
-
"tokenizers": {},
|
| 419 |
"children": [
|
| 420 |
{
|
| 421 |
"name": "Nanticoke",
|
| 422 |
"iso_1_code": null,
|
| 423 |
"iso_3_code": "nnt",
|
| 424 |
-
"tokenizers": {},
|
| 425 |
"children": [],
|
|
|
|
| 426 |
"node_i": "597",
|
| 427 |
-
"
|
| 428 |
-
"
|
| 429 |
},
|
| 430 |
{
|
| 431 |
"name": "Piscataway",
|
| 432 |
"iso_1_code": null,
|
| 433 |
"iso_3_code": "psy",
|
| 434 |
-
"tokenizers": {},
|
| 435 |
"children": [],
|
|
|
|
| 436 |
"node_i": "598",
|
| 437 |
-
"
|
| 438 |
-
"
|
| 439 |
}
|
| 440 |
],
|
|
|
|
| 441 |
"node_i": "596",
|
| 442 |
-
"
|
| 443 |
-
"
|
| 444 |
}
|
| 445 |
],
|
|
|
|
| 446 |
"node_i": "577",
|
| 447 |
-
"
|
| 448 |
-
"
|
| 449 |
},
|
| 450 |
{
|
| 451 |
"name": "Fox",
|
| 452 |
"iso_1_code": null,
|
| 453 |
"iso_3_code": null,
|
| 454 |
-
"tokenizers": {},
|
| 455 |
"children": [
|
| 456 |
{
|
| 457 |
"name": "Kickapoo",
|
| 458 |
"iso_1_code": null,
|
| 459 |
"iso_3_code": "kic",
|
| 460 |
-
"tokenizers": {},
|
| 461 |
"children": [],
|
|
|
|
| 462 |
"node_i": "600",
|
| 463 |
-
"
|
| 464 |
-
"
|
| 465 |
},
|
| 466 |
{
|
| 467 |
"name": "Meskwaki",
|
| 468 |
"iso_1_code": null,
|
| 469 |
"iso_3_code": "sac",
|
| 470 |
-
"tokenizers": {},
|
| 471 |
"children": [],
|
|
|
|
| 472 |
"node_i": "601",
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
}
|
| 476 |
],
|
|
|
|
| 477 |
"node_i": "599",
|
| 478 |
-
"
|
| 479 |
-
"
|
| 480 |
},
|
| 481 |
{
|
| 482 |
"name": "Ojibwa-Potawatomi",
|
| 483 |
"iso_1_code": null,
|
| 484 |
"iso_3_code": null,
|
| 485 |
-
"tokenizers": {},
|
| 486 |
"children": [
|
| 487 |
{
|
| 488 |
"name": "Algonquin",
|
| 489 |
"iso_1_code": null,
|
| 490 |
"iso_3_code": "alq",
|
| 491 |
-
"tokenizers": {},
|
| 492 |
"children": [],
|
|
|
|
| 493 |
"node_i": "603",
|
|
|
|
| 494 |
"scripts": [
|
| 495 |
"Latn"
|
| 496 |
-
]
|
| 497 |
-
"own_tokenizer": false
|
| 498 |
},
|
| 499 |
{
|
| 500 |
"name": "Chippewa",
|
| 501 |
"iso_1_code": "oj",
|
| 502 |
"iso_3_code": "ciw",
|
| 503 |
-
"tokenizers": {},
|
| 504 |
"children": [],
|
|
|
|
| 505 |
"node_i": "604",
|
| 506 |
-
"
|
| 507 |
-
"
|
| 508 |
},
|
| 509 |
{
|
| 510 |
"name": "Ojibwa, Northwestern",
|
| 511 |
"iso_1_code": "oj",
|
| 512 |
"iso_3_code": "ojb",
|
| 513 |
-
"tokenizers": {},
|
| 514 |
"children": [],
|
|
|
|
| 515 |
"node_i": "605",
|
|
|
|
| 516 |
"scripts": [
|
| 517 |
-
"
|
| 518 |
-
"
|
| 519 |
-
]
|
| 520 |
-
"own_tokenizer": false
|
| 521 |
},
|
| 522 |
{
|
| 523 |
"name": "Ojibwa, Central",
|
| 524 |
"iso_1_code": "oj",
|
| 525 |
"iso_3_code": "ojc",
|
| 526 |
-
"tokenizers": {},
|
| 527 |
"children": [],
|
|
|
|
| 528 |
"node_i": "606",
|
| 529 |
-
"
|
| 530 |
-
"
|
| 531 |
},
|
| 532 |
{
|
| 533 |
"name": "Ojibwa, Eastern",
|
| 534 |
"iso_1_code": "oj",
|
| 535 |
"iso_3_code": "ojg",
|
| 536 |
-
"tokenizers": {},
|
| 537 |
"children": [],
|
|
|
|
| 538 |
"node_i": "607",
|
| 539 |
-
"
|
| 540 |
-
"
|
| 541 |
},
|
| 542 |
{
|
| 543 |
"name": "Oji-Cree",
|
| 544 |
"iso_1_code": "oj",
|
| 545 |
"iso_3_code": "ojs",
|
| 546 |
-
"tokenizers": {},
|
| 547 |
"children": [],
|
|
|
|
| 548 |
"node_i": "608",
|
| 549 |
-
"
|
| 550 |
-
"
|
| 551 |
},
|
| 552 |
{
|
| 553 |
"name": "Ojibwa, Western",
|
| 554 |
"iso_1_code": "oj",
|
| 555 |
"iso_3_code": "ojw",
|
| 556 |
-
"tokenizers": {},
|
| 557 |
"children": [],
|
|
|
|
| 558 |
"node_i": "609",
|
| 559 |
-
"
|
| 560 |
-
"
|
| 561 |
},
|
| 562 |
{
|
| 563 |
"name": "Ottawa",
|
| 564 |
"iso_1_code": "oj",
|
| 565 |
"iso_3_code": "otw",
|
| 566 |
-
"tokenizers": {},
|
| 567 |
"children": [],
|
|
|
|
| 568 |
"node_i": "610",
|
|
|
|
| 569 |
"scripts": [
|
| 570 |
"Latn"
|
| 571 |
-
]
|
| 572 |
-
"own_tokenizer": false
|
| 573 |
},
|
| 574 |
{
|
| 575 |
"name": "Potawatomi",
|
| 576 |
"iso_1_code": null,
|
| 577 |
"iso_3_code": "pot",
|
| 578 |
-
"tokenizers": {},
|
| 579 |
"children": [],
|
|
|
|
| 580 |
"node_i": "611",
|
|
|
|
| 581 |
"scripts": [
|
| 582 |
"Latn"
|
| 583 |
-
]
|
| 584 |
-
"own_tokenizer": false
|
| 585 |
}
|
| 586 |
],
|
|
|
|
| 587 |
"node_i": "602",
|
| 588 |
-
"
|
| 589 |
-
"
|
| 590 |
}
|
| 591 |
],
|
|
|
|
| 592 |
"node_i": "557",
|
| 593 |
-
"
|
| 594 |
-
"
|
| 595 |
},
|
| 596 |
{
|
| 597 |
"name": "Ritwan",
|
| 598 |
"iso_1_code": null,
|
| 599 |
"iso_3_code": null,
|
| 600 |
-
"tokenizers": {},
|
| 601 |
"children": [
|
| 602 |
{
|
| 603 |
"name": "Wiyot",
|
| 604 |
"iso_1_code": null,
|
| 605 |
"iso_3_code": "wiy",
|
| 606 |
-
"tokenizers": {},
|
| 607 |
"children": [],
|
|
|
|
| 608 |
"node_i": "613",
|
| 609 |
-
"
|
| 610 |
-
"
|
| 611 |
},
|
| 612 |
{
|
| 613 |
"name": "Yurok",
|
| 614 |
"iso_1_code": null,
|
| 615 |
"iso_3_code": "yur",
|
| 616 |
-
"tokenizers": {},
|
| 617 |
"children": [],
|
|
|
|
| 618 |
"node_i": "614",
|
| 619 |
-
"
|
| 620 |
-
"
|
| 621 |
}
|
| 622 |
],
|
|
|
|
| 623 |
"node_i": "612",
|
| 624 |
-
"
|
| 625 |
-
"
|
| 626 |
}
|
| 627 |
],
|
|
|
|
| 628 |
"node_i": "556",
|
| 629 |
-
"
|
| 630 |
-
"
|
| 631 |
}
|
|
|
|
| 2 |
"name": "Algic",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Algonquian",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Blackfoot",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "bla",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "558",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
"scripts": [
|
| 20 |
"Latn"
|
| 21 |
+
]
|
|
|
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Cheyenne",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "chy",
|
|
|
|
| 27 |
"children": [],
|
| 28 |
+
"tokenizers": {},
|
| 29 |
"node_i": "559",
|
| 30 |
+
"native_tokenizers": [],
|
| 31 |
+
"scripts": []
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Menominee",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": "mez",
|
|
|
|
| 37 |
"children": [],
|
| 38 |
+
"tokenizers": {},
|
| 39 |
"node_i": "560",
|
| 40 |
+
"native_tokenizers": [],
|
| 41 |
+
"scripts": []
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"name": "Miami",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "mia",
|
|
|
|
| 47 |
"children": [],
|
| 48 |
+
"tokenizers": {},
|
| 49 |
"node_i": "561",
|
| 50 |
+
"native_tokenizers": [],
|
| 51 |
+
"scripts": []
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Nawathinehena",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "nwa",
|
|
|
|
| 57 |
"children": [],
|
| 58 |
+
"tokenizers": {},
|
| 59 |
"node_i": "562",
|
| 60 |
+
"native_tokenizers": [],
|
| 61 |
+
"scripts": []
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"name": "Shawnee",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": "sjw",
|
|
|
|
| 67 |
"children": [],
|
| 68 |
+
"tokenizers": {},
|
| 69 |
"node_i": "563",
|
| 70 |
+
"native_tokenizers": [],
|
| 71 |
+
"scripts": []
|
| 72 |
},
|
| 73 |
{
|
| 74 |
"name": "Arapaho",
|
| 75 |
"iso_1_code": null,
|
| 76 |
"iso_3_code": null,
|
|
|
|
| 77 |
"children": [
|
| 78 |
{
|
| 79 |
"name": "Arapaho",
|
| 80 |
"iso_1_code": null,
|
| 81 |
"iso_3_code": "arp",
|
|
|
|
| 82 |
"children": [],
|
| 83 |
+
"tokenizers": {},
|
| 84 |
"node_i": "565",
|
| 85 |
+
"native_tokenizers": [],
|
| 86 |
"scripts": [
|
| 87 |
"Latn"
|
| 88 |
+
]
|
|
|
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"name": "Gros Ventre",
|
| 92 |
"iso_1_code": null,
|
| 93 |
"iso_3_code": "ats",
|
|
|
|
| 94 |
"children": [],
|
| 95 |
+
"tokenizers": {},
|
| 96 |
"node_i": "566",
|
| 97 |
+
"native_tokenizers": [],
|
| 98 |
+
"scripts": []
|
| 99 |
}
|
| 100 |
],
|
| 101 |
+
"tokenizers": {},
|
| 102 |
"node_i": "564",
|
| 103 |
+
"native_tokenizers": [],
|
| 104 |
+
"scripts": []
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"name": "Cree-Montagnais",
|
| 108 |
"iso_1_code": null,
|
| 109 |
"iso_3_code": null,
|
|
|
|
| 110 |
"children": [
|
| 111 |
{
|
| 112 |
"name": "Atikamekw",
|
| 113 |
"iso_1_code": null,
|
| 114 |
"iso_3_code": "atj",
|
|
|
|
| 115 |
"children": [],
|
| 116 |
+
"tokenizers": {},
|
| 117 |
"node_i": "568",
|
| 118 |
+
"native_tokenizers": [],
|
| 119 |
"scripts": [
|
| 120 |
"Latn"
|
| 121 |
+
]
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"name": "Cree, Southern East",
|
| 125 |
"iso_1_code": "cr",
|
| 126 |
"iso_3_code": "crj",
|
|
|
|
| 127 |
"children": [],
|
| 128 |
+
"tokenizers": {},
|
| 129 |
"node_i": "569",
|
| 130 |
+
"native_tokenizers": [],
|
| 131 |
"scripts": [
|
| 132 |
"Cans"
|
| 133 |
+
]
|
|
|
|
| 134 |
},
|
| 135 |
{
|
| 136 |
"name": "Cree, Plains",
|
| 137 |
"iso_1_code": "cr",
|
| 138 |
"iso_3_code": "crk",
|
|
|
|
| 139 |
"children": [],
|
| 140 |
+
"tokenizers": {},
|
| 141 |
"node_i": "570",
|
| 142 |
+
"native_tokenizers": [],
|
| 143 |
"scripts": [
|
| 144 |
"Latn",
|
| 145 |
"Cans"
|
| 146 |
+
]
|
|
|
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"name": "Cree, Northern East",
|
| 150 |
"iso_1_code": "cr",
|
| 151 |
"iso_3_code": "crl",
|
|
|
|
| 152 |
"children": [],
|
| 153 |
+
"tokenizers": {},
|
| 154 |
"node_i": "571",
|
| 155 |
+
"native_tokenizers": [],
|
| 156 |
"scripts": [
|
| 157 |
"Cans"
|
| 158 |
+
]
|
|
|
|
| 159 |
},
|
| 160 |
{
|
| 161 |
"name": "Cree, Moose",
|
| 162 |
"iso_1_code": "cr",
|
| 163 |
"iso_3_code": "crm",
|
|
|
|
| 164 |
"children": [],
|
| 165 |
+
"tokenizers": {},
|
| 166 |
"node_i": "572",
|
| 167 |
+
"native_tokenizers": [],
|
| 168 |
"scripts": [
|
| 169 |
"Cans"
|
| 170 |
+
]
|
|
|
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"name": "Cree, Swampy",
|
| 174 |
"iso_1_code": "cr",
|
| 175 |
"iso_3_code": "csw",
|
|
|
|
| 176 |
"children": [],
|
| 177 |
+
"tokenizers": {},
|
| 178 |
"node_i": "573",
|
| 179 |
+
"native_tokenizers": [],
|
| 180 |
"scripts": [
|
| 181 |
"Latn"
|
| 182 |
+
]
|
|
|
|
| 183 |
},
|
| 184 |
{
|
| 185 |
"name": "Cree, Woods",
|
| 186 |
"iso_1_code": "cr",
|
| 187 |
"iso_3_code": "cwd",
|
|
|
|
| 188 |
"children": [],
|
| 189 |
+
"tokenizers": {},
|
| 190 |
"node_i": "574",
|
| 191 |
+
"native_tokenizers": [],
|
| 192 |
"scripts": [
|
| 193 |
"Cans"
|
| 194 |
+
]
|
|
|
|
| 195 |
},
|
| 196 |
{
|
| 197 |
"name": "Innu",
|
| 198 |
"iso_1_code": null,
|
| 199 |
"iso_3_code": "moe",
|
|
|
|
| 200 |
"children": [],
|
| 201 |
+
"tokenizers": {},
|
| 202 |
"node_i": "575",
|
| 203 |
+
"native_tokenizers": [],
|
| 204 |
+
"scripts": []
|
| 205 |
},
|
| 206 |
{
|
| 207 |
"name": "Naskapi",
|
| 208 |
"iso_1_code": null,
|
| 209 |
"iso_3_code": "nsk",
|
|
|
|
| 210 |
"children": [],
|
| 211 |
+
"tokenizers": {},
|
| 212 |
"node_i": "576",
|
| 213 |
+
"native_tokenizers": [],
|
| 214 |
+
"scripts": []
|
| 215 |
}
|
| 216 |
],
|
| 217 |
+
"tokenizers": {},
|
| 218 |
"node_i": "567",
|
| 219 |
+
"native_tokenizers": [],
|
| 220 |
+
"scripts": []
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"name": "Eastern Algonquian",
|
| 224 |
"iso_1_code": null,
|
| 225 |
"iso_3_code": null,
|
|
|
|
| 226 |
"children": [
|
| 227 |
{
|
| 228 |
"name": "Carolina Algonquian",
|
| 229 |
"iso_1_code": null,
|
| 230 |
"iso_3_code": "crr",
|
|
|
|
| 231 |
"children": [],
|
| 232 |
+
"tokenizers": {},
|
| 233 |
"node_i": "578",
|
| 234 |
+
"native_tokenizers": [],
|
| 235 |
+
"scripts": []
|
| 236 |
},
|
| 237 |
{
|
| 238 |
"name": "Etchemin",
|
| 239 |
"iso_1_code": null,
|
| 240 |
"iso_3_code": "etc",
|
|
|
|
| 241 |
"children": [],
|
| 242 |
+
"tokenizers": {},
|
| 243 |
"node_i": "579",
|
| 244 |
+
"native_tokenizers": [],
|
| 245 |
+
"scripts": []
|
| 246 |
},
|
| 247 |
{
|
| 248 |
"name": "Mi\u2019kmaq",
|
| 249 |
"iso_1_code": null,
|
| 250 |
"iso_3_code": "mic",
|
|
|
|
| 251 |
"children": [],
|
| 252 |
+
"tokenizers": {},
|
| 253 |
"node_i": "580",
|
| 254 |
+
"native_tokenizers": [],
|
| 255 |
"scripts": [
|
| 256 |
"Latn"
|
| 257 |
+
]
|
|
|
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"name": "Powhatan",
|
| 261 |
"iso_1_code": null,
|
| 262 |
"iso_3_code": "pim",
|
|
|
|
| 263 |
"children": [],
|
| 264 |
+
"tokenizers": {},
|
| 265 |
"node_i": "581",
|
| 266 |
+
"native_tokenizers": [],
|
| 267 |
+
"scripts": []
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"name": "Malecite-Passamaquoddy",
|
| 271 |
"iso_1_code": null,
|
| 272 |
"iso_3_code": "pqm",
|
|
|
|
| 273 |
"children": [],
|
| 274 |
+
"tokenizers": {},
|
| 275 |
"node_i": "582",
|
| 276 |
+
"native_tokenizers": [],
|
| 277 |
+
"scripts": []
|
| 278 |
},
|
| 279 |
{
|
| 280 |
"name": "Quiripi",
|
| 281 |
"iso_1_code": null,
|
| 282 |
"iso_3_code": "qyp",
|
|
|
|
| 283 |
"children": [],
|
| 284 |
+
"tokenizers": {},
|
| 285 |
"node_i": "583",
|
| 286 |
+
"native_tokenizers": [],
|
| 287 |
+
"scripts": []
|
| 288 |
},
|
| 289 |
{
|
| 290 |
"name": "Wampanoag",
|
| 291 |
"iso_1_code": null,
|
| 292 |
"iso_3_code": "wam",
|
|
|
|
| 293 |
"children": [],
|
| 294 |
+
"tokenizers": {},
|
| 295 |
"node_i": "584",
|
| 296 |
+
"native_tokenizers": [],
|
| 297 |
+
"scripts": []
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"name": "Loup B",
|
| 301 |
"iso_1_code": null,
|
| 302 |
"iso_3_code": "xlb",
|
|
|
|
| 303 |
"children": [],
|
| 304 |
+
"tokenizers": {},
|
| 305 |
"node_i": "585",
|
| 306 |
+
"native_tokenizers": [],
|
| 307 |
+
"scripts": []
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"name": "Loup A",
|
| 311 |
"iso_1_code": null,
|
| 312 |
"iso_3_code": "xlo",
|
|
|
|
| 313 |
"children": [],
|
| 314 |
+
"tokenizers": {},
|
| 315 |
"node_i": "586",
|
| 316 |
+
"native_tokenizers": [],
|
| 317 |
+
"scripts": []
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"name": "Narragansett",
|
| 321 |
"iso_1_code": null,
|
| 322 |
"iso_3_code": "xnt",
|
|
|
|
| 323 |
"children": [],
|
| 324 |
+
"tokenizers": {},
|
| 325 |
"node_i": "587",
|
| 326 |
+
"native_tokenizers": [],
|
| 327 |
+
"scripts": []
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"name": "Mohegan-Pequot",
|
| 331 |
"iso_1_code": null,
|
| 332 |
"iso_3_code": "xpq",
|
|
|
|
| 333 |
"children": [],
|
| 334 |
+
"tokenizers": {},
|
| 335 |
"node_i": "588",
|
| 336 |
+
"native_tokenizers": [],
|
| 337 |
+
"scripts": []
|
| 338 |
},
|
| 339 |
{
|
| 340 |
"name": "Abenaki",
|
| 341 |
"iso_1_code": null,
|
| 342 |
"iso_3_code": null,
|
|
|
|
| 343 |
"children": [
|
| 344 |
{
|
| 345 |
"name": "Abenaki, Eastern",
|
| 346 |
"iso_1_code": null,
|
| 347 |
"iso_3_code": "aaq",
|
|
|
|
| 348 |
"children": [],
|
| 349 |
+
"tokenizers": {},
|
| 350 |
"node_i": "590",
|
| 351 |
+
"native_tokenizers": [],
|
| 352 |
+
"scripts": []
|
| 353 |
},
|
| 354 |
{
|
| 355 |
"name": "Abenaki, Western",
|
| 356 |
"iso_1_code": null,
|
| 357 |
"iso_3_code": "abe",
|
|
|
|
| 358 |
"children": [],
|
| 359 |
+
"tokenizers": {},
|
| 360 |
"node_i": "591",
|
| 361 |
+
"native_tokenizers": [],
|
| 362 |
+
"scripts": []
|
| 363 |
}
|
| 364 |
],
|
| 365 |
+
"tokenizers": {},
|
| 366 |
"node_i": "589",
|
| 367 |
+
"native_tokenizers": [],
|
| 368 |
+
"scripts": []
|
| 369 |
},
|
| 370 |
{
|
| 371 |
"name": "Delaware",
|
| 372 |
"iso_1_code": null,
|
| 373 |
"iso_3_code": null,
|
|
|
|
| 374 |
"children": [
|
| 375 |
{
|
| 376 |
"name": "Mahican",
|
| 377 |
"iso_1_code": null,
|
| 378 |
"iso_3_code": "mjy",
|
|
|
|
| 379 |
"children": [],
|
| 380 |
+
"tokenizers": {},
|
| 381 |
"node_i": "593",
|
| 382 |
+
"native_tokenizers": [],
|
| 383 |
+
"scripts": []
|
| 384 |
},
|
| 385 |
{
|
| 386 |
"name": "Munsee",
|
| 387 |
"iso_1_code": null,
|
| 388 |
"iso_3_code": "umu",
|
|
|
|
| 389 |
"children": [],
|
| 390 |
+
"tokenizers": {},
|
| 391 |
"node_i": "594",
|
| 392 |
+
"native_tokenizers": [],
|
| 393 |
+
"scripts": []
|
| 394 |
},
|
| 395 |
{
|
| 396 |
"name": "Unami",
|
| 397 |
"iso_1_code": null,
|
| 398 |
"iso_3_code": "unm",
|
|
|
|
| 399 |
"children": [],
|
| 400 |
+
"tokenizers": {},
|
| 401 |
"node_i": "595",
|
| 402 |
+
"native_tokenizers": [],
|
| 403 |
+
"scripts": []
|
| 404 |
}
|
| 405 |
],
|
| 406 |
+
"tokenizers": {},
|
| 407 |
"node_i": "592",
|
| 408 |
+
"native_tokenizers": [],
|
| 409 |
+
"scripts": []
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"name": "Nanticoke-Conoy",
|
| 413 |
"iso_1_code": null,
|
| 414 |
"iso_3_code": null,
|
|
|
|
| 415 |
"children": [
|
| 416 |
{
|
| 417 |
"name": "Nanticoke",
|
| 418 |
"iso_1_code": null,
|
| 419 |
"iso_3_code": "nnt",
|
|
|
|
| 420 |
"children": [],
|
| 421 |
+
"tokenizers": {},
|
| 422 |
"node_i": "597",
|
| 423 |
+
"native_tokenizers": [],
|
| 424 |
+
"scripts": []
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"name": "Piscataway",
|
| 428 |
"iso_1_code": null,
|
| 429 |
"iso_3_code": "psy",
|
|
|
|
| 430 |
"children": [],
|
| 431 |
+
"tokenizers": {},
|
| 432 |
"node_i": "598",
|
| 433 |
+
"native_tokenizers": [],
|
| 434 |
+
"scripts": []
|
| 435 |
}
|
| 436 |
],
|
| 437 |
+
"tokenizers": {},
|
| 438 |
"node_i": "596",
|
| 439 |
+
"native_tokenizers": [],
|
| 440 |
+
"scripts": []
|
| 441 |
}
|
| 442 |
],
|
| 443 |
+
"tokenizers": {},
|
| 444 |
"node_i": "577",
|
| 445 |
+
"native_tokenizers": [],
|
| 446 |
+
"scripts": []
|
| 447 |
},
|
| 448 |
{
|
| 449 |
"name": "Fox",
|
| 450 |
"iso_1_code": null,
|
| 451 |
"iso_3_code": null,
|
|
|
|
| 452 |
"children": [
|
| 453 |
{
|
| 454 |
"name": "Kickapoo",
|
| 455 |
"iso_1_code": null,
|
| 456 |
"iso_3_code": "kic",
|
|
|
|
| 457 |
"children": [],
|
| 458 |
+
"tokenizers": {},
|
| 459 |
"node_i": "600",
|
| 460 |
+
"native_tokenizers": [],
|
| 461 |
+
"scripts": []
|
| 462 |
},
|
| 463 |
{
|
| 464 |
"name": "Meskwaki",
|
| 465 |
"iso_1_code": null,
|
| 466 |
"iso_3_code": "sac",
|
|
|
|
| 467 |
"children": [],
|
| 468 |
+
"tokenizers": {},
|
| 469 |
"node_i": "601",
|
| 470 |
+
"native_tokenizers": [],
|
| 471 |
+
"scripts": []
|
| 472 |
}
|
| 473 |
],
|
| 474 |
+
"tokenizers": {},
|
| 475 |
"node_i": "599",
|
| 476 |
+
"native_tokenizers": [],
|
| 477 |
+
"scripts": []
|
| 478 |
},
|
| 479 |
{
|
| 480 |
"name": "Ojibwa-Potawatomi",
|
| 481 |
"iso_1_code": null,
|
| 482 |
"iso_3_code": null,
|
|
|
|
| 483 |
"children": [
|
| 484 |
{
|
| 485 |
"name": "Algonquin",
|
| 486 |
"iso_1_code": null,
|
| 487 |
"iso_3_code": "alq",
|
|
|
|
| 488 |
"children": [],
|
| 489 |
+
"tokenizers": {},
|
| 490 |
"node_i": "603",
|
| 491 |
+
"native_tokenizers": [],
|
| 492 |
"scripts": [
|
| 493 |
"Latn"
|
| 494 |
+
]
|
|
|
|
| 495 |
},
|
| 496 |
{
|
| 497 |
"name": "Chippewa",
|
| 498 |
"iso_1_code": "oj",
|
| 499 |
"iso_3_code": "ciw",
|
|
|
|
| 500 |
"children": [],
|
| 501 |
+
"tokenizers": {},
|
| 502 |
"node_i": "604",
|
| 503 |
+
"native_tokenizers": [],
|
| 504 |
+
"scripts": []
|
| 505 |
},
|
| 506 |
{
|
| 507 |
"name": "Ojibwa, Northwestern",
|
| 508 |
"iso_1_code": "oj",
|
| 509 |
"iso_3_code": "ojb",
|
|
|
|
| 510 |
"children": [],
|
| 511 |
+
"tokenizers": {},
|
| 512 |
"node_i": "605",
|
| 513 |
+
"native_tokenizers": [],
|
| 514 |
"scripts": [
|
| 515 |
+
"Cans",
|
| 516 |
+
"Latn"
|
| 517 |
+
]
|
|
|
|
| 518 |
},
|
| 519 |
{
|
| 520 |
"name": "Ojibwa, Central",
|
| 521 |
"iso_1_code": "oj",
|
| 522 |
"iso_3_code": "ojc",
|
|
|
|
| 523 |
"children": [],
|
| 524 |
+
"tokenizers": {},
|
| 525 |
"node_i": "606",
|
| 526 |
+
"native_tokenizers": [],
|
| 527 |
+
"scripts": []
|
| 528 |
},
|
| 529 |
{
|
| 530 |
"name": "Ojibwa, Eastern",
|
| 531 |
"iso_1_code": "oj",
|
| 532 |
"iso_3_code": "ojg",
|
|
|
|
| 533 |
"children": [],
|
| 534 |
+
"tokenizers": {},
|
| 535 |
"node_i": "607",
|
| 536 |
+
"native_tokenizers": [],
|
| 537 |
+
"scripts": []
|
| 538 |
},
|
| 539 |
{
|
| 540 |
"name": "Oji-Cree",
|
| 541 |
"iso_1_code": "oj",
|
| 542 |
"iso_3_code": "ojs",
|
|
|
|
| 543 |
"children": [],
|
| 544 |
+
"tokenizers": {},
|
| 545 |
"node_i": "608",
|
| 546 |
+
"native_tokenizers": [],
|
| 547 |
+
"scripts": []
|
| 548 |
},
|
| 549 |
{
|
| 550 |
"name": "Ojibwa, Western",
|
| 551 |
"iso_1_code": "oj",
|
| 552 |
"iso_3_code": "ojw",
|
|
|
|
| 553 |
"children": [],
|
| 554 |
+
"tokenizers": {},
|
| 555 |
"node_i": "609",
|
| 556 |
+
"native_tokenizers": [],
|
| 557 |
+
"scripts": []
|
| 558 |
},
|
| 559 |
{
|
| 560 |
"name": "Ottawa",
|
| 561 |
"iso_1_code": "oj",
|
| 562 |
"iso_3_code": "otw",
|
|
|
|
| 563 |
"children": [],
|
| 564 |
+
"tokenizers": {},
|
| 565 |
"node_i": "610",
|
| 566 |
+
"native_tokenizers": [],
|
| 567 |
"scripts": [
|
| 568 |
"Latn"
|
| 569 |
+
]
|
|
|
|
| 570 |
},
|
| 571 |
{
|
| 572 |
"name": "Potawatomi",
|
| 573 |
"iso_1_code": null,
|
| 574 |
"iso_3_code": "pot",
|
|
|
|
| 575 |
"children": [],
|
| 576 |
+
"tokenizers": {},
|
| 577 |
"node_i": "611",
|
| 578 |
+
"native_tokenizers": [],
|
| 579 |
"scripts": [
|
| 580 |
"Latn"
|
| 581 |
+
]
|
|
|
|
| 582 |
}
|
| 583 |
],
|
| 584 |
+
"tokenizers": {},
|
| 585 |
"node_i": "602",
|
| 586 |
+
"native_tokenizers": [],
|
| 587 |
+
"scripts": []
|
| 588 |
}
|
| 589 |
],
|
| 590 |
+
"tokenizers": {},
|
| 591 |
"node_i": "557",
|
| 592 |
+
"native_tokenizers": [],
|
| 593 |
+
"scripts": []
|
| 594 |
},
|
| 595 |
{
|
| 596 |
"name": "Ritwan",
|
| 597 |
"iso_1_code": null,
|
| 598 |
"iso_3_code": null,
|
|
|
|
| 599 |
"children": [
|
| 600 |
{
|
| 601 |
"name": "Wiyot",
|
| 602 |
"iso_1_code": null,
|
| 603 |
"iso_3_code": "wiy",
|
|
|
|
| 604 |
"children": [],
|
| 605 |
+
"tokenizers": {},
|
| 606 |
"node_i": "613",
|
| 607 |
+
"native_tokenizers": [],
|
| 608 |
+
"scripts": []
|
| 609 |
},
|
| 610 |
{
|
| 611 |
"name": "Yurok",
|
| 612 |
"iso_1_code": null,
|
| 613 |
"iso_3_code": "yur",
|
|
|
|
| 614 |
"children": [],
|
| 615 |
+
"tokenizers": {},
|
| 616 |
"node_i": "614",
|
| 617 |
+
"native_tokenizers": [],
|
| 618 |
+
"scripts": []
|
| 619 |
}
|
| 620 |
],
|
| 621 |
+
"tokenizers": {},
|
| 622 |
"node_i": "612",
|
| 623 |
+
"native_tokenizers": [],
|
| 624 |
+
"scripts": []
|
| 625 |
}
|
| 626 |
],
|
| 627 |
+
"tokenizers": {},
|
| 628 |
"node_i": "556",
|
| 629 |
+
"native_tokenizers": [],
|
| 630 |
+
"scripts": []
|
| 631 |
}
|
data/Amto-Musan.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Amto-Musan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Amto",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "amt",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "616",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Siawi",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "mmp",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "617",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "615",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Amto-Musan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Amto",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "amt",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "616",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Siawi",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "mmp",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "617",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "615",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Andamanese.json
CHANGED
|
@@ -2,194 +2,194 @@
|
|
| 2 |
"name": "Andamanese",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Great Andamanese",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Great Andamanese, Mixed",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "gac",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "620",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Central",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": null,
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [
|
| 29 |
{
|
| 30 |
"name": "Aka-Bea",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": "abj",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "622",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Akar-Bale",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "acl",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "623",
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Aka-Kede",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": "akx",
|
| 53 |
-
"tokenizers": {},
|
| 54 |
"children": [],
|
|
|
|
| 55 |
"node_i": "624",
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"name": "Aka-Kol",
|
| 61 |
"iso_1_code": null,
|
| 62 |
"iso_3_code": "aky",
|
| 63 |
-
"tokenizers": {},
|
| 64 |
"children": [],
|
|
|
|
| 65 |
"node_i": "625",
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"name": "A-Pucikwar",
|
| 71 |
"iso_1_code": null,
|
| 72 |
"iso_3_code": "apq",
|
| 73 |
-
"tokenizers": {},
|
| 74 |
"children": [],
|
|
|
|
| 75 |
"node_i": "626",
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"name": "Oko-Juwoi",
|
| 81 |
"iso_1_code": null,
|
| 82 |
"iso_3_code": "okj",
|
| 83 |
-
"tokenizers": {},
|
| 84 |
"children": [],
|
|
|
|
| 85 |
"node_i": "627",
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
}
|
| 89 |
],
|
|
|
|
| 90 |
"node_i": "621",
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
},
|
| 94 |
{
|
| 95 |
"name": "Northern",
|
| 96 |
"iso_1_code": null,
|
| 97 |
"iso_3_code": null,
|
| 98 |
-
"tokenizers": {},
|
| 99 |
"children": [
|
| 100 |
{
|
| 101 |
"name": "Aka-Cari",
|
| 102 |
"iso_1_code": null,
|
| 103 |
"iso_3_code": "aci",
|
| 104 |
-
"tokenizers": {},
|
| 105 |
"children": [],
|
|
|
|
| 106 |
"node_i": "629",
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"name": "Aka-Kora",
|
| 112 |
"iso_1_code": null,
|
| 113 |
"iso_3_code": "ack",
|
| 114 |
-
"tokenizers": {},
|
| 115 |
"children": [],
|
|
|
|
| 116 |
"node_i": "630",
|
| 117 |
-
"
|
| 118 |
-
"
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"name": "Aka-Jeru",
|
| 122 |
"iso_1_code": null,
|
| 123 |
"iso_3_code": "akj",
|
| 124 |
-
"tokenizers": {},
|
| 125 |
"children": [],
|
|
|
|
| 126 |
"node_i": "631",
|
| 127 |
-
"
|
| 128 |
-
"
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"name": "Aka-Bo",
|
| 132 |
"iso_1_code": null,
|
| 133 |
"iso_3_code": "akm",
|
| 134 |
-
"tokenizers": {},
|
| 135 |
"children": [],
|
|
|
|
| 136 |
"node_i": "632",
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
}
|
| 140 |
],
|
|
|
|
| 141 |
"node_i": "628",
|
| 142 |
-
"
|
| 143 |
-
"
|
| 144 |
}
|
| 145 |
],
|
|
|
|
| 146 |
"node_i": "619",
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"name": "South Andamanese",
|
| 152 |
"iso_1_code": null,
|
| 153 |
"iso_3_code": null,
|
| 154 |
-
"tokenizers": {},
|
| 155 |
"children": [
|
| 156 |
{
|
| 157 |
"name": "Jarawa",
|
| 158 |
"iso_1_code": null,
|
| 159 |
"iso_3_code": "anq",
|
| 160 |
-
"tokenizers": {},
|
| 161 |
"children": [],
|
|
|
|
| 162 |
"node_i": "634",
|
| 163 |
-
"
|
| 164 |
-
"
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "\u00d6\u00f1ge",
|
| 168 |
"iso_1_code": null,
|
| 169 |
"iso_3_code": "oon",
|
| 170 |
-
"tokenizers": {},
|
| 171 |
"children": [],
|
|
|
|
| 172 |
"node_i": "635",
|
| 173 |
-
"
|
| 174 |
-
"
|
| 175 |
},
|
| 176 |
{
|
| 177 |
"name": "Sentinel",
|
| 178 |
"iso_1_code": null,
|
| 179 |
"iso_3_code": "std",
|
| 180 |
-
"tokenizers": {},
|
| 181 |
"children": [],
|
|
|
|
| 182 |
"node_i": "636",
|
| 183 |
-
"
|
| 184 |
-
"
|
| 185 |
}
|
| 186 |
],
|
|
|
|
| 187 |
"node_i": "633",
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
}
|
| 191 |
],
|
|
|
|
| 192 |
"node_i": "618",
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
}
|
|
|
|
| 2 |
"name": "Andamanese",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Great Andamanese",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Great Andamanese, Mixed",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "gac",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "620",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Central",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": null,
|
|
|
|
| 25 |
"children": [
|
| 26 |
{
|
| 27 |
"name": "Aka-Bea",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "abj",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "622",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Akar-Bale",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "acl",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "623",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Aka-Kede",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "akx",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "624",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"name": "Aka-Kol",
|
| 58 |
"iso_1_code": null,
|
| 59 |
"iso_3_code": "aky",
|
|
|
|
| 60 |
"children": [],
|
| 61 |
+
"tokenizers": {},
|
| 62 |
"node_i": "625",
|
| 63 |
+
"native_tokenizers": [],
|
| 64 |
+
"scripts": []
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"name": "A-Pucikwar",
|
| 68 |
"iso_1_code": null,
|
| 69 |
"iso_3_code": "apq",
|
|
|
|
| 70 |
"children": [],
|
| 71 |
+
"tokenizers": {},
|
| 72 |
"node_i": "626",
|
| 73 |
+
"native_tokenizers": [],
|
| 74 |
+
"scripts": []
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"name": "Oko-Juwoi",
|
| 78 |
"iso_1_code": null,
|
| 79 |
"iso_3_code": "okj",
|
|
|
|
| 80 |
"children": [],
|
| 81 |
+
"tokenizers": {},
|
| 82 |
"node_i": "627",
|
| 83 |
+
"native_tokenizers": [],
|
| 84 |
+
"scripts": []
|
| 85 |
}
|
| 86 |
],
|
| 87 |
+
"tokenizers": {},
|
| 88 |
"node_i": "621",
|
| 89 |
+
"native_tokenizers": [],
|
| 90 |
+
"scripts": []
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"name": "Northern",
|
| 94 |
"iso_1_code": null,
|
| 95 |
"iso_3_code": null,
|
|
|
|
| 96 |
"children": [
|
| 97 |
{
|
| 98 |
"name": "Aka-Cari",
|
| 99 |
"iso_1_code": null,
|
| 100 |
"iso_3_code": "aci",
|
|
|
|
| 101 |
"children": [],
|
| 102 |
+
"tokenizers": {},
|
| 103 |
"node_i": "629",
|
| 104 |
+
"native_tokenizers": [],
|
| 105 |
+
"scripts": []
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"name": "Aka-Kora",
|
| 109 |
"iso_1_code": null,
|
| 110 |
"iso_3_code": "ack",
|
|
|
|
| 111 |
"children": [],
|
| 112 |
+
"tokenizers": {},
|
| 113 |
"node_i": "630",
|
| 114 |
+
"native_tokenizers": [],
|
| 115 |
+
"scripts": []
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"name": "Aka-Jeru",
|
| 119 |
"iso_1_code": null,
|
| 120 |
"iso_3_code": "akj",
|
|
|
|
| 121 |
"children": [],
|
| 122 |
+
"tokenizers": {},
|
| 123 |
"node_i": "631",
|
| 124 |
+
"native_tokenizers": [],
|
| 125 |
+
"scripts": []
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"name": "Aka-Bo",
|
| 129 |
"iso_1_code": null,
|
| 130 |
"iso_3_code": "akm",
|
|
|
|
| 131 |
"children": [],
|
| 132 |
+
"tokenizers": {},
|
| 133 |
"node_i": "632",
|
| 134 |
+
"native_tokenizers": [],
|
| 135 |
+
"scripts": []
|
| 136 |
}
|
| 137 |
],
|
| 138 |
+
"tokenizers": {},
|
| 139 |
"node_i": "628",
|
| 140 |
+
"native_tokenizers": [],
|
| 141 |
+
"scripts": []
|
| 142 |
}
|
| 143 |
],
|
| 144 |
+
"tokenizers": {},
|
| 145 |
"node_i": "619",
|
| 146 |
+
"native_tokenizers": [],
|
| 147 |
+
"scripts": []
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"name": "South Andamanese",
|
| 151 |
"iso_1_code": null,
|
| 152 |
"iso_3_code": null,
|
|
|
|
| 153 |
"children": [
|
| 154 |
{
|
| 155 |
"name": "Jarawa",
|
| 156 |
"iso_1_code": null,
|
| 157 |
"iso_3_code": "anq",
|
|
|
|
| 158 |
"children": [],
|
| 159 |
+
"tokenizers": {},
|
| 160 |
"node_i": "634",
|
| 161 |
+
"native_tokenizers": [],
|
| 162 |
+
"scripts": []
|
| 163 |
},
|
| 164 |
{
|
| 165 |
"name": "\u00d6\u00f1ge",
|
| 166 |
"iso_1_code": null,
|
| 167 |
"iso_3_code": "oon",
|
|
|
|
| 168 |
"children": [],
|
| 169 |
+
"tokenizers": {},
|
| 170 |
"node_i": "635",
|
| 171 |
+
"native_tokenizers": [],
|
| 172 |
+
"scripts": []
|
| 173 |
},
|
| 174 |
{
|
| 175 |
"name": "Sentinel",
|
| 176 |
"iso_1_code": null,
|
| 177 |
"iso_3_code": "std",
|
|
|
|
| 178 |
"children": [],
|
| 179 |
+
"tokenizers": {},
|
| 180 |
"node_i": "636",
|
| 181 |
+
"native_tokenizers": [],
|
| 182 |
+
"scripts": []
|
| 183 |
}
|
| 184 |
],
|
| 185 |
+
"tokenizers": {},
|
| 186 |
"node_i": "633",
|
| 187 |
+
"native_tokenizers": [],
|
| 188 |
+
"scripts": []
|
| 189 |
}
|
| 190 |
],
|
| 191 |
+
"tokenizers": {},
|
| 192 |
"node_i": "618",
|
| 193 |
+
"native_tokenizers": [],
|
| 194 |
+
"scripts": []
|
| 195 |
}
|
data/Arafundi.json
CHANGED
|
@@ -2,40 +2,40 @@
|
|
| 2 |
"name": "Arafundi",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Andai",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "afd",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "638",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Nanubae",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "afk",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "639",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "Tapei",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": "afp",
|
| 31 |
-
"tokenizers": {},
|
| 32 |
"children": [],
|
|
|
|
| 33 |
"node_i": "640",
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
}
|
| 37 |
],
|
|
|
|
| 38 |
"node_i": "637",
|
| 39 |
-
"
|
| 40 |
-
"
|
| 41 |
}
|
|
|
|
| 2 |
"name": "Arafundi",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Andai",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "afd",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "638",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Nanubae",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "afk",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "639",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Tapei",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "afp",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "640",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
}
|
| 36 |
],
|
| 37 |
+
"tokenizers": {},
|
| 38 |
"node_i": "637",
|
| 39 |
+
"native_tokenizers": [],
|
| 40 |
+
"scripts": []
|
| 41 |
}
|
data/Arai (Left May).json
CHANGED
|
@@ -2,72 +2,72 @@
|
|
| 2 |
"name": "Arai (Left May)",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Sawiyanu",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "amm",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "642",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Bo",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "bpw",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "643",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"name": "Yawuno Teneyo",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": "itr",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "644",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Nakwi",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "nax",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "645",
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Nimo",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": "niw",
|
| 53 |
-
"tokenizers": {},
|
| 54 |
"children": [],
|
|
|
|
| 55 |
"node_i": "646",
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"name": "Owiniga",
|
| 61 |
"iso_1_code": null,
|
| 62 |
"iso_3_code": "owi",
|
| 63 |
-
"tokenizers": {},
|
| 64 |
"children": [],
|
|
|
|
| 65 |
"node_i": "647",
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
}
|
| 69 |
],
|
|
|
|
| 70 |
"node_i": "641",
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
}
|
|
|
|
| 2 |
"name": "Arai (Left May)",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Sawiyanu",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "amm",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "642",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Bo",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "bpw",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "643",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
+
"scripts": []
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"name": "Yawuno Teneyo",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": "itr",
|
|
|
|
| 32 |
"children": [],
|
| 33 |
+
"tokenizers": {},
|
| 34 |
"node_i": "644",
|
| 35 |
+
"native_tokenizers": [],
|
| 36 |
+
"scripts": []
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"name": "Nakwi",
|
| 40 |
"iso_1_code": null,
|
| 41 |
"iso_3_code": "nax",
|
|
|
|
| 42 |
"children": [],
|
| 43 |
+
"tokenizers": {},
|
| 44 |
"node_i": "645",
|
| 45 |
+
"native_tokenizers": [],
|
| 46 |
+
"scripts": []
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"name": "Nimo",
|
| 50 |
"iso_1_code": null,
|
| 51 |
"iso_3_code": "niw",
|
|
|
|
| 52 |
"children": [],
|
| 53 |
+
"tokenizers": {},
|
| 54 |
"node_i": "646",
|
| 55 |
+
"native_tokenizers": [],
|
| 56 |
+
"scripts": []
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"name": "Owiniga",
|
| 60 |
"iso_1_code": null,
|
| 61 |
"iso_3_code": "owi",
|
|
|
|
| 62 |
"children": [],
|
| 63 |
+
"tokenizers": {},
|
| 64 |
"node_i": "647",
|
| 65 |
+
"native_tokenizers": [],
|
| 66 |
+
"scripts": []
|
| 67 |
}
|
| 68 |
],
|
| 69 |
+
"tokenizers": {},
|
| 70 |
"node_i": "641",
|
| 71 |
+
"native_tokenizers": [],
|
| 72 |
+
"scripts": []
|
| 73 |
}
|
data/Arauan.json
CHANGED
|
@@ -2,87 +2,87 @@
|
|
| 2 |
"name": "Arauan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Aru\u00e1",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "aru",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "649",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Kulina",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "cul",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "650",
|
|
|
|
| 24 |
"scripts": [
|
| 25 |
"Latn"
|
| 26 |
-
]
|
| 27 |
-
"own_tokenizer": false
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"name": "Den\u00ed",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": "dny",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "651",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Paumar\u00ed",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "pad",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "652",
|
|
|
|
| 46 |
"scripts": [
|
| 47 |
"Latn"
|
| 48 |
-
]
|
| 49 |
-
"own_tokenizer": false
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"name": "Suruah\u00e1",
|
| 53 |
"iso_1_code": null,
|
| 54 |
"iso_3_code": "swx",
|
| 55 |
-
"tokenizers": {},
|
| 56 |
"children": [],
|
|
|
|
| 57 |
"node_i": "653",
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"name": "Jamamadi",
|
| 63 |
"iso_1_code": null,
|
| 64 |
"iso_3_code": null,
|
| 65 |
-
"tokenizers": {},
|
| 66 |
"children": [
|
| 67 |
{
|
| 68 |
"name": "Jamamad\u00ed",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "jaa",
|
| 71 |
-
"tokenizers": {},
|
| 72 |
"children": [],
|
|
|
|
| 73 |
"node_i": "655",
|
|
|
|
| 74 |
"scripts": [
|
| 75 |
"Latn"
|
| 76 |
-
]
|
| 77 |
-
"own_tokenizer": false
|
| 78 |
}
|
| 79 |
],
|
|
|
|
| 80 |
"node_i": "654",
|
| 81 |
-
"
|
| 82 |
-
"
|
| 83 |
}
|
| 84 |
],
|
|
|
|
| 85 |
"node_i": "648",
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
}
|
|
|
|
| 2 |
"name": "Arauan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Aru\u00e1",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "aru",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "649",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Kulina",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "cul",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "650",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
"scripts": [
|
| 25 |
"Latn"
|
| 26 |
+
]
|
|
|
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"name": "Den\u00ed",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": "dny",
|
|
|
|
| 32 |
"children": [],
|
| 33 |
+
"tokenizers": {},
|
| 34 |
"node_i": "651",
|
| 35 |
+
"native_tokenizers": [],
|
| 36 |
+
"scripts": []
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"name": "Paumar\u00ed",
|
| 40 |
"iso_1_code": null,
|
| 41 |
"iso_3_code": "pad",
|
|
|
|
| 42 |
"children": [],
|
| 43 |
+
"tokenizers": {},
|
| 44 |
"node_i": "652",
|
| 45 |
+
"native_tokenizers": [],
|
| 46 |
"scripts": [
|
| 47 |
"Latn"
|
| 48 |
+
]
|
|
|
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"name": "Suruah\u00e1",
|
| 52 |
"iso_1_code": null,
|
| 53 |
"iso_3_code": "swx",
|
|
|
|
| 54 |
"children": [],
|
| 55 |
+
"tokenizers": {},
|
| 56 |
"node_i": "653",
|
| 57 |
+
"native_tokenizers": [],
|
| 58 |
+
"scripts": []
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"name": "Jamamadi",
|
| 62 |
"iso_1_code": null,
|
| 63 |
"iso_3_code": null,
|
|
|
|
| 64 |
"children": [
|
| 65 |
{
|
| 66 |
"name": "Jamamad\u00ed",
|
| 67 |
"iso_1_code": null,
|
| 68 |
"iso_3_code": "jaa",
|
|
|
|
| 69 |
"children": [],
|
| 70 |
+
"tokenizers": {},
|
| 71 |
"node_i": "655",
|
| 72 |
+
"native_tokenizers": [],
|
| 73 |
"scripts": [
|
| 74 |
"Latn"
|
| 75 |
+
]
|
|
|
|
| 76 |
}
|
| 77 |
],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "654",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
}
|
| 83 |
],
|
| 84 |
+
"tokenizers": {},
|
| 85 |
"node_i": "648",
|
| 86 |
+
"native_tokenizers": [],
|
| 87 |
+
"scripts": []
|
| 88 |
}
|
data/Australian.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Austro-Asiatic.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Austronesian.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Aymaran.json
CHANGED
|
@@ -2,64 +2,64 @@
|
|
| 2 |
"name": "Aymaran",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Aymara",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Aymara, Southern",
|
| 15 |
"iso_1_code": "ay",
|
| 16 |
"iso_3_code": "ayc",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3257",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Aymara, Central",
|
| 25 |
"iso_1_code": "ay",
|
| 26 |
"iso_3_code": "ayr",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3258",
|
|
|
|
| 30 |
"scripts": [
|
| 31 |
"Latn"
|
| 32 |
-
]
|
| 33 |
-
"own_tokenizer": false
|
| 34 |
}
|
| 35 |
],
|
|
|
|
| 36 |
"node_i": "3256",
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"name": "Tupe",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": null,
|
| 44 |
-
"tokenizers": {},
|
| 45 |
"children": [
|
| 46 |
{
|
| 47 |
"name": "Jaqaru",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "jqr",
|
| 50 |
-
"tokenizers": {},
|
| 51 |
"children": [],
|
|
|
|
| 52 |
"node_i": "3260",
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
}
|
| 56 |
],
|
|
|
|
| 57 |
"node_i": "3259",
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
}
|
| 61 |
],
|
|
|
|
| 62 |
"node_i": "3255",
|
| 63 |
-
"
|
| 64 |
-
"
|
| 65 |
}
|
|
|
|
| 2 |
"name": "Aymaran",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Aymara",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Aymara, Southern",
|
| 13 |
"iso_1_code": "ay",
|
| 14 |
"iso_3_code": "ayc",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3257",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Aymara, Central",
|
| 23 |
"iso_1_code": "ay",
|
| 24 |
"iso_3_code": "ayr",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3258",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
"scripts": [
|
| 30 |
"Latn"
|
| 31 |
+
]
|
|
|
|
| 32 |
}
|
| 33 |
],
|
| 34 |
+
"tokenizers": {},
|
| 35 |
"node_i": "3256",
|
| 36 |
+
"native_tokenizers": [],
|
| 37 |
+
"scripts": []
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Tupe",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": null,
|
|
|
|
| 43 |
"children": [
|
| 44 |
{
|
| 45 |
"name": "Jaqaru",
|
| 46 |
"iso_1_code": null,
|
| 47 |
"iso_3_code": "jqr",
|
|
|
|
| 48 |
"children": [],
|
| 49 |
+
"tokenizers": {},
|
| 50 |
"node_i": "3260",
|
| 51 |
+
"native_tokenizers": [],
|
| 52 |
+
"scripts": []
|
| 53 |
}
|
| 54 |
],
|
| 55 |
+
"tokenizers": {},
|
| 56 |
"node_i": "3259",
|
| 57 |
+
"native_tokenizers": [],
|
| 58 |
+
"scripts": []
|
| 59 |
}
|
| 60 |
],
|
| 61 |
+
"tokenizers": {},
|
| 62 |
"node_i": "3255",
|
| 63 |
+
"native_tokenizers": [],
|
| 64 |
+
"scripts": []
|
| 65 |
}
|
data/Barbacoan.json
CHANGED
|
@@ -2,68 +2,68 @@
|
|
| 2 |
"name": "Barbacoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Northern",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Awa-Cuaiquer",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "kwi",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3263",
|
|
|
|
| 20 |
"scripts": [
|
| 21 |
"Latn"
|
| 22 |
-
]
|
| 23 |
-
"own_tokenizer": false
|
| 24 |
}
|
| 25 |
],
|
|
|
|
| 26 |
"node_i": "3262",
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"name": "Southern",
|
| 32 |
"iso_1_code": null,
|
| 33 |
"iso_3_code": null,
|
| 34 |
-
"tokenizers": {},
|
| 35 |
"children": [
|
| 36 |
{
|
| 37 |
"name": "Chachi",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "cbi",
|
| 40 |
-
"tokenizers": {},
|
| 41 |
"children": [],
|
|
|
|
| 42 |
"node_i": "3265",
|
|
|
|
| 43 |
"scripts": [
|
| 44 |
"Latn"
|
| 45 |
-
]
|
| 46 |
-
"own_tokenizer": false
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"name": "Tsafiki",
|
| 50 |
"iso_1_code": null,
|
| 51 |
"iso_3_code": "cof",
|
| 52 |
-
"tokenizers": {},
|
| 53 |
"children": [],
|
|
|
|
| 54 |
"node_i": "3266",
|
|
|
|
| 55 |
"scripts": [
|
| 56 |
"Latn"
|
| 57 |
-
]
|
| 58 |
-
"own_tokenizer": false
|
| 59 |
}
|
| 60 |
],
|
|
|
|
| 61 |
"node_i": "3264",
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
}
|
| 65 |
],
|
|
|
|
| 66 |
"node_i": "3261",
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
}
|
|
|
|
| 2 |
"name": "Barbacoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Northern",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Awa-Cuaiquer",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "kwi",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3263",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
"scripts": [
|
| 20 |
"Latn"
|
| 21 |
+
]
|
|
|
|
| 22 |
}
|
| 23 |
],
|
| 24 |
+
"tokenizers": {},
|
| 25 |
"node_i": "3262",
|
| 26 |
+
"native_tokenizers": [],
|
| 27 |
+
"scripts": []
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"name": "Southern",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": null,
|
|
|
|
| 33 |
"children": [
|
| 34 |
{
|
| 35 |
"name": "Chachi",
|
| 36 |
"iso_1_code": null,
|
| 37 |
"iso_3_code": "cbi",
|
|
|
|
| 38 |
"children": [],
|
| 39 |
+
"tokenizers": {},
|
| 40 |
"node_i": "3265",
|
| 41 |
+
"native_tokenizers": [],
|
| 42 |
"scripts": [
|
| 43 |
"Latn"
|
| 44 |
+
]
|
|
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Tsafiki",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "cof",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3266",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
"scripts": [
|
| 55 |
"Latn"
|
| 56 |
+
]
|
|
|
|
| 57 |
}
|
| 58 |
],
|
| 59 |
+
"tokenizers": {},
|
| 60 |
"node_i": "3264",
|
| 61 |
+
"native_tokenizers": [],
|
| 62 |
+
"scripts": []
|
| 63 |
}
|
| 64 |
],
|
| 65 |
+
"tokenizers": {},
|
| 66 |
"node_i": "3261",
|
| 67 |
+
"native_tokenizers": [],
|
| 68 |
+
"scripts": []
|
| 69 |
}
|
data/Bayono-Awbono.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Bayono-Awbono",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Awbono",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "awh",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3268",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Bayono",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "byl",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3269",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3267",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Bayono-Awbono",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Awbono",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "awh",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3268",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Bayono",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "byl",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3269",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3267",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Border.json
CHANGED
|
@@ -2,197 +2,197 @@
|
|
| 2 |
"name": "Border",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Bewani",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Ainbai",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "aic",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3272",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Kilmeri",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "kih",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3273",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Ningera",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": "nby",
|
| 37 |
-
"tokenizers": {},
|
| 38 |
"children": [],
|
|
|
|
| 39 |
"node_i": "3274",
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"name": "Pagi",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "pgi",
|
| 47 |
-
"tokenizers": {},
|
| 48 |
"children": [],
|
|
|
|
| 49 |
"node_i": "3275",
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Umeda",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "upi",
|
| 57 |
-
"tokenizers": {},
|
| 58 |
"children": [],
|
|
|
|
| 59 |
"node_i": "3276",
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
}
|
| 63 |
],
|
|
|
|
| 64 |
"node_i": "3271",
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Taikat",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": null,
|
| 72 |
-
"tokenizers": {},
|
| 73 |
"children": [
|
| 74 |
{
|
| 75 |
"name": "Taikat",
|
| 76 |
"iso_1_code": null,
|
| 77 |
"iso_3_code": "aos",
|
| 78 |
-
"tokenizers": {},
|
| 79 |
"children": [],
|
|
|
|
| 80 |
"node_i": "3278",
|
| 81 |
-
"
|
| 82 |
-
"
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"name": "Awyi",
|
| 86 |
"iso_1_code": null,
|
| 87 |
"iso_3_code": "auw",
|
| 88 |
-
"tokenizers": {},
|
| 89 |
"children": [],
|
|
|
|
| 90 |
"node_i": "3279",
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
}
|
| 94 |
],
|
|
|
|
| 95 |
"node_i": "3277",
|
| 96 |
-
"
|
| 97 |
-
"
|
| 98 |
},
|
| 99 |
{
|
| 100 |
"name": "Waris",
|
| 101 |
"iso_1_code": null,
|
| 102 |
"iso_3_code": null,
|
| 103 |
-
"tokenizers": {},
|
| 104 |
"children": [
|
| 105 |
{
|
| 106 |
"name": "Amanab",
|
| 107 |
"iso_1_code": null,
|
| 108 |
"iso_3_code": "amn",
|
| 109 |
-
"tokenizers": {},
|
| 110 |
"children": [],
|
|
|
|
| 111 |
"node_i": "3281",
|
|
|
|
| 112 |
"scripts": [
|
| 113 |
"Latn"
|
| 114 |
-
]
|
| 115 |
-
"own_tokenizer": false
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"name": "Daonda",
|
| 119 |
"iso_1_code": null,
|
| 120 |
"iso_3_code": "dnd",
|
| 121 |
-
"tokenizers": {},
|
| 122 |
"children": [],
|
|
|
|
| 123 |
"node_i": "3282",
|
| 124 |
-
"
|
| 125 |
-
"
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"name": "Imonda",
|
| 129 |
"iso_1_code": null,
|
| 130 |
"iso_3_code": "imn",
|
| 131 |
-
"tokenizers": {},
|
| 132 |
"children": [],
|
|
|
|
| 133 |
"node_i": "3283",
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
},
|
| 137 |
{
|
| 138 |
"name": "Manem",
|
| 139 |
"iso_1_code": null,
|
| 140 |
"iso_3_code": "jet",
|
| 141 |
-
"tokenizers": {},
|
| 142 |
"children": [],
|
|
|
|
| 143 |
"node_i": "3284",
|
| 144 |
-
"
|
| 145 |
-
"
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"name": "Auwe",
|
| 149 |
"iso_1_code": null,
|
| 150 |
"iso_3_code": "smf",
|
| 151 |
-
"tokenizers": {},
|
| 152 |
"children": [],
|
|
|
|
| 153 |
"node_i": "3285",
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"name": "Viid",
|
| 159 |
"iso_1_code": null,
|
| 160 |
"iso_3_code": "snu",
|
| 161 |
-
"tokenizers": {},
|
| 162 |
"children": [],
|
|
|
|
| 163 |
"node_i": "3286",
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"name": "Sowanda",
|
| 169 |
"iso_1_code": null,
|
| 170 |
"iso_3_code": "sow",
|
| 171 |
-
"tokenizers": {},
|
| 172 |
"children": [],
|
|
|
|
| 173 |
"node_i": "3287",
|
| 174 |
-
"
|
| 175 |
-
"
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"name": "Waris",
|
| 179 |
"iso_1_code": null,
|
| 180 |
"iso_3_code": "wrs",
|
| 181 |
-
"tokenizers": {},
|
| 182 |
"children": [],
|
|
|
|
| 183 |
"node_i": "3288",
|
|
|
|
| 184 |
"scripts": [
|
| 185 |
"Latn"
|
| 186 |
-
]
|
| 187 |
-
"own_tokenizer": false
|
| 188 |
}
|
| 189 |
],
|
|
|
|
| 190 |
"node_i": "3280",
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
}
|
| 194 |
],
|
|
|
|
| 195 |
"node_i": "3270",
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
}
|
|
|
|
| 2 |
"name": "Border",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Bewani",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Ainbai",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "aic",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3272",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Kilmeri",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "kih",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3273",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Ningera",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": "nby",
|
|
|
|
| 35 |
"children": [],
|
| 36 |
+
"tokenizers": {},
|
| 37 |
"node_i": "3274",
|
| 38 |
+
"native_tokenizers": [],
|
| 39 |
+
"scripts": []
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "Pagi",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": "pgi",
|
|
|
|
| 45 |
"children": [],
|
| 46 |
+
"tokenizers": {},
|
| 47 |
"node_i": "3275",
|
| 48 |
+
"native_tokenizers": [],
|
| 49 |
+
"scripts": []
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"name": "Umeda",
|
| 53 |
"iso_1_code": null,
|
| 54 |
"iso_3_code": "upi",
|
|
|
|
| 55 |
"children": [],
|
| 56 |
+
"tokenizers": {},
|
| 57 |
"node_i": "3276",
|
| 58 |
+
"native_tokenizers": [],
|
| 59 |
+
"scripts": []
|
| 60 |
}
|
| 61 |
],
|
| 62 |
+
"tokenizers": {},
|
| 63 |
"node_i": "3271",
|
| 64 |
+
"native_tokenizers": [],
|
| 65 |
+
"scripts": []
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"name": "Taikat",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": null,
|
|
|
|
| 71 |
"children": [
|
| 72 |
{
|
| 73 |
"name": "Taikat",
|
| 74 |
"iso_1_code": null,
|
| 75 |
"iso_3_code": "aos",
|
|
|
|
| 76 |
"children": [],
|
| 77 |
+
"tokenizers": {},
|
| 78 |
"node_i": "3278",
|
| 79 |
+
"native_tokenizers": [],
|
| 80 |
+
"scripts": []
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"name": "Awyi",
|
| 84 |
"iso_1_code": null,
|
| 85 |
"iso_3_code": "auw",
|
|
|
|
| 86 |
"children": [],
|
| 87 |
+
"tokenizers": {},
|
| 88 |
"node_i": "3279",
|
| 89 |
+
"native_tokenizers": [],
|
| 90 |
+
"scripts": []
|
| 91 |
}
|
| 92 |
],
|
| 93 |
+
"tokenizers": {},
|
| 94 |
"node_i": "3277",
|
| 95 |
+
"native_tokenizers": [],
|
| 96 |
+
"scripts": []
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"name": "Waris",
|
| 100 |
"iso_1_code": null,
|
| 101 |
"iso_3_code": null,
|
|
|
|
| 102 |
"children": [
|
| 103 |
{
|
| 104 |
"name": "Amanab",
|
| 105 |
"iso_1_code": null,
|
| 106 |
"iso_3_code": "amn",
|
|
|
|
| 107 |
"children": [],
|
| 108 |
+
"tokenizers": {},
|
| 109 |
"node_i": "3281",
|
| 110 |
+
"native_tokenizers": [],
|
| 111 |
"scripts": [
|
| 112 |
"Latn"
|
| 113 |
+
]
|
|
|
|
| 114 |
},
|
| 115 |
{
|
| 116 |
"name": "Daonda",
|
| 117 |
"iso_1_code": null,
|
| 118 |
"iso_3_code": "dnd",
|
|
|
|
| 119 |
"children": [],
|
| 120 |
+
"tokenizers": {},
|
| 121 |
"node_i": "3282",
|
| 122 |
+
"native_tokenizers": [],
|
| 123 |
+
"scripts": []
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"name": "Imonda",
|
| 127 |
"iso_1_code": null,
|
| 128 |
"iso_3_code": "imn",
|
|
|
|
| 129 |
"children": [],
|
| 130 |
+
"tokenizers": {},
|
| 131 |
"node_i": "3283",
|
| 132 |
+
"native_tokenizers": [],
|
| 133 |
+
"scripts": []
|
| 134 |
},
|
| 135 |
{
|
| 136 |
"name": "Manem",
|
| 137 |
"iso_1_code": null,
|
| 138 |
"iso_3_code": "jet",
|
|
|
|
| 139 |
"children": [],
|
| 140 |
+
"tokenizers": {},
|
| 141 |
"node_i": "3284",
|
| 142 |
+
"native_tokenizers": [],
|
| 143 |
+
"scripts": []
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"name": "Auwe",
|
| 147 |
"iso_1_code": null,
|
| 148 |
"iso_3_code": "smf",
|
|
|
|
| 149 |
"children": [],
|
| 150 |
+
"tokenizers": {},
|
| 151 |
"node_i": "3285",
|
| 152 |
+
"native_tokenizers": [],
|
| 153 |
+
"scripts": []
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"name": "Viid",
|
| 157 |
"iso_1_code": null,
|
| 158 |
"iso_3_code": "snu",
|
|
|
|
| 159 |
"children": [],
|
| 160 |
+
"tokenizers": {},
|
| 161 |
"node_i": "3286",
|
| 162 |
+
"native_tokenizers": [],
|
| 163 |
+
"scripts": []
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"name": "Sowanda",
|
| 167 |
"iso_1_code": null,
|
| 168 |
"iso_3_code": "sow",
|
|
|
|
| 169 |
"children": [],
|
| 170 |
+
"tokenizers": {},
|
| 171 |
"node_i": "3287",
|
| 172 |
+
"native_tokenizers": [],
|
| 173 |
+
"scripts": []
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"name": "Waris",
|
| 177 |
"iso_1_code": null,
|
| 178 |
"iso_3_code": "wrs",
|
|
|
|
| 179 |
"children": [],
|
| 180 |
+
"tokenizers": {},
|
| 181 |
"node_i": "3288",
|
| 182 |
+
"native_tokenizers": [],
|
| 183 |
"scripts": [
|
| 184 |
"Latn"
|
| 185 |
+
]
|
|
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
+
"tokenizers": {},
|
| 189 |
"node_i": "3280",
|
| 190 |
+
"native_tokenizers": [],
|
| 191 |
+
"scripts": []
|
| 192 |
}
|
| 193 |
],
|
| 194 |
+
"tokenizers": {},
|
| 195 |
"node_i": "3270",
|
| 196 |
+
"native_tokenizers": [],
|
| 197 |
+
"scripts": []
|
| 198 |
}
|
data/Bororoan.json
CHANGED
|
@@ -2,42 +2,42 @@
|
|
| 2 |
"name": "Bororoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Bor\u00f4ro",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "bor",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3290",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Otuke",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "otu",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3291",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"name": "Umot\u00edna",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": "umo",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "3292",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
}
|
| 39 |
],
|
|
|
|
| 40 |
"node_i": "3289",
|
| 41 |
-
"
|
| 42 |
-
"
|
| 43 |
}
|
|
|
|
| 2 |
"name": "Bororoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Bor\u00f4ro",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "bor",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3290",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Otuke",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "otu",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3291",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
+
"scripts": []
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"name": "Umot\u00edna",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": "umo",
|
|
|
|
| 32 |
"children": [],
|
| 33 |
+
"tokenizers": {},
|
| 34 |
"node_i": "3292",
|
| 35 |
+
"native_tokenizers": [],
|
| 36 |
+
"scripts": []
|
| 37 |
}
|
| 38 |
],
|
| 39 |
+
"tokenizers": {},
|
| 40 |
"node_i": "3289",
|
| 41 |
+
"native_tokenizers": [],
|
| 42 |
+
"scripts": []
|
| 43 |
}
|
data/Botocudoan.json
CHANGED
|
@@ -2,20 +2,20 @@
|
|
| 2 |
"name": "Botocudoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Krenak",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "kqq",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3294",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
}
|
| 17 |
],
|
|
|
|
| 18 |
"node_i": "3293",
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
}
|
|
|
|
| 2 |
"name": "Botocudoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Krenak",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "kqq",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3294",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
}
|
| 16 |
],
|
| 17 |
+
"tokenizers": {},
|
| 18 |
"node_i": "3293",
|
| 19 |
+
"native_tokenizers": [],
|
| 20 |
+
"scripts": []
|
| 21 |
}
|
data/Caddoan.json
CHANGED
|
@@ -2,93 +2,93 @@
|
|
| 2 |
"name": "Caddoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Caddo",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "cad",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3296",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Northern Caddoan",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": null,
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Wichita",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "wic",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3298",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Kitsai-Proto-Pawnee",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": null,
|
| 37 |
-
"tokenizers": {},
|
| 38 |
"children": [
|
| 39 |
{
|
| 40 |
"name": "Kitsai",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "kii",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "3300",
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Proto-Pawnee",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": null,
|
| 53 |
-
"tokenizers": {},
|
| 54 |
"children": [
|
| 55 |
{
|
| 56 |
"name": "Arikara",
|
| 57 |
"iso_1_code": null,
|
| 58 |
"iso_3_code": "ari",
|
| 59 |
-
"tokenizers": {},
|
| 60 |
"children": [],
|
|
|
|
| 61 |
"node_i": "3302",
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
},
|
| 65 |
{
|
| 66 |
"name": "Pawnee",
|
| 67 |
"iso_1_code": null,
|
| 68 |
"iso_3_code": "paw",
|
| 69 |
-
"tokenizers": {},
|
| 70 |
"children": [],
|
|
|
|
| 71 |
"node_i": "3303",
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
}
|
| 75 |
],
|
|
|
|
| 76 |
"node_i": "3301",
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
}
|
| 80 |
],
|
|
|
|
| 81 |
"node_i": "3299",
|
| 82 |
-
"
|
| 83 |
-
"
|
| 84 |
}
|
| 85 |
],
|
|
|
|
| 86 |
"node_i": "3297",
|
| 87 |
-
"
|
| 88 |
-
"
|
| 89 |
}
|
| 90 |
],
|
|
|
|
| 91 |
"node_i": "3295",
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
}
|
|
|
|
| 2 |
"name": "Caddoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Caddo",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "cad",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3296",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Northern Caddoan",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": null,
|
|
|
|
| 20 |
"children": [
|
| 21 |
{
|
| 22 |
"name": "Wichita",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "wic",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3298",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Kitsai-Proto-Pawnee",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": null,
|
|
|
|
| 35 |
"children": [
|
| 36 |
{
|
| 37 |
"name": "Kitsai",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "kii",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3300",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Proto-Pawnee",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": null,
|
|
|
|
| 50 |
"children": [
|
| 51 |
{
|
| 52 |
"name": "Arikara",
|
| 53 |
"iso_1_code": null,
|
| 54 |
"iso_3_code": "ari",
|
|
|
|
| 55 |
"children": [],
|
| 56 |
+
"tokenizers": {},
|
| 57 |
"node_i": "3302",
|
| 58 |
+
"native_tokenizers": [],
|
| 59 |
+
"scripts": []
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"name": "Pawnee",
|
| 63 |
"iso_1_code": null,
|
| 64 |
"iso_3_code": "paw",
|
|
|
|
| 65 |
"children": [],
|
| 66 |
+
"tokenizers": {},
|
| 67 |
"node_i": "3303",
|
| 68 |
+
"native_tokenizers": [],
|
| 69 |
+
"scripts": []
|
| 70 |
}
|
| 71 |
],
|
| 72 |
+
"tokenizers": {},
|
| 73 |
"node_i": "3301",
|
| 74 |
+
"native_tokenizers": [],
|
| 75 |
+
"scripts": []
|
| 76 |
}
|
| 77 |
],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3299",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
}
|
| 83 |
],
|
| 84 |
+
"tokenizers": {},
|
| 85 |
"node_i": "3297",
|
| 86 |
+
"native_tokenizers": [],
|
| 87 |
+
"scripts": []
|
| 88 |
}
|
| 89 |
],
|
| 90 |
+
"tokenizers": {},
|
| 91 |
"node_i": "3295",
|
| 92 |
+
"native_tokenizers": [],
|
| 93 |
+
"scripts": []
|
| 94 |
}
|
data/Cahuapanan.json
CHANGED
|
@@ -2,32 +2,32 @@
|
|
| 2 |
"name": "Cahuapanan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Shawi",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "cbt",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3305",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Jebero",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "jeb",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3306",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
}
|
| 29 |
],
|
|
|
|
| 30 |
"node_i": "3304",
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
}
|
|
|
|
| 2 |
"name": "Cahuapanan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Shawi",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "cbt",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3305",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Jebero",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "jeb",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3306",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
+
"scripts": []
|
| 27 |
}
|
| 28 |
],
|
| 29 |
+
"tokenizers": {},
|
| 30 |
"node_i": "3304",
|
| 31 |
+
"native_tokenizers": [],
|
| 32 |
+
"scripts": []
|
| 33 |
}
|
data/Cariban.json
CHANGED
|
@@ -2,569 +2,569 @@
|
|
| 2 |
"name": "Cariban",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Carib",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "car",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3308",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Central",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": null,
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [
|
| 25 |
{
|
| 26 |
"name": "Apala\u00ed",
|
| 27 |
"iso_1_code": null,
|
| 28 |
"iso_3_code": "apy",
|
| 29 |
-
"tokenizers": {},
|
| 30 |
"children": [],
|
|
|
|
| 31 |
"node_i": "3310",
|
|
|
|
| 32 |
"scripts": [
|
| 33 |
"Latn"
|
| 34 |
-
]
|
| 35 |
-
"own_tokenizer": false
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Cuman\u00e1",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": null,
|
| 41 |
-
"tokenizers": {},
|
| 42 |
"children": [
|
| 43 |
{
|
| 44 |
"name": "Chaima",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "ciy",
|
| 47 |
-
"tokenizers": {},
|
| 48 |
"children": [],
|
|
|
|
| 49 |
"node_i": "3312",
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Cumanagoto",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "cuo",
|
| 57 |
-
"tokenizers": {},
|
| 58 |
"children": [],
|
|
|
|
| 59 |
"node_i": "3313",
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
}
|
| 63 |
],
|
|
|
|
| 64 |
"node_i": "3311",
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Makiritare",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": null,
|
| 72 |
-
"tokenizers": {},
|
| 73 |
"children": [
|
| 74 |
{
|
| 75 |
"name": "Maquiritari",
|
| 76 |
"iso_1_code": null,
|
| 77 |
"iso_3_code": "mch",
|
| 78 |
-
"tokenizers": {},
|
| 79 |
"children": [],
|
|
|
|
| 80 |
"node_i": "3315",
|
| 81 |
-
"
|
| 82 |
-
"
|
| 83 |
}
|
| 84 |
],
|
|
|
|
| 85 |
"node_i": "3314",
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"name": "Mapoyo-Yavarana",
|
| 91 |
"iso_1_code": null,
|
| 92 |
"iso_3_code": null,
|
| 93 |
-
"tokenizers": {},
|
| 94 |
"children": [
|
| 95 |
{
|
| 96 |
"name": "Mapoyo",
|
| 97 |
"iso_1_code": null,
|
| 98 |
"iso_3_code": "mcg",
|
| 99 |
-
"tokenizers": {},
|
| 100 |
"children": [],
|
|
|
|
| 101 |
"node_i": "3317",
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"name": "P\u00e9mono",
|
| 107 |
"iso_1_code": null,
|
| 108 |
"iso_3_code": "pev",
|
| 109 |
-
"tokenizers": {},
|
| 110 |
"children": [],
|
|
|
|
| 111 |
"node_i": "3318",
|
| 112 |
-
"
|
| 113 |
-
"
|
| 114 |
},
|
| 115 |
{
|
| 116 |
"name": "Tamanaku",
|
| 117 |
"iso_1_code": null,
|
| 118 |
"iso_3_code": "tmz",
|
| 119 |
-
"tokenizers": {},
|
| 120 |
"children": [],
|
|
|
|
| 121 |
"node_i": "3319",
|
| 122 |
-
"
|
| 123 |
-
"
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"name": "Yabarana",
|
| 127 |
"iso_1_code": null,
|
| 128 |
"iso_3_code": "yar",
|
| 129 |
-
"tokenizers": {},
|
| 130 |
"children": [],
|
|
|
|
| 131 |
"node_i": "3320",
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
}
|
| 135 |
],
|
|
|
|
| 136 |
"node_i": "3316",
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"name": "Wayana",
|
| 142 |
"iso_1_code": null,
|
| 143 |
"iso_3_code": null,
|
| 144 |
-
"tokenizers": {},
|
| 145 |
"children": [
|
| 146 |
{
|
| 147 |
"name": "Kaxui\u00e2na",
|
| 148 |
"iso_1_code": null,
|
| 149 |
"iso_3_code": "kbb",
|
| 150 |
-
"tokenizers": {},
|
| 151 |
"children": [],
|
|
|
|
| 152 |
"node_i": "3322",
|
| 153 |
-
"
|
| 154 |
-
"
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"name": "Wayana",
|
| 158 |
"iso_1_code": null,
|
| 159 |
"iso_3_code": "way",
|
| 160 |
-
"tokenizers": {},
|
| 161 |
"children": [],
|
|
|
|
| 162 |
"node_i": "3323",
|
|
|
|
| 163 |
"scripts": [
|
| 164 |
"Latn"
|
| 165 |
-
]
|
| 166 |
-
"own_tokenizer": false
|
| 167 |
}
|
| 168 |
],
|
|
|
|
| 169 |
"node_i": "3321",
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
}
|
| 173 |
],
|
|
|
|
| 174 |
"node_i": "3309",
|
| 175 |
-
"
|
| 176 |
-
"
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"name": "Kashuyana",
|
| 180 |
"iso_1_code": null,
|
| 181 |
"iso_3_code": null,
|
| 182 |
-
"tokenizers": {},
|
| 183 |
"children": [
|
| 184 |
{
|
| 185 |
"name": "Sikiana",
|
| 186 |
"iso_1_code": null,
|
| 187 |
"iso_3_code": "sik",
|
| 188 |
-
"tokenizers": {},
|
| 189 |
"children": [],
|
|
|
|
| 190 |
"node_i": "3325",
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
}
|
| 194 |
],
|
|
|
|
| 195 |
"node_i": "3324",
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
},
|
| 199 |
{
|
| 200 |
"name": "North Amazonian",
|
| 201 |
"iso_1_code": null,
|
| 202 |
"iso_3_code": null,
|
| 203 |
-
"tokenizers": {},
|
| 204 |
"children": [
|
| 205 |
{
|
| 206 |
"name": "Pem\u00f3n",
|
| 207 |
"iso_1_code": null,
|
| 208 |
"iso_3_code": null,
|
| 209 |
-
"tokenizers": {},
|
| 210 |
"children": [
|
| 211 |
{
|
| 212 |
"name": "Pem\u00f3n proper",
|
| 213 |
"iso_1_code": null,
|
| 214 |
"iso_3_code": null,
|
| 215 |
-
"tokenizers": {},
|
| 216 |
"children": [
|
| 217 |
{
|
| 218 |
"name": "Pemon",
|
| 219 |
"iso_1_code": null,
|
| 220 |
"iso_3_code": "aoc",
|
| 221 |
-
"tokenizers": {},
|
| 222 |
"children": [],
|
|
|
|
| 223 |
"node_i": "3329",
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
},
|
| 227 |
{
|
| 228 |
"name": "Macushi",
|
| 229 |
"iso_1_code": null,
|
| 230 |
"iso_3_code": "mbc",
|
| 231 |
-
"tokenizers": {},
|
| 232 |
"children": [],
|
|
|
|
| 233 |
"node_i": "3330",
|
|
|
|
| 234 |
"scripts": [
|
| 235 |
"Latn"
|
| 236 |
-
]
|
| 237 |
-
"own_tokenizer": false
|
| 238 |
},
|
| 239 |
{
|
| 240 |
"name": "Kapong",
|
| 241 |
"iso_1_code": null,
|
| 242 |
"iso_3_code": null,
|
| 243 |
-
"tokenizers": {},
|
| 244 |
"children": [
|
| 245 |
{
|
| 246 |
"name": "Akawaio",
|
| 247 |
"iso_1_code": null,
|
| 248 |
"iso_3_code": "ake",
|
| 249 |
-
"tokenizers": {},
|
| 250 |
"children": [],
|
|
|
|
| 251 |
"node_i": "3332",
|
|
|
|
| 252 |
"scripts": [
|
| 253 |
"Latn"
|
| 254 |
-
]
|
| 255 |
-
"own_tokenizer": false
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"name": "Patamona",
|
| 259 |
"iso_1_code": null,
|
| 260 |
"iso_3_code": "pbc",
|
| 261 |
-
"tokenizers": {},
|
| 262 |
"children": [],
|
|
|
|
| 263 |
"node_i": "3333",
|
|
|
|
| 264 |
"scripts": [
|
| 265 |
"Latn"
|
| 266 |
-
]
|
| 267 |
-
"own_tokenizer": false
|
| 268 |
}
|
| 269 |
],
|
|
|
|
| 270 |
"node_i": "3331",
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
}
|
| 274 |
],
|
|
|
|
| 275 |
"node_i": "3328",
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
}
|
| 279 |
],
|
|
|
|
| 280 |
"node_i": "3327",
|
| 281 |
-
"
|
| 282 |
-
"
|
| 283 |
},
|
| 284 |
{
|
| 285 |
"name": "Yawaper\u00ed",
|
| 286 |
"iso_1_code": null,
|
| 287 |
"iso_3_code": null,
|
| 288 |
-
"tokenizers": {},
|
| 289 |
"children": [
|
| 290 |
{
|
| 291 |
"name": "Waimiri-Atroar\u00ed",
|
| 292 |
"iso_1_code": null,
|
| 293 |
"iso_3_code": "atr",
|
| 294 |
-
"tokenizers": {},
|
| 295 |
"children": [],
|
|
|
|
| 296 |
"node_i": "3335",
|
| 297 |
-
"
|
| 298 |
-
"
|
| 299 |
}
|
| 300 |
],
|
|
|
|
| 301 |
"node_i": "3334",
|
| 302 |
-
"
|
| 303 |
-
"
|
| 304 |
}
|
| 305 |
],
|
|
|
|
| 306 |
"node_i": "3326",
|
| 307 |
-
"
|
| 308 |
-
"
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"name": "South Amazonian",
|
| 312 |
"iso_1_code": null,
|
| 313 |
"iso_3_code": null,
|
| 314 |
-
"tokenizers": {},
|
| 315 |
"children": [
|
| 316 |
{
|
| 317 |
"name": "E\u2019\u00f1apa Woromaipu",
|
| 318 |
"iso_1_code": null,
|
| 319 |
"iso_3_code": "pbh",
|
| 320 |
-
"tokenizers": {},
|
| 321 |
"children": [],
|
|
|
|
| 322 |
"node_i": "3337",
|
| 323 |
-
"
|
| 324 |
-
"
|
| 325 |
},
|
| 326 |
{
|
| 327 |
"name": "Arara",
|
| 328 |
"iso_1_code": null,
|
| 329 |
"iso_3_code": null,
|
| 330 |
-
"tokenizers": {},
|
| 331 |
"children": [
|
| 332 |
{
|
| 333 |
"name": "Arara, Par\u00e1",
|
| 334 |
"iso_1_code": null,
|
| 335 |
"iso_3_code": "aap",
|
| 336 |
-
"tokenizers": {},
|
| 337 |
"children": [],
|
|
|
|
| 338 |
"node_i": "3339",
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
},
|
| 342 |
{
|
| 343 |
"name": "Ikpeng",
|
| 344 |
"iso_1_code": null,
|
| 345 |
"iso_3_code": "txi",
|
| 346 |
-
"tokenizers": {},
|
| 347 |
"children": [],
|
|
|
|
| 348 |
"node_i": "3340",
|
| 349 |
-
"
|
| 350 |
-
"
|
| 351 |
}
|
| 352 |
],
|
|
|
|
| 353 |
"node_i": "3338",
|
| 354 |
-
"
|
| 355 |
-
"
|
| 356 |
},
|
| 357 |
{
|
| 358 |
"name": "Bakair\u00ed",
|
| 359 |
"iso_1_code": null,
|
| 360 |
"iso_3_code": null,
|
| 361 |
-
"tokenizers": {},
|
| 362 |
"children": [
|
| 363 |
{
|
| 364 |
"name": "Bakair\u00ed",
|
| 365 |
"iso_1_code": null,
|
| 366 |
"iso_3_code": "bkq",
|
| 367 |
-
"tokenizers": {},
|
| 368 |
"children": [],
|
|
|
|
| 369 |
"node_i": "3342",
|
|
|
|
| 370 |
"scripts": [
|
| 371 |
"Latn"
|
| 372 |
-
]
|
| 373 |
-
"own_tokenizer": false
|
| 374 |
},
|
| 375 |
{
|
| 376 |
"name": "Amonap",
|
| 377 |
"iso_1_code": null,
|
| 378 |
"iso_3_code": null,
|
| 379 |
-
"tokenizers": {},
|
| 380 |
"children": [
|
| 381 |
{
|
| 382 |
"name": "Kuik\u00faro-Kalap\u00e1lo",
|
| 383 |
"iso_1_code": null,
|
| 384 |
"iso_3_code": "kui",
|
| 385 |
-
"tokenizers": {},
|
| 386 |
"children": [],
|
|
|
|
| 387 |
"node_i": "3344",
|
| 388 |
-
"
|
| 389 |
-
"
|
| 390 |
},
|
| 391 |
{
|
| 392 |
"name": "Matipuhy",
|
| 393 |
"iso_1_code": null,
|
| 394 |
"iso_3_code": "mzo",
|
| 395 |
-
"tokenizers": {},
|
| 396 |
"children": [],
|
|
|
|
| 397 |
"node_i": "3345",
|
| 398 |
-
"
|
| 399 |
-
"
|
| 400 |
}
|
| 401 |
],
|
|
|
|
| 402 |
"node_i": "3343",
|
| 403 |
-
"
|
| 404 |
-
"
|
| 405 |
}
|
| 406 |
],
|
|
|
|
| 407 |
"node_i": "3341",
|
| 408 |
-
"
|
| 409 |
-
"
|
| 410 |
}
|
| 411 |
],
|
|
|
|
| 412 |
"node_i": "3336",
|
| 413 |
-
"
|
| 414 |
-
"
|
| 415 |
},
|
| 416 |
{
|
| 417 |
"name": "Tiriy\u00f3",
|
| 418 |
"iso_1_code": null,
|
| 419 |
"iso_3_code": null,
|
| 420 |
-
"tokenizers": {},
|
| 421 |
"children": [
|
| 422 |
{
|
| 423 |
"name": "Salum\u00e1",
|
| 424 |
"iso_1_code": null,
|
| 425 |
"iso_3_code": "slj",
|
| 426 |
-
"tokenizers": {},
|
| 427 |
"children": [],
|
|
|
|
| 428 |
"node_i": "3347",
|
| 429 |
-
"
|
| 430 |
-
"
|
| 431 |
},
|
| 432 |
{
|
| 433 |
"name": "Karihona",
|
| 434 |
"iso_1_code": null,
|
| 435 |
"iso_3_code": null,
|
| 436 |
-
"tokenizers": {},
|
| 437 |
"children": [
|
| 438 |
{
|
| 439 |
"name": "Carijona",
|
| 440 |
"iso_1_code": null,
|
| 441 |
"iso_3_code": "cbd",
|
| 442 |
-
"tokenizers": {},
|
| 443 |
"children": [],
|
|
|
|
| 444 |
"node_i": "3349",
|
| 445 |
-
"
|
| 446 |
-
"
|
| 447 |
}
|
| 448 |
],
|
|
|
|
| 449 |
"node_i": "3348",
|
| 450 |
-
"
|
| 451 |
-
"
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"name": "Tiriy\u00f3",
|
| 455 |
"iso_1_code": null,
|
| 456 |
"iso_3_code": null,
|
| 457 |
-
"tokenizers": {},
|
| 458 |
"children": [
|
| 459 |
{
|
| 460 |
"name": "Akurio",
|
| 461 |
"iso_1_code": null,
|
| 462 |
"iso_3_code": "ako",
|
| 463 |
-
"tokenizers": {},
|
| 464 |
"children": [],
|
|
|
|
| 465 |
"node_i": "3351",
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
},
|
| 469 |
{
|
| 470 |
"name": "Tri\u00f3",
|
| 471 |
"iso_1_code": null,
|
| 472 |
"iso_3_code": "tri",
|
| 473 |
-
"tokenizers": {},
|
| 474 |
"children": [],
|
|
|
|
| 475 |
"node_i": "3352",
|
| 476 |
-
"
|
| 477 |
-
"
|
| 478 |
}
|
| 479 |
],
|
|
|
|
| 480 |
"node_i": "3350",
|
| 481 |
-
"
|
| 482 |
-
"
|
| 483 |
}
|
| 484 |
],
|
|
|
|
| 485 |
"node_i": "3346",
|
| 486 |
-
"
|
| 487 |
-
"
|
| 488 |
},
|
| 489 |
{
|
| 490 |
"name": "Waiwai",
|
| 491 |
"iso_1_code": null,
|
| 492 |
"iso_3_code": null,
|
| 493 |
-
"tokenizers": {},
|
| 494 |
"children": [
|
| 495 |
{
|
| 496 |
"name": "Hixkary\u00e1na",
|
| 497 |
"iso_1_code": null,
|
| 498 |
"iso_3_code": "hix",
|
| 499 |
-
"tokenizers": {},
|
| 500 |
"children": [],
|
|
|
|
| 501 |
"node_i": "3354",
|
|
|
|
| 502 |
"scripts": [
|
| 503 |
"Latn"
|
| 504 |
-
]
|
| 505 |
-
"own_tokenizer": false
|
| 506 |
},
|
| 507 |
{
|
| 508 |
"name": "Waiwai",
|
| 509 |
"iso_1_code": null,
|
| 510 |
"iso_3_code": "waw",
|
| 511 |
-
"tokenizers": {},
|
| 512 |
"children": [],
|
|
|
|
| 513 |
"node_i": "3355",
|
| 514 |
-
"
|
| 515 |
-
"
|
| 516 |
}
|
| 517 |
],
|
|
|
|
| 518 |
"node_i": "3353",
|
| 519 |
-
"
|
| 520 |
-
"
|
| 521 |
},
|
| 522 |
{
|
| 523 |
"name": "Yukpa",
|
| 524 |
"iso_1_code": null,
|
| 525 |
"iso_3_code": null,
|
| 526 |
-
"tokenizers": {},
|
| 527 |
"children": [
|
| 528 |
{
|
| 529 |
"name": "Yucpa-Yapreria",
|
| 530 |
"iso_1_code": null,
|
| 531 |
"iso_3_code": null,
|
| 532 |
-
"tokenizers": {},
|
| 533 |
"children": [
|
| 534 |
{
|
| 535 |
"name": "Japreria",
|
| 536 |
"iso_1_code": null,
|
| 537 |
"iso_3_code": "jru",
|
| 538 |
-
"tokenizers": {},
|
| 539 |
"children": [],
|
|
|
|
| 540 |
"node_i": "3358",
|
| 541 |
-
"
|
| 542 |
-
"
|
| 543 |
},
|
| 544 |
{
|
| 545 |
"name": "Yukpa",
|
| 546 |
"iso_1_code": null,
|
| 547 |
"iso_3_code": "yup",
|
| 548 |
-
"tokenizers": {},
|
| 549 |
"children": [],
|
|
|
|
| 550 |
"node_i": "3359",
|
|
|
|
| 551 |
"scripts": [
|
| 552 |
"Latn"
|
| 553 |
-
]
|
| 554 |
-
"own_tokenizer": false
|
| 555 |
}
|
| 556 |
],
|
|
|
|
| 557 |
"node_i": "3357",
|
| 558 |
-
"
|
| 559 |
-
"
|
| 560 |
}
|
| 561 |
],
|
|
|
|
| 562 |
"node_i": "3356",
|
| 563 |
-
"
|
| 564 |
-
"
|
| 565 |
}
|
| 566 |
],
|
|
|
|
| 567 |
"node_i": "3307",
|
| 568 |
-
"
|
| 569 |
-
"
|
| 570 |
}
|
|
|
|
| 2 |
"name": "Cariban",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Carib",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "car",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3308",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Central",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": null,
|
|
|
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Apala\u00ed",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "apy",
|
|
|
|
| 27 |
"children": [],
|
| 28 |
+
"tokenizers": {},
|
| 29 |
"node_i": "3310",
|
| 30 |
+
"native_tokenizers": [],
|
| 31 |
"scripts": [
|
| 32 |
"Latn"
|
| 33 |
+
]
|
|
|
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"name": "Cuman\u00e1",
|
| 37 |
"iso_1_code": null,
|
| 38 |
"iso_3_code": null,
|
|
|
|
| 39 |
"children": [
|
| 40 |
{
|
| 41 |
"name": "Chaima",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": "ciy",
|
|
|
|
| 44 |
"children": [],
|
| 45 |
+
"tokenizers": {},
|
| 46 |
"node_i": "3312",
|
| 47 |
+
"native_tokenizers": [],
|
| 48 |
+
"scripts": []
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"name": "Cumanagoto",
|
| 52 |
"iso_1_code": null,
|
| 53 |
"iso_3_code": "cuo",
|
|
|
|
| 54 |
"children": [],
|
| 55 |
+
"tokenizers": {},
|
| 56 |
"node_i": "3313",
|
| 57 |
+
"native_tokenizers": [],
|
| 58 |
+
"scripts": []
|
| 59 |
}
|
| 60 |
],
|
| 61 |
+
"tokenizers": {},
|
| 62 |
"node_i": "3311",
|
| 63 |
+
"native_tokenizers": [],
|
| 64 |
+
"scripts": []
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"name": "Makiritare",
|
| 68 |
"iso_1_code": null,
|
| 69 |
"iso_3_code": null,
|
|
|
|
| 70 |
"children": [
|
| 71 |
{
|
| 72 |
"name": "Maquiritari",
|
| 73 |
"iso_1_code": null,
|
| 74 |
"iso_3_code": "mch",
|
|
|
|
| 75 |
"children": [],
|
| 76 |
+
"tokenizers": {},
|
| 77 |
"node_i": "3315",
|
| 78 |
+
"native_tokenizers": [],
|
| 79 |
+
"scripts": []
|
| 80 |
}
|
| 81 |
],
|
| 82 |
+
"tokenizers": {},
|
| 83 |
"node_i": "3314",
|
| 84 |
+
"native_tokenizers": [],
|
| 85 |
+
"scripts": []
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"name": "Mapoyo-Yavarana",
|
| 89 |
"iso_1_code": null,
|
| 90 |
"iso_3_code": null,
|
|
|
|
| 91 |
"children": [
|
| 92 |
{
|
| 93 |
"name": "Mapoyo",
|
| 94 |
"iso_1_code": null,
|
| 95 |
"iso_3_code": "mcg",
|
|
|
|
| 96 |
"children": [],
|
| 97 |
+
"tokenizers": {},
|
| 98 |
"node_i": "3317",
|
| 99 |
+
"native_tokenizers": [],
|
| 100 |
+
"scripts": []
|
| 101 |
},
|
| 102 |
{
|
| 103 |
"name": "P\u00e9mono",
|
| 104 |
"iso_1_code": null,
|
| 105 |
"iso_3_code": "pev",
|
|
|
|
| 106 |
"children": [],
|
| 107 |
+
"tokenizers": {},
|
| 108 |
"node_i": "3318",
|
| 109 |
+
"native_tokenizers": [],
|
| 110 |
+
"scripts": []
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"name": "Tamanaku",
|
| 114 |
"iso_1_code": null,
|
| 115 |
"iso_3_code": "tmz",
|
|
|
|
| 116 |
"children": [],
|
| 117 |
+
"tokenizers": {},
|
| 118 |
"node_i": "3319",
|
| 119 |
+
"native_tokenizers": [],
|
| 120 |
+
"scripts": []
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"name": "Yabarana",
|
| 124 |
"iso_1_code": null,
|
| 125 |
"iso_3_code": "yar",
|
|
|
|
| 126 |
"children": [],
|
| 127 |
+
"tokenizers": {},
|
| 128 |
"node_i": "3320",
|
| 129 |
+
"native_tokenizers": [],
|
| 130 |
+
"scripts": []
|
| 131 |
}
|
| 132 |
],
|
| 133 |
+
"tokenizers": {},
|
| 134 |
"node_i": "3316",
|
| 135 |
+
"native_tokenizers": [],
|
| 136 |
+
"scripts": []
|
| 137 |
},
|
| 138 |
{
|
| 139 |
"name": "Wayana",
|
| 140 |
"iso_1_code": null,
|
| 141 |
"iso_3_code": null,
|
|
|
|
| 142 |
"children": [
|
| 143 |
{
|
| 144 |
"name": "Kaxui\u00e2na",
|
| 145 |
"iso_1_code": null,
|
| 146 |
"iso_3_code": "kbb",
|
|
|
|
| 147 |
"children": [],
|
| 148 |
+
"tokenizers": {},
|
| 149 |
"node_i": "3322",
|
| 150 |
+
"native_tokenizers": [],
|
| 151 |
+
"scripts": []
|
| 152 |
},
|
| 153 |
{
|
| 154 |
"name": "Wayana",
|
| 155 |
"iso_1_code": null,
|
| 156 |
"iso_3_code": "way",
|
|
|
|
| 157 |
"children": [],
|
| 158 |
+
"tokenizers": {},
|
| 159 |
"node_i": "3323",
|
| 160 |
+
"native_tokenizers": [],
|
| 161 |
"scripts": [
|
| 162 |
"Latn"
|
| 163 |
+
]
|
|
|
|
| 164 |
}
|
| 165 |
],
|
| 166 |
+
"tokenizers": {},
|
| 167 |
"node_i": "3321",
|
| 168 |
+
"native_tokenizers": [],
|
| 169 |
+
"scripts": []
|
| 170 |
}
|
| 171 |
],
|
| 172 |
+
"tokenizers": {},
|
| 173 |
"node_i": "3309",
|
| 174 |
+
"native_tokenizers": [],
|
| 175 |
+
"scripts": []
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"name": "Kashuyana",
|
| 179 |
"iso_1_code": null,
|
| 180 |
"iso_3_code": null,
|
|
|
|
| 181 |
"children": [
|
| 182 |
{
|
| 183 |
"name": "Sikiana",
|
| 184 |
"iso_1_code": null,
|
| 185 |
"iso_3_code": "sik",
|
|
|
|
| 186 |
"children": [],
|
| 187 |
+
"tokenizers": {},
|
| 188 |
"node_i": "3325",
|
| 189 |
+
"native_tokenizers": [],
|
| 190 |
+
"scripts": []
|
| 191 |
}
|
| 192 |
],
|
| 193 |
+
"tokenizers": {},
|
| 194 |
"node_i": "3324",
|
| 195 |
+
"native_tokenizers": [],
|
| 196 |
+
"scripts": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"name": "North Amazonian",
|
| 200 |
"iso_1_code": null,
|
| 201 |
"iso_3_code": null,
|
|
|
|
| 202 |
"children": [
|
| 203 |
{
|
| 204 |
"name": "Pem\u00f3n",
|
| 205 |
"iso_1_code": null,
|
| 206 |
"iso_3_code": null,
|
|
|
|
| 207 |
"children": [
|
| 208 |
{
|
| 209 |
"name": "Pem\u00f3n proper",
|
| 210 |
"iso_1_code": null,
|
| 211 |
"iso_3_code": null,
|
|
|
|
| 212 |
"children": [
|
| 213 |
{
|
| 214 |
"name": "Pemon",
|
| 215 |
"iso_1_code": null,
|
| 216 |
"iso_3_code": "aoc",
|
|
|
|
| 217 |
"children": [],
|
| 218 |
+
"tokenizers": {},
|
| 219 |
"node_i": "3329",
|
| 220 |
+
"native_tokenizers": [],
|
| 221 |
+
"scripts": []
|
| 222 |
},
|
| 223 |
{
|
| 224 |
"name": "Macushi",
|
| 225 |
"iso_1_code": null,
|
| 226 |
"iso_3_code": "mbc",
|
|
|
|
| 227 |
"children": [],
|
| 228 |
+
"tokenizers": {},
|
| 229 |
"node_i": "3330",
|
| 230 |
+
"native_tokenizers": [],
|
| 231 |
"scripts": [
|
| 232 |
"Latn"
|
| 233 |
+
]
|
|
|
|
| 234 |
},
|
| 235 |
{
|
| 236 |
"name": "Kapong",
|
| 237 |
"iso_1_code": null,
|
| 238 |
"iso_3_code": null,
|
|
|
|
| 239 |
"children": [
|
| 240 |
{
|
| 241 |
"name": "Akawaio",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": "ake",
|
|
|
|
| 244 |
"children": [],
|
| 245 |
+
"tokenizers": {},
|
| 246 |
"node_i": "3332",
|
| 247 |
+
"native_tokenizers": [],
|
| 248 |
"scripts": [
|
| 249 |
"Latn"
|
| 250 |
+
]
|
|
|
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"name": "Patamona",
|
| 254 |
"iso_1_code": null,
|
| 255 |
"iso_3_code": "pbc",
|
|
|
|
| 256 |
"children": [],
|
| 257 |
+
"tokenizers": {},
|
| 258 |
"node_i": "3333",
|
| 259 |
+
"native_tokenizers": [],
|
| 260 |
"scripts": [
|
| 261 |
"Latn"
|
| 262 |
+
]
|
|
|
|
| 263 |
}
|
| 264 |
],
|
| 265 |
+
"tokenizers": {},
|
| 266 |
"node_i": "3331",
|
| 267 |
+
"native_tokenizers": [],
|
| 268 |
+
"scripts": []
|
| 269 |
}
|
| 270 |
],
|
| 271 |
+
"tokenizers": {},
|
| 272 |
"node_i": "3328",
|
| 273 |
+
"native_tokenizers": [],
|
| 274 |
+
"scripts": []
|
| 275 |
}
|
| 276 |
],
|
| 277 |
+
"tokenizers": {},
|
| 278 |
"node_i": "3327",
|
| 279 |
+
"native_tokenizers": [],
|
| 280 |
+
"scripts": []
|
| 281 |
},
|
| 282 |
{
|
| 283 |
"name": "Yawaper\u00ed",
|
| 284 |
"iso_1_code": null,
|
| 285 |
"iso_3_code": null,
|
|
|
|
| 286 |
"children": [
|
| 287 |
{
|
| 288 |
"name": "Waimiri-Atroar\u00ed",
|
| 289 |
"iso_1_code": null,
|
| 290 |
"iso_3_code": "atr",
|
|
|
|
| 291 |
"children": [],
|
| 292 |
+
"tokenizers": {},
|
| 293 |
"node_i": "3335",
|
| 294 |
+
"native_tokenizers": [],
|
| 295 |
+
"scripts": []
|
| 296 |
}
|
| 297 |
],
|
| 298 |
+
"tokenizers": {},
|
| 299 |
"node_i": "3334",
|
| 300 |
+
"native_tokenizers": [],
|
| 301 |
+
"scripts": []
|
| 302 |
}
|
| 303 |
],
|
| 304 |
+
"tokenizers": {},
|
| 305 |
"node_i": "3326",
|
| 306 |
+
"native_tokenizers": [],
|
| 307 |
+
"scripts": []
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"name": "South Amazonian",
|
| 311 |
"iso_1_code": null,
|
| 312 |
"iso_3_code": null,
|
|
|
|
| 313 |
"children": [
|
| 314 |
{
|
| 315 |
"name": "E\u2019\u00f1apa Woromaipu",
|
| 316 |
"iso_1_code": null,
|
| 317 |
"iso_3_code": "pbh",
|
|
|
|
| 318 |
"children": [],
|
| 319 |
+
"tokenizers": {},
|
| 320 |
"node_i": "3337",
|
| 321 |
+
"native_tokenizers": [],
|
| 322 |
+
"scripts": []
|
| 323 |
},
|
| 324 |
{
|
| 325 |
"name": "Arara",
|
| 326 |
"iso_1_code": null,
|
| 327 |
"iso_3_code": null,
|
|
|
|
| 328 |
"children": [
|
| 329 |
{
|
| 330 |
"name": "Arara, Par\u00e1",
|
| 331 |
"iso_1_code": null,
|
| 332 |
"iso_3_code": "aap",
|
|
|
|
| 333 |
"children": [],
|
| 334 |
+
"tokenizers": {},
|
| 335 |
"node_i": "3339",
|
| 336 |
+
"native_tokenizers": [],
|
| 337 |
+
"scripts": []
|
| 338 |
},
|
| 339 |
{
|
| 340 |
"name": "Ikpeng",
|
| 341 |
"iso_1_code": null,
|
| 342 |
"iso_3_code": "txi",
|
|
|
|
| 343 |
"children": [],
|
| 344 |
+
"tokenizers": {},
|
| 345 |
"node_i": "3340",
|
| 346 |
+
"native_tokenizers": [],
|
| 347 |
+
"scripts": []
|
| 348 |
}
|
| 349 |
],
|
| 350 |
+
"tokenizers": {},
|
| 351 |
"node_i": "3338",
|
| 352 |
+
"native_tokenizers": [],
|
| 353 |
+
"scripts": []
|
| 354 |
},
|
| 355 |
{
|
| 356 |
"name": "Bakair\u00ed",
|
| 357 |
"iso_1_code": null,
|
| 358 |
"iso_3_code": null,
|
|
|
|
| 359 |
"children": [
|
| 360 |
{
|
| 361 |
"name": "Bakair\u00ed",
|
| 362 |
"iso_1_code": null,
|
| 363 |
"iso_3_code": "bkq",
|
|
|
|
| 364 |
"children": [],
|
| 365 |
+
"tokenizers": {},
|
| 366 |
"node_i": "3342",
|
| 367 |
+
"native_tokenizers": [],
|
| 368 |
"scripts": [
|
| 369 |
"Latn"
|
| 370 |
+
]
|
|
|
|
| 371 |
},
|
| 372 |
{
|
| 373 |
"name": "Amonap",
|
| 374 |
"iso_1_code": null,
|
| 375 |
"iso_3_code": null,
|
|
|
|
| 376 |
"children": [
|
| 377 |
{
|
| 378 |
"name": "Kuik\u00faro-Kalap\u00e1lo",
|
| 379 |
"iso_1_code": null,
|
| 380 |
"iso_3_code": "kui",
|
|
|
|
| 381 |
"children": [],
|
| 382 |
+
"tokenizers": {},
|
| 383 |
"node_i": "3344",
|
| 384 |
+
"native_tokenizers": [],
|
| 385 |
+
"scripts": []
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"name": "Matipuhy",
|
| 389 |
"iso_1_code": null,
|
| 390 |
"iso_3_code": "mzo",
|
|
|
|
| 391 |
"children": [],
|
| 392 |
+
"tokenizers": {},
|
| 393 |
"node_i": "3345",
|
| 394 |
+
"native_tokenizers": [],
|
| 395 |
+
"scripts": []
|
| 396 |
}
|
| 397 |
],
|
| 398 |
+
"tokenizers": {},
|
| 399 |
"node_i": "3343",
|
| 400 |
+
"native_tokenizers": [],
|
| 401 |
+
"scripts": []
|
| 402 |
}
|
| 403 |
],
|
| 404 |
+
"tokenizers": {},
|
| 405 |
"node_i": "3341",
|
| 406 |
+
"native_tokenizers": [],
|
| 407 |
+
"scripts": []
|
| 408 |
}
|
| 409 |
],
|
| 410 |
+
"tokenizers": {},
|
| 411 |
"node_i": "3336",
|
| 412 |
+
"native_tokenizers": [],
|
| 413 |
+
"scripts": []
|
| 414 |
},
|
| 415 |
{
|
| 416 |
"name": "Tiriy\u00f3",
|
| 417 |
"iso_1_code": null,
|
| 418 |
"iso_3_code": null,
|
|
|
|
| 419 |
"children": [
|
| 420 |
{
|
| 421 |
"name": "Salum\u00e1",
|
| 422 |
"iso_1_code": null,
|
| 423 |
"iso_3_code": "slj",
|
|
|
|
| 424 |
"children": [],
|
| 425 |
+
"tokenizers": {},
|
| 426 |
"node_i": "3347",
|
| 427 |
+
"native_tokenizers": [],
|
| 428 |
+
"scripts": []
|
| 429 |
},
|
| 430 |
{
|
| 431 |
"name": "Karihona",
|
| 432 |
"iso_1_code": null,
|
| 433 |
"iso_3_code": null,
|
|
|
|
| 434 |
"children": [
|
| 435 |
{
|
| 436 |
"name": "Carijona",
|
| 437 |
"iso_1_code": null,
|
| 438 |
"iso_3_code": "cbd",
|
|
|
|
| 439 |
"children": [],
|
| 440 |
+
"tokenizers": {},
|
| 441 |
"node_i": "3349",
|
| 442 |
+
"native_tokenizers": [],
|
| 443 |
+
"scripts": []
|
| 444 |
}
|
| 445 |
],
|
| 446 |
+
"tokenizers": {},
|
| 447 |
"node_i": "3348",
|
| 448 |
+
"native_tokenizers": [],
|
| 449 |
+
"scripts": []
|
| 450 |
},
|
| 451 |
{
|
| 452 |
"name": "Tiriy\u00f3",
|
| 453 |
"iso_1_code": null,
|
| 454 |
"iso_3_code": null,
|
|
|
|
| 455 |
"children": [
|
| 456 |
{
|
| 457 |
"name": "Akurio",
|
| 458 |
"iso_1_code": null,
|
| 459 |
"iso_3_code": "ako",
|
|
|
|
| 460 |
"children": [],
|
| 461 |
+
"tokenizers": {},
|
| 462 |
"node_i": "3351",
|
| 463 |
+
"native_tokenizers": [],
|
| 464 |
+
"scripts": []
|
| 465 |
},
|
| 466 |
{
|
| 467 |
"name": "Tri\u00f3",
|
| 468 |
"iso_1_code": null,
|
| 469 |
"iso_3_code": "tri",
|
|
|
|
| 470 |
"children": [],
|
| 471 |
+
"tokenizers": {},
|
| 472 |
"node_i": "3352",
|
| 473 |
+
"native_tokenizers": [],
|
| 474 |
+
"scripts": []
|
| 475 |
}
|
| 476 |
],
|
| 477 |
+
"tokenizers": {},
|
| 478 |
"node_i": "3350",
|
| 479 |
+
"native_tokenizers": [],
|
| 480 |
+
"scripts": []
|
| 481 |
}
|
| 482 |
],
|
| 483 |
+
"tokenizers": {},
|
| 484 |
"node_i": "3346",
|
| 485 |
+
"native_tokenizers": [],
|
| 486 |
+
"scripts": []
|
| 487 |
},
|
| 488 |
{
|
| 489 |
"name": "Waiwai",
|
| 490 |
"iso_1_code": null,
|
| 491 |
"iso_3_code": null,
|
|
|
|
| 492 |
"children": [
|
| 493 |
{
|
| 494 |
"name": "Hixkary\u00e1na",
|
| 495 |
"iso_1_code": null,
|
| 496 |
"iso_3_code": "hix",
|
|
|
|
| 497 |
"children": [],
|
| 498 |
+
"tokenizers": {},
|
| 499 |
"node_i": "3354",
|
| 500 |
+
"native_tokenizers": [],
|
| 501 |
"scripts": [
|
| 502 |
"Latn"
|
| 503 |
+
]
|
|
|
|
| 504 |
},
|
| 505 |
{
|
| 506 |
"name": "Waiwai",
|
| 507 |
"iso_1_code": null,
|
| 508 |
"iso_3_code": "waw",
|
|
|
|
| 509 |
"children": [],
|
| 510 |
+
"tokenizers": {},
|
| 511 |
"node_i": "3355",
|
| 512 |
+
"native_tokenizers": [],
|
| 513 |
+
"scripts": []
|
| 514 |
}
|
| 515 |
],
|
| 516 |
+
"tokenizers": {},
|
| 517 |
"node_i": "3353",
|
| 518 |
+
"native_tokenizers": [],
|
| 519 |
+
"scripts": []
|
| 520 |
},
|
| 521 |
{
|
| 522 |
"name": "Yukpa",
|
| 523 |
"iso_1_code": null,
|
| 524 |
"iso_3_code": null,
|
|
|
|
| 525 |
"children": [
|
| 526 |
{
|
| 527 |
"name": "Yucpa-Yapreria",
|
| 528 |
"iso_1_code": null,
|
| 529 |
"iso_3_code": null,
|
|
|
|
| 530 |
"children": [
|
| 531 |
{
|
| 532 |
"name": "Japreria",
|
| 533 |
"iso_1_code": null,
|
| 534 |
"iso_3_code": "jru",
|
|
|
|
| 535 |
"children": [],
|
| 536 |
+
"tokenizers": {},
|
| 537 |
"node_i": "3358",
|
| 538 |
+
"native_tokenizers": [],
|
| 539 |
+
"scripts": []
|
| 540 |
},
|
| 541 |
{
|
| 542 |
"name": "Yukpa",
|
| 543 |
"iso_1_code": null,
|
| 544 |
"iso_3_code": "yup",
|
|
|
|
| 545 |
"children": [],
|
| 546 |
+
"tokenizers": {},
|
| 547 |
"node_i": "3359",
|
| 548 |
+
"native_tokenizers": [],
|
| 549 |
"scripts": [
|
| 550 |
"Latn"
|
| 551 |
+
]
|
|
|
|
| 552 |
}
|
| 553 |
],
|
| 554 |
+
"tokenizers": {},
|
| 555 |
"node_i": "3357",
|
| 556 |
+
"native_tokenizers": [],
|
| 557 |
+
"scripts": []
|
| 558 |
}
|
| 559 |
],
|
| 560 |
+
"tokenizers": {},
|
| 561 |
"node_i": "3356",
|
| 562 |
+
"native_tokenizers": [],
|
| 563 |
+
"scripts": []
|
| 564 |
}
|
| 565 |
],
|
| 566 |
+
"tokenizers": {},
|
| 567 |
"node_i": "3307",
|
| 568 |
+
"native_tokenizers": [],
|
| 569 |
+
"scripts": []
|
| 570 |
}
|
data/Central Solomons.json
CHANGED
|
@@ -2,50 +2,50 @@
|
|
| 2 |
"name": "Central Solomons",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Bilua",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "blb",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3361",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Lavukaleve",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "lvk",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3362",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "Savosavo",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": "svs",
|
| 31 |
-
"tokenizers": {},
|
| 32 |
"children": [],
|
|
|
|
| 33 |
"node_i": "3363",
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Touo",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": "tqu",
|
| 41 |
-
"tokenizers": {},
|
| 42 |
"children": [],
|
|
|
|
| 43 |
"node_i": "3364",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
}
|
| 47 |
],
|
|
|
|
| 48 |
"node_i": "3360",
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
}
|
|
|
|
| 2 |
"name": "Central Solomons",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Bilua",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "blb",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3361",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Lavukaleve",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "lvk",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3362",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Savosavo",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "svs",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3363",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Touo",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "tqu",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3364",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
}
|
| 46 |
],
|
| 47 |
+
"tokenizers": {},
|
| 48 |
"node_i": "3360",
|
| 49 |
+
"native_tokenizers": [],
|
| 50 |
+
"scripts": []
|
| 51 |
}
|
data/Chapacuran.json
CHANGED
|
@@ -2,72 +2,72 @@
|
|
| 2 |
"name": "Chapacuran",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Itene",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Itene",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "ite",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3367",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Tor\u00e1",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "trz",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3368",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
}
|
| 33 |
],
|
|
|
|
| 34 |
"node_i": "3366",
|
| 35 |
-
"
|
| 36 |
-
"
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"name": "Wari",
|
| 40 |
"iso_1_code": null,
|
| 41 |
"iso_3_code": null,
|
| 42 |
-
"tokenizers": {},
|
| 43 |
"children": [
|
| 44 |
{
|
| 45 |
"name": "Oro Win",
|
| 46 |
"iso_1_code": null,
|
| 47 |
"iso_3_code": "orw",
|
| 48 |
-
"tokenizers": {},
|
| 49 |
"children": [],
|
|
|
|
| 50 |
"node_i": "3370",
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"name": "Paka\u00e1snovos",
|
| 56 |
"iso_1_code": null,
|
| 57 |
"iso_3_code": "pav",
|
| 58 |
-
"tokenizers": {},
|
| 59 |
"children": [],
|
|
|
|
| 60 |
"node_i": "3371",
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
}
|
| 64 |
],
|
|
|
|
| 65 |
"node_i": "3369",
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
}
|
| 69 |
],
|
|
|
|
| 70 |
"node_i": "3365",
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
}
|
|
|
|
| 2 |
"name": "Chapacuran",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Itene",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Itene",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "ite",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3367",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Tor\u00e1",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "trz",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3368",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
}
|
| 31 |
],
|
| 32 |
+
"tokenizers": {},
|
| 33 |
"node_i": "3366",
|
| 34 |
+
"native_tokenizers": [],
|
| 35 |
+
"scripts": []
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Wari",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": null,
|
|
|
|
| 41 |
"children": [
|
| 42 |
{
|
| 43 |
"name": "Oro Win",
|
| 44 |
"iso_1_code": null,
|
| 45 |
"iso_3_code": "orw",
|
|
|
|
| 46 |
"children": [],
|
| 47 |
+
"tokenizers": {},
|
| 48 |
"node_i": "3370",
|
| 49 |
+
"native_tokenizers": [],
|
| 50 |
+
"scripts": []
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"name": "Paka\u00e1snovos",
|
| 54 |
"iso_1_code": null,
|
| 55 |
"iso_3_code": "pav",
|
|
|
|
| 56 |
"children": [],
|
| 57 |
+
"tokenizers": {},
|
| 58 |
"node_i": "3371",
|
| 59 |
+
"native_tokenizers": [],
|
| 60 |
+
"scripts": []
|
| 61 |
}
|
| 62 |
],
|
| 63 |
+
"tokenizers": {},
|
| 64 |
"node_i": "3369",
|
| 65 |
+
"native_tokenizers": [],
|
| 66 |
+
"scripts": []
|
| 67 |
}
|
| 68 |
],
|
| 69 |
+
"tokenizers": {},
|
| 70 |
"node_i": "3365",
|
| 71 |
+
"native_tokenizers": [],
|
| 72 |
+
"scripts": []
|
| 73 |
}
|
data/Chibchan.json
CHANGED
|
@@ -2,392 +2,392 @@
|
|
| 2 |
"name": "Chibchan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Chibchan A",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Boruca",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "brn",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3374",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Teribe",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "tfr",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3375",
|
|
|
|
| 30 |
"scripts": [
|
| 31 |
"Latn"
|
| 32 |
-
]
|
| 33 |
-
"own_tokenizer": false
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"name": "Guaymi\u00edc",
|
| 37 |
"iso_1_code": null,
|
| 38 |
"iso_3_code": null,
|
| 39 |
-
"tokenizers": {},
|
| 40 |
"children": [
|
| 41 |
{
|
| 42 |
"name": "Ng\u00e4bere",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": "gym",
|
| 45 |
-
"tokenizers": {},
|
| 46 |
"children": [],
|
|
|
|
| 47 |
"node_i": "3377",
|
|
|
|
| 48 |
"scripts": [
|
| 49 |
"Latn"
|
| 50 |
-
]
|
| 51 |
-
"own_tokenizer": false
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Buglere",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "sab",
|
| 57 |
-
"tokenizers": {},
|
| 58 |
"children": [],
|
|
|
|
| 59 |
"node_i": "3378",
|
|
|
|
| 60 |
"scripts": [
|
| 61 |
"Latn"
|
| 62 |
-
]
|
| 63 |
-
"own_tokenizer": false
|
| 64 |
}
|
| 65 |
],
|
|
|
|
| 66 |
"node_i": "3376",
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"name": "Viceitic",
|
| 72 |
"iso_1_code": null,
|
| 73 |
"iso_3_code": null,
|
| 74 |
-
"tokenizers": {},
|
| 75 |
"children": [
|
| 76 |
{
|
| 77 |
"name": "Bribri",
|
| 78 |
"iso_1_code": null,
|
| 79 |
"iso_3_code": "bzd",
|
| 80 |
-
"tokenizers": {},
|
| 81 |
"children": [],
|
|
|
|
| 82 |
"node_i": "3380",
|
|
|
|
| 83 |
"scripts": [
|
| 84 |
"Latn"
|
| 85 |
-
]
|
| 86 |
-
"own_tokenizer": false
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"name": "Cab\u00e9car",
|
| 90 |
"iso_1_code": null,
|
| 91 |
"iso_3_code": "cjp",
|
| 92 |
-
"tokenizers": {},
|
| 93 |
"children": [],
|
|
|
|
| 94 |
"node_i": "3381",
|
|
|
|
| 95 |
"scripts": [
|
| 96 |
"Latn"
|
| 97 |
-
]
|
| 98 |
-
"own_tokenizer": false
|
| 99 |
}
|
| 100 |
],
|
|
|
|
| 101 |
"node_i": "3379",
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
}
|
| 105 |
],
|
|
|
|
| 106 |
"node_i": "3373",
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"name": "Chibchan B",
|
| 112 |
"iso_1_code": null,
|
| 113 |
"iso_3_code": null,
|
| 114 |
-
"tokenizers": {},
|
| 115 |
"children": [
|
| 116 |
{
|
| 117 |
"name": "Pech",
|
| 118 |
"iso_1_code": null,
|
| 119 |
"iso_3_code": "pay",
|
| 120 |
-
"tokenizers": {},
|
| 121 |
"children": [],
|
|
|
|
| 122 |
"node_i": "3383",
|
| 123 |
-
"
|
| 124 |
-
"
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"name": "Eastern Chibchan",
|
| 128 |
"iso_1_code": null,
|
| 129 |
"iso_3_code": null,
|
| 130 |
-
"tokenizers": {},
|
| 131 |
"children": [
|
| 132 |
{
|
| 133 |
"name": "Colombian",
|
| 134 |
"iso_1_code": null,
|
| 135 |
"iso_3_code": null,
|
| 136 |
-
"tokenizers": {},
|
| 137 |
"children": [
|
| 138 |
{
|
| 139 |
"name": "Northern Colombian",
|
| 140 |
"iso_1_code": null,
|
| 141 |
"iso_3_code": null,
|
| 142 |
-
"tokenizers": {},
|
| 143 |
"children": [
|
| 144 |
{
|
| 145 |
"name": "Chimila",
|
| 146 |
"iso_1_code": null,
|
| 147 |
"iso_3_code": "cbg",
|
| 148 |
-
"tokenizers": {},
|
| 149 |
"children": [],
|
|
|
|
| 150 |
"node_i": "3387",
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"name": "Arhuacan",
|
| 156 |
"iso_1_code": null,
|
| 157 |
"iso_3_code": null,
|
| 158 |
-
"tokenizers": {},
|
| 159 |
"children": [
|
| 160 |
{
|
| 161 |
"name": "Kogi",
|
| 162 |
"iso_1_code": null,
|
| 163 |
"iso_3_code": "kog",
|
| 164 |
-
"tokenizers": {},
|
| 165 |
"children": [],
|
|
|
|
| 166 |
"node_i": "3389",
|
|
|
|
| 167 |
"scripts": [
|
| 168 |
"Latn"
|
| 169 |
-
]
|
| 170 |
-
"own_tokenizer": false
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"name": "Southern and Eastern Arhuacan",
|
| 174 |
"iso_1_code": null,
|
| 175 |
"iso_3_code": null,
|
| 176 |
-
"tokenizers": {},
|
| 177 |
"children": [
|
| 178 |
{
|
| 179 |
"name": "Arhuaco",
|
| 180 |
"iso_1_code": null,
|
| 181 |
"iso_3_code": "arh",
|
| 182 |
-
"tokenizers": {},
|
| 183 |
"children": [],
|
|
|
|
| 184 |
"node_i": "3391",
|
| 185 |
-
"
|
| 186 |
-
"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"name": "Guamaca-Atanque",
|
| 190 |
"iso_1_code": null,
|
| 191 |
"iso_3_code": null,
|
| 192 |
-
"tokenizers": {},
|
| 193 |
"children": [
|
| 194 |
{
|
| 195 |
"name": "Sanka",
|
| 196 |
"iso_1_code": null,
|
| 197 |
"iso_3_code": "mbp",
|
| 198 |
-
"tokenizers": {},
|
| 199 |
"children": [],
|
|
|
|
| 200 |
"node_i": "3393",
|
| 201 |
-
"
|
| 202 |
-
"
|
| 203 |
}
|
| 204 |
],
|
|
|
|
| 205 |
"node_i": "3392",
|
| 206 |
-
"
|
| 207 |
-
"
|
| 208 |
}
|
| 209 |
],
|
|
|
|
| 210 |
"node_i": "3390",
|
| 211 |
-
"
|
| 212 |
-
"
|
| 213 |
}
|
| 214 |
],
|
|
|
|
| 215 |
"node_i": "3388",
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
}
|
| 219 |
],
|
|
|
|
| 220 |
"node_i": "3386",
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
},
|
| 224 |
{
|
| 225 |
"name": "Southern Colombian",
|
| 226 |
"iso_1_code": null,
|
| 227 |
"iso_3_code": null,
|
| 228 |
-
"tokenizers": {},
|
| 229 |
"children": [
|
| 230 |
{
|
| 231 |
"name": "Bar\u00ed",
|
| 232 |
"iso_1_code": null,
|
| 233 |
"iso_3_code": "mot",
|
| 234 |
-
"tokenizers": {},
|
| 235 |
"children": [],
|
|
|
|
| 236 |
"node_i": "3395",
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
},
|
| 240 |
{
|
| 241 |
"name": "Cundicocuyese",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": null,
|
| 244 |
-
"tokenizers": {},
|
| 245 |
"children": [
|
| 246 |
{
|
| 247 |
"name": "Chibcha",
|
| 248 |
"iso_1_code": null,
|
| 249 |
"iso_3_code": "chb",
|
| 250 |
-
"tokenizers": {},
|
| 251 |
"children": [],
|
|
|
|
| 252 |
"node_i": "3397",
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
},
|
| 256 |
{
|
| 257 |
"name": "Tunebo, Barro Negro",
|
| 258 |
"iso_1_code": null,
|
| 259 |
"iso_3_code": "tbn",
|
| 260 |
-
"tokenizers": {},
|
| 261 |
"children": [],
|
|
|
|
| 262 |
"node_i": "3398",
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
},
|
| 266 |
{
|
| 267 |
"name": "Tunebo, Western",
|
| 268 |
"iso_1_code": null,
|
| 269 |
"iso_3_code": "tnb",
|
| 270 |
-
"tokenizers": {},
|
| 271 |
"children": [],
|
|
|
|
| 272 |
"node_i": "3399",
|
| 273 |
-
"
|
| 274 |
-
"
|
| 275 |
},
|
| 276 |
{
|
| 277 |
"name": "Tunebo, Angosturas",
|
| 278 |
"iso_1_code": null,
|
| 279 |
"iso_3_code": "tnd",
|
| 280 |
-
"tokenizers": {},
|
| 281 |
"children": [],
|
|
|
|
| 282 |
"node_i": "3400",
|
| 283 |
-
"
|
| 284 |
-
"
|
| 285 |
},
|
| 286 |
{
|
| 287 |
"name": "Tunebo, Central",
|
| 288 |
"iso_1_code": null,
|
| 289 |
"iso_3_code": "tuf",
|
| 290 |
-
"tokenizers": {},
|
| 291 |
"children": [],
|
|
|
|
| 292 |
"node_i": "3401",
|
|
|
|
| 293 |
"scripts": [
|
| 294 |
"Latn"
|
| 295 |
-
]
|
| 296 |
-
"own_tokenizer": false
|
| 297 |
}
|
| 298 |
],
|
|
|
|
| 299 |
"node_i": "3396",
|
| 300 |
-
"
|
| 301 |
-
"
|
| 302 |
}
|
| 303 |
],
|
|
|
|
| 304 |
"node_i": "3394",
|
| 305 |
-
"
|
| 306 |
-
"
|
| 307 |
}
|
| 308 |
],
|
|
|
|
| 309 |
"node_i": "3385",
|
| 310 |
-
"
|
| 311 |
-
"
|
| 312 |
},
|
| 313 |
{
|
| 314 |
"name": "Cuna",
|
| 315 |
"iso_1_code": null,
|
| 316 |
"iso_3_code": null,
|
| 317 |
-
"tokenizers": {},
|
| 318 |
"children": [
|
| 319 |
{
|
| 320 |
"name": "Kuna, San Blas",
|
| 321 |
"iso_1_code": null,
|
| 322 |
"iso_3_code": "cuk",
|
| 323 |
-
"tokenizers": {},
|
| 324 |
"children": [],
|
|
|
|
| 325 |
"node_i": "3403",
|
|
|
|
| 326 |
"scripts": [
|
| 327 |
"Latn"
|
| 328 |
-
]
|
| 329 |
-
"own_tokenizer": false
|
| 330 |
},
|
| 331 |
{
|
| 332 |
"name": "Kuna, Border",
|
| 333 |
"iso_1_code": null,
|
| 334 |
"iso_3_code": "kvn",
|
| 335 |
-
"tokenizers": {},
|
| 336 |
"children": [],
|
|
|
|
| 337 |
"node_i": "3404",
|
|
|
|
| 338 |
"scripts": [
|
| 339 |
"Latn"
|
| 340 |
-
]
|
| 341 |
-
"own_tokenizer": false
|
| 342 |
}
|
| 343 |
],
|
|
|
|
| 344 |
"node_i": "3402",
|
| 345 |
-
"
|
| 346 |
-
"
|
| 347 |
}
|
| 348 |
],
|
|
|
|
| 349 |
"node_i": "3384",
|
| 350 |
-
"
|
| 351 |
-
"
|
| 352 |
},
|
| 353 |
{
|
| 354 |
"name": "Votic",
|
| 355 |
"iso_1_code": null,
|
| 356 |
"iso_3_code": null,
|
| 357 |
-
"tokenizers": {},
|
| 358 |
"children": [
|
| 359 |
{
|
| 360 |
"name": "Mal\u00e9ku Ja\u00edka",
|
| 361 |
"iso_1_code": null,
|
| 362 |
"iso_3_code": "gut",
|
| 363 |
-
"tokenizers": {},
|
| 364 |
"children": [],
|
|
|
|
| 365 |
"node_i": "3406",
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"name": "Rama",
|
| 371 |
"iso_1_code": null,
|
| 372 |
"iso_3_code": "rma",
|
| 373 |
-
"tokenizers": {},
|
| 374 |
"children": [],
|
|
|
|
| 375 |
"node_i": "3407",
|
| 376 |
-
"
|
| 377 |
-
"
|
| 378 |
}
|
| 379 |
],
|
|
|
|
| 380 |
"node_i": "3405",
|
| 381 |
-
"
|
| 382 |
-
"
|
| 383 |
}
|
| 384 |
],
|
|
|
|
| 385 |
"node_i": "3382",
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
}
|
| 389 |
],
|
|
|
|
| 390 |
"node_i": "3372",
|
| 391 |
-
"
|
| 392 |
-
"
|
| 393 |
}
|
|
|
|
| 2 |
"name": "Chibchan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Chibchan A",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Boruca",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "brn",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3374",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Teribe",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "tfr",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3375",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
"scripts": [
|
| 30 |
"Latn"
|
| 31 |
+
]
|
|
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Guaymi\u00edc",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": null,
|
|
|
|
| 37 |
"children": [
|
| 38 |
{
|
| 39 |
"name": "Ng\u00e4bere",
|
| 40 |
"iso_1_code": null,
|
| 41 |
"iso_3_code": "gym",
|
|
|
|
| 42 |
"children": [],
|
| 43 |
+
"tokenizers": {},
|
| 44 |
"node_i": "3377",
|
| 45 |
+
"native_tokenizers": [],
|
| 46 |
"scripts": [
|
| 47 |
"Latn"
|
| 48 |
+
]
|
|
|
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"name": "Buglere",
|
| 52 |
"iso_1_code": null,
|
| 53 |
"iso_3_code": "sab",
|
|
|
|
| 54 |
"children": [],
|
| 55 |
+
"tokenizers": {},
|
| 56 |
"node_i": "3378",
|
| 57 |
+
"native_tokenizers": [],
|
| 58 |
"scripts": [
|
| 59 |
"Latn"
|
| 60 |
+
]
|
|
|
|
| 61 |
}
|
| 62 |
],
|
| 63 |
+
"tokenizers": {},
|
| 64 |
"node_i": "3376",
|
| 65 |
+
"native_tokenizers": [],
|
| 66 |
+
"scripts": []
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Viceitic",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": null,
|
|
|
|
| 72 |
"children": [
|
| 73 |
{
|
| 74 |
"name": "Bribri",
|
| 75 |
"iso_1_code": null,
|
| 76 |
"iso_3_code": "bzd",
|
|
|
|
| 77 |
"children": [],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3380",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
"scripts": [
|
| 82 |
"Latn"
|
| 83 |
+
]
|
|
|
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"name": "Cab\u00e9car",
|
| 87 |
"iso_1_code": null,
|
| 88 |
"iso_3_code": "cjp",
|
|
|
|
| 89 |
"children": [],
|
| 90 |
+
"tokenizers": {},
|
| 91 |
"node_i": "3381",
|
| 92 |
+
"native_tokenizers": [],
|
| 93 |
"scripts": [
|
| 94 |
"Latn"
|
| 95 |
+
]
|
|
|
|
| 96 |
}
|
| 97 |
],
|
| 98 |
+
"tokenizers": {},
|
| 99 |
"node_i": "3379",
|
| 100 |
+
"native_tokenizers": [],
|
| 101 |
+
"scripts": []
|
| 102 |
}
|
| 103 |
],
|
| 104 |
+
"tokenizers": {},
|
| 105 |
"node_i": "3373",
|
| 106 |
+
"native_tokenizers": [],
|
| 107 |
+
"scripts": []
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"name": "Chibchan B",
|
| 111 |
"iso_1_code": null,
|
| 112 |
"iso_3_code": null,
|
|
|
|
| 113 |
"children": [
|
| 114 |
{
|
| 115 |
"name": "Pech",
|
| 116 |
"iso_1_code": null,
|
| 117 |
"iso_3_code": "pay",
|
|
|
|
| 118 |
"children": [],
|
| 119 |
+
"tokenizers": {},
|
| 120 |
"node_i": "3383",
|
| 121 |
+
"native_tokenizers": [],
|
| 122 |
+
"scripts": []
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"name": "Eastern Chibchan",
|
| 126 |
"iso_1_code": null,
|
| 127 |
"iso_3_code": null,
|
|
|
|
| 128 |
"children": [
|
| 129 |
{
|
| 130 |
"name": "Colombian",
|
| 131 |
"iso_1_code": null,
|
| 132 |
"iso_3_code": null,
|
|
|
|
| 133 |
"children": [
|
| 134 |
{
|
| 135 |
"name": "Northern Colombian",
|
| 136 |
"iso_1_code": null,
|
| 137 |
"iso_3_code": null,
|
|
|
|
| 138 |
"children": [
|
| 139 |
{
|
| 140 |
"name": "Chimila",
|
| 141 |
"iso_1_code": null,
|
| 142 |
"iso_3_code": "cbg",
|
|
|
|
| 143 |
"children": [],
|
| 144 |
+
"tokenizers": {},
|
| 145 |
"node_i": "3387",
|
| 146 |
+
"native_tokenizers": [],
|
| 147 |
+
"scripts": []
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"name": "Arhuacan",
|
| 151 |
"iso_1_code": null,
|
| 152 |
"iso_3_code": null,
|
|
|
|
| 153 |
"children": [
|
| 154 |
{
|
| 155 |
"name": "Kogi",
|
| 156 |
"iso_1_code": null,
|
| 157 |
"iso_3_code": "kog",
|
|
|
|
| 158 |
"children": [],
|
| 159 |
+
"tokenizers": {},
|
| 160 |
"node_i": "3389",
|
| 161 |
+
"native_tokenizers": [],
|
| 162 |
"scripts": [
|
| 163 |
"Latn"
|
| 164 |
+
]
|
|
|
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "Southern and Eastern Arhuacan",
|
| 168 |
"iso_1_code": null,
|
| 169 |
"iso_3_code": null,
|
|
|
|
| 170 |
"children": [
|
| 171 |
{
|
| 172 |
"name": "Arhuaco",
|
| 173 |
"iso_1_code": null,
|
| 174 |
"iso_3_code": "arh",
|
|
|
|
| 175 |
"children": [],
|
| 176 |
+
"tokenizers": {},
|
| 177 |
"node_i": "3391",
|
| 178 |
+
"native_tokenizers": [],
|
| 179 |
+
"scripts": []
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"name": "Guamaca-Atanque",
|
| 183 |
"iso_1_code": null,
|
| 184 |
"iso_3_code": null,
|
|
|
|
| 185 |
"children": [
|
| 186 |
{
|
| 187 |
"name": "Sanka",
|
| 188 |
"iso_1_code": null,
|
| 189 |
"iso_3_code": "mbp",
|
|
|
|
| 190 |
"children": [],
|
| 191 |
+
"tokenizers": {},
|
| 192 |
"node_i": "3393",
|
| 193 |
+
"native_tokenizers": [],
|
| 194 |
+
"scripts": []
|
| 195 |
}
|
| 196 |
],
|
| 197 |
+
"tokenizers": {},
|
| 198 |
"node_i": "3392",
|
| 199 |
+
"native_tokenizers": [],
|
| 200 |
+
"scripts": []
|
| 201 |
}
|
| 202 |
],
|
| 203 |
+
"tokenizers": {},
|
| 204 |
"node_i": "3390",
|
| 205 |
+
"native_tokenizers": [],
|
| 206 |
+
"scripts": []
|
| 207 |
}
|
| 208 |
],
|
| 209 |
+
"tokenizers": {},
|
| 210 |
"node_i": "3388",
|
| 211 |
+
"native_tokenizers": [],
|
| 212 |
+
"scripts": []
|
| 213 |
}
|
| 214 |
],
|
| 215 |
+
"tokenizers": {},
|
| 216 |
"node_i": "3386",
|
| 217 |
+
"native_tokenizers": [],
|
| 218 |
+
"scripts": []
|
| 219 |
},
|
| 220 |
{
|
| 221 |
"name": "Southern Colombian",
|
| 222 |
"iso_1_code": null,
|
| 223 |
"iso_3_code": null,
|
|
|
|
| 224 |
"children": [
|
| 225 |
{
|
| 226 |
"name": "Bar\u00ed",
|
| 227 |
"iso_1_code": null,
|
| 228 |
"iso_3_code": "mot",
|
|
|
|
| 229 |
"children": [],
|
| 230 |
+
"tokenizers": {},
|
| 231 |
"node_i": "3395",
|
| 232 |
+
"native_tokenizers": [],
|
| 233 |
+
"scripts": []
|
| 234 |
},
|
| 235 |
{
|
| 236 |
"name": "Cundicocuyese",
|
| 237 |
"iso_1_code": null,
|
| 238 |
"iso_3_code": null,
|
|
|
|
| 239 |
"children": [
|
| 240 |
{
|
| 241 |
"name": "Chibcha",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": "chb",
|
|
|
|
| 244 |
"children": [],
|
| 245 |
+
"tokenizers": {},
|
| 246 |
"node_i": "3397",
|
| 247 |
+
"native_tokenizers": [],
|
| 248 |
+
"scripts": []
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"name": "Tunebo, Barro Negro",
|
| 252 |
"iso_1_code": null,
|
| 253 |
"iso_3_code": "tbn",
|
|
|
|
| 254 |
"children": [],
|
| 255 |
+
"tokenizers": {},
|
| 256 |
"node_i": "3398",
|
| 257 |
+
"native_tokenizers": [],
|
| 258 |
+
"scripts": []
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"name": "Tunebo, Western",
|
| 262 |
"iso_1_code": null,
|
| 263 |
"iso_3_code": "tnb",
|
|
|
|
| 264 |
"children": [],
|
| 265 |
+
"tokenizers": {},
|
| 266 |
"node_i": "3399",
|
| 267 |
+
"native_tokenizers": [],
|
| 268 |
+
"scripts": []
|
| 269 |
},
|
| 270 |
{
|
| 271 |
"name": "Tunebo, Angosturas",
|
| 272 |
"iso_1_code": null,
|
| 273 |
"iso_3_code": "tnd",
|
|
|
|
| 274 |
"children": [],
|
| 275 |
+
"tokenizers": {},
|
| 276 |
"node_i": "3400",
|
| 277 |
+
"native_tokenizers": [],
|
| 278 |
+
"scripts": []
|
| 279 |
},
|
| 280 |
{
|
| 281 |
"name": "Tunebo, Central",
|
| 282 |
"iso_1_code": null,
|
| 283 |
"iso_3_code": "tuf",
|
|
|
|
| 284 |
"children": [],
|
| 285 |
+
"tokenizers": {},
|
| 286 |
"node_i": "3401",
|
| 287 |
+
"native_tokenizers": [],
|
| 288 |
"scripts": [
|
| 289 |
"Latn"
|
| 290 |
+
]
|
|
|
|
| 291 |
}
|
| 292 |
],
|
| 293 |
+
"tokenizers": {},
|
| 294 |
"node_i": "3396",
|
| 295 |
+
"native_tokenizers": [],
|
| 296 |
+
"scripts": []
|
| 297 |
}
|
| 298 |
],
|
| 299 |
+
"tokenizers": {},
|
| 300 |
"node_i": "3394",
|
| 301 |
+
"native_tokenizers": [],
|
| 302 |
+
"scripts": []
|
| 303 |
}
|
| 304 |
],
|
| 305 |
+
"tokenizers": {},
|
| 306 |
"node_i": "3385",
|
| 307 |
+
"native_tokenizers": [],
|
| 308 |
+
"scripts": []
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"name": "Cuna",
|
| 312 |
"iso_1_code": null,
|
| 313 |
"iso_3_code": null,
|
|
|
|
| 314 |
"children": [
|
| 315 |
{
|
| 316 |
"name": "Kuna, San Blas",
|
| 317 |
"iso_1_code": null,
|
| 318 |
"iso_3_code": "cuk",
|
|
|
|
| 319 |
"children": [],
|
| 320 |
+
"tokenizers": {},
|
| 321 |
"node_i": "3403",
|
| 322 |
+
"native_tokenizers": [],
|
| 323 |
"scripts": [
|
| 324 |
"Latn"
|
| 325 |
+
]
|
|
|
|
| 326 |
},
|
| 327 |
{
|
| 328 |
"name": "Kuna, Border",
|
| 329 |
"iso_1_code": null,
|
| 330 |
"iso_3_code": "kvn",
|
|
|
|
| 331 |
"children": [],
|
| 332 |
+
"tokenizers": {},
|
| 333 |
"node_i": "3404",
|
| 334 |
+
"native_tokenizers": [],
|
| 335 |
"scripts": [
|
| 336 |
"Latn"
|
| 337 |
+
]
|
|
|
|
| 338 |
}
|
| 339 |
],
|
| 340 |
+
"tokenizers": {},
|
| 341 |
"node_i": "3402",
|
| 342 |
+
"native_tokenizers": [],
|
| 343 |
+
"scripts": []
|
| 344 |
}
|
| 345 |
],
|
| 346 |
+
"tokenizers": {},
|
| 347 |
"node_i": "3384",
|
| 348 |
+
"native_tokenizers": [],
|
| 349 |
+
"scripts": []
|
| 350 |
},
|
| 351 |
{
|
| 352 |
"name": "Votic",
|
| 353 |
"iso_1_code": null,
|
| 354 |
"iso_3_code": null,
|
|
|
|
| 355 |
"children": [
|
| 356 |
{
|
| 357 |
"name": "Mal\u00e9ku Ja\u00edka",
|
| 358 |
"iso_1_code": null,
|
| 359 |
"iso_3_code": "gut",
|
|
|
|
| 360 |
"children": [],
|
| 361 |
+
"tokenizers": {},
|
| 362 |
"node_i": "3406",
|
| 363 |
+
"native_tokenizers": [],
|
| 364 |
+
"scripts": []
|
| 365 |
},
|
| 366 |
{
|
| 367 |
"name": "Rama",
|
| 368 |
"iso_1_code": null,
|
| 369 |
"iso_3_code": "rma",
|
|
|
|
| 370 |
"children": [],
|
| 371 |
+
"tokenizers": {},
|
| 372 |
"node_i": "3407",
|
| 373 |
+
"native_tokenizers": [],
|
| 374 |
+
"scripts": []
|
| 375 |
}
|
| 376 |
],
|
| 377 |
+
"tokenizers": {},
|
| 378 |
"node_i": "3405",
|
| 379 |
+
"native_tokenizers": [],
|
| 380 |
+
"scripts": []
|
| 381 |
}
|
| 382 |
],
|
| 383 |
+
"tokenizers": {},
|
| 384 |
"node_i": "3382",
|
| 385 |
+
"native_tokenizers": [],
|
| 386 |
+
"scripts": []
|
| 387 |
}
|
| 388 |
],
|
| 389 |
+
"tokenizers": {},
|
| 390 |
"node_i": "3372",
|
| 391 |
+
"native_tokenizers": [],
|
| 392 |
+
"scripts": []
|
| 393 |
}
|
data/Chimakuan.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Chimakuan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Quileute",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "qui",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3409",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Chemakum",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "xch",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3410",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3408",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Chimakuan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Quileute",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "qui",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3409",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Chemakum",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "xch",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3410",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3408",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Chinookan.json
CHANGED
|
@@ -2,41 +2,41 @@
|
|
| 2 |
"name": "Chinookan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Chinook",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "chh",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3412",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Upper Chinookan",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": null,
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Wasco-Wishram",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "wac",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3414",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
}
|
| 33 |
],
|
|
|
|
| 34 |
"node_i": "3413",
|
| 35 |
-
"
|
| 36 |
-
"
|
| 37 |
}
|
| 38 |
],
|
|
|
|
| 39 |
"node_i": "3411",
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
}
|
|
|
|
| 2 |
"name": "Chinookan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Chinook",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "chh",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3412",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Upper Chinookan",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": null,
|
|
|
|
| 20 |
"children": [
|
| 21 |
{
|
| 22 |
"name": "Wasco-Wishram",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "wac",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3414",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
}
|
| 31 |
],
|
| 32 |
+
"tokenizers": {},
|
| 33 |
"node_i": "3413",
|
| 34 |
+
"native_tokenizers": [],
|
| 35 |
+
"scripts": []
|
| 36 |
}
|
| 37 |
],
|
| 38 |
+
"tokenizers": {},
|
| 39 |
"node_i": "3411",
|
| 40 |
+
"native_tokenizers": [],
|
| 41 |
+
"scripts": []
|
| 42 |
}
|
data/Chipaya-Uru.json
CHANGED
|
@@ -2,32 +2,32 @@
|
|
| 2 |
"name": "Chipaya-Uru",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Chipaya",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "cap",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3416",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Uru",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "ure",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3417",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
}
|
| 29 |
],
|
|
|
|
| 30 |
"node_i": "3415",
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
}
|
|
|
|
| 2 |
"name": "Chipaya-Uru",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Chipaya",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "cap",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3416",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Uru",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "ure",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3417",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
+
"scripts": []
|
| 27 |
}
|
| 28 |
],
|
| 29 |
+
"tokenizers": {},
|
| 30 |
"node_i": "3415",
|
| 31 |
+
"native_tokenizers": [],
|
| 32 |
+
"scripts": []
|
| 33 |
}
|
data/Chocoan.json
CHANGED
|
@@ -2,121 +2,121 @@
|
|
| 2 |
"name": "Chocoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Woun Meu",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "noa",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3419",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Ember\u00e1",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": null,
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [
|
| 25 |
{
|
| 26 |
"name": "Northern Ember\u00e1",
|
| 27 |
"iso_1_code": null,
|
| 28 |
"iso_3_code": null,
|
| 29 |
-
"tokenizers": {},
|
| 30 |
"children": [
|
| 31 |
{
|
| 32 |
"name": "Embera Cat\u00edo",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": "cto",
|
| 35 |
-
"tokenizers": {},
|
| 36 |
"children": [],
|
|
|
|
| 37 |
"node_i": "3422",
|
|
|
|
| 38 |
"scripts": [
|
| 39 |
"Latn"
|
| 40 |
-
]
|
| 41 |
-
"own_tokenizer": false
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"name": "Ember\u00e1, Northern",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "emp",
|
| 47 |
-
"tokenizers": {},
|
| 48 |
"children": [],
|
|
|
|
| 49 |
"node_i": "3423",
|
|
|
|
| 50 |
"scripts": [
|
| 51 |
"Latn"
|
| 52 |
-
]
|
| 53 |
-
"own_tokenizer": false
|
| 54 |
}
|
| 55 |
],
|
|
|
|
| 56 |
"node_i": "3421",
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"name": "Southern Ember\u00e1",
|
| 62 |
"iso_1_code": null,
|
| 63 |
"iso_3_code": null,
|
| 64 |
-
"tokenizers": {},
|
| 65 |
"children": [
|
| 66 |
{
|
| 67 |
"name": "Embera Baud\u00f3",
|
| 68 |
"iso_1_code": null,
|
| 69 |
"iso_3_code": "bdc",
|
| 70 |
-
"tokenizers": {},
|
| 71 |
"children": [],
|
|
|
|
| 72 |
"node_i": "3425",
|
| 73 |
-
"
|
| 74 |
-
"
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"name": "Embera Cham\u00ed",
|
| 78 |
"iso_1_code": null,
|
| 79 |
"iso_3_code": "cmi",
|
| 80 |
-
"tokenizers": {},
|
| 81 |
"children": [],
|
|
|
|
| 82 |
"node_i": "3426",
|
| 83 |
-
"
|
| 84 |
-
"
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "Epena",
|
| 88 |
"iso_1_code": null,
|
| 89 |
"iso_3_code": "sja",
|
| 90 |
-
"tokenizers": {},
|
| 91 |
"children": [],
|
|
|
|
| 92 |
"node_i": "3427",
|
|
|
|
| 93 |
"scripts": [
|
| 94 |
"Latn"
|
| 95 |
-
]
|
| 96 |
-
"own_tokenizer": false
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"name": "Embera Tad\u00f3",
|
| 100 |
"iso_1_code": null,
|
| 101 |
"iso_3_code": "tdc",
|
| 102 |
-
"tokenizers": {},
|
| 103 |
"children": [],
|
|
|
|
| 104 |
"node_i": "3428",
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
}
|
| 108 |
],
|
|
|
|
| 109 |
"node_i": "3424",
|
| 110 |
-
"
|
| 111 |
-
"
|
| 112 |
}
|
| 113 |
],
|
|
|
|
| 114 |
"node_i": "3420",
|
| 115 |
-
"
|
| 116 |
-
"
|
| 117 |
}
|
| 118 |
],
|
|
|
|
| 119 |
"node_i": "3418",
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
}
|
|
|
|
| 2 |
"name": "Chocoan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Woun Meu",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "noa",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3419",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Ember\u00e1",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": null,
|
|
|
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Northern Ember\u00e1",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": null,
|
|
|
|
| 27 |
"children": [
|
| 28 |
{
|
| 29 |
"name": "Embera Cat\u00edo",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": "cto",
|
|
|
|
| 32 |
"children": [],
|
| 33 |
+
"tokenizers": {},
|
| 34 |
"node_i": "3422",
|
| 35 |
+
"native_tokenizers": [],
|
| 36 |
"scripts": [
|
| 37 |
"Latn"
|
| 38 |
+
]
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"name": "Ember\u00e1, Northern",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": "emp",
|
|
|
|
| 44 |
"children": [],
|
| 45 |
+
"tokenizers": {},
|
| 46 |
"node_i": "3423",
|
| 47 |
+
"native_tokenizers": [],
|
| 48 |
"scripts": [
|
| 49 |
"Latn"
|
| 50 |
+
]
|
|
|
|
| 51 |
}
|
| 52 |
],
|
| 53 |
+
"tokenizers": {},
|
| 54 |
"node_i": "3421",
|
| 55 |
+
"native_tokenizers": [],
|
| 56 |
+
"scripts": []
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"name": "Southern Ember\u00e1",
|
| 60 |
"iso_1_code": null,
|
| 61 |
"iso_3_code": null,
|
|
|
|
| 62 |
"children": [
|
| 63 |
{
|
| 64 |
"name": "Embera Baud\u00f3",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": "bdc",
|
|
|
|
| 67 |
"children": [],
|
| 68 |
+
"tokenizers": {},
|
| 69 |
"node_i": "3425",
|
| 70 |
+
"native_tokenizers": [],
|
| 71 |
+
"scripts": []
|
| 72 |
},
|
| 73 |
{
|
| 74 |
"name": "Embera Cham\u00ed",
|
| 75 |
"iso_1_code": null,
|
| 76 |
"iso_3_code": "cmi",
|
|
|
|
| 77 |
"children": [],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3426",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
},
|
| 83 |
{
|
| 84 |
"name": "Epena",
|
| 85 |
"iso_1_code": null,
|
| 86 |
"iso_3_code": "sja",
|
|
|
|
| 87 |
"children": [],
|
| 88 |
+
"tokenizers": {},
|
| 89 |
"node_i": "3427",
|
| 90 |
+
"native_tokenizers": [],
|
| 91 |
"scripts": [
|
| 92 |
"Latn"
|
| 93 |
+
]
|
|
|
|
| 94 |
},
|
| 95 |
{
|
| 96 |
"name": "Embera Tad\u00f3",
|
| 97 |
"iso_1_code": null,
|
| 98 |
"iso_3_code": "tdc",
|
|
|
|
| 99 |
"children": [],
|
| 100 |
+
"tokenizers": {},
|
| 101 |
"node_i": "3428",
|
| 102 |
+
"native_tokenizers": [],
|
| 103 |
+
"scripts": []
|
| 104 |
}
|
| 105 |
],
|
| 106 |
+
"tokenizers": {},
|
| 107 |
"node_i": "3424",
|
| 108 |
+
"native_tokenizers": [],
|
| 109 |
+
"scripts": []
|
| 110 |
}
|
| 111 |
],
|
| 112 |
+
"tokenizers": {},
|
| 113 |
"node_i": "3420",
|
| 114 |
+
"native_tokenizers": [],
|
| 115 |
+
"scripts": []
|
| 116 |
}
|
| 117 |
],
|
| 118 |
+
"tokenizers": {},
|
| 119 |
"node_i": "3418",
|
| 120 |
+
"native_tokenizers": [],
|
| 121 |
+
"scripts": []
|
| 122 |
}
|
data/Cholonan.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Cholonan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Chol\u00f3n",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "cht",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3430",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Hibito",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "hib",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3431",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3429",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Cholonan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Chol\u00f3n",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "cht",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3430",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Hibito",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "hib",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3431",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3429",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Chon.json
CHANGED
|
@@ -2,41 +2,41 @@
|
|
| 2 |
"name": "Chon",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Tehuelche",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "teh",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3433",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Island Chon",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": null,
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Ona",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "ona",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3435",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
}
|
| 33 |
],
|
|
|
|
| 34 |
"node_i": "3434",
|
| 35 |
-
"
|
| 36 |
-
"
|
| 37 |
}
|
| 38 |
],
|
|
|
|
| 39 |
"node_i": "3432",
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
}
|
|
|
|
| 2 |
"name": "Chon",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Tehuelche",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "teh",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3433",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Island Chon",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": null,
|
|
|
|
| 20 |
"children": [
|
| 21 |
{
|
| 22 |
"name": "Ona",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "ona",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3435",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
}
|
| 31 |
],
|
| 32 |
+
"tokenizers": {},
|
| 33 |
"node_i": "3434",
|
| 34 |
+
"native_tokenizers": [],
|
| 35 |
+
"scripts": []
|
| 36 |
}
|
| 37 |
],
|
| 38 |
+
"tokenizers": {},
|
| 39 |
"node_i": "3432",
|
| 40 |
+
"native_tokenizers": [],
|
| 41 |
+
"scripts": []
|
| 42 |
}
|
data/Chukotko-Kamchatkan.json
CHANGED
|
@@ -2,108 +2,108 @@
|
|
| 2 |
"name": "Chukotko-Kamchatkan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Northern",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Chukot",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": null,
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [
|
| 19 |
{
|
| 20 |
"name": "Chukchi",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "ckt",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3439",
|
|
|
|
| 26 |
"scripts": [
|
| 27 |
"Cyrl"
|
| 28 |
-
]
|
| 29 |
-
"own_tokenizer": false
|
| 30 |
}
|
| 31 |
],
|
|
|
|
| 32 |
"node_i": "3438",
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Koryak-Alyutor",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": null,
|
| 40 |
-
"tokenizers": {},
|
| 41 |
"children": [
|
| 42 |
{
|
| 43 |
"name": "Alutor",
|
| 44 |
"iso_1_code": null,
|
| 45 |
"iso_3_code": "alr",
|
| 46 |
-
"tokenizers": {},
|
| 47 |
"children": [],
|
|
|
|
| 48 |
"node_i": "3441",
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"name": "Koryak",
|
| 54 |
"iso_1_code": null,
|
| 55 |
"iso_3_code": "kpy",
|
| 56 |
-
"tokenizers": {},
|
| 57 |
"children": [],
|
|
|
|
| 58 |
"node_i": "3442",
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"name": "Kerek",
|
| 64 |
"iso_1_code": null,
|
| 65 |
"iso_3_code": "krk",
|
| 66 |
-
"tokenizers": {},
|
| 67 |
"children": [],
|
|
|
|
| 68 |
"node_i": "3443",
|
| 69 |
-
"
|
| 70 |
-
"
|
| 71 |
}
|
| 72 |
],
|
|
|
|
| 73 |
"node_i": "3440",
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
}
|
| 77 |
],
|
|
|
|
| 78 |
"node_i": "3437",
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"name": "Southern",
|
| 84 |
"iso_1_code": null,
|
| 85 |
"iso_3_code": null,
|
| 86 |
-
"tokenizers": {},
|
| 87 |
"children": [
|
| 88 |
{
|
| 89 |
"name": "Itelmen",
|
| 90 |
"iso_1_code": null,
|
| 91 |
"iso_3_code": "itl",
|
| 92 |
-
"tokenizers": {},
|
| 93 |
"children": [],
|
|
|
|
| 94 |
"node_i": "3445",
|
|
|
|
| 95 |
"scripts": [
|
| 96 |
"Cyrl"
|
| 97 |
-
]
|
| 98 |
-
"own_tokenizer": false
|
| 99 |
}
|
| 100 |
],
|
|
|
|
| 101 |
"node_i": "3444",
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
}
|
| 105 |
],
|
|
|
|
| 106 |
"node_i": "3436",
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
}
|
|
|
|
| 2 |
"name": "Chukotko-Kamchatkan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Northern",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Chukot",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": null,
|
|
|
|
| 15 |
"children": [
|
| 16 |
{
|
| 17 |
"name": "Chukchi",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "ckt",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3439",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
"scripts": [
|
| 25 |
"Cyrl"
|
| 26 |
+
]
|
|
|
|
| 27 |
}
|
| 28 |
],
|
| 29 |
+
"tokenizers": {},
|
| 30 |
"node_i": "3438",
|
| 31 |
+
"native_tokenizers": [],
|
| 32 |
+
"scripts": []
|
| 33 |
},
|
| 34 |
{
|
| 35 |
"name": "Koryak-Alyutor",
|
| 36 |
"iso_1_code": null,
|
| 37 |
"iso_3_code": null,
|
|
|
|
| 38 |
"children": [
|
| 39 |
{
|
| 40 |
"name": "Alutor",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "alr",
|
|
|
|
| 43 |
"children": [],
|
| 44 |
+
"tokenizers": {},
|
| 45 |
"node_i": "3441",
|
| 46 |
+
"native_tokenizers": [],
|
| 47 |
+
"scripts": []
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Koryak",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": "kpy",
|
|
|
|
| 53 |
"children": [],
|
| 54 |
+
"tokenizers": {},
|
| 55 |
"node_i": "3442",
|
| 56 |
+
"native_tokenizers": [],
|
| 57 |
+
"scripts": []
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"name": "Kerek",
|
| 61 |
"iso_1_code": null,
|
| 62 |
"iso_3_code": "krk",
|
|
|
|
| 63 |
"children": [],
|
| 64 |
+
"tokenizers": {},
|
| 65 |
"node_i": "3443",
|
| 66 |
+
"native_tokenizers": [],
|
| 67 |
+
"scripts": []
|
| 68 |
}
|
| 69 |
],
|
| 70 |
+
"tokenizers": {},
|
| 71 |
"node_i": "3440",
|
| 72 |
+
"native_tokenizers": [],
|
| 73 |
+
"scripts": []
|
| 74 |
}
|
| 75 |
],
|
| 76 |
+
"tokenizers": {},
|
| 77 |
"node_i": "3437",
|
| 78 |
+
"native_tokenizers": [],
|
| 79 |
+
"scripts": []
|
| 80 |
},
|
| 81 |
{
|
| 82 |
"name": "Southern",
|
| 83 |
"iso_1_code": null,
|
| 84 |
"iso_3_code": null,
|
|
|
|
| 85 |
"children": [
|
| 86 |
{
|
| 87 |
"name": "Itelmen",
|
| 88 |
"iso_1_code": null,
|
| 89 |
"iso_3_code": "itl",
|
|
|
|
| 90 |
"children": [],
|
| 91 |
+
"tokenizers": {},
|
| 92 |
"node_i": "3445",
|
| 93 |
+
"native_tokenizers": [],
|
| 94 |
"scripts": [
|
| 95 |
"Cyrl"
|
| 96 |
+
]
|
|
|
|
| 97 |
}
|
| 98 |
],
|
| 99 |
+
"tokenizers": {},
|
| 100 |
"node_i": "3444",
|
| 101 |
+
"native_tokenizers": [],
|
| 102 |
+
"scripts": []
|
| 103 |
}
|
| 104 |
],
|
| 105 |
+
"tokenizers": {},
|
| 106 |
"node_i": "3436",
|
| 107 |
+
"native_tokenizers": [],
|
| 108 |
+
"scripts": []
|
| 109 |
}
|
data/Chumashan.json
CHANGED
|
@@ -2,92 +2,92 @@
|
|
| 2 |
"name": "Chumashan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Obispe\u00f1o",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "obi",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3447",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Central Chumash",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": null,
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Barbare\u00f1o",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "boi",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3449",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Inese\u00f1o",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": "inz",
|
| 37 |
-
"tokenizers": {},
|
| 38 |
"children": [],
|
|
|
|
| 39 |
"node_i": "3450",
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"name": "Purisime\u00f1o",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "puy",
|
| 47 |
-
"tokenizers": {},
|
| 48 |
"children": [],
|
|
|
|
| 49 |
"node_i": "3451",
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Venture\u00f1o",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "veo",
|
| 57 |
-
"tokenizers": {},
|
| 58 |
"children": [],
|
|
|
|
| 59 |
"node_i": "3452",
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
}
|
| 63 |
],
|
|
|
|
| 64 |
"node_i": "3448",
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Island Chumash",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": null,
|
| 72 |
-
"tokenizers": {},
|
| 73 |
"children": [
|
| 74 |
{
|
| 75 |
"name": "Cruze\u00f1o",
|
| 76 |
"iso_1_code": null,
|
| 77 |
"iso_3_code": "crz",
|
| 78 |
-
"tokenizers": {},
|
| 79 |
"children": [],
|
|
|
|
| 80 |
"node_i": "3454",
|
| 81 |
-
"
|
| 82 |
-
"
|
| 83 |
}
|
| 84 |
],
|
|
|
|
| 85 |
"node_i": "3453",
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
}
|
| 89 |
],
|
|
|
|
| 90 |
"node_i": "3446",
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
}
|
|
|
|
| 2 |
"name": "Chumashan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Obispe\u00f1o",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "obi",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3447",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Central Chumash",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": null,
|
|
|
|
| 20 |
"children": [
|
| 21 |
{
|
| 22 |
"name": "Barbare\u00f1o",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "boi",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3449",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Inese\u00f1o",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": "inz",
|
|
|
|
| 35 |
"children": [],
|
| 36 |
+
"tokenizers": {},
|
| 37 |
"node_i": "3450",
|
| 38 |
+
"native_tokenizers": [],
|
| 39 |
+
"scripts": []
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "Purisime\u00f1o",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": "puy",
|
|
|
|
| 45 |
"children": [],
|
| 46 |
+
"tokenizers": {},
|
| 47 |
"node_i": "3451",
|
| 48 |
+
"native_tokenizers": [],
|
| 49 |
+
"scripts": []
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"name": "Venture\u00f1o",
|
| 53 |
"iso_1_code": null,
|
| 54 |
"iso_3_code": "veo",
|
|
|
|
| 55 |
"children": [],
|
| 56 |
+
"tokenizers": {},
|
| 57 |
"node_i": "3452",
|
| 58 |
+
"native_tokenizers": [],
|
| 59 |
+
"scripts": []
|
| 60 |
}
|
| 61 |
],
|
| 62 |
+
"tokenizers": {},
|
| 63 |
"node_i": "3448",
|
| 64 |
+
"native_tokenizers": [],
|
| 65 |
+
"scripts": []
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"name": "Island Chumash",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": null,
|
|
|
|
| 71 |
"children": [
|
| 72 |
{
|
| 73 |
"name": "Cruze\u00f1o",
|
| 74 |
"iso_1_code": null,
|
| 75 |
"iso_3_code": "crz",
|
|
|
|
| 76 |
"children": [],
|
| 77 |
+
"tokenizers": {},
|
| 78 |
"node_i": "3454",
|
| 79 |
+
"native_tokenizers": [],
|
| 80 |
+
"scripts": []
|
| 81 |
}
|
| 82 |
],
|
| 83 |
+
"tokenizers": {},
|
| 84 |
"node_i": "3453",
|
| 85 |
+
"native_tokenizers": [],
|
| 86 |
+
"scripts": []
|
| 87 |
}
|
| 88 |
],
|
| 89 |
+
"tokenizers": {},
|
| 90 |
"node_i": "3446",
|
| 91 |
+
"native_tokenizers": [],
|
| 92 |
+
"scripts": []
|
| 93 |
}
|
data/Cochimí-Yuman.json
CHANGED
|
@@ -2,155 +2,155 @@
|
|
| 2 |
"name": "Cochim\u00ed-Yuman",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Yuman",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Cochimi",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "coj",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3457",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Kiliwa",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "klb",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3458",
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Delta-California",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": null,
|
| 37 |
-
"tokenizers": {},
|
| 38 |
"children": [
|
| 39 |
{
|
| 40 |
"name": "Cocopa",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "coc",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "3460",
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Kumiai",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": "dih",
|
| 53 |
-
"tokenizers": {},
|
| 54 |
"children": [],
|
|
|
|
| 55 |
"node_i": "3461",
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
}
|
| 59 |
],
|
|
|
|
| 60 |
"node_i": "3459",
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"name": "Pai",
|
| 66 |
"iso_1_code": null,
|
| 67 |
"iso_3_code": null,
|
| 68 |
-
"tokenizers": {},
|
| 69 |
"children": [
|
| 70 |
{
|
| 71 |
"name": "Paipai",
|
| 72 |
"iso_1_code": null,
|
| 73 |
"iso_3_code": "ppi",
|
| 74 |
-
"tokenizers": {},
|
| 75 |
"children": [],
|
|
|
|
| 76 |
"node_i": "3463",
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "Havasupai-Walapai-Yavapai",
|
| 82 |
"iso_1_code": null,
|
| 83 |
"iso_3_code": "yuf",
|
| 84 |
-
"tokenizers": {},
|
| 85 |
"children": [],
|
|
|
|
| 86 |
"node_i": "3464",
|
| 87 |
-
"
|
| 88 |
-
"
|
| 89 |
}
|
| 90 |
],
|
|
|
|
| 91 |
"node_i": "3462",
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
},
|
| 95 |
{
|
| 96 |
"name": "River",
|
| 97 |
"iso_1_code": null,
|
| 98 |
"iso_3_code": null,
|
| 99 |
-
"tokenizers": {},
|
| 100 |
"children": [
|
| 101 |
{
|
| 102 |
"name": "Mojave",
|
| 103 |
"iso_1_code": null,
|
| 104 |
"iso_3_code": null,
|
| 105 |
-
"tokenizers": {},
|
| 106 |
"children": [
|
| 107 |
{
|
| 108 |
"name": "Mohave",
|
| 109 |
"iso_1_code": null,
|
| 110 |
"iso_3_code": "mov",
|
| 111 |
-
"tokenizers": {},
|
| 112 |
"children": [],
|
|
|
|
| 113 |
"node_i": "3467",
|
| 114 |
-
"
|
| 115 |
-
"
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"name": "Maricopa",
|
| 119 |
"iso_1_code": null,
|
| 120 |
"iso_3_code": "mrc",
|
| 121 |
-
"tokenizers": {},
|
| 122 |
"children": [],
|
|
|
|
| 123 |
"node_i": "3468",
|
| 124 |
-
"
|
| 125 |
-
"
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"name": "Quechan",
|
| 129 |
"iso_1_code": null,
|
| 130 |
"iso_3_code": "yum",
|
| 131 |
-
"tokenizers": {},
|
| 132 |
"children": [],
|
|
|
|
| 133 |
"node_i": "3469",
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
}
|
| 137 |
],
|
|
|
|
| 138 |
"node_i": "3466",
|
| 139 |
-
"
|
| 140 |
-
"
|
| 141 |
}
|
| 142 |
],
|
|
|
|
| 143 |
"node_i": "3465",
|
| 144 |
-
"
|
| 145 |
-
"
|
| 146 |
}
|
| 147 |
],
|
|
|
|
| 148 |
"node_i": "3456",
|
| 149 |
-
"
|
| 150 |
-
"
|
| 151 |
}
|
| 152 |
],
|
|
|
|
| 153 |
"node_i": "3455",
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
}
|
|
|
|
| 2 |
"name": "Cochim\u00ed-Yuman",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Yuman",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Cochimi",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "coj",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3457",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Kiliwa",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "klb",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3458",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Delta-California",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": null,
|
|
|
|
| 35 |
"children": [
|
| 36 |
{
|
| 37 |
"name": "Cocopa",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "coc",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3460",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Kumiai",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "dih",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3461",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
}
|
| 56 |
],
|
| 57 |
+
"tokenizers": {},
|
| 58 |
"node_i": "3459",
|
| 59 |
+
"native_tokenizers": [],
|
| 60 |
+
"scripts": []
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"name": "Pai",
|
| 64 |
"iso_1_code": null,
|
| 65 |
"iso_3_code": null,
|
|
|
|
| 66 |
"children": [
|
| 67 |
{
|
| 68 |
"name": "Paipai",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "ppi",
|
|
|
|
| 71 |
"children": [],
|
| 72 |
+
"tokenizers": {},
|
| 73 |
"node_i": "3463",
|
| 74 |
+
"native_tokenizers": [],
|
| 75 |
+
"scripts": []
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"name": "Havasupai-Walapai-Yavapai",
|
| 79 |
"iso_1_code": null,
|
| 80 |
"iso_3_code": "yuf",
|
|
|
|
| 81 |
"children": [],
|
| 82 |
+
"tokenizers": {},
|
| 83 |
"node_i": "3464",
|
| 84 |
+
"native_tokenizers": [],
|
| 85 |
+
"scripts": []
|
| 86 |
}
|
| 87 |
],
|
| 88 |
+
"tokenizers": {},
|
| 89 |
"node_i": "3462",
|
| 90 |
+
"native_tokenizers": [],
|
| 91 |
+
"scripts": []
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"name": "River",
|
| 95 |
"iso_1_code": null,
|
| 96 |
"iso_3_code": null,
|
|
|
|
| 97 |
"children": [
|
| 98 |
{
|
| 99 |
"name": "Mojave",
|
| 100 |
"iso_1_code": null,
|
| 101 |
"iso_3_code": null,
|
|
|
|
| 102 |
"children": [
|
| 103 |
{
|
| 104 |
"name": "Mohave",
|
| 105 |
"iso_1_code": null,
|
| 106 |
"iso_3_code": "mov",
|
|
|
|
| 107 |
"children": [],
|
| 108 |
+
"tokenizers": {},
|
| 109 |
"node_i": "3467",
|
| 110 |
+
"native_tokenizers": [],
|
| 111 |
+
"scripts": []
|
| 112 |
},
|
| 113 |
{
|
| 114 |
"name": "Maricopa",
|
| 115 |
"iso_1_code": null,
|
| 116 |
"iso_3_code": "mrc",
|
|
|
|
| 117 |
"children": [],
|
| 118 |
+
"tokenizers": {},
|
| 119 |
"node_i": "3468",
|
| 120 |
+
"native_tokenizers": [],
|
| 121 |
+
"scripts": []
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"name": "Quechan",
|
| 125 |
"iso_1_code": null,
|
| 126 |
"iso_3_code": "yum",
|
|
|
|
| 127 |
"children": [],
|
| 128 |
+
"tokenizers": {},
|
| 129 |
"node_i": "3469",
|
| 130 |
+
"native_tokenizers": [],
|
| 131 |
+
"scripts": []
|
| 132 |
}
|
| 133 |
],
|
| 134 |
+
"tokenizers": {},
|
| 135 |
"node_i": "3466",
|
| 136 |
+
"native_tokenizers": [],
|
| 137 |
+
"scripts": []
|
| 138 |
}
|
| 139 |
],
|
| 140 |
+
"tokenizers": {},
|
| 141 |
"node_i": "3465",
|
| 142 |
+
"native_tokenizers": [],
|
| 143 |
+
"scripts": []
|
| 144 |
}
|
| 145 |
],
|
| 146 |
+
"tokenizers": {},
|
| 147 |
"node_i": "3456",
|
| 148 |
+
"native_tokenizers": [],
|
| 149 |
+
"scripts": []
|
| 150 |
}
|
| 151 |
],
|
| 152 |
+
"tokenizers": {},
|
| 153 |
"node_i": "3455",
|
| 154 |
+
"native_tokenizers": [],
|
| 155 |
+
"scripts": []
|
| 156 |
}
|
data/Comecrudan.json
CHANGED
|
@@ -2,60 +2,60 @@
|
|
| 2 |
"name": "Comecrudan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Mamulique",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "emm",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3471",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Comecrudo",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "xcm",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3472",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "Cotoname",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": "xcn",
|
| 31 |
-
"tokenizers": {},
|
| 32 |
"children": [],
|
|
|
|
| 33 |
"node_i": "3473",
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Coahuilteco",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": "xcw",
|
| 41 |
-
"tokenizers": {},
|
| 42 |
"children": [],
|
|
|
|
| 43 |
"node_i": "3474",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"name": "Garza",
|
| 49 |
"iso_1_code": null,
|
| 50 |
"iso_3_code": "xgr",
|
| 51 |
-
"tokenizers": {},
|
| 52 |
"children": [],
|
|
|
|
| 53 |
"node_i": "3475",
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
}
|
| 57 |
],
|
|
|
|
| 58 |
"node_i": "3470",
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
}
|
|
|
|
| 2 |
"name": "Comecrudan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Mamulique",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "emm",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3471",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Comecrudo",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "xcm",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3472",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Cotoname",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "xcn",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3473",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Coahuilteco",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "xcw",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3474",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Garza",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "xgr",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3475",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
}
|
| 56 |
],
|
| 57 |
+
"tokenizers": {},
|
| 58 |
"node_i": "3470",
|
| 59 |
+
"native_tokenizers": [],
|
| 60 |
+
"scripts": []
|
| 61 |
}
|
data/Constructed language.json
CHANGED
|
@@ -2,22 +2,22 @@
|
|
| 2 |
"name": "Constructed language",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Esperanto",
|
| 9 |
"iso_1_code": "eo",
|
| 10 |
"iso_3_code": "epo",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3477",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
}
|
| 19 |
],
|
|
|
|
| 20 |
"node_i": "3476",
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
}
|
|
|
|
| 2 |
"name": "Constructed language",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Esperanto",
|
| 8 |
"iso_1_code": "eo",
|
| 9 |
"iso_3_code": "epo",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3477",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
}
|
| 18 |
],
|
| 19 |
+
"tokenizers": {},
|
| 20 |
"node_i": "3476",
|
| 21 |
+
"native_tokenizers": [],
|
| 22 |
+
"scripts": []
|
| 23 |
}
|
data/Coosan.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Coosan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Coos",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "csz",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3479",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Miluk",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "iml",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3480",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3478",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Coosan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Coos",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "csz",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3479",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Miluk",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "iml",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3480",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3478",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Creole.json
CHANGED
|
@@ -2,2288 +2,1742 @@
|
|
| 2 |
"name": "Creole",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {
|
| 6 |
-
"Arab": {
|
| 7 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 8 |
-
"original_lang_name": "malay",
|
| 9 |
-
"original_lang_code": "msa",
|
| 10 |
-
"scripts": [
|
| 11 |
-
"Latn",
|
| 12 |
-
"Arab",
|
| 13 |
-
"Thai"
|
| 14 |
-
],
|
| 15 |
-
"class_name": "SpaCyTokenizer",
|
| 16 |
-
"macrolanguage": true
|
| 17 |
-
},
|
| 18 |
-
"Latn": {
|
| 19 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 20 |
-
"original_lang_name": "malay",
|
| 21 |
-
"original_lang_code": "msa",
|
| 22 |
-
"scripts": [
|
| 23 |
-
"Latn",
|
| 24 |
-
"Arab",
|
| 25 |
-
"Thai"
|
| 26 |
-
],
|
| 27 |
-
"class_name": "SpaCyTokenizer",
|
| 28 |
-
"macrolanguage": true
|
| 29 |
-
},
|
| 30 |
-
"Thai": {
|
| 31 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 32 |
-
"original_lang_name": "malay",
|
| 33 |
-
"original_lang_code": "msa",
|
| 34 |
-
"scripts": [
|
| 35 |
-
"Latn",
|
| 36 |
-
"Arab",
|
| 37 |
-
"Thai"
|
| 38 |
-
],
|
| 39 |
-
"class_name": "SpaCyTokenizer",
|
| 40 |
-
"macrolanguage": true
|
| 41 |
-
}
|
| 42 |
-
},
|
| 43 |
"children": [
|
| 44 |
{
|
| 45 |
"name": "Afrikaans based",
|
| 46 |
"iso_1_code": null,
|
| 47 |
"iso_3_code": null,
|
| 48 |
-
"tokenizers": {},
|
| 49 |
"children": [
|
| 50 |
{
|
| 51 |
"name": "Flaaitaal",
|
| 52 |
"iso_1_code": null,
|
| 53 |
"iso_3_code": "fly",
|
| 54 |
-
"tokenizers": {},
|
| 55 |
"children": [],
|
|
|
|
| 56 |
"node_i": "3483",
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"name": "Oorlams",
|
| 62 |
"iso_1_code": null,
|
| 63 |
"iso_3_code": "oor",
|
| 64 |
-
"tokenizers": {},
|
| 65 |
"children": [],
|
|
|
|
| 66 |
"node_i": "3484",
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
}
|
| 70 |
],
|
|
|
|
| 71 |
"node_i": "3482",
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"name": "Arabic based",
|
| 77 |
"iso_1_code": null,
|
| 78 |
"iso_3_code": null,
|
| 79 |
-
"tokenizers": {
|
| 80 |
-
"Arab": {
|
| 81 |
-
"full_object": "SpaCyTokenizer(\"ar\")",
|
| 82 |
-
"original_lang_name": "arabic",
|
| 83 |
-
"original_lang_code": "ara",
|
| 84 |
-
"scripts": [
|
| 85 |
-
"Arab"
|
| 86 |
-
],
|
| 87 |
-
"class_name": "SpaCyTokenizer",
|
| 88 |
-
"macrolanguage": true
|
| 89 |
-
}
|
| 90 |
-
},
|
| 91 |
"children": [
|
| 92 |
{
|
| 93 |
"name": "Nubi",
|
| 94 |
"iso_1_code": null,
|
| 95 |
"iso_3_code": "kcn",
|
| 96 |
-
"tokenizers": {},
|
| 97 |
"children": [],
|
|
|
|
| 98 |
"node_i": "3486",
|
| 99 |
-
"
|
| 100 |
-
"
|
| 101 |
},
|
| 102 |
{
|
| 103 |
"name": "Arabic, Juba",
|
| 104 |
"iso_1_code": "ar",
|
| 105 |
"iso_3_code": "pga",
|
| 106 |
-
"tokenizers": {
|
| 107 |
-
"Arab": {
|
| 108 |
-
"full_object": "SpaCyTokenizer(\"ar\")",
|
| 109 |
-
"original_lang_name": "arabic",
|
| 110 |
-
"original_lang_code": "ara",
|
| 111 |
-
"scripts": [
|
| 112 |
-
"Arab"
|
| 113 |
-
],
|
| 114 |
-
"class_name": "SpaCyTokenizer",
|
| 115 |
-
"macrolanguage": true
|
| 116 |
-
}
|
| 117 |
-
},
|
| 118 |
"children": [],
|
|
|
|
| 119 |
"node_i": "3487",
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
}
|
| 123 |
],
|
|
|
|
| 124 |
"node_i": "3485",
|
| 125 |
-
"
|
| 126 |
-
"
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"name": "Assamese based",
|
| 130 |
"iso_1_code": null,
|
| 131 |
"iso_3_code": null,
|
| 132 |
-
"tokenizers": {},
|
| 133 |
"children": [
|
| 134 |
{
|
| 135 |
"name": "Nagamese",
|
| 136 |
"iso_1_code": null,
|
| 137 |
"iso_3_code": "nag",
|
| 138 |
-
"tokenizers": {},
|
| 139 |
"children": [],
|
|
|
|
| 140 |
"node_i": "3489",
|
| 141 |
-
"
|
| 142 |
-
"
|
| 143 |
}
|
| 144 |
],
|
|
|
|
| 145 |
"node_i": "3488",
|
| 146 |
-
"
|
| 147 |
-
"
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"name": "Dutch based",
|
| 151 |
"iso_1_code": null,
|
| 152 |
"iso_3_code": null,
|
| 153 |
-
"tokenizers": {},
|
| 154 |
"children": [
|
| 155 |
{
|
| 156 |
"name": "Berbice Dutch Creole",
|
| 157 |
"iso_1_code": null,
|
| 158 |
"iso_3_code": "brc",
|
| 159 |
-
"tokenizers": {},
|
| 160 |
"children": [],
|
|
|
|
| 161 |
"node_i": "3491",
|
| 162 |
-
"
|
| 163 |
-
"
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"name": "Negerhollands",
|
| 167 |
"iso_1_code": null,
|
| 168 |
"iso_3_code": "dcr",
|
| 169 |
-
"tokenizers": {},
|
| 170 |
"children": [],
|
|
|
|
| 171 |
"node_i": "3492",
|
| 172 |
-
"
|
| 173 |
-
"
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"name": "Javindo",
|
| 177 |
"iso_1_code": null,
|
| 178 |
"iso_3_code": "jvd",
|
| 179 |
-
"tokenizers": {},
|
| 180 |
"children": [],
|
|
|
|
| 181 |
"node_i": "3493",
|
| 182 |
-
"
|
| 183 |
-
"
|
| 184 |
},
|
| 185 |
{
|
| 186 |
"name": "Petjo",
|
| 187 |
"iso_1_code": null,
|
| 188 |
"iso_3_code": "pey",
|
| 189 |
-
"tokenizers": {},
|
| 190 |
"children": [],
|
|
|
|
| 191 |
"node_i": "3494",
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
},
|
| 195 |
{
|
| 196 |
"name": "Skepi Dutch Creole",
|
| 197 |
"iso_1_code": null,
|
| 198 |
"iso_3_code": "skw",
|
| 199 |
-
"tokenizers": {},
|
| 200 |
"children": [],
|
|
|
|
| 201 |
"node_i": "3495",
|
| 202 |
-
"
|
| 203 |
-
"
|
| 204 |
}
|
| 205 |
],
|
|
|
|
| 206 |
"node_i": "3490",
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
},
|
| 210 |
{
|
| 211 |
"name": "English based",
|
| 212 |
"iso_1_code": null,
|
| 213 |
"iso_3_code": null,
|
| 214 |
-
"tokenizers": {
|
| 215 |
-
"Latn": {
|
| 216 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 217 |
-
"original_lang_name": "nigerian_pidgin",
|
| 218 |
-
"original_lang_code": "pcm",
|
| 219 |
-
"scripts": [
|
| 220 |
-
"Latn"
|
| 221 |
-
],
|
| 222 |
-
"class_name": "StanzaTokenizer",
|
| 223 |
-
"macrolanguage": false
|
| 224 |
-
}
|
| 225 |
-
},
|
| 226 |
"children": [
|
| 227 |
{
|
| 228 |
"name": "Saramaccan",
|
| 229 |
"iso_1_code": null,
|
| 230 |
"iso_3_code": "srm",
|
|
|
|
| 231 |
"tokenizers": {
|
| 232 |
"Latn": {
|
| 233 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 234 |
"original_lang_name": "nigerian_pidgin",
|
| 235 |
"original_lang_code": "pcm",
|
| 236 |
-
"
|
| 237 |
-
|
| 238 |
-
],
|
| 239 |
-
"class_name": "StanzaTokenizer",
|
| 240 |
-
"macrolanguage": false
|
| 241 |
}
|
| 242 |
},
|
| 243 |
-
"children": [],
|
| 244 |
"node_i": "3497",
|
|
|
|
| 245 |
"scripts": [
|
| 246 |
"Latn"
|
| 247 |
-
]
|
| 248 |
-
"own_tokenizer": false
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"name": "Atlantic",
|
| 252 |
"iso_1_code": null,
|
| 253 |
"iso_3_code": null,
|
| 254 |
-
"tokenizers": {
|
| 255 |
-
"Latn": {
|
| 256 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 257 |
-
"original_lang_name": "nigerian_pidgin",
|
| 258 |
-
"original_lang_code": "pcm",
|
| 259 |
-
"scripts": [
|
| 260 |
-
"Latn"
|
| 261 |
-
],
|
| 262 |
-
"class_name": "StanzaTokenizer",
|
| 263 |
-
"macrolanguage": false
|
| 264 |
-
}
|
| 265 |
-
},
|
| 266 |
"children": [
|
| 267 |
{
|
| 268 |
"name": "Eastern",
|
| 269 |
"iso_1_code": null,
|
| 270 |
"iso_3_code": null,
|
| 271 |
-
"tokenizers": {
|
| 272 |
-
"Latn": {
|
| 273 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 274 |
-
"original_lang_name": "nigerian_pidgin",
|
| 275 |
-
"original_lang_code": "pcm",
|
| 276 |
-
"scripts": [
|
| 277 |
-
"Latn"
|
| 278 |
-
],
|
| 279 |
-
"class_name": "StanzaTokenizer",
|
| 280 |
-
"macrolanguage": false
|
| 281 |
-
}
|
| 282 |
-
},
|
| 283 |
"children": [
|
| 284 |
{
|
| 285 |
"name": "Turks and Caicos English Creole",
|
| 286 |
"iso_1_code": null,
|
| 287 |
"iso_3_code": "tch",
|
| 288 |
-
"tokenizers": {},
|
| 289 |
"children": [],
|
|
|
|
| 290 |
"node_i": "3500",
|
| 291 |
-
"
|
| 292 |
-
"
|
| 293 |
},
|
| 294 |
{
|
| 295 |
"name": "Northern",
|
| 296 |
"iso_1_code": null,
|
| 297 |
"iso_3_code": null,
|
| 298 |
-
"tokenizers": {
|
| 299 |
-
"Latn": {
|
| 300 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 301 |
-
"original_lang_name": "nigerian_pidgin",
|
| 302 |
-
"original_lang_code": "pcm",
|
| 303 |
-
"scripts": [
|
| 304 |
-
"Latn"
|
| 305 |
-
],
|
| 306 |
-
"class_name": "StanzaTokenizer",
|
| 307 |
-
"macrolanguage": false
|
| 308 |
-
}
|
| 309 |
-
},
|
| 310 |
"children": [
|
| 311 |
{
|
| 312 |
"name": "Afro-Seminole Creole",
|
| 313 |
"iso_1_code": null,
|
| 314 |
"iso_3_code": "afs",
|
| 315 |
-
"tokenizers": {},
|
| 316 |
"children": [],
|
|
|
|
| 317 |
"node_i": "3502",
|
| 318 |
-
"
|
| 319 |
-
"
|
| 320 |
},
|
| 321 |
{
|
| 322 |
"name": "Bahamas English Creole",
|
| 323 |
"iso_1_code": null,
|
| 324 |
"iso_3_code": "bah",
|
| 325 |
-
"tokenizers": {},
|
| 326 |
"children": [],
|
|
|
|
| 327 |
"node_i": "3503",
|
| 328 |
-
"
|
| 329 |
-
"
|
| 330 |
},
|
| 331 |
{
|
| 332 |
"name": "Sea Island English Creole",
|
| 333 |
"iso_1_code": null,
|
| 334 |
"iso_3_code": "gul",
|
|
|
|
| 335 |
"tokenizers": {
|
| 336 |
"Latn": {
|
| 337 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 338 |
"original_lang_name": "nigerian_pidgin",
|
| 339 |
"original_lang_code": "pcm",
|
| 340 |
-
"
|
| 341 |
-
|
| 342 |
-
],
|
| 343 |
-
"class_name": "StanzaTokenizer",
|
| 344 |
-
"macrolanguage": false
|
| 345 |
}
|
| 346 |
},
|
| 347 |
-
"children": [],
|
| 348 |
"node_i": "3504",
|
|
|
|
| 349 |
"scripts": [
|
| 350 |
"Latn"
|
| 351 |
-
]
|
| 352 |
-
"own_tokenizer": false
|
| 353 |
}
|
| 354 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
"node_i": "3501",
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
},
|
| 359 |
{
|
| 360 |
"name": "Southern",
|
| 361 |
"iso_1_code": null,
|
| 362 |
"iso_3_code": null,
|
| 363 |
-
"tokenizers": {},
|
| 364 |
"children": [
|
| 365 |
{
|
| 366 |
"name": "Leeward Caribbean English Creole",
|
| 367 |
"iso_1_code": null,
|
| 368 |
"iso_3_code": "aig",
|
| 369 |
-
"tokenizers": {},
|
| 370 |
"children": [],
|
|
|
|
| 371 |
"node_i": "3506",
|
| 372 |
-
"
|
| 373 |
-
"
|
| 374 |
},
|
| 375 |
{
|
| 376 |
"name": "Bajan",
|
| 377 |
"iso_1_code": null,
|
| 378 |
"iso_3_code": "bjs",
|
| 379 |
-
"tokenizers": {},
|
| 380 |
"children": [],
|
|
|
|
| 381 |
"node_i": "3507",
|
| 382 |
-
"
|
| 383 |
-
"
|
| 384 |
},
|
| 385 |
{
|
| 386 |
"name": "Grenadian English Creole",
|
| 387 |
"iso_1_code": null,
|
| 388 |
"iso_3_code": "gcl",
|
| 389 |
-
"tokenizers": {},
|
| 390 |
"children": [],
|
|
|
|
| 391 |
"node_i": "3508",
|
| 392 |
-
"
|
| 393 |
-
"
|
| 394 |
},
|
| 395 |
{
|
| 396 |
"name": "Guyanese English Creole",
|
| 397 |
"iso_1_code": null,
|
| 398 |
"iso_3_code": "gyn",
|
| 399 |
-
"tokenizers": {},
|
| 400 |
"children": [],
|
|
|
|
| 401 |
"node_i": "3509",
|
| 402 |
-
"
|
| 403 |
-
"
|
| 404 |
},
|
| 405 |
{
|
| 406 |
"name": "Vincentian English Creole",
|
| 407 |
"iso_1_code": null,
|
| 408 |
"iso_3_code": "svc",
|
| 409 |
-
"tokenizers": {},
|
| 410 |
"children": [],
|
|
|
|
| 411 |
"node_i": "3510",
|
| 412 |
-
"
|
| 413 |
-
"
|
| 414 |
},
|
| 415 |
{
|
| 416 |
"name": "Tobagonian English Creole",
|
| 417 |
"iso_1_code": null,
|
| 418 |
"iso_3_code": "tgh",
|
| 419 |
-
"tokenizers": {},
|
| 420 |
"children": [],
|
|
|
|
| 421 |
"node_i": "3511",
|
| 422 |
-
"
|
| 423 |
-
"
|
| 424 |
},
|
| 425 |
{
|
| 426 |
"name": "Trinidadian English Creole",
|
| 427 |
"iso_1_code": null,
|
| 428 |
"iso_3_code": "trf",
|
| 429 |
-
"tokenizers": {},
|
| 430 |
"children": [],
|
|
|
|
| 431 |
"node_i": "3512",
|
| 432 |
-
"
|
| 433 |
-
"
|
| 434 |
},
|
| 435 |
{
|
| 436 |
"name": "Virgin Islands English Creole",
|
| 437 |
"iso_1_code": null,
|
| 438 |
"iso_3_code": "vic",
|
| 439 |
-
"tokenizers": {},
|
| 440 |
"children": [],
|
|
|
|
| 441 |
"node_i": "3513",
|
| 442 |
-
"
|
| 443 |
-
"
|
| 444 |
}
|
| 445 |
],
|
|
|
|
| 446 |
"node_i": "3505",
|
| 447 |
-
"
|
| 448 |
-
"
|
| 449 |
}
|
| 450 |
],
|
| 451 |
-
"node_i": "3499",
|
| 452 |
-
"scripts": [],
|
| 453 |
-
"own_tokenizer": false
|
| 454 |
-
},
|
| 455 |
-
{
|
| 456 |
-
"name": "Krio",
|
| 457 |
-
"iso_1_code": null,
|
| 458 |
-
"iso_3_code": null,
|
| 459 |
"tokenizers": {
|
| 460 |
"Latn": {
|
| 461 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 462 |
"original_lang_name": "nigerian_pidgin",
|
| 463 |
"original_lang_code": "pcm",
|
| 464 |
-
"
|
| 465 |
-
|
| 466 |
-
],
|
| 467 |
-
"class_name": "StanzaTokenizer",
|
| 468 |
-
"macrolanguage": false
|
| 469 |
}
|
| 470 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
"children": [
|
| 472 |
{
|
| 473 |
"name": "Equatorial Guinean Pidgin",
|
| 474 |
"iso_1_code": null,
|
| 475 |
"iso_3_code": "fpe",
|
| 476 |
-
"tokenizers": {},
|
| 477 |
"children": [],
|
|
|
|
| 478 |
"node_i": "3515",
|
| 479 |
-
"
|
| 480 |
-
"
|
| 481 |
},
|
| 482 |
{
|
| 483 |
"name": "Ghanaian Pidgin English",
|
| 484 |
"iso_1_code": null,
|
| 485 |
"iso_3_code": "gpe",
|
| 486 |
-
"tokenizers": {},
|
| 487 |
"children": [],
|
|
|
|
| 488 |
"node_i": "3516",
|
| 489 |
-
"
|
| 490 |
-
"
|
| 491 |
},
|
| 492 |
{
|
| 493 |
"name": "Krio",
|
| 494 |
"iso_1_code": null,
|
| 495 |
"iso_3_code": "kri",
|
|
|
|
| 496 |
"tokenizers": {
|
| 497 |
"Latn": {
|
| 498 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 499 |
"original_lang_name": "nigerian_pidgin",
|
| 500 |
"original_lang_code": "pcm",
|
| 501 |
-
"
|
| 502 |
-
|
| 503 |
-
],
|
| 504 |
-
"class_name": "StanzaTokenizer",
|
| 505 |
-
"macrolanguage": false
|
| 506 |
}
|
| 507 |
},
|
| 508 |
-
"children": [],
|
| 509 |
"node_i": "3517",
|
|
|
|
| 510 |
"scripts": [
|
| 511 |
"Latn"
|
| 512 |
-
]
|
| 513 |
-
"own_tokenizer": false
|
| 514 |
},
|
| 515 |
{
|
| 516 |
"name": "Pidgin, Nigerian",
|
| 517 |
"iso_1_code": null,
|
| 518 |
"iso_3_code": "pcm",
|
|
|
|
| 519 |
"tokenizers": {
|
| 520 |
"Latn": {
|
| 521 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 522 |
"original_lang_name": "nigerian_pidgin",
|
| 523 |
"original_lang_code": "pcm",
|
| 524 |
-
"
|
| 525 |
-
|
| 526 |
-
],
|
| 527 |
-
"class_name": "StanzaTokenizer",
|
| 528 |
-
"macrolanguage": false
|
| 529 |
}
|
| 530 |
},
|
| 531 |
-
"children": [],
|
| 532 |
"node_i": "3518",
|
| 533 |
-
"
|
| 534 |
"Latn"
|
| 535 |
],
|
| 536 |
-
"
|
|
|
|
|
|
|
| 537 |
},
|
| 538 |
{
|
| 539 |
"name": "Pidgin, Cameroon",
|
| 540 |
"iso_1_code": null,
|
| 541 |
"iso_3_code": "wes",
|
|
|
|
| 542 |
"tokenizers": {
|
| 543 |
"Latn": {
|
| 544 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 545 |
"original_lang_name": "nigerian_pidgin",
|
| 546 |
"original_lang_code": "pcm",
|
| 547 |
-
"
|
| 548 |
-
|
| 549 |
-
],
|
| 550 |
-
"class_name": "StanzaTokenizer",
|
| 551 |
-
"macrolanguage": false
|
| 552 |
}
|
| 553 |
},
|
| 554 |
-
"children": [],
|
| 555 |
"node_i": "3519",
|
|
|
|
| 556 |
"scripts": [
|
| 557 |
"Latn"
|
| 558 |
-
]
|
| 559 |
-
"own_tokenizer": false
|
| 560 |
}
|
| 561 |
],
|
| 562 |
-
"node_i": "3514",
|
| 563 |
-
"scripts": [],
|
| 564 |
-
"own_tokenizer": false
|
| 565 |
-
},
|
| 566 |
-
{
|
| 567 |
-
"name": "Suriname",
|
| 568 |
-
"iso_1_code": null,
|
| 569 |
-
"iso_3_code": null,
|
| 570 |
"tokenizers": {
|
| 571 |
"Latn": {
|
| 572 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 573 |
"original_lang_name": "nigerian_pidgin",
|
| 574 |
"original_lang_code": "pcm",
|
| 575 |
-
"
|
| 576 |
-
|
| 577 |
-
],
|
| 578 |
-
"class_name": "StanzaTokenizer",
|
| 579 |
-
"macrolanguage": false
|
| 580 |
}
|
| 581 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
"children": [
|
| 583 |
{
|
| 584 |
"name": "Sranan Tongo",
|
| 585 |
"iso_1_code": null,
|
| 586 |
"iso_3_code": "srn",
|
|
|
|
| 587 |
"tokenizers": {
|
| 588 |
"Latn": {
|
| 589 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 590 |
"original_lang_name": "nigerian_pidgin",
|
| 591 |
"original_lang_code": "pcm",
|
| 592 |
-
"
|
| 593 |
-
|
| 594 |
-
],
|
| 595 |
-
"class_name": "StanzaTokenizer",
|
| 596 |
-
"macrolanguage": false
|
| 597 |
}
|
| 598 |
},
|
| 599 |
-
"children": [],
|
| 600 |
"node_i": "3521",
|
|
|
|
| 601 |
"scripts": [
|
| 602 |
"Latn"
|
| 603 |
-
]
|
| 604 |
-
"own_tokenizer": false
|
| 605 |
},
|
| 606 |
{
|
| 607 |
"name": "Ndyuka",
|
| 608 |
"iso_1_code": null,
|
| 609 |
"iso_3_code": null,
|
| 610 |
-
"tokenizers": {
|
| 611 |
-
"Latn": {
|
| 612 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 613 |
-
"original_lang_name": "nigerian_pidgin",
|
| 614 |
-
"original_lang_code": "pcm",
|
| 615 |
-
"scripts": [
|
| 616 |
-
"Latn"
|
| 617 |
-
],
|
| 618 |
-
"class_name": "StanzaTokenizer",
|
| 619 |
-
"macrolanguage": false
|
| 620 |
-
}
|
| 621 |
-
},
|
| 622 |
"children": [
|
| 623 |
{
|
| 624 |
"name": "Aukan",
|
| 625 |
"iso_1_code": null,
|
| 626 |
"iso_3_code": "djk",
|
|
|
|
| 627 |
"tokenizers": {
|
| 628 |
"Latn": {
|
| 629 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 630 |
"original_lang_name": "nigerian_pidgin",
|
| 631 |
"original_lang_code": "pcm",
|
| 632 |
-
"
|
| 633 |
-
|
| 634 |
-
],
|
| 635 |
-
"class_name": "StanzaTokenizer",
|
| 636 |
-
"macrolanguage": false
|
| 637 |
}
|
| 638 |
},
|
| 639 |
-
"children": [],
|
| 640 |
"node_i": "3523",
|
|
|
|
| 641 |
"scripts": [
|
| 642 |
"Latn"
|
| 643 |
-
]
|
| 644 |
-
"own_tokenizer": false
|
| 645 |
},
|
| 646 |
{
|
| 647 |
"name": "Kwinti",
|
| 648 |
"iso_1_code": null,
|
| 649 |
"iso_3_code": "kww",
|
| 650 |
-
"tokenizers": {},
|
| 651 |
"children": [],
|
|
|
|
| 652 |
"node_i": "3524",
|
| 653 |
-
"
|
| 654 |
-
"
|
| 655 |
}
|
| 656 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
"node_i": "3522",
|
| 658 |
-
"
|
| 659 |
-
"
|
| 660 |
}
|
| 661 |
],
|
| 662 |
-
"node_i": "3520",
|
| 663 |
-
"scripts": [],
|
| 664 |
-
"own_tokenizer": false
|
| 665 |
-
},
|
| 666 |
-
{
|
| 667 |
-
"name": "Western",
|
| 668 |
-
"iso_1_code": null,
|
| 669 |
-
"iso_3_code": null,
|
| 670 |
"tokenizers": {
|
| 671 |
"Latn": {
|
| 672 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 673 |
"original_lang_name": "nigerian_pidgin",
|
| 674 |
"original_lang_code": "pcm",
|
| 675 |
-
"
|
| 676 |
-
|
| 677 |
-
],
|
| 678 |
-
"class_name": "StanzaTokenizer",
|
| 679 |
-
"macrolanguage": false
|
| 680 |
}
|
| 681 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
"children": [
|
| 683 |
{
|
| 684 |
"name": "Belize English Creole",
|
| 685 |
"iso_1_code": null,
|
| 686 |
"iso_3_code": "bzj",
|
|
|
|
| 687 |
"tokenizers": {
|
| 688 |
"Latn": {
|
| 689 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 690 |
"original_lang_name": "nigerian_pidgin",
|
| 691 |
"original_lang_code": "pcm",
|
| 692 |
-
"
|
| 693 |
-
|
| 694 |
-
],
|
| 695 |
-
"class_name": "StanzaTokenizer",
|
| 696 |
-
"macrolanguage": false
|
| 697 |
}
|
| 698 |
},
|
| 699 |
-
"children": [],
|
| 700 |
"node_i": "3526",
|
|
|
|
| 701 |
"scripts": [
|
| 702 |
"Latn"
|
| 703 |
-
]
|
| 704 |
-
"own_tokenizer": false
|
| 705 |
},
|
| 706 |
{
|
| 707 |
"name": "Nicaragua English Creole",
|
| 708 |
"iso_1_code": null,
|
| 709 |
"iso_3_code": "bzk",
|
| 710 |
-
"tokenizers": {},
|
| 711 |
"children": [],
|
|
|
|
| 712 |
"node_i": "3527",
|
| 713 |
-
"
|
| 714 |
-
"
|
| 715 |
},
|
| 716 |
{
|
| 717 |
"name": "Islander English Creole",
|
| 718 |
"iso_1_code": null,
|
| 719 |
"iso_3_code": "icr",
|
|
|
|
| 720 |
"tokenizers": {
|
| 721 |
"Latn": {
|
| 722 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 723 |
"original_lang_name": "nigerian_pidgin",
|
| 724 |
"original_lang_code": "pcm",
|
| 725 |
-
"
|
| 726 |
-
|
| 727 |
-
],
|
| 728 |
-
"class_name": "StanzaTokenizer",
|
| 729 |
-
"macrolanguage": false
|
| 730 |
}
|
| 731 |
},
|
| 732 |
-
"children": [],
|
| 733 |
"node_i": "3528",
|
|
|
|
| 734 |
"scripts": [
|
| 735 |
"Latn"
|
| 736 |
-
]
|
| 737 |
-
"own_tokenizer": false
|
| 738 |
},
|
| 739 |
{
|
| 740 |
"name": "Jamaican English Creole",
|
| 741 |
"iso_1_code": null,
|
| 742 |
"iso_3_code": "jam",
|
|
|
|
| 743 |
"tokenizers": {
|
| 744 |
"Latn": {
|
| 745 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 746 |
"original_lang_name": "nigerian_pidgin",
|
| 747 |
"original_lang_code": "pcm",
|
| 748 |
-
"
|
| 749 |
-
|
| 750 |
-
],
|
| 751 |
-
"class_name": "StanzaTokenizer",
|
| 752 |
-
"macrolanguage": false
|
| 753 |
}
|
| 754 |
},
|
| 755 |
-
"children": [],
|
| 756 |
"node_i": "3529",
|
|
|
|
| 757 |
"scripts": [
|
| 758 |
"Latn"
|
| 759 |
-
]
|
| 760 |
-
"own_tokenizer": false
|
| 761 |
}
|
| 762 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
"node_i": "3525",
|
| 764 |
-
"
|
| 765 |
-
"
|
| 766 |
}
|
| 767 |
],
|
| 768 |
-
"node_i": "3498",
|
| 769 |
-
"scripts": [],
|
| 770 |
-
"own_tokenizer": false
|
| 771 |
-
},
|
| 772 |
-
{
|
| 773 |
-
"name": "Pacific",
|
| 774 |
-
"iso_1_code": null,
|
| 775 |
-
"iso_3_code": null,
|
| 776 |
"tokenizers": {
|
| 777 |
"Latn": {
|
| 778 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 779 |
"original_lang_name": "nigerian_pidgin",
|
| 780 |
"original_lang_code": "pcm",
|
| 781 |
-
"
|
| 782 |
-
|
| 783 |
-
],
|
| 784 |
-
"class_name": "StanzaTokenizer",
|
| 785 |
-
"macrolanguage": false
|
| 786 |
}
|
| 787 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
"children": [
|
| 789 |
{
|
| 790 |
"name": "Bislama",
|
| 791 |
"iso_1_code": "bi",
|
| 792 |
"iso_3_code": "bis",
|
|
|
|
| 793 |
"tokenizers": {
|
| 794 |
"Latn": {
|
| 795 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 796 |
"original_lang_name": "nigerian_pidgin",
|
| 797 |
"original_lang_code": "pcm",
|
| 798 |
-
"
|
| 799 |
-
|
| 800 |
-
],
|
| 801 |
-
"class_name": "StanzaTokenizer",
|
| 802 |
-
"macrolanguage": false
|
| 803 |
}
|
| 804 |
},
|
| 805 |
-
"children": [],
|
| 806 |
"node_i": "3531",
|
|
|
|
| 807 |
"scripts": [
|
| 808 |
"Latn"
|
| 809 |
-
]
|
| 810 |
-
"own_tokenizer": false
|
| 811 |
},
|
| 812 |
{
|
| 813 |
"name": "Hawaii Pidgin",
|
| 814 |
"iso_1_code": null,
|
| 815 |
"iso_3_code": "hwc",
|
|
|
|
| 816 |
"tokenizers": {
|
| 817 |
"Latn": {
|
| 818 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 819 |
"original_lang_name": "nigerian_pidgin",
|
| 820 |
"original_lang_code": "pcm",
|
| 821 |
-
"
|
| 822 |
-
|
| 823 |
-
],
|
| 824 |
-
"class_name": "StanzaTokenizer",
|
| 825 |
-
"macrolanguage": false
|
| 826 |
}
|
| 827 |
},
|
| 828 |
-
"children": [],
|
| 829 |
"node_i": "3532",
|
|
|
|
| 830 |
"scripts": [
|
| 831 |
"Latn"
|
| 832 |
-
]
|
| 833 |
-
"own_tokenizer": false
|
| 834 |
},
|
| 835 |
{
|
| 836 |
"name": "Ngatik Men\u2019s Creole",
|
| 837 |
"iso_1_code": null,
|
| 838 |
"iso_3_code": "ngm",
|
| 839 |
-
"tokenizers": {},
|
| 840 |
"children": [],
|
|
|
|
| 841 |
"node_i": "3533",
|
| 842 |
-
"
|
| 843 |
-
"
|
| 844 |
},
|
| 845 |
{
|
| 846 |
"name": "Pitcairn-Norfolk",
|
| 847 |
"iso_1_code": null,
|
| 848 |
"iso_3_code": "pih",
|
| 849 |
-
"tokenizers": {},
|
| 850 |
"children": [],
|
|
|
|
| 851 |
"node_i": "3534",
|
| 852 |
-
"
|
| 853 |
-
"
|
| 854 |
},
|
| 855 |
{
|
| 856 |
"name": "Pijin",
|
| 857 |
"iso_1_code": null,
|
| 858 |
"iso_3_code": "pis",
|
|
|
|
| 859 |
"tokenizers": {
|
| 860 |
"Latn": {
|
| 861 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 862 |
"original_lang_name": "nigerian_pidgin",
|
| 863 |
"original_lang_code": "pcm",
|
| 864 |
-
"
|
| 865 |
-
|
| 866 |
-
],
|
| 867 |
-
"class_name": "StanzaTokenizer",
|
| 868 |
-
"macrolanguage": false
|
| 869 |
}
|
| 870 |
},
|
| 871 |
-
"children": [],
|
| 872 |
"node_i": "3535",
|
|
|
|
| 873 |
"scripts": [
|
| 874 |
"Latn"
|
| 875 |
-
]
|
| 876 |
-
"own_tokenizer": false
|
| 877 |
},
|
| 878 |
{
|
| 879 |
"name": "Kriol",
|
| 880 |
"iso_1_code": null,
|
| 881 |
"iso_3_code": "rop",
|
|
|
|
| 882 |
"tokenizers": {
|
| 883 |
"Latn": {
|
| 884 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 885 |
"original_lang_name": "nigerian_pidgin",
|
| 886 |
"original_lang_code": "pcm",
|
| 887 |
-
"
|
| 888 |
-
|
| 889 |
-
],
|
| 890 |
-
"class_name": "StanzaTokenizer",
|
| 891 |
-
"macrolanguage": false
|
| 892 |
}
|
| 893 |
},
|
| 894 |
-
"children": [],
|
| 895 |
"node_i": "3536",
|
|
|
|
| 896 |
"scripts": [
|
| 897 |
"Latn"
|
| 898 |
-
]
|
| 899 |
-
"own_tokenizer": false
|
| 900 |
},
|
| 901 |
{
|
| 902 |
"name": "Torres Strait Creole",
|
| 903 |
"iso_1_code": null,
|
| 904 |
"iso_3_code": "tcs",
|
|
|
|
| 905 |
"tokenizers": {
|
| 906 |
"Latn": {
|
| 907 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 908 |
"original_lang_name": "nigerian_pidgin",
|
| 909 |
"original_lang_code": "pcm",
|
| 910 |
-
"
|
| 911 |
-
|
| 912 |
-
],
|
| 913 |
-
"class_name": "StanzaTokenizer",
|
| 914 |
-
"macrolanguage": false
|
| 915 |
}
|
| 916 |
},
|
| 917 |
-
"children": [],
|
| 918 |
"node_i": "3537",
|
|
|
|
| 919 |
"scripts": [
|
| 920 |
"Latn"
|
| 921 |
-
]
|
| 922 |
-
"own_tokenizer": false
|
| 923 |
},
|
| 924 |
{
|
| 925 |
"name": "Tok Pisin",
|
| 926 |
"iso_1_code": null,
|
| 927 |
"iso_3_code": "tpi",
|
|
|
|
| 928 |
"tokenizers": {
|
| 929 |
"Latn": {
|
| 930 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 931 |
"original_lang_name": "nigerian_pidgin",
|
| 932 |
"original_lang_code": "pcm",
|
| 933 |
-
"
|
| 934 |
-
|
| 935 |
-
],
|
| 936 |
-
"class_name": "StanzaTokenizer",
|
| 937 |
-
"macrolanguage": false
|
| 938 |
}
|
| 939 |
},
|
| 940 |
-
"children": [],
|
| 941 |
"node_i": "3538",
|
|
|
|
| 942 |
"scripts": [
|
| 943 |
"Latn"
|
| 944 |
-
]
|
| 945 |
-
"own_tokenizer": false
|
| 946 |
}
|
| 947 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 948 |
"node_i": "3530",
|
| 949 |
-
"
|
| 950 |
-
"
|
| 951 |
}
|
| 952 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 953 |
"node_i": "3496",
|
| 954 |
-
"
|
| 955 |
-
"
|
| 956 |
},
|
| 957 |
{
|
| 958 |
"name": "French based",
|
| 959 |
"iso_1_code": null,
|
| 960 |
"iso_3_code": null,
|
| 961 |
-
"tokenizers": {
|
| 962 |
-
"Arab": {
|
| 963 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 964 |
-
"original_lang_name": "malay",
|
| 965 |
-
"original_lang_code": "msa",
|
| 966 |
-
"scripts": [
|
| 967 |
-
"Latn",
|
| 968 |
-
"Arab",
|
| 969 |
-
"Thai"
|
| 970 |
-
],
|
| 971 |
-
"class_name": "SpaCyTokenizer",
|
| 972 |
-
"macrolanguage": true
|
| 973 |
-
},
|
| 974 |
-
"Latn": {
|
| 975 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 976 |
-
"original_lang_name": "malay",
|
| 977 |
-
"original_lang_code": "msa",
|
| 978 |
-
"scripts": [
|
| 979 |
-
"Latn",
|
| 980 |
-
"Arab",
|
| 981 |
-
"Thai"
|
| 982 |
-
],
|
| 983 |
-
"class_name": "SpaCyTokenizer",
|
| 984 |
-
"macrolanguage": true
|
| 985 |
-
},
|
| 986 |
-
"Thai": {
|
| 987 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 988 |
-
"original_lang_name": "malay",
|
| 989 |
-
"original_lang_code": "msa",
|
| 990 |
-
"scripts": [
|
| 991 |
-
"Latn",
|
| 992 |
-
"Arab",
|
| 993 |
-
"Thai"
|
| 994 |
-
],
|
| 995 |
-
"class_name": "SpaCyTokenizer",
|
| 996 |
-
"macrolanguage": true
|
| 997 |
-
}
|
| 998 |
-
},
|
| 999 |
"children": [
|
| 1000 |
{
|
| 1001 |
"name": "Lesser Antillean French Creole",
|
| 1002 |
"iso_1_code": null,
|
| 1003 |
"iso_3_code": "acf",
|
|
|
|
| 1004 |
"tokenizers": {
|
| 1005 |
"Latn": {
|
| 1006 |
-
"full_object": "
|
| 1007 |
-
"original_lang_name": "
|
| 1008 |
-
"original_lang_code": "
|
| 1009 |
-
"
|
| 1010 |
-
|
| 1011 |
-
"Arab",
|
| 1012 |
-
"Thai"
|
| 1013 |
-
],
|
| 1014 |
-
"class_name": "SpaCyTokenizer",
|
| 1015 |
-
"macrolanguage": true
|
| 1016 |
}
|
| 1017 |
},
|
| 1018 |
-
"children": [],
|
| 1019 |
"node_i": "3540",
|
|
|
|
| 1020 |
"scripts": [
|
| 1021 |
"Latn"
|
| 1022 |
-
]
|
| 1023 |
-
"own_tokenizer": false
|
| 1024 |
},
|
| 1025 |
{
|
| 1026 |
"name": "Tayo",
|
| 1027 |
"iso_1_code": null,
|
| 1028 |
"iso_3_code": "cks",
|
| 1029 |
-
"tokenizers": {},
|
| 1030 |
"children": [],
|
|
|
|
| 1031 |
"node_i": "3541",
|
| 1032 |
-
"
|
| 1033 |
-
"
|
| 1034 |
},
|
| 1035 |
{
|
| 1036 |
"name": "Seychelles French Creole",
|
| 1037 |
"iso_1_code": null,
|
| 1038 |
"iso_3_code": "crs",
|
|
|
|
| 1039 |
"tokenizers": {
|
| 1040 |
"Latn": {
|
| 1041 |
-
"full_object": "
|
| 1042 |
-
"original_lang_name": "
|
| 1043 |
-
"original_lang_code": "
|
| 1044 |
-
"
|
| 1045 |
-
|
| 1046 |
-
"Arab",
|
| 1047 |
-
"Thai"
|
| 1048 |
-
],
|
| 1049 |
-
"class_name": "SpaCyTokenizer",
|
| 1050 |
-
"macrolanguage": true
|
| 1051 |
}
|
| 1052 |
},
|
| 1053 |
-
"children": [],
|
| 1054 |
"node_i": "3542",
|
|
|
|
| 1055 |
"scripts": [
|
| 1056 |
"Latn"
|
| 1057 |
-
]
|
| 1058 |
-
"own_tokenizer": false
|
| 1059 |
},
|
| 1060 |
{
|
| 1061 |
"name": "Guadeloupean French Creole",
|
| 1062 |
"iso_1_code": null,
|
| 1063 |
"iso_3_code": "gcf",
|
|
|
|
| 1064 |
"tokenizers": {
|
| 1065 |
"Latn": {
|
| 1066 |
-
"full_object": "
|
| 1067 |
-
"original_lang_name": "
|
| 1068 |
-
"original_lang_code": "
|
| 1069 |
-
"
|
| 1070 |
-
|
| 1071 |
-
"Arab",
|
| 1072 |
-
"Thai"
|
| 1073 |
-
],
|
| 1074 |
-
"class_name": "SpaCyTokenizer",
|
| 1075 |
-
"macrolanguage": true
|
| 1076 |
}
|
| 1077 |
},
|
| 1078 |
-
"children": [],
|
| 1079 |
"node_i": "3543",
|
|
|
|
| 1080 |
"scripts": [
|
| 1081 |
"Latn"
|
| 1082 |
-
]
|
| 1083 |
-
"own_tokenizer": false
|
| 1084 |
},
|
| 1085 |
{
|
| 1086 |
"name": "Guianese French Creole",
|
| 1087 |
"iso_1_code": null,
|
| 1088 |
"iso_3_code": "gcr",
|
|
|
|
| 1089 |
"tokenizers": {
|
| 1090 |
"Latn": {
|
| 1091 |
-
"full_object": "
|
| 1092 |
-
"original_lang_name": "
|
| 1093 |
-
"original_lang_code": "
|
| 1094 |
-
"
|
| 1095 |
-
|
| 1096 |
-
"Arab",
|
| 1097 |
-
"Thai"
|
| 1098 |
-
],
|
| 1099 |
-
"class_name": "SpaCyTokenizer",
|
| 1100 |
-
"macrolanguage": true
|
| 1101 |
}
|
| 1102 |
},
|
| 1103 |
-
"children": [],
|
| 1104 |
"node_i": "3544",
|
|
|
|
| 1105 |
"scripts": [
|
| 1106 |
"Latn"
|
| 1107 |
-
]
|
| 1108 |
-
"own_tokenizer": false
|
| 1109 |
},
|
| 1110 |
{
|
| 1111 |
"name": "Haitian Creole",
|
| 1112 |
"iso_1_code": "ht",
|
| 1113 |
"iso_3_code": "hat",
|
|
|
|
| 1114 |
"tokenizers": {
|
| 1115 |
"Latn": {
|
| 1116 |
-
"full_object": "
|
| 1117 |
-
"original_lang_name": "
|
| 1118 |
-
"original_lang_code": "
|
| 1119 |
-
"
|
| 1120 |
-
|
| 1121 |
-
"Arab",
|
| 1122 |
-
"Thai"
|
| 1123 |
-
],
|
| 1124 |
-
"class_name": "SpaCyTokenizer",
|
| 1125 |
-
"macrolanguage": true
|
| 1126 |
}
|
| 1127 |
},
|
| 1128 |
-
"children": [],
|
| 1129 |
"node_i": "3545",
|
|
|
|
| 1130 |
"scripts": [
|
| 1131 |
"Latn"
|
| 1132 |
-
]
|
| 1133 |
-
"own_tokenizer": false
|
| 1134 |
},
|
| 1135 |
{
|
| 1136 |
"name": "Karipuna French Creole",
|
| 1137 |
"iso_1_code": null,
|
| 1138 |
"iso_3_code": "kmv",
|
| 1139 |
-
"tokenizers": {},
|
| 1140 |
"children": [],
|
|
|
|
| 1141 |
"node_i": "3546",
|
| 1142 |
-
"
|
| 1143 |
-
"
|
| 1144 |
},
|
| 1145 |
{
|
| 1146 |
"name": "Louisiana Creole",
|
| 1147 |
"iso_1_code": null,
|
| 1148 |
"iso_3_code": "lou",
|
| 1149 |
-
"tokenizers": {},
|
| 1150 |
"children": [],
|
|
|
|
| 1151 |
"node_i": "3547",
|
| 1152 |
-
"
|
| 1153 |
-
"
|
| 1154 |
},
|
| 1155 |
{
|
| 1156 |
"name": "Morisyen",
|
| 1157 |
"iso_1_code": null,
|
| 1158 |
"iso_3_code": "mfe",
|
|
|
|
| 1159 |
"tokenizers": {
|
| 1160 |
"Latn": {
|
| 1161 |
-
"full_object": "
|
| 1162 |
-
"original_lang_name": "
|
| 1163 |
-
"original_lang_code": "
|
| 1164 |
-
"
|
| 1165 |
-
|
| 1166 |
-
"Arab",
|
| 1167 |
-
"Thai"
|
| 1168 |
-
],
|
| 1169 |
-
"class_name": "SpaCyTokenizer",
|
| 1170 |
-
"macrolanguage": true
|
| 1171 |
}
|
| 1172 |
},
|
| 1173 |
-
"children": [],
|
| 1174 |
"node_i": "3548",
|
|
|
|
| 1175 |
"scripts": [
|
| 1176 |
"Latn"
|
| 1177 |
-
]
|
| 1178 |
-
"own_tokenizer": false
|
| 1179 |
},
|
| 1180 |
{
|
| 1181 |
"name": "R\u00e9union French Creole",
|
| 1182 |
"iso_1_code": null,
|
| 1183 |
"iso_3_code": "rcf",
|
|
|
|
| 1184 |
"tokenizers": {
|
| 1185 |
"Latn": {
|
| 1186 |
-
"full_object": "
|
| 1187 |
-
"original_lang_name": "
|
| 1188 |
-
"original_lang_code": "
|
| 1189 |
-
"
|
| 1190 |
-
|
| 1191 |
-
"Arab",
|
| 1192 |
-
"Thai"
|
| 1193 |
-
],
|
| 1194 |
-
"class_name": "SpaCyTokenizer",
|
| 1195 |
-
"macrolanguage": true
|
| 1196 |
}
|
| 1197 |
},
|
| 1198 |
-
"children": [],
|
| 1199 |
"node_i": "3549",
|
|
|
|
| 1200 |
"scripts": [
|
| 1201 |
"Latn"
|
| 1202 |
-
]
|
| 1203 |
-
"own_tokenizer": false
|
| 1204 |
},
|
| 1205 |
{
|
| 1206 |
"name": "San Miguel French Creole",
|
| 1207 |
"iso_1_code": null,
|
| 1208 |
"iso_3_code": "scf",
|
| 1209 |
-
"tokenizers": {},
|
| 1210 |
"children": [],
|
|
|
|
| 1211 |
"node_i": "3550",
|
| 1212 |
-
"
|
| 1213 |
-
"
|
| 1214 |
}
|
| 1215 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
"node_i": "3539",
|
| 1217 |
-
"
|
| 1218 |
-
"
|
| 1219 |
},
|
| 1220 |
{
|
| 1221 |
"name": "German based",
|
| 1222 |
"iso_1_code": null,
|
| 1223 |
"iso_3_code": null,
|
| 1224 |
-
"tokenizers": {},
|
| 1225 |
"children": [
|
| 1226 |
{
|
| 1227 |
"name": "Unserdeutsch",
|
| 1228 |
"iso_1_code": null,
|
| 1229 |
"iso_3_code": "uln",
|
| 1230 |
-
"tokenizers": {},
|
| 1231 |
"children": [],
|
|
|
|
| 1232 |
"node_i": "3552",
|
| 1233 |
-
"
|
| 1234 |
-
"
|
| 1235 |
}
|
| 1236 |
],
|
|
|
|
| 1237 |
"node_i": "3551",
|
| 1238 |
-
"
|
| 1239 |
-
"
|
| 1240 |
},
|
| 1241 |
{
|
| 1242 |
"name": "Hindi based",
|
| 1243 |
"iso_1_code": null,
|
| 1244 |
"iso_3_code": null,
|
| 1245 |
-
"tokenizers": {},
|
| 1246 |
"children": [
|
| 1247 |
{
|
| 1248 |
"name": "Andaman Hindi Creole",
|
| 1249 |
"iso_1_code": null,
|
| 1250 |
"iso_3_code": "hca",
|
| 1251 |
-
"tokenizers": {},
|
| 1252 |
"children": [],
|
|
|
|
| 1253 |
"node_i": "3554",
|
| 1254 |
-
"
|
| 1255 |
-
"
|
| 1256 |
}
|
| 1257 |
],
|
|
|
|
| 1258 |
"node_i": "3553",
|
| 1259 |
-
"
|
| 1260 |
-
"
|
| 1261 |
},
|
| 1262 |
{
|
| 1263 |
"name": "Iberian based",
|
| 1264 |
"iso_1_code": null,
|
| 1265 |
"iso_3_code": null,
|
| 1266 |
-
"tokenizers": {
|
| 1267 |
-
"Arab": {
|
| 1268 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1269 |
-
"original_lang_name": "malay",
|
| 1270 |
-
"original_lang_code": "msa",
|
| 1271 |
-
"scripts": [
|
| 1272 |
-
"Latn",
|
| 1273 |
-
"Arab",
|
| 1274 |
-
"Thai"
|
| 1275 |
-
],
|
| 1276 |
-
"class_name": "SpaCyTokenizer",
|
| 1277 |
-
"macrolanguage": true
|
| 1278 |
-
},
|
| 1279 |
-
"Latn": {
|
| 1280 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1281 |
-
"original_lang_name": "malay",
|
| 1282 |
-
"original_lang_code": "msa",
|
| 1283 |
-
"scripts": [
|
| 1284 |
-
"Latn",
|
| 1285 |
-
"Arab",
|
| 1286 |
-
"Thai"
|
| 1287 |
-
],
|
| 1288 |
-
"class_name": "SpaCyTokenizer",
|
| 1289 |
-
"macrolanguage": true
|
| 1290 |
-
},
|
| 1291 |
-
"Thai": {
|
| 1292 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1293 |
-
"original_lang_name": "malay",
|
| 1294 |
-
"original_lang_code": "msa",
|
| 1295 |
-
"scripts": [
|
| 1296 |
-
"Latn",
|
| 1297 |
-
"Arab",
|
| 1298 |
-
"Thai"
|
| 1299 |
-
],
|
| 1300 |
-
"class_name": "SpaCyTokenizer",
|
| 1301 |
-
"macrolanguage": true
|
| 1302 |
-
}
|
| 1303 |
-
},
|
| 1304 |
"children": [
|
| 1305 |
{
|
| 1306 |
"name": "Papiamentu",
|
| 1307 |
"iso_1_code": null,
|
| 1308 |
"iso_3_code": "pap",
|
|
|
|
| 1309 |
"tokenizers": {
|
| 1310 |
"Latn": {
|
| 1311 |
-
"full_object": "
|
| 1312 |
-
"original_lang_name": "
|
| 1313 |
-
"original_lang_code": "
|
| 1314 |
-
"
|
| 1315 |
-
|
| 1316 |
-
"Arab",
|
| 1317 |
-
"Thai"
|
| 1318 |
-
],
|
| 1319 |
-
"class_name": "SpaCyTokenizer",
|
| 1320 |
-
"macrolanguage": true
|
| 1321 |
}
|
| 1322 |
},
|
| 1323 |
-
"children": [],
|
| 1324 |
"node_i": "3556",
|
|
|
|
| 1325 |
"scripts": [
|
| 1326 |
"Latn"
|
| 1327 |
-
]
|
| 1328 |
-
"own_tokenizer": false
|
| 1329 |
}
|
| 1330 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1331 |
"node_i": "3555",
|
| 1332 |
-
"
|
| 1333 |
-
"
|
| 1334 |
},
|
| 1335 |
{
|
| 1336 |
"name": "Japanese-based",
|
| 1337 |
"iso_1_code": null,
|
| 1338 |
"iso_3_code": null,
|
| 1339 |
-
"tokenizers": {},
|
| 1340 |
"children": [
|
| 1341 |
{
|
| 1342 |
"name": "Yilan Creole",
|
| 1343 |
"iso_1_code": null,
|
| 1344 |
"iso_3_code": "ycr",
|
| 1345 |
-
"tokenizers": {},
|
| 1346 |
"children": [],
|
|
|
|
| 1347 |
"node_i": "3558",
|
| 1348 |
-
"
|
| 1349 |
-
"
|
| 1350 |
}
|
| 1351 |
],
|
|
|
|
| 1352 |
"node_i": "3557",
|
| 1353 |
-
"
|
| 1354 |
-
"
|
| 1355 |
},
|
| 1356 |
{
|
| 1357 |
"name": "Kongo based",
|
| 1358 |
"iso_1_code": null,
|
| 1359 |
"iso_3_code": null,
|
| 1360 |
-
"tokenizers": {
|
| 1361 |
-
"Arab": {
|
| 1362 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1363 |
-
"original_lang_name": "malay",
|
| 1364 |
-
"original_lang_code": "msa",
|
| 1365 |
-
"scripts": [
|
| 1366 |
-
"Latn",
|
| 1367 |
-
"Arab",
|
| 1368 |
-
"Thai"
|
| 1369 |
-
],
|
| 1370 |
-
"class_name": "SpaCyTokenizer",
|
| 1371 |
-
"macrolanguage": true
|
| 1372 |
-
},
|
| 1373 |
-
"Latn": {
|
| 1374 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1375 |
-
"original_lang_name": "malay",
|
| 1376 |
-
"original_lang_code": "msa",
|
| 1377 |
-
"scripts": [
|
| 1378 |
-
"Latn",
|
| 1379 |
-
"Arab",
|
| 1380 |
-
"Thai"
|
| 1381 |
-
],
|
| 1382 |
-
"class_name": "SpaCyTokenizer",
|
| 1383 |
-
"macrolanguage": true
|
| 1384 |
-
},
|
| 1385 |
-
"Thai": {
|
| 1386 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1387 |
-
"original_lang_name": "malay",
|
| 1388 |
-
"original_lang_code": "msa",
|
| 1389 |
-
"scripts": [
|
| 1390 |
-
"Latn",
|
| 1391 |
-
"Arab",
|
| 1392 |
-
"Thai"
|
| 1393 |
-
],
|
| 1394 |
-
"class_name": "SpaCyTokenizer",
|
| 1395 |
-
"macrolanguage": true
|
| 1396 |
-
}
|
| 1397 |
-
},
|
| 1398 |
"children": [
|
| 1399 |
{
|
| 1400 |
"name": "Kituba",
|
| 1401 |
"iso_1_code": null,
|
| 1402 |
"iso_3_code": "ktu",
|
|
|
|
| 1403 |
"tokenizers": {
|
| 1404 |
"Latn": {
|
| 1405 |
-
"full_object": "
|
| 1406 |
-
"original_lang_name": "
|
| 1407 |
-
"original_lang_code": "
|
| 1408 |
-
"
|
| 1409 |
-
|
| 1410 |
-
"Arab",
|
| 1411 |
-
"Thai"
|
| 1412 |
-
],
|
| 1413 |
-
"class_name": "SpaCyTokenizer",
|
| 1414 |
-
"macrolanguage": true
|
| 1415 |
}
|
| 1416 |
},
|
| 1417 |
-
"children": [],
|
| 1418 |
"node_i": "3560",
|
|
|
|
| 1419 |
"scripts": [
|
| 1420 |
"Latn"
|
| 1421 |
-
]
|
| 1422 |
-
"own_tokenizer": false
|
| 1423 |
},
|
| 1424 |
{
|
| 1425 |
"name": "Kituba",
|
| 1426 |
"iso_1_code": null,
|
| 1427 |
"iso_3_code": "mkw",
|
| 1428 |
-
"tokenizers": {},
|
| 1429 |
"children": [],
|
|
|
|
| 1430 |
"node_i": "3561",
|
| 1431 |
-
"
|
| 1432 |
-
"
|
| 1433 |
}
|
| 1434 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1435 |
"node_i": "3559",
|
| 1436 |
-
"
|
| 1437 |
-
"
|
| 1438 |
},
|
| 1439 |
{
|
| 1440 |
"name": "Malay based",
|
| 1441 |
"iso_1_code": null,
|
| 1442 |
"iso_3_code": null,
|
| 1443 |
-
"tokenizers": {
|
| 1444 |
-
"Latn": {
|
| 1445 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1446 |
-
"original_lang_name": "malay",
|
| 1447 |
-
"original_lang_code": "msa",
|
| 1448 |
-
"scripts": [
|
| 1449 |
-
"Latn",
|
| 1450 |
-
"Arab",
|
| 1451 |
-
"Thai"
|
| 1452 |
-
],
|
| 1453 |
-
"class_name": "SpaCyTokenizer",
|
| 1454 |
-
"macrolanguage": true
|
| 1455 |
-
},
|
| 1456 |
-
"Arab": {
|
| 1457 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1458 |
-
"original_lang_name": "malay",
|
| 1459 |
-
"original_lang_code": "msa",
|
| 1460 |
-
"scripts": [
|
| 1461 |
-
"Latn",
|
| 1462 |
-
"Arab",
|
| 1463 |
-
"Thai"
|
| 1464 |
-
],
|
| 1465 |
-
"class_name": "SpaCyTokenizer",
|
| 1466 |
-
"macrolanguage": true
|
| 1467 |
-
},
|
| 1468 |
-
"Thai": {
|
| 1469 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1470 |
-
"original_lang_name": "malay",
|
| 1471 |
-
"original_lang_code": "msa",
|
| 1472 |
-
"scripts": [
|
| 1473 |
-
"Latn",
|
| 1474 |
-
"Arab",
|
| 1475 |
-
"Thai"
|
| 1476 |
-
],
|
| 1477 |
-
"class_name": "SpaCyTokenizer",
|
| 1478 |
-
"macrolanguage": true
|
| 1479 |
-
}
|
| 1480 |
-
},
|
| 1481 |
"children": [
|
| 1482 |
{
|
| 1483 |
"name": "Malay, Ambonese",
|
| 1484 |
"iso_1_code": null,
|
| 1485 |
"iso_3_code": "abs",
|
|
|
|
| 1486 |
"tokenizers": {
|
| 1487 |
"Latn": {
|
| 1488 |
-
"full_object": "
|
| 1489 |
-
"original_lang_name": "
|
| 1490 |
-
"original_lang_code": "
|
| 1491 |
-
"
|
| 1492 |
-
|
| 1493 |
-
"Arab",
|
| 1494 |
-
"Thai"
|
| 1495 |
-
],
|
| 1496 |
-
"class_name": "SpaCyTokenizer",
|
| 1497 |
-
"macrolanguage": true
|
| 1498 |
}
|
| 1499 |
},
|
| 1500 |
-
"children": [],
|
| 1501 |
"node_i": "3563",
|
|
|
|
| 1502 |
"scripts": [
|
| 1503 |
"Latn"
|
| 1504 |
-
]
|
| 1505 |
-
"own_tokenizer": false
|
| 1506 |
},
|
| 1507 |
{
|
| 1508 |
"name": "Betawi",
|
| 1509 |
"iso_1_code": null,
|
| 1510 |
"iso_3_code": "bew",
|
|
|
|
| 1511 |
"tokenizers": {
|
| 1512 |
"Latn": {
|
| 1513 |
-
"full_object": "
|
| 1514 |
-
"original_lang_name": "
|
| 1515 |
-
"original_lang_code": "
|
| 1516 |
-
"
|
| 1517 |
-
|
| 1518 |
-
"Arab",
|
| 1519 |
-
"Thai"
|
| 1520 |
-
],
|
| 1521 |
-
"class_name": "SpaCyTokenizer",
|
| 1522 |
-
"macrolanguage": true
|
| 1523 |
}
|
| 1524 |
},
|
| 1525 |
-
"children": [],
|
| 1526 |
"node_i": "3564",
|
|
|
|
| 1527 |
"scripts": [
|
| 1528 |
"Latn"
|
| 1529 |
-
]
|
| 1530 |
-
"own_tokenizer": false
|
| 1531 |
},
|
| 1532 |
{
|
| 1533 |
"name": "Malay, Banda",
|
| 1534 |
"iso_1_code": null,
|
| 1535 |
"iso_3_code": "bpq",
|
| 1536 |
-
"tokenizers": {},
|
| 1537 |
"children": [],
|
|
|
|
| 1538 |
"node_i": "3565",
|
| 1539 |
-
"
|
| 1540 |
-
"
|
| 1541 |
},
|
| 1542 |
{
|
| 1543 |
"name": "Malaccan Malay Creole",
|
| 1544 |
"iso_1_code": null,
|
| 1545 |
"iso_3_code": "ccm",
|
| 1546 |
-
"tokenizers": {},
|
| 1547 |
"children": [],
|
|
|
|
| 1548 |
"node_i": "3566",
|
| 1549 |
-
"
|
| 1550 |
-
"
|
| 1551 |
},
|
| 1552 |
{
|
| 1553 |
"name": "Malay, Cocos Islands",
|
| 1554 |
"iso_1_code": "ms",
|
| 1555 |
"iso_3_code": "coa",
|
| 1556 |
-
"tokenizers": {
|
| 1557 |
-
"Latn": {
|
| 1558 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1559 |
-
"original_lang_name": "malay",
|
| 1560 |
-
"original_lang_code": "msa",
|
| 1561 |
-
"scripts": [
|
| 1562 |
-
"Latn",
|
| 1563 |
-
"Arab",
|
| 1564 |
-
"Thai"
|
| 1565 |
-
],
|
| 1566 |
-
"class_name": "SpaCyTokenizer",
|
| 1567 |
-
"macrolanguage": true
|
| 1568 |
-
},
|
| 1569 |
-
"Arab": {
|
| 1570 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1571 |
-
"original_lang_name": "malay",
|
| 1572 |
-
"original_lang_code": "msa",
|
| 1573 |
-
"scripts": [
|
| 1574 |
-
"Latn",
|
| 1575 |
-
"Arab",
|
| 1576 |
-
"Thai"
|
| 1577 |
-
],
|
| 1578 |
-
"class_name": "SpaCyTokenizer",
|
| 1579 |
-
"macrolanguage": true
|
| 1580 |
-
},
|
| 1581 |
-
"Thai": {
|
| 1582 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1583 |
-
"original_lang_name": "malay",
|
| 1584 |
-
"original_lang_code": "msa",
|
| 1585 |
-
"scripts": [
|
| 1586 |
-
"Latn",
|
| 1587 |
-
"Arab",
|
| 1588 |
-
"Thai"
|
| 1589 |
-
],
|
| 1590 |
-
"class_name": "SpaCyTokenizer",
|
| 1591 |
-
"macrolanguage": true
|
| 1592 |
-
}
|
| 1593 |
-
},
|
| 1594 |
"children": [],
|
|
|
|
| 1595 |
"node_i": "3567",
|
| 1596 |
-
"
|
| 1597 |
-
"
|
| 1598 |
},
|
| 1599 |
{
|
| 1600 |
"name": "Malay, Larantuka",
|
| 1601 |
"iso_1_code": null,
|
| 1602 |
"iso_3_code": "lrt",
|
| 1603 |
-
"tokenizers": {},
|
| 1604 |
"children": [],
|
|
|
|
| 1605 |
"node_i": "3568",
|
| 1606 |
-
"
|
| 1607 |
-
"
|
| 1608 |
},
|
| 1609 |
{
|
| 1610 |
"name": "Malay, North Moluccan",
|
| 1611 |
"iso_1_code": "ms",
|
| 1612 |
"iso_3_code": "max",
|
|
|
|
| 1613 |
"tokenizers": {
|
| 1614 |
"Latn": {
|
| 1615 |
-
"full_object": "
|
| 1616 |
-
"original_lang_name": "
|
| 1617 |
-
"original_lang_code": "
|
| 1618 |
-
"
|
| 1619 |
-
|
| 1620 |
-
"Arab",
|
| 1621 |
-
"Thai"
|
| 1622 |
-
],
|
| 1623 |
-
"class_name": "SpaCyTokenizer",
|
| 1624 |
-
"macrolanguage": true
|
| 1625 |
-
},
|
| 1626 |
-
"Arab": {
|
| 1627 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1628 |
-
"original_lang_name": "malay",
|
| 1629 |
-
"original_lang_code": "msa",
|
| 1630 |
-
"scripts": [
|
| 1631 |
-
"Latn",
|
| 1632 |
-
"Arab",
|
| 1633 |
-
"Thai"
|
| 1634 |
-
],
|
| 1635 |
-
"class_name": "SpaCyTokenizer",
|
| 1636 |
-
"macrolanguage": true
|
| 1637 |
-
},
|
| 1638 |
-
"Thai": {
|
| 1639 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1640 |
-
"original_lang_name": "malay",
|
| 1641 |
-
"original_lang_code": "msa",
|
| 1642 |
-
"scripts": [
|
| 1643 |
-
"Latn",
|
| 1644 |
-
"Arab",
|
| 1645 |
-
"Thai"
|
| 1646 |
-
],
|
| 1647 |
-
"class_name": "SpaCyTokenizer",
|
| 1648 |
-
"macrolanguage": true
|
| 1649 |
}
|
| 1650 |
},
|
| 1651 |
-
"children": [],
|
| 1652 |
"node_i": "3569",
|
|
|
|
| 1653 |
"scripts": [
|
| 1654 |
"Latn"
|
| 1655 |
-
]
|
| 1656 |
-
"own_tokenizer": true
|
| 1657 |
},
|
| 1658 |
{
|
| 1659 |
"name": "Malay, Baba",
|
| 1660 |
"iso_1_code": null,
|
| 1661 |
"iso_3_code": "mbf",
|
|
|
|
| 1662 |
"tokenizers": {
|
| 1663 |
"Latn": {
|
| 1664 |
-
"full_object": "
|
| 1665 |
-
"original_lang_name": "
|
| 1666 |
-
"original_lang_code": "
|
| 1667 |
-
"
|
| 1668 |
-
|
| 1669 |
-
"Arab",
|
| 1670 |
-
"Thai"
|
| 1671 |
-
],
|
| 1672 |
-
"class_name": "SpaCyTokenizer",
|
| 1673 |
-
"macrolanguage": true
|
| 1674 |
}
|
| 1675 |
},
|
| 1676 |
-
"children": [],
|
| 1677 |
"node_i": "3570",
|
|
|
|
| 1678 |
"scripts": [
|
| 1679 |
"Latn"
|
| 1680 |
-
]
|
| 1681 |
-
"own_tokenizer": false
|
| 1682 |
},
|
| 1683 |
{
|
| 1684 |
"name": "Malay, Balinese",
|
| 1685 |
"iso_1_code": null,
|
| 1686 |
"iso_3_code": "mhp",
|
| 1687 |
-
"tokenizers": {},
|
| 1688 |
"children": [],
|
|
|
|
| 1689 |
"node_i": "3571",
|
| 1690 |
-
"
|
| 1691 |
-
"
|
| 1692 |
},
|
| 1693 |
{
|
| 1694 |
"name": "Malay, Kupang",
|
| 1695 |
"iso_1_code": null,
|
| 1696 |
"iso_3_code": "mkn",
|
|
|
|
| 1697 |
"tokenizers": {
|
| 1698 |
"Latn": {
|
| 1699 |
-
"full_object": "
|
| 1700 |
-
"original_lang_name": "
|
| 1701 |
-
"original_lang_code": "
|
| 1702 |
-
"
|
| 1703 |
-
|
| 1704 |
-
"Arab",
|
| 1705 |
-
"Thai"
|
| 1706 |
-
],
|
| 1707 |
-
"class_name": "SpaCyTokenizer",
|
| 1708 |
-
"macrolanguage": true
|
| 1709 |
}
|
| 1710 |
},
|
| 1711 |
-
"children": [],
|
| 1712 |
"node_i": "3572",
|
|
|
|
| 1713 |
"scripts": [
|
| 1714 |
"Latn"
|
| 1715 |
-
]
|
| 1716 |
-
"own_tokenizer": false
|
| 1717 |
},
|
| 1718 |
{
|
| 1719 |
"name": "Indonesian, Peranakan",
|
| 1720 |
"iso_1_code": null,
|
| 1721 |
"iso_3_code": "pea",
|
| 1722 |
-
"tokenizers": {},
|
| 1723 |
"children": [],
|
|
|
|
| 1724 |
"node_i": "3573",
|
| 1725 |
-
"
|
| 1726 |
-
"
|
| 1727 |
},
|
| 1728 |
{
|
| 1729 |
"name": "Malay, Papuan",
|
| 1730 |
"iso_1_code": null,
|
| 1731 |
"iso_3_code": "pmy",
|
| 1732 |
-
"tokenizers": {},
|
| 1733 |
"children": [],
|
|
|
|
| 1734 |
"node_i": "3574",
|
| 1735 |
-
"
|
| 1736 |
-
"
|
| 1737 |
},
|
| 1738 |
{
|
| 1739 |
"name": "Sri Lankan Malay Creole",
|
| 1740 |
"iso_1_code": null,
|
| 1741 |
"iso_3_code": "sci",
|
| 1742 |
-
"tokenizers": {},
|
| 1743 |
"children": [],
|
|
|
|
| 1744 |
"node_i": "3575",
|
| 1745 |
-
"
|
| 1746 |
-
"
|
| 1747 |
},
|
| 1748 |
{
|
| 1749 |
"name": "Malay, Manado",
|
| 1750 |
"iso_1_code": "ms",
|
| 1751 |
"iso_3_code": "xmm",
|
|
|
|
| 1752 |
"tokenizers": {
|
| 1753 |
"Latn": {
|
| 1754 |
-
"full_object": "
|
| 1755 |
-
"original_lang_name": "
|
| 1756 |
-
"original_lang_code": "
|
| 1757 |
-
"
|
| 1758 |
-
|
| 1759 |
-
"Arab",
|
| 1760 |
-
"Thai"
|
| 1761 |
-
],
|
| 1762 |
-
"class_name": "SpaCyTokenizer",
|
| 1763 |
-
"macrolanguage": true
|
| 1764 |
-
},
|
| 1765 |
-
"Arab": {
|
| 1766 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1767 |
-
"original_lang_name": "malay",
|
| 1768 |
-
"original_lang_code": "msa",
|
| 1769 |
-
"scripts": [
|
| 1770 |
-
"Latn",
|
| 1771 |
-
"Arab",
|
| 1772 |
-
"Thai"
|
| 1773 |
-
],
|
| 1774 |
-
"class_name": "SpaCyTokenizer",
|
| 1775 |
-
"macrolanguage": true
|
| 1776 |
-
},
|
| 1777 |
-
"Thai": {
|
| 1778 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1779 |
-
"original_lang_name": "malay",
|
| 1780 |
-
"original_lang_code": "msa",
|
| 1781 |
-
"scripts": [
|
| 1782 |
-
"Latn",
|
| 1783 |
-
"Arab",
|
| 1784 |
-
"Thai"
|
| 1785 |
-
],
|
| 1786 |
-
"class_name": "SpaCyTokenizer",
|
| 1787 |
-
"macrolanguage": true
|
| 1788 |
}
|
| 1789 |
},
|
| 1790 |
-
"children": [],
|
| 1791 |
"node_i": "3576",
|
|
|
|
| 1792 |
"scripts": [
|
| 1793 |
"Latn"
|
| 1794 |
-
]
|
| 1795 |
-
"own_tokenizer": true
|
| 1796 |
}
|
| 1797 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1798 |
"node_i": "3562",
|
| 1799 |
-
"
|
| 1800 |
-
"
|
| 1801 |
},
|
| 1802 |
{
|
| 1803 |
"name": "Ngbandi based",
|
| 1804 |
"iso_1_code": null,
|
| 1805 |
"iso_3_code": null,
|
| 1806 |
-
"tokenizers": {
|
| 1807 |
-
"Arab": {
|
| 1808 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1809 |
-
"original_lang_name": "malay",
|
| 1810 |
-
"original_lang_code": "msa",
|
| 1811 |
-
"scripts": [
|
| 1812 |
-
"Latn",
|
| 1813 |
-
"Arab",
|
| 1814 |
-
"Thai"
|
| 1815 |
-
],
|
| 1816 |
-
"class_name": "SpaCyTokenizer",
|
| 1817 |
-
"macrolanguage": true
|
| 1818 |
-
},
|
| 1819 |
-
"Latn": {
|
| 1820 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1821 |
-
"original_lang_name": "malay",
|
| 1822 |
-
"original_lang_code": "msa",
|
| 1823 |
-
"scripts": [
|
| 1824 |
-
"Latn",
|
| 1825 |
-
"Arab",
|
| 1826 |
-
"Thai"
|
| 1827 |
-
],
|
| 1828 |
-
"class_name": "SpaCyTokenizer",
|
| 1829 |
-
"macrolanguage": true
|
| 1830 |
-
},
|
| 1831 |
-
"Thai": {
|
| 1832 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1833 |
-
"original_lang_name": "malay",
|
| 1834 |
-
"original_lang_code": "msa",
|
| 1835 |
-
"scripts": [
|
| 1836 |
-
"Latn",
|
| 1837 |
-
"Arab",
|
| 1838 |
-
"Thai"
|
| 1839 |
-
],
|
| 1840 |
-
"class_name": "SpaCyTokenizer",
|
| 1841 |
-
"macrolanguage": true
|
| 1842 |
-
}
|
| 1843 |
-
},
|
| 1844 |
"children": [
|
| 1845 |
{
|
| 1846 |
"name": "Sango",
|
| 1847 |
"iso_1_code": "sg",
|
| 1848 |
"iso_3_code": "sag",
|
|
|
|
| 1849 |
"tokenizers": {
|
| 1850 |
"Latn": {
|
| 1851 |
-
"full_object": "
|
| 1852 |
-
"original_lang_name": "
|
| 1853 |
-
"original_lang_code": "
|
| 1854 |
-
"
|
| 1855 |
-
|
| 1856 |
-
"Arab",
|
| 1857 |
-
"Thai"
|
| 1858 |
-
],
|
| 1859 |
-
"class_name": "SpaCyTokenizer",
|
| 1860 |
-
"macrolanguage": true
|
| 1861 |
}
|
| 1862 |
},
|
| 1863 |
-
"children": [],
|
| 1864 |
"node_i": "3578",
|
|
|
|
| 1865 |
"scripts": [
|
| 1866 |
"Latn"
|
| 1867 |
-
]
|
| 1868 |
-
"own_tokenizer": false
|
| 1869 |
},
|
| 1870 |
{
|
| 1871 |
"name": "Sango, Riverain",
|
| 1872 |
"iso_1_code": null,
|
| 1873 |
"iso_3_code": "snj",
|
| 1874 |
-
"tokenizers": {},
|
| 1875 |
"children": [],
|
|
|
|
| 1876 |
"node_i": "3579",
|
| 1877 |
-
"
|
| 1878 |
-
"
|
| 1879 |
}
|
| 1880 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1881 |
"node_i": "3577",
|
| 1882 |
-
"
|
| 1883 |
-
"
|
| 1884 |
},
|
| 1885 |
{
|
| 1886 |
"name": "Portuguese based",
|
| 1887 |
"iso_1_code": null,
|
| 1888 |
"iso_3_code": null,
|
| 1889 |
-
"tokenizers": {
|
| 1890 |
-
"Arab": {
|
| 1891 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1892 |
-
"original_lang_name": "malay",
|
| 1893 |
-
"original_lang_code": "msa",
|
| 1894 |
-
"scripts": [
|
| 1895 |
-
"Latn",
|
| 1896 |
-
"Arab",
|
| 1897 |
-
"Thai"
|
| 1898 |
-
],
|
| 1899 |
-
"class_name": "SpaCyTokenizer",
|
| 1900 |
-
"macrolanguage": true
|
| 1901 |
-
},
|
| 1902 |
-
"Latn": {
|
| 1903 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1904 |
-
"original_lang_name": "malay",
|
| 1905 |
-
"original_lang_code": "msa",
|
| 1906 |
-
"scripts": [
|
| 1907 |
-
"Latn",
|
| 1908 |
-
"Arab",
|
| 1909 |
-
"Thai"
|
| 1910 |
-
],
|
| 1911 |
-
"class_name": "SpaCyTokenizer",
|
| 1912 |
-
"macrolanguage": true
|
| 1913 |
-
},
|
| 1914 |
-
"Thai": {
|
| 1915 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 1916 |
-
"original_lang_name": "malay",
|
| 1917 |
-
"original_lang_code": "msa",
|
| 1918 |
-
"scripts": [
|
| 1919 |
-
"Latn",
|
| 1920 |
-
"Arab",
|
| 1921 |
-
"Thai"
|
| 1922 |
-
],
|
| 1923 |
-
"class_name": "SpaCyTokenizer",
|
| 1924 |
-
"macrolanguage": true
|
| 1925 |
-
}
|
| 1926 |
-
},
|
| 1927 |
"children": [
|
| 1928 |
{
|
| 1929 |
"name": "Angolar",
|
| 1930 |
"iso_1_code": null,
|
| 1931 |
"iso_3_code": "aoa",
|
| 1932 |
-
"tokenizers": {},
|
| 1933 |
"children": [],
|
|
|
|
| 1934 |
"node_i": "3581",
|
| 1935 |
-
"
|
| 1936 |
-
"
|
| 1937 |
},
|
| 1938 |
{
|
| 1939 |
"name": "Cafundo Creole",
|
| 1940 |
"iso_1_code": null,
|
| 1941 |
"iso_3_code": "ccd",
|
| 1942 |
-
"tokenizers": {},
|
| 1943 |
"children": [],
|
|
|
|
| 1944 |
"node_i": "3582",
|
| 1945 |
-
"
|
| 1946 |
-
"
|
| 1947 |
},
|
| 1948 |
{
|
| 1949 |
"name": "S\u00e3otomense",
|
| 1950 |
"iso_1_code": null,
|
| 1951 |
"iso_3_code": "cri",
|
|
|
|
| 1952 |
"tokenizers": {
|
| 1953 |
"Latn": {
|
| 1954 |
-
"full_object": "
|
| 1955 |
-
"original_lang_name": "
|
| 1956 |
-
"original_lang_code": "
|
| 1957 |
-
"
|
| 1958 |
-
|
| 1959 |
-
"Arab",
|
| 1960 |
-
"Thai"
|
| 1961 |
-
],
|
| 1962 |
-
"class_name": "SpaCyTokenizer",
|
| 1963 |
-
"macrolanguage": true
|
| 1964 |
}
|
| 1965 |
},
|
| 1966 |
-
"children": [],
|
| 1967 |
"node_i": "3583",
|
|
|
|
| 1968 |
"scripts": [
|
| 1969 |
"Latn"
|
| 1970 |
-
]
|
| 1971 |
-
"own_tokenizer": false
|
| 1972 |
},
|
| 1973 |
{
|
| 1974 |
"name": "Fa d\u2019Ambu",
|
| 1975 |
"iso_1_code": null,
|
| 1976 |
"iso_3_code": "fab",
|
| 1977 |
-
"tokenizers": {},
|
| 1978 |
"children": [],
|
|
|
|
| 1979 |
"node_i": "3584",
|
| 1980 |
-
"
|
| 1981 |
-
"
|
| 1982 |
},
|
| 1983 |
{
|
| 1984 |
"name": "Indo-Portuguese",
|
| 1985 |
"iso_1_code": null,
|
| 1986 |
"iso_3_code": "idb",
|
| 1987 |
-
"tokenizers": {},
|
| 1988 |
"children": [],
|
|
|
|
| 1989 |
"node_i": "3585",
|
| 1990 |
-
"
|
| 1991 |
-
"
|
| 1992 |
},
|
| 1993 |
{
|
| 1994 |
"name": "Kabuverdianu",
|
| 1995 |
"iso_1_code": null,
|
| 1996 |
"iso_3_code": "kea",
|
|
|
|
| 1997 |
"tokenizers": {
|
| 1998 |
"Latn": {
|
| 1999 |
-
"full_object": "
|
| 2000 |
-
"original_lang_name": "
|
| 2001 |
-
"original_lang_code": "
|
| 2002 |
-
"
|
| 2003 |
-
|
| 2004 |
-
"Arab",
|
| 2005 |
-
"Thai"
|
| 2006 |
-
],
|
| 2007 |
-
"class_name": "SpaCyTokenizer",
|
| 2008 |
-
"macrolanguage": true
|
| 2009 |
}
|
| 2010 |
},
|
| 2011 |
-
"children": [],
|
| 2012 |
"node_i": "3586",
|
|
|
|
| 2013 |
"scripts": [
|
| 2014 |
"Latn"
|
| 2015 |
-
]
|
| 2016 |
-
"own_tokenizer": false
|
| 2017 |
},
|
| 2018 |
{
|
| 2019 |
"name": "Malaccan Portuguese Creole",
|
| 2020 |
"iso_1_code": null,
|
| 2021 |
"iso_3_code": "mcm",
|
| 2022 |
-
"tokenizers": {},
|
| 2023 |
"children": [],
|
|
|
|
| 2024 |
"node_i": "3587",
|
| 2025 |
-
"
|
| 2026 |
-
"
|
| 2027 |
},
|
| 2028 |
{
|
| 2029 |
"name": "Macanese",
|
| 2030 |
"iso_1_code": null,
|
| 2031 |
"iso_3_code": "mzs",
|
| 2032 |
-
"tokenizers": {},
|
| 2033 |
"children": [],
|
|
|
|
| 2034 |
"node_i": "3588",
|
| 2035 |
-
"
|
| 2036 |
-
"
|
| 2037 |
},
|
| 2038 |
{
|
| 2039 |
"name": "Guinea-Bissau Creole",
|
| 2040 |
"iso_1_code": null,
|
| 2041 |
"iso_3_code": "pov",
|
|
|
|
| 2042 |
"tokenizers": {
|
| 2043 |
"Latn": {
|
| 2044 |
-
"full_object": "
|
| 2045 |
-
"original_lang_name": "
|
| 2046 |
-
"original_lang_code": "
|
| 2047 |
-
"
|
| 2048 |
-
|
| 2049 |
-
"Arab",
|
| 2050 |
-
"Thai"
|
| 2051 |
-
],
|
| 2052 |
-
"class_name": "SpaCyTokenizer",
|
| 2053 |
-
"macrolanguage": true
|
| 2054 |
}
|
| 2055 |
},
|
| 2056 |
-
"children": [],
|
| 2057 |
"node_i": "3589",
|
|
|
|
| 2058 |
"scripts": [
|
| 2059 |
"Latn"
|
| 2060 |
-
]
|
| 2061 |
-
"own_tokenizer": false
|
| 2062 |
},
|
| 2063 |
{
|
| 2064 |
"name": "Principense",
|
| 2065 |
"iso_1_code": null,
|
| 2066 |
"iso_3_code": "pre",
|
| 2067 |
-
"tokenizers": {},
|
| 2068 |
"children": [],
|
|
|
|
| 2069 |
"node_i": "3590",
|
| 2070 |
-
"
|
| 2071 |
-
"
|
| 2072 |
},
|
| 2073 |
{
|
| 2074 |
"name": "Ternate\u00f1o",
|
| 2075 |
"iso_1_code": null,
|
| 2076 |
"iso_3_code": "tmg",
|
| 2077 |
-
"tokenizers": {},
|
| 2078 |
"children": [],
|
|
|
|
| 2079 |
"node_i": "3591",
|
| 2080 |
-
"
|
| 2081 |
-
"
|
| 2082 |
},
|
| 2083 |
{
|
| 2084 |
"name": "Pidgin, Timor",
|
| 2085 |
"iso_1_code": null,
|
| 2086 |
"iso_3_code": "tvy",
|
| 2087 |
-
"tokenizers": {},
|
| 2088 |
"children": [],
|
|
|
|
| 2089 |
"node_i": "3592",
|
| 2090 |
-
"
|
| 2091 |
-
"
|
| 2092 |
},
|
| 2093 |
{
|
| 2094 |
"name": "Korlai Portuguese Creole",
|
| 2095 |
"iso_1_code": null,
|
| 2096 |
"iso_3_code": "vkp",
|
| 2097 |
-
"tokenizers": {},
|
| 2098 |
"children": [],
|
|
|
|
| 2099 |
"node_i": "3593",
|
| 2100 |
-
"
|
| 2101 |
-
"
|
| 2102 |
}
|
| 2103 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2104 |
"node_i": "3580",
|
| 2105 |
-
"
|
| 2106 |
-
"
|
| 2107 |
},
|
| 2108 |
{
|
| 2109 |
"name": "Spanish based",
|
| 2110 |
"iso_1_code": null,
|
| 2111 |
"iso_3_code": null,
|
| 2112 |
-
"tokenizers": {
|
| 2113 |
-
"Arab": {
|
| 2114 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2115 |
-
"original_lang_name": "malay",
|
| 2116 |
-
"original_lang_code": "msa",
|
| 2117 |
-
"scripts": [
|
| 2118 |
-
"Latn",
|
| 2119 |
-
"Arab",
|
| 2120 |
-
"Thai"
|
| 2121 |
-
],
|
| 2122 |
-
"class_name": "SpaCyTokenizer",
|
| 2123 |
-
"macrolanguage": true
|
| 2124 |
-
},
|
| 2125 |
-
"Latn": {
|
| 2126 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2127 |
-
"original_lang_name": "malay",
|
| 2128 |
-
"original_lang_code": "msa",
|
| 2129 |
-
"scripts": [
|
| 2130 |
-
"Latn",
|
| 2131 |
-
"Arab",
|
| 2132 |
-
"Thai"
|
| 2133 |
-
],
|
| 2134 |
-
"class_name": "SpaCyTokenizer",
|
| 2135 |
-
"macrolanguage": true
|
| 2136 |
-
},
|
| 2137 |
-
"Thai": {
|
| 2138 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2139 |
-
"original_lang_name": "malay",
|
| 2140 |
-
"original_lang_code": "msa",
|
| 2141 |
-
"scripts": [
|
| 2142 |
-
"Latn",
|
| 2143 |
-
"Arab",
|
| 2144 |
-
"Thai"
|
| 2145 |
-
],
|
| 2146 |
-
"class_name": "SpaCyTokenizer",
|
| 2147 |
-
"macrolanguage": true
|
| 2148 |
-
}
|
| 2149 |
-
},
|
| 2150 |
"children": [
|
| 2151 |
{
|
| 2152 |
"name": "Chavacano",
|
| 2153 |
"iso_1_code": null,
|
| 2154 |
"iso_3_code": "cbk",
|
|
|
|
| 2155 |
"tokenizers": {
|
| 2156 |
"Latn": {
|
| 2157 |
-
"full_object": "
|
| 2158 |
-
"original_lang_name": "
|
| 2159 |
-
"original_lang_code": "
|
| 2160 |
-
"
|
| 2161 |
-
|
| 2162 |
-
"Arab",
|
| 2163 |
-
"Thai"
|
| 2164 |
-
],
|
| 2165 |
-
"class_name": "SpaCyTokenizer",
|
| 2166 |
-
"macrolanguage": true
|
| 2167 |
}
|
| 2168 |
},
|
| 2169 |
-
"children": [],
|
| 2170 |
"node_i": "3595",
|
|
|
|
| 2171 |
"scripts": [
|
| 2172 |
"Latn"
|
| 2173 |
-
]
|
| 2174 |
-
"own_tokenizer": false
|
| 2175 |
},
|
| 2176 |
{
|
| 2177 |
"name": "Palenquero",
|
| 2178 |
"iso_1_code": null,
|
| 2179 |
"iso_3_code": "pln",
|
| 2180 |
-
"tokenizers": {},
|
| 2181 |
"children": [],
|
|
|
|
| 2182 |
"node_i": "3596",
|
| 2183 |
-
"
|
| 2184 |
-
"
|
| 2185 |
}
|
| 2186 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2187 |
"node_i": "3594",
|
| 2188 |
-
"
|
| 2189 |
-
"
|
| 2190 |
},
|
| 2191 |
{
|
| 2192 |
"name": "Swahili based",
|
| 2193 |
"iso_1_code": null,
|
| 2194 |
"iso_3_code": null,
|
| 2195 |
-
"tokenizers": {},
|
| 2196 |
"children": [
|
| 2197 |
{
|
| 2198 |
"name": "Cutchi-Swahili",
|
| 2199 |
"iso_1_code": null,
|
| 2200 |
"iso_3_code": "ccl",
|
| 2201 |
-
"tokenizers": {},
|
| 2202 |
"children": [],
|
|
|
|
| 2203 |
"node_i": "3598",
|
| 2204 |
-
"
|
| 2205 |
-
"
|
| 2206 |
}
|
| 2207 |
],
|
|
|
|
| 2208 |
"node_i": "3597",
|
| 2209 |
-
"
|
| 2210 |
-
"
|
| 2211 |
},
|
| 2212 |
{
|
| 2213 |
"name": "Tetun based",
|
| 2214 |
"iso_1_code": null,
|
| 2215 |
"iso_3_code": null,
|
| 2216 |
-
"tokenizers": {
|
| 2217 |
-
"Arab": {
|
| 2218 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2219 |
-
"original_lang_name": "malay",
|
| 2220 |
-
"original_lang_code": "msa",
|
| 2221 |
-
"scripts": [
|
| 2222 |
-
"Latn",
|
| 2223 |
-
"Arab",
|
| 2224 |
-
"Thai"
|
| 2225 |
-
],
|
| 2226 |
-
"class_name": "SpaCyTokenizer",
|
| 2227 |
-
"macrolanguage": true
|
| 2228 |
-
},
|
| 2229 |
-
"Latn": {
|
| 2230 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2231 |
-
"original_lang_name": "malay",
|
| 2232 |
-
"original_lang_code": "msa",
|
| 2233 |
-
"scripts": [
|
| 2234 |
-
"Latn",
|
| 2235 |
-
"Arab",
|
| 2236 |
-
"Thai"
|
| 2237 |
-
],
|
| 2238 |
-
"class_name": "SpaCyTokenizer",
|
| 2239 |
-
"macrolanguage": true
|
| 2240 |
-
},
|
| 2241 |
-
"Thai": {
|
| 2242 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
| 2243 |
-
"original_lang_name": "malay",
|
| 2244 |
-
"original_lang_code": "msa",
|
| 2245 |
-
"scripts": [
|
| 2246 |
-
"Latn",
|
| 2247 |
-
"Arab",
|
| 2248 |
-
"Thai"
|
| 2249 |
-
],
|
| 2250 |
-
"class_name": "SpaCyTokenizer",
|
| 2251 |
-
"macrolanguage": true
|
| 2252 |
-
}
|
| 2253 |
-
},
|
| 2254 |
"children": [
|
| 2255 |
{
|
| 2256 |
"name": "Tetun Dili",
|
| 2257 |
"iso_1_code": null,
|
| 2258 |
"iso_3_code": "tdt",
|
|
|
|
| 2259 |
"tokenizers": {
|
| 2260 |
"Latn": {
|
| 2261 |
-
"full_object": "
|
| 2262 |
-
"original_lang_name": "
|
| 2263 |
-
"original_lang_code": "
|
| 2264 |
-
"
|
| 2265 |
-
|
| 2266 |
-
"Arab",
|
| 2267 |
-
"Thai"
|
| 2268 |
-
],
|
| 2269 |
-
"class_name": "SpaCyTokenizer",
|
| 2270 |
-
"macrolanguage": true
|
| 2271 |
}
|
| 2272 |
},
|
| 2273 |
-
"children": [],
|
| 2274 |
"node_i": "3600",
|
|
|
|
| 2275 |
"scripts": [
|
| 2276 |
"Latn"
|
| 2277 |
-
]
|
| 2278 |
-
"own_tokenizer": false
|
| 2279 |
}
|
| 2280 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2281 |
"node_i": "3599",
|
| 2282 |
-
"
|
| 2283 |
-
"
|
| 2284 |
}
|
| 2285 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2286 |
"node_i": "3481",
|
| 2287 |
-
"
|
| 2288 |
-
"
|
| 2289 |
}
|
|
|
|
| 2 |
"name": "Creole",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Afrikaans based",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Flaaitaal",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "fly",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3483",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Oorlams",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "oor",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3484",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
+
"scripts": []
|
| 30 |
}
|
| 31 |
],
|
| 32 |
+
"tokenizers": {},
|
| 33 |
"node_i": "3482",
|
| 34 |
+
"native_tokenizers": [],
|
| 35 |
+
"scripts": []
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Arabic based",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"children": [
|
| 42 |
{
|
| 43 |
"name": "Nubi",
|
| 44 |
"iso_1_code": null,
|
| 45 |
"iso_3_code": "kcn",
|
|
|
|
| 46 |
"children": [],
|
| 47 |
+
"tokenizers": {},
|
| 48 |
"node_i": "3486",
|
| 49 |
+
"native_tokenizers": [],
|
| 50 |
+
"scripts": []
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"name": "Arabic, Juba",
|
| 54 |
"iso_1_code": "ar",
|
| 55 |
"iso_3_code": "pga",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"children": [],
|
| 57 |
+
"tokenizers": {},
|
| 58 |
"node_i": "3487",
|
| 59 |
+
"native_tokenizers": [],
|
| 60 |
+
"scripts": []
|
| 61 |
}
|
| 62 |
],
|
| 63 |
+
"tokenizers": {},
|
| 64 |
"node_i": "3485",
|
| 65 |
+
"native_tokenizers": [],
|
| 66 |
+
"scripts": []
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Assamese based",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": null,
|
|
|
|
| 72 |
"children": [
|
| 73 |
{
|
| 74 |
"name": "Nagamese",
|
| 75 |
"iso_1_code": null,
|
| 76 |
"iso_3_code": "nag",
|
|
|
|
| 77 |
"children": [],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3489",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
}
|
| 83 |
],
|
| 84 |
+
"tokenizers": {},
|
| 85 |
"node_i": "3488",
|
| 86 |
+
"native_tokenizers": [],
|
| 87 |
+
"scripts": []
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"name": "Dutch based",
|
| 91 |
"iso_1_code": null,
|
| 92 |
"iso_3_code": null,
|
|
|
|
| 93 |
"children": [
|
| 94 |
{
|
| 95 |
"name": "Berbice Dutch Creole",
|
| 96 |
"iso_1_code": null,
|
| 97 |
"iso_3_code": "brc",
|
|
|
|
| 98 |
"children": [],
|
| 99 |
+
"tokenizers": {},
|
| 100 |
"node_i": "3491",
|
| 101 |
+
"native_tokenizers": [],
|
| 102 |
+
"scripts": []
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"name": "Negerhollands",
|
| 106 |
"iso_1_code": null,
|
| 107 |
"iso_3_code": "dcr",
|
|
|
|
| 108 |
"children": [],
|
| 109 |
+
"tokenizers": {},
|
| 110 |
"node_i": "3492",
|
| 111 |
+
"native_tokenizers": [],
|
| 112 |
+
"scripts": []
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"name": "Javindo",
|
| 116 |
"iso_1_code": null,
|
| 117 |
"iso_3_code": "jvd",
|
|
|
|
| 118 |
"children": [],
|
| 119 |
+
"tokenizers": {},
|
| 120 |
"node_i": "3493",
|
| 121 |
+
"native_tokenizers": [],
|
| 122 |
+
"scripts": []
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"name": "Petjo",
|
| 126 |
"iso_1_code": null,
|
| 127 |
"iso_3_code": "pey",
|
|
|
|
| 128 |
"children": [],
|
| 129 |
+
"tokenizers": {},
|
| 130 |
"node_i": "3494",
|
| 131 |
+
"native_tokenizers": [],
|
| 132 |
+
"scripts": []
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"name": "Skepi Dutch Creole",
|
| 136 |
"iso_1_code": null,
|
| 137 |
"iso_3_code": "skw",
|
|
|
|
| 138 |
"children": [],
|
| 139 |
+
"tokenizers": {},
|
| 140 |
"node_i": "3495",
|
| 141 |
+
"native_tokenizers": [],
|
| 142 |
+
"scripts": []
|
| 143 |
}
|
| 144 |
],
|
| 145 |
+
"tokenizers": {},
|
| 146 |
"node_i": "3490",
|
| 147 |
+
"native_tokenizers": [],
|
| 148 |
+
"scripts": []
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"name": "English based",
|
| 152 |
"iso_1_code": null,
|
| 153 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
"children": [
|
| 155 |
{
|
| 156 |
"name": "Saramaccan",
|
| 157 |
"iso_1_code": null,
|
| 158 |
"iso_3_code": "srm",
|
| 159 |
+
"children": [],
|
| 160 |
"tokenizers": {
|
| 161 |
"Latn": {
|
| 162 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 163 |
"original_lang_name": "nigerian_pidgin",
|
| 164 |
"original_lang_code": "pcm",
|
| 165 |
+
"script": "Latn",
|
| 166 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 167 |
}
|
| 168 |
},
|
|
|
|
| 169 |
"node_i": "3497",
|
| 170 |
+
"native_tokenizers": [],
|
| 171 |
"scripts": [
|
| 172 |
"Latn"
|
| 173 |
+
]
|
|
|
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"name": "Atlantic",
|
| 177 |
"iso_1_code": null,
|
| 178 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
"children": [
|
| 180 |
{
|
| 181 |
"name": "Eastern",
|
| 182 |
"iso_1_code": null,
|
| 183 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
"children": [
|
| 185 |
{
|
| 186 |
"name": "Turks and Caicos English Creole",
|
| 187 |
"iso_1_code": null,
|
| 188 |
"iso_3_code": "tch",
|
|
|
|
| 189 |
"children": [],
|
| 190 |
+
"tokenizers": {},
|
| 191 |
"node_i": "3500",
|
| 192 |
+
"native_tokenizers": [],
|
| 193 |
+
"scripts": []
|
| 194 |
},
|
| 195 |
{
|
| 196 |
"name": "Northern",
|
| 197 |
"iso_1_code": null,
|
| 198 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
"children": [
|
| 200 |
{
|
| 201 |
"name": "Afro-Seminole Creole",
|
| 202 |
"iso_1_code": null,
|
| 203 |
"iso_3_code": "afs",
|
|
|
|
| 204 |
"children": [],
|
| 205 |
+
"tokenizers": {},
|
| 206 |
"node_i": "3502",
|
| 207 |
+
"native_tokenizers": [],
|
| 208 |
+
"scripts": []
|
| 209 |
},
|
| 210 |
{
|
| 211 |
"name": "Bahamas English Creole",
|
| 212 |
"iso_1_code": null,
|
| 213 |
"iso_3_code": "bah",
|
|
|
|
| 214 |
"children": [],
|
| 215 |
+
"tokenizers": {},
|
| 216 |
"node_i": "3503",
|
| 217 |
+
"native_tokenizers": [],
|
| 218 |
+
"scripts": []
|
| 219 |
},
|
| 220 |
{
|
| 221 |
"name": "Sea Island English Creole",
|
| 222 |
"iso_1_code": null,
|
| 223 |
"iso_3_code": "gul",
|
| 224 |
+
"children": [],
|
| 225 |
"tokenizers": {
|
| 226 |
"Latn": {
|
| 227 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 228 |
"original_lang_name": "nigerian_pidgin",
|
| 229 |
"original_lang_code": "pcm",
|
| 230 |
+
"script": "Latn",
|
| 231 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 232 |
}
|
| 233 |
},
|
|
|
|
| 234 |
"node_i": "3504",
|
| 235 |
+
"native_tokenizers": [],
|
| 236 |
"scripts": [
|
| 237 |
"Latn"
|
| 238 |
+
]
|
|
|
|
| 239 |
}
|
| 240 |
],
|
| 241 |
+
"tokenizers": {
|
| 242 |
+
"Latn": {
|
| 243 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 244 |
+
"original_lang_name": "nigerian_pidgin",
|
| 245 |
+
"original_lang_code": "pcm",
|
| 246 |
+
"script": "Latn",
|
| 247 |
+
"class_name": "StanzaTokenizer"
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
"node_i": "3501",
|
| 251 |
+
"native_tokenizers": [],
|
| 252 |
+
"scripts": []
|
| 253 |
},
|
| 254 |
{
|
| 255 |
"name": "Southern",
|
| 256 |
"iso_1_code": null,
|
| 257 |
"iso_3_code": null,
|
|
|
|
| 258 |
"children": [
|
| 259 |
{
|
| 260 |
"name": "Leeward Caribbean English Creole",
|
| 261 |
"iso_1_code": null,
|
| 262 |
"iso_3_code": "aig",
|
|
|
|
| 263 |
"children": [],
|
| 264 |
+
"tokenizers": {},
|
| 265 |
"node_i": "3506",
|
| 266 |
+
"native_tokenizers": [],
|
| 267 |
+
"scripts": []
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"name": "Bajan",
|
| 271 |
"iso_1_code": null,
|
| 272 |
"iso_3_code": "bjs",
|
|
|
|
| 273 |
"children": [],
|
| 274 |
+
"tokenizers": {},
|
| 275 |
"node_i": "3507",
|
| 276 |
+
"native_tokenizers": [],
|
| 277 |
+
"scripts": []
|
| 278 |
},
|
| 279 |
{
|
| 280 |
"name": "Grenadian English Creole",
|
| 281 |
"iso_1_code": null,
|
| 282 |
"iso_3_code": "gcl",
|
|
|
|
| 283 |
"children": [],
|
| 284 |
+
"tokenizers": {},
|
| 285 |
"node_i": "3508",
|
| 286 |
+
"native_tokenizers": [],
|
| 287 |
+
"scripts": []
|
| 288 |
},
|
| 289 |
{
|
| 290 |
"name": "Guyanese English Creole",
|
| 291 |
"iso_1_code": null,
|
| 292 |
"iso_3_code": "gyn",
|
|
|
|
| 293 |
"children": [],
|
| 294 |
+
"tokenizers": {},
|
| 295 |
"node_i": "3509",
|
| 296 |
+
"native_tokenizers": [],
|
| 297 |
+
"scripts": []
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"name": "Vincentian English Creole",
|
| 301 |
"iso_1_code": null,
|
| 302 |
"iso_3_code": "svc",
|
|
|
|
| 303 |
"children": [],
|
| 304 |
+
"tokenizers": {},
|
| 305 |
"node_i": "3510",
|
| 306 |
+
"native_tokenizers": [],
|
| 307 |
+
"scripts": []
|
| 308 |
},
|
| 309 |
{
|
| 310 |
"name": "Tobagonian English Creole",
|
| 311 |
"iso_1_code": null,
|
| 312 |
"iso_3_code": "tgh",
|
|
|
|
| 313 |
"children": [],
|
| 314 |
+
"tokenizers": {},
|
| 315 |
"node_i": "3511",
|
| 316 |
+
"native_tokenizers": [],
|
| 317 |
+
"scripts": []
|
| 318 |
},
|
| 319 |
{
|
| 320 |
"name": "Trinidadian English Creole",
|
| 321 |
"iso_1_code": null,
|
| 322 |
"iso_3_code": "trf",
|
|
|
|
| 323 |
"children": [],
|
| 324 |
+
"tokenizers": {},
|
| 325 |
"node_i": "3512",
|
| 326 |
+
"native_tokenizers": [],
|
| 327 |
+
"scripts": []
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"name": "Virgin Islands English Creole",
|
| 331 |
"iso_1_code": null,
|
| 332 |
"iso_3_code": "vic",
|
|
|
|
| 333 |
"children": [],
|
| 334 |
+
"tokenizers": {},
|
| 335 |
"node_i": "3513",
|
| 336 |
+
"native_tokenizers": [],
|
| 337 |
+
"scripts": []
|
| 338 |
}
|
| 339 |
],
|
| 340 |
+
"tokenizers": {},
|
| 341 |
"node_i": "3505",
|
| 342 |
+
"native_tokenizers": [],
|
| 343 |
+
"scripts": []
|
| 344 |
}
|
| 345 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
"tokenizers": {
|
| 347 |
"Latn": {
|
| 348 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 349 |
"original_lang_name": "nigerian_pidgin",
|
| 350 |
"original_lang_code": "pcm",
|
| 351 |
+
"script": "Latn",
|
| 352 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 353 |
}
|
| 354 |
},
|
| 355 |
+
"node_i": "3499",
|
| 356 |
+
"native_tokenizers": [],
|
| 357 |
+
"scripts": []
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"name": "Krio",
|
| 361 |
+
"iso_1_code": null,
|
| 362 |
+
"iso_3_code": null,
|
| 363 |
"children": [
|
| 364 |
{
|
| 365 |
"name": "Equatorial Guinean Pidgin",
|
| 366 |
"iso_1_code": null,
|
| 367 |
"iso_3_code": "fpe",
|
|
|
|
| 368 |
"children": [],
|
| 369 |
+
"tokenizers": {},
|
| 370 |
"node_i": "3515",
|
| 371 |
+
"native_tokenizers": [],
|
| 372 |
+
"scripts": []
|
| 373 |
},
|
| 374 |
{
|
| 375 |
"name": "Ghanaian Pidgin English",
|
| 376 |
"iso_1_code": null,
|
| 377 |
"iso_3_code": "gpe",
|
|
|
|
| 378 |
"children": [],
|
| 379 |
+
"tokenizers": {},
|
| 380 |
"node_i": "3516",
|
| 381 |
+
"native_tokenizers": [],
|
| 382 |
+
"scripts": []
|
| 383 |
},
|
| 384 |
{
|
| 385 |
"name": "Krio",
|
| 386 |
"iso_1_code": null,
|
| 387 |
"iso_3_code": "kri",
|
| 388 |
+
"children": [],
|
| 389 |
"tokenizers": {
|
| 390 |
"Latn": {
|
| 391 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 392 |
"original_lang_name": "nigerian_pidgin",
|
| 393 |
"original_lang_code": "pcm",
|
| 394 |
+
"script": "Latn",
|
| 395 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 396 |
}
|
| 397 |
},
|
|
|
|
| 398 |
"node_i": "3517",
|
| 399 |
+
"native_tokenizers": [],
|
| 400 |
"scripts": [
|
| 401 |
"Latn"
|
| 402 |
+
]
|
|
|
|
| 403 |
},
|
| 404 |
{
|
| 405 |
"name": "Pidgin, Nigerian",
|
| 406 |
"iso_1_code": null,
|
| 407 |
"iso_3_code": "pcm",
|
| 408 |
+
"children": [],
|
| 409 |
"tokenizers": {
|
| 410 |
"Latn": {
|
| 411 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 412 |
"original_lang_name": "nigerian_pidgin",
|
| 413 |
"original_lang_code": "pcm",
|
| 414 |
+
"script": "Latn",
|
| 415 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 416 |
}
|
| 417 |
},
|
|
|
|
| 418 |
"node_i": "3518",
|
| 419 |
+
"native_tokenizers": [
|
| 420 |
"Latn"
|
| 421 |
],
|
| 422 |
+
"scripts": [
|
| 423 |
+
"Latn"
|
| 424 |
+
]
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"name": "Pidgin, Cameroon",
|
| 428 |
"iso_1_code": null,
|
| 429 |
"iso_3_code": "wes",
|
| 430 |
+
"children": [],
|
| 431 |
"tokenizers": {
|
| 432 |
"Latn": {
|
| 433 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 434 |
"original_lang_name": "nigerian_pidgin",
|
| 435 |
"original_lang_code": "pcm",
|
| 436 |
+
"script": "Latn",
|
| 437 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 438 |
}
|
| 439 |
},
|
|
|
|
| 440 |
"node_i": "3519",
|
| 441 |
+
"native_tokenizers": [],
|
| 442 |
"scripts": [
|
| 443 |
"Latn"
|
| 444 |
+
]
|
|
|
|
| 445 |
}
|
| 446 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
"tokenizers": {
|
| 448 |
"Latn": {
|
| 449 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 450 |
"original_lang_name": "nigerian_pidgin",
|
| 451 |
"original_lang_code": "pcm",
|
| 452 |
+
"script": "Latn",
|
| 453 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 454 |
}
|
| 455 |
},
|
| 456 |
+
"node_i": "3514",
|
| 457 |
+
"native_tokenizers": [],
|
| 458 |
+
"scripts": []
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"name": "Suriname",
|
| 462 |
+
"iso_1_code": null,
|
| 463 |
+
"iso_3_code": null,
|
| 464 |
"children": [
|
| 465 |
{
|
| 466 |
"name": "Sranan Tongo",
|
| 467 |
"iso_1_code": null,
|
| 468 |
"iso_3_code": "srn",
|
| 469 |
+
"children": [],
|
| 470 |
"tokenizers": {
|
| 471 |
"Latn": {
|
| 472 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 473 |
"original_lang_name": "nigerian_pidgin",
|
| 474 |
"original_lang_code": "pcm",
|
| 475 |
+
"script": "Latn",
|
| 476 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 477 |
}
|
| 478 |
},
|
|
|
|
| 479 |
"node_i": "3521",
|
| 480 |
+
"native_tokenizers": [],
|
| 481 |
"scripts": [
|
| 482 |
"Latn"
|
| 483 |
+
]
|
|
|
|
| 484 |
},
|
| 485 |
{
|
| 486 |
"name": "Ndyuka",
|
| 487 |
"iso_1_code": null,
|
| 488 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
"children": [
|
| 490 |
{
|
| 491 |
"name": "Aukan",
|
| 492 |
"iso_1_code": null,
|
| 493 |
"iso_3_code": "djk",
|
| 494 |
+
"children": [],
|
| 495 |
"tokenizers": {
|
| 496 |
"Latn": {
|
| 497 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 498 |
"original_lang_name": "nigerian_pidgin",
|
| 499 |
"original_lang_code": "pcm",
|
| 500 |
+
"script": "Latn",
|
| 501 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 502 |
}
|
| 503 |
},
|
|
|
|
| 504 |
"node_i": "3523",
|
| 505 |
+
"native_tokenizers": [],
|
| 506 |
"scripts": [
|
| 507 |
"Latn"
|
| 508 |
+
]
|
|
|
|
| 509 |
},
|
| 510 |
{
|
| 511 |
"name": "Kwinti",
|
| 512 |
"iso_1_code": null,
|
| 513 |
"iso_3_code": "kww",
|
|
|
|
| 514 |
"children": [],
|
| 515 |
+
"tokenizers": {},
|
| 516 |
"node_i": "3524",
|
| 517 |
+
"native_tokenizers": [],
|
| 518 |
+
"scripts": []
|
| 519 |
}
|
| 520 |
],
|
| 521 |
+
"tokenizers": {
|
| 522 |
+
"Latn": {
|
| 523 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 524 |
+
"original_lang_name": "nigerian_pidgin",
|
| 525 |
+
"original_lang_code": "pcm",
|
| 526 |
+
"script": "Latn",
|
| 527 |
+
"class_name": "StanzaTokenizer"
|
| 528 |
+
}
|
| 529 |
+
},
|
| 530 |
"node_i": "3522",
|
| 531 |
+
"native_tokenizers": [],
|
| 532 |
+
"scripts": []
|
| 533 |
}
|
| 534 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
"tokenizers": {
|
| 536 |
"Latn": {
|
| 537 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 538 |
"original_lang_name": "nigerian_pidgin",
|
| 539 |
"original_lang_code": "pcm",
|
| 540 |
+
"script": "Latn",
|
| 541 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 542 |
}
|
| 543 |
},
|
| 544 |
+
"node_i": "3520",
|
| 545 |
+
"native_tokenizers": [],
|
| 546 |
+
"scripts": []
|
| 547 |
+
},
|
| 548 |
+
{
|
| 549 |
+
"name": "Western",
|
| 550 |
+
"iso_1_code": null,
|
| 551 |
+
"iso_3_code": null,
|
| 552 |
"children": [
|
| 553 |
{
|
| 554 |
"name": "Belize English Creole",
|
| 555 |
"iso_1_code": null,
|
| 556 |
"iso_3_code": "bzj",
|
| 557 |
+
"children": [],
|
| 558 |
"tokenizers": {
|
| 559 |
"Latn": {
|
| 560 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 561 |
"original_lang_name": "nigerian_pidgin",
|
| 562 |
"original_lang_code": "pcm",
|
| 563 |
+
"script": "Latn",
|
| 564 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 565 |
}
|
| 566 |
},
|
|
|
|
| 567 |
"node_i": "3526",
|
| 568 |
+
"native_tokenizers": [],
|
| 569 |
"scripts": [
|
| 570 |
"Latn"
|
| 571 |
+
]
|
|
|
|
| 572 |
},
|
| 573 |
{
|
| 574 |
"name": "Nicaragua English Creole",
|
| 575 |
"iso_1_code": null,
|
| 576 |
"iso_3_code": "bzk",
|
|
|
|
| 577 |
"children": [],
|
| 578 |
+
"tokenizers": {},
|
| 579 |
"node_i": "3527",
|
| 580 |
+
"native_tokenizers": [],
|
| 581 |
+
"scripts": []
|
| 582 |
},
|
| 583 |
{
|
| 584 |
"name": "Islander English Creole",
|
| 585 |
"iso_1_code": null,
|
| 586 |
"iso_3_code": "icr",
|
| 587 |
+
"children": [],
|
| 588 |
"tokenizers": {
|
| 589 |
"Latn": {
|
| 590 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 591 |
"original_lang_name": "nigerian_pidgin",
|
| 592 |
"original_lang_code": "pcm",
|
| 593 |
+
"script": "Latn",
|
| 594 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 595 |
}
|
| 596 |
},
|
|
|
|
| 597 |
"node_i": "3528",
|
| 598 |
+
"native_tokenizers": [],
|
| 599 |
"scripts": [
|
| 600 |
"Latn"
|
| 601 |
+
]
|
|
|
|
| 602 |
},
|
| 603 |
{
|
| 604 |
"name": "Jamaican English Creole",
|
| 605 |
"iso_1_code": null,
|
| 606 |
"iso_3_code": "jam",
|
| 607 |
+
"children": [],
|
| 608 |
"tokenizers": {
|
| 609 |
"Latn": {
|
| 610 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 611 |
"original_lang_name": "nigerian_pidgin",
|
| 612 |
"original_lang_code": "pcm",
|
| 613 |
+
"script": "Latn",
|
| 614 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 615 |
}
|
| 616 |
},
|
|
|
|
| 617 |
"node_i": "3529",
|
| 618 |
+
"native_tokenizers": [],
|
| 619 |
"scripts": [
|
| 620 |
"Latn"
|
| 621 |
+
]
|
|
|
|
| 622 |
}
|
| 623 |
],
|
| 624 |
+
"tokenizers": {
|
| 625 |
+
"Latn": {
|
| 626 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 627 |
+
"original_lang_name": "nigerian_pidgin",
|
| 628 |
+
"original_lang_code": "pcm",
|
| 629 |
+
"script": "Latn",
|
| 630 |
+
"class_name": "StanzaTokenizer"
|
| 631 |
+
}
|
| 632 |
+
},
|
| 633 |
"node_i": "3525",
|
| 634 |
+
"native_tokenizers": [],
|
| 635 |
+
"scripts": []
|
| 636 |
}
|
| 637 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
"tokenizers": {
|
| 639 |
"Latn": {
|
| 640 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 641 |
"original_lang_name": "nigerian_pidgin",
|
| 642 |
"original_lang_code": "pcm",
|
| 643 |
+
"script": "Latn",
|
| 644 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 645 |
}
|
| 646 |
},
|
| 647 |
+
"node_i": "3498",
|
| 648 |
+
"native_tokenizers": [],
|
| 649 |
+
"scripts": []
|
| 650 |
+
},
|
| 651 |
+
{
|
| 652 |
+
"name": "Pacific",
|
| 653 |
+
"iso_1_code": null,
|
| 654 |
+
"iso_3_code": null,
|
| 655 |
"children": [
|
| 656 |
{
|
| 657 |
"name": "Bislama",
|
| 658 |
"iso_1_code": "bi",
|
| 659 |
"iso_3_code": "bis",
|
| 660 |
+
"children": [],
|
| 661 |
"tokenizers": {
|
| 662 |
"Latn": {
|
| 663 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 664 |
"original_lang_name": "nigerian_pidgin",
|
| 665 |
"original_lang_code": "pcm",
|
| 666 |
+
"script": "Latn",
|
| 667 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 668 |
}
|
| 669 |
},
|
|
|
|
| 670 |
"node_i": "3531",
|
| 671 |
+
"native_tokenizers": [],
|
| 672 |
"scripts": [
|
| 673 |
"Latn"
|
| 674 |
+
]
|
|
|
|
| 675 |
},
|
| 676 |
{
|
| 677 |
"name": "Hawaii Pidgin",
|
| 678 |
"iso_1_code": null,
|
| 679 |
"iso_3_code": "hwc",
|
| 680 |
+
"children": [],
|
| 681 |
"tokenizers": {
|
| 682 |
"Latn": {
|
| 683 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 684 |
"original_lang_name": "nigerian_pidgin",
|
| 685 |
"original_lang_code": "pcm",
|
| 686 |
+
"script": "Latn",
|
| 687 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 688 |
}
|
| 689 |
},
|
|
|
|
| 690 |
"node_i": "3532",
|
| 691 |
+
"native_tokenizers": [],
|
| 692 |
"scripts": [
|
| 693 |
"Latn"
|
| 694 |
+
]
|
|
|
|
| 695 |
},
|
| 696 |
{
|
| 697 |
"name": "Ngatik Men\u2019s Creole",
|
| 698 |
"iso_1_code": null,
|
| 699 |
"iso_3_code": "ngm",
|
|
|
|
| 700 |
"children": [],
|
| 701 |
+
"tokenizers": {},
|
| 702 |
"node_i": "3533",
|
| 703 |
+
"native_tokenizers": [],
|
| 704 |
+
"scripts": []
|
| 705 |
},
|
| 706 |
{
|
| 707 |
"name": "Pitcairn-Norfolk",
|
| 708 |
"iso_1_code": null,
|
| 709 |
"iso_3_code": "pih",
|
|
|
|
| 710 |
"children": [],
|
| 711 |
+
"tokenizers": {},
|
| 712 |
"node_i": "3534",
|
| 713 |
+
"native_tokenizers": [],
|
| 714 |
+
"scripts": []
|
| 715 |
},
|
| 716 |
{
|
| 717 |
"name": "Pijin",
|
| 718 |
"iso_1_code": null,
|
| 719 |
"iso_3_code": "pis",
|
| 720 |
+
"children": [],
|
| 721 |
"tokenizers": {
|
| 722 |
"Latn": {
|
| 723 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 724 |
"original_lang_name": "nigerian_pidgin",
|
| 725 |
"original_lang_code": "pcm",
|
| 726 |
+
"script": "Latn",
|
| 727 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 728 |
}
|
| 729 |
},
|
|
|
|
| 730 |
"node_i": "3535",
|
| 731 |
+
"native_tokenizers": [],
|
| 732 |
"scripts": [
|
| 733 |
"Latn"
|
| 734 |
+
]
|
|
|
|
| 735 |
},
|
| 736 |
{
|
| 737 |
"name": "Kriol",
|
| 738 |
"iso_1_code": null,
|
| 739 |
"iso_3_code": "rop",
|
| 740 |
+
"children": [],
|
| 741 |
"tokenizers": {
|
| 742 |
"Latn": {
|
| 743 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 744 |
"original_lang_name": "nigerian_pidgin",
|
| 745 |
"original_lang_code": "pcm",
|
| 746 |
+
"script": "Latn",
|
| 747 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 748 |
}
|
| 749 |
},
|
|
|
|
| 750 |
"node_i": "3536",
|
| 751 |
+
"native_tokenizers": [],
|
| 752 |
"scripts": [
|
| 753 |
"Latn"
|
| 754 |
+
]
|
|
|
|
| 755 |
},
|
| 756 |
{
|
| 757 |
"name": "Torres Strait Creole",
|
| 758 |
"iso_1_code": null,
|
| 759 |
"iso_3_code": "tcs",
|
| 760 |
+
"children": [],
|
| 761 |
"tokenizers": {
|
| 762 |
"Latn": {
|
| 763 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 764 |
"original_lang_name": "nigerian_pidgin",
|
| 765 |
"original_lang_code": "pcm",
|
| 766 |
+
"script": "Latn",
|
| 767 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 768 |
}
|
| 769 |
},
|
|
|
|
| 770 |
"node_i": "3537",
|
| 771 |
+
"native_tokenizers": [],
|
| 772 |
"scripts": [
|
| 773 |
"Latn"
|
| 774 |
+
]
|
|
|
|
| 775 |
},
|
| 776 |
{
|
| 777 |
"name": "Tok Pisin",
|
| 778 |
"iso_1_code": null,
|
| 779 |
"iso_3_code": "tpi",
|
| 780 |
+
"children": [],
|
| 781 |
"tokenizers": {
|
| 782 |
"Latn": {
|
| 783 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 784 |
"original_lang_name": "nigerian_pidgin",
|
| 785 |
"original_lang_code": "pcm",
|
| 786 |
+
"script": "Latn",
|
| 787 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
| 788 |
}
|
| 789 |
},
|
|
|
|
| 790 |
"node_i": "3538",
|
| 791 |
+
"native_tokenizers": [],
|
| 792 |
"scripts": [
|
| 793 |
"Latn"
|
| 794 |
+
]
|
|
|
|
| 795 |
}
|
| 796 |
],
|
| 797 |
+
"tokenizers": {
|
| 798 |
+
"Latn": {
|
| 799 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 800 |
+
"original_lang_name": "nigerian_pidgin",
|
| 801 |
+
"original_lang_code": "pcm",
|
| 802 |
+
"script": "Latn",
|
| 803 |
+
"class_name": "StanzaTokenizer"
|
| 804 |
+
}
|
| 805 |
+
},
|
| 806 |
"node_i": "3530",
|
| 807 |
+
"native_tokenizers": [],
|
| 808 |
+
"scripts": []
|
| 809 |
}
|
| 810 |
],
|
| 811 |
+
"tokenizers": {
|
| 812 |
+
"Latn": {
|
| 813 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 814 |
+
"original_lang_name": "nigerian_pidgin",
|
| 815 |
+
"original_lang_code": "pcm",
|
| 816 |
+
"script": "Latn",
|
| 817 |
+
"class_name": "StanzaTokenizer"
|
| 818 |
+
}
|
| 819 |
+
},
|
| 820 |
"node_i": "3496",
|
| 821 |
+
"native_tokenizers": [],
|
| 822 |
+
"scripts": []
|
| 823 |
},
|
| 824 |
{
|
| 825 |
"name": "French based",
|
| 826 |
"iso_1_code": null,
|
| 827 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
"children": [
|
| 829 |
{
|
| 830 |
"name": "Lesser Antillean French Creole",
|
| 831 |
"iso_1_code": null,
|
| 832 |
"iso_3_code": "acf",
|
| 833 |
+
"children": [],
|
| 834 |
"tokenizers": {
|
| 835 |
"Latn": {
|
| 836 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 837 |
+
"original_lang_name": "nigerian_pidgin",
|
| 838 |
+
"original_lang_code": "pcm",
|
| 839 |
+
"script": "Latn",
|
| 840 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 841 |
}
|
| 842 |
},
|
|
|
|
| 843 |
"node_i": "3540",
|
| 844 |
+
"native_tokenizers": [],
|
| 845 |
"scripts": [
|
| 846 |
"Latn"
|
| 847 |
+
]
|
|
|
|
| 848 |
},
|
| 849 |
{
|
| 850 |
"name": "Tayo",
|
| 851 |
"iso_1_code": null,
|
| 852 |
"iso_3_code": "cks",
|
|
|
|
| 853 |
"children": [],
|
| 854 |
+
"tokenizers": {},
|
| 855 |
"node_i": "3541",
|
| 856 |
+
"native_tokenizers": [],
|
| 857 |
+
"scripts": []
|
| 858 |
},
|
| 859 |
{
|
| 860 |
"name": "Seychelles French Creole",
|
| 861 |
"iso_1_code": null,
|
| 862 |
"iso_3_code": "crs",
|
| 863 |
+
"children": [],
|
| 864 |
"tokenizers": {
|
| 865 |
"Latn": {
|
| 866 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 867 |
+
"original_lang_name": "nigerian_pidgin",
|
| 868 |
+
"original_lang_code": "pcm",
|
| 869 |
+
"script": "Latn",
|
| 870 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 871 |
}
|
| 872 |
},
|
|
|
|
| 873 |
"node_i": "3542",
|
| 874 |
+
"native_tokenizers": [],
|
| 875 |
"scripts": [
|
| 876 |
"Latn"
|
| 877 |
+
]
|
|
|
|
| 878 |
},
|
| 879 |
{
|
| 880 |
"name": "Guadeloupean French Creole",
|
| 881 |
"iso_1_code": null,
|
| 882 |
"iso_3_code": "gcf",
|
| 883 |
+
"children": [],
|
| 884 |
"tokenizers": {
|
| 885 |
"Latn": {
|
| 886 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 887 |
+
"original_lang_name": "nigerian_pidgin",
|
| 888 |
+
"original_lang_code": "pcm",
|
| 889 |
+
"script": "Latn",
|
| 890 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
}
|
| 892 |
},
|
|
|
|
| 893 |
"node_i": "3543",
|
| 894 |
+
"native_tokenizers": [],
|
| 895 |
"scripts": [
|
| 896 |
"Latn"
|
| 897 |
+
]
|
|
|
|
| 898 |
},
|
| 899 |
{
|
| 900 |
"name": "Guianese French Creole",
|
| 901 |
"iso_1_code": null,
|
| 902 |
"iso_3_code": "gcr",
|
| 903 |
+
"children": [],
|
| 904 |
"tokenizers": {
|
| 905 |
"Latn": {
|
| 906 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 907 |
+
"original_lang_name": "nigerian_pidgin",
|
| 908 |
+
"original_lang_code": "pcm",
|
| 909 |
+
"script": "Latn",
|
| 910 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
}
|
| 912 |
},
|
|
|
|
| 913 |
"node_i": "3544",
|
| 914 |
+
"native_tokenizers": [],
|
| 915 |
"scripts": [
|
| 916 |
"Latn"
|
| 917 |
+
]
|
|
|
|
| 918 |
},
|
| 919 |
{
|
| 920 |
"name": "Haitian Creole",
|
| 921 |
"iso_1_code": "ht",
|
| 922 |
"iso_3_code": "hat",
|
| 923 |
+
"children": [],
|
| 924 |
"tokenizers": {
|
| 925 |
"Latn": {
|
| 926 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 927 |
+
"original_lang_name": "nigerian_pidgin",
|
| 928 |
+
"original_lang_code": "pcm",
|
| 929 |
+
"script": "Latn",
|
| 930 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
}
|
| 932 |
},
|
|
|
|
| 933 |
"node_i": "3545",
|
| 934 |
+
"native_tokenizers": [],
|
| 935 |
"scripts": [
|
| 936 |
"Latn"
|
| 937 |
+
]
|
|
|
|
| 938 |
},
|
| 939 |
{
|
| 940 |
"name": "Karipuna French Creole",
|
| 941 |
"iso_1_code": null,
|
| 942 |
"iso_3_code": "kmv",
|
|
|
|
| 943 |
"children": [],
|
| 944 |
+
"tokenizers": {},
|
| 945 |
"node_i": "3546",
|
| 946 |
+
"native_tokenizers": [],
|
| 947 |
+
"scripts": []
|
| 948 |
},
|
| 949 |
{
|
| 950 |
"name": "Louisiana Creole",
|
| 951 |
"iso_1_code": null,
|
| 952 |
"iso_3_code": "lou",
|
|
|
|
| 953 |
"children": [],
|
| 954 |
+
"tokenizers": {},
|
| 955 |
"node_i": "3547",
|
| 956 |
+
"native_tokenizers": [],
|
| 957 |
+
"scripts": []
|
| 958 |
},
|
| 959 |
{
|
| 960 |
"name": "Morisyen",
|
| 961 |
"iso_1_code": null,
|
| 962 |
"iso_3_code": "mfe",
|
| 963 |
+
"children": [],
|
| 964 |
"tokenizers": {
|
| 965 |
"Latn": {
|
| 966 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 967 |
+
"original_lang_name": "nigerian_pidgin",
|
| 968 |
+
"original_lang_code": "pcm",
|
| 969 |
+
"script": "Latn",
|
| 970 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
}
|
| 972 |
},
|
|
|
|
| 973 |
"node_i": "3548",
|
| 974 |
+
"native_tokenizers": [],
|
| 975 |
"scripts": [
|
| 976 |
"Latn"
|
| 977 |
+
]
|
|
|
|
| 978 |
},
|
| 979 |
{
|
| 980 |
"name": "R\u00e9union French Creole",
|
| 981 |
"iso_1_code": null,
|
| 982 |
"iso_3_code": "rcf",
|
| 983 |
+
"children": [],
|
| 984 |
"tokenizers": {
|
| 985 |
"Latn": {
|
| 986 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 987 |
+
"original_lang_name": "nigerian_pidgin",
|
| 988 |
+
"original_lang_code": "pcm",
|
| 989 |
+
"script": "Latn",
|
| 990 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 991 |
}
|
| 992 |
},
|
|
|
|
| 993 |
"node_i": "3549",
|
| 994 |
+
"native_tokenizers": [],
|
| 995 |
"scripts": [
|
| 996 |
"Latn"
|
| 997 |
+
]
|
|
|
|
| 998 |
},
|
| 999 |
{
|
| 1000 |
"name": "San Miguel French Creole",
|
| 1001 |
"iso_1_code": null,
|
| 1002 |
"iso_3_code": "scf",
|
|
|
|
| 1003 |
"children": [],
|
| 1004 |
+
"tokenizers": {},
|
| 1005 |
"node_i": "3550",
|
| 1006 |
+
"native_tokenizers": [],
|
| 1007 |
+
"scripts": []
|
| 1008 |
}
|
| 1009 |
],
|
| 1010 |
+
"tokenizers": {
|
| 1011 |
+
"Latn": {
|
| 1012 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1013 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1014 |
+
"original_lang_code": "pcm",
|
| 1015 |
+
"script": "Latn",
|
| 1016 |
+
"class_name": "StanzaTokenizer"
|
| 1017 |
+
}
|
| 1018 |
+
},
|
| 1019 |
"node_i": "3539",
|
| 1020 |
+
"native_tokenizers": [],
|
| 1021 |
+
"scripts": []
|
| 1022 |
},
|
| 1023 |
{
|
| 1024 |
"name": "German based",
|
| 1025 |
"iso_1_code": null,
|
| 1026 |
"iso_3_code": null,
|
|
|
|
| 1027 |
"children": [
|
| 1028 |
{
|
| 1029 |
"name": "Unserdeutsch",
|
| 1030 |
"iso_1_code": null,
|
| 1031 |
"iso_3_code": "uln",
|
|
|
|
| 1032 |
"children": [],
|
| 1033 |
+
"tokenizers": {},
|
| 1034 |
"node_i": "3552",
|
| 1035 |
+
"native_tokenizers": [],
|
| 1036 |
+
"scripts": []
|
| 1037 |
}
|
| 1038 |
],
|
| 1039 |
+
"tokenizers": {},
|
| 1040 |
"node_i": "3551",
|
| 1041 |
+
"native_tokenizers": [],
|
| 1042 |
+
"scripts": []
|
| 1043 |
},
|
| 1044 |
{
|
| 1045 |
"name": "Hindi based",
|
| 1046 |
"iso_1_code": null,
|
| 1047 |
"iso_3_code": null,
|
|
|
|
| 1048 |
"children": [
|
| 1049 |
{
|
| 1050 |
"name": "Andaman Hindi Creole",
|
| 1051 |
"iso_1_code": null,
|
| 1052 |
"iso_3_code": "hca",
|
|
|
|
| 1053 |
"children": [],
|
| 1054 |
+
"tokenizers": {},
|
| 1055 |
"node_i": "3554",
|
| 1056 |
+
"native_tokenizers": [],
|
| 1057 |
+
"scripts": []
|
| 1058 |
}
|
| 1059 |
],
|
| 1060 |
+
"tokenizers": {},
|
| 1061 |
"node_i": "3553",
|
| 1062 |
+
"native_tokenizers": [],
|
| 1063 |
+
"scripts": []
|
| 1064 |
},
|
| 1065 |
{
|
| 1066 |
"name": "Iberian based",
|
| 1067 |
"iso_1_code": null,
|
| 1068 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1069 |
"children": [
|
| 1070 |
{
|
| 1071 |
"name": "Papiamentu",
|
| 1072 |
"iso_1_code": null,
|
| 1073 |
"iso_3_code": "pap",
|
| 1074 |
+
"children": [],
|
| 1075 |
"tokenizers": {
|
| 1076 |
"Latn": {
|
| 1077 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1078 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1079 |
+
"original_lang_code": "pcm",
|
| 1080 |
+
"script": "Latn",
|
| 1081 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1082 |
}
|
| 1083 |
},
|
|
|
|
| 1084 |
"node_i": "3556",
|
| 1085 |
+
"native_tokenizers": [],
|
| 1086 |
"scripts": [
|
| 1087 |
"Latn"
|
| 1088 |
+
]
|
|
|
|
| 1089 |
}
|
| 1090 |
],
|
| 1091 |
+
"tokenizers": {
|
| 1092 |
+
"Latn": {
|
| 1093 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1094 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1095 |
+
"original_lang_code": "pcm",
|
| 1096 |
+
"script": "Latn",
|
| 1097 |
+
"class_name": "StanzaTokenizer"
|
| 1098 |
+
}
|
| 1099 |
+
},
|
| 1100 |
"node_i": "3555",
|
| 1101 |
+
"native_tokenizers": [],
|
| 1102 |
+
"scripts": []
|
| 1103 |
},
|
| 1104 |
{
|
| 1105 |
"name": "Japanese-based",
|
| 1106 |
"iso_1_code": null,
|
| 1107 |
"iso_3_code": null,
|
|
|
|
| 1108 |
"children": [
|
| 1109 |
{
|
| 1110 |
"name": "Yilan Creole",
|
| 1111 |
"iso_1_code": null,
|
| 1112 |
"iso_3_code": "ycr",
|
|
|
|
| 1113 |
"children": [],
|
| 1114 |
+
"tokenizers": {},
|
| 1115 |
"node_i": "3558",
|
| 1116 |
+
"native_tokenizers": [],
|
| 1117 |
+
"scripts": []
|
| 1118 |
}
|
| 1119 |
],
|
| 1120 |
+
"tokenizers": {},
|
| 1121 |
"node_i": "3557",
|
| 1122 |
+
"native_tokenizers": [],
|
| 1123 |
+
"scripts": []
|
| 1124 |
},
|
| 1125 |
{
|
| 1126 |
"name": "Kongo based",
|
| 1127 |
"iso_1_code": null,
|
| 1128 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1129 |
"children": [
|
| 1130 |
{
|
| 1131 |
"name": "Kituba",
|
| 1132 |
"iso_1_code": null,
|
| 1133 |
"iso_3_code": "ktu",
|
| 1134 |
+
"children": [],
|
| 1135 |
"tokenizers": {
|
| 1136 |
"Latn": {
|
| 1137 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1138 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1139 |
+
"original_lang_code": "pcm",
|
| 1140 |
+
"script": "Latn",
|
| 1141 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
}
|
| 1143 |
},
|
|
|
|
| 1144 |
"node_i": "3560",
|
| 1145 |
+
"native_tokenizers": [],
|
| 1146 |
"scripts": [
|
| 1147 |
"Latn"
|
| 1148 |
+
]
|
|
|
|
| 1149 |
},
|
| 1150 |
{
|
| 1151 |
"name": "Kituba",
|
| 1152 |
"iso_1_code": null,
|
| 1153 |
"iso_3_code": "mkw",
|
|
|
|
| 1154 |
"children": [],
|
| 1155 |
+
"tokenizers": {},
|
| 1156 |
"node_i": "3561",
|
| 1157 |
+
"native_tokenizers": [],
|
| 1158 |
+
"scripts": []
|
| 1159 |
}
|
| 1160 |
],
|
| 1161 |
+
"tokenizers": {
|
| 1162 |
+
"Latn": {
|
| 1163 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1164 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1165 |
+
"original_lang_code": "pcm",
|
| 1166 |
+
"script": "Latn",
|
| 1167 |
+
"class_name": "StanzaTokenizer"
|
| 1168 |
+
}
|
| 1169 |
+
},
|
| 1170 |
"node_i": "3559",
|
| 1171 |
+
"native_tokenizers": [],
|
| 1172 |
+
"scripts": []
|
| 1173 |
},
|
| 1174 |
{
|
| 1175 |
"name": "Malay based",
|
| 1176 |
"iso_1_code": null,
|
| 1177 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1178 |
"children": [
|
| 1179 |
{
|
| 1180 |
"name": "Malay, Ambonese",
|
| 1181 |
"iso_1_code": null,
|
| 1182 |
"iso_3_code": "abs",
|
| 1183 |
+
"children": [],
|
| 1184 |
"tokenizers": {
|
| 1185 |
"Latn": {
|
| 1186 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1187 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1188 |
+
"original_lang_code": "pcm",
|
| 1189 |
+
"script": "Latn",
|
| 1190 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1191 |
}
|
| 1192 |
},
|
|
|
|
| 1193 |
"node_i": "3563",
|
| 1194 |
+
"native_tokenizers": [],
|
| 1195 |
"scripts": [
|
| 1196 |
"Latn"
|
| 1197 |
+
]
|
|
|
|
| 1198 |
},
|
| 1199 |
{
|
| 1200 |
"name": "Betawi",
|
| 1201 |
"iso_1_code": null,
|
| 1202 |
"iso_3_code": "bew",
|
| 1203 |
+
"children": [],
|
| 1204 |
"tokenizers": {
|
| 1205 |
"Latn": {
|
| 1206 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1207 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1208 |
+
"original_lang_code": "pcm",
|
| 1209 |
+
"script": "Latn",
|
| 1210 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
}
|
| 1212 |
},
|
|
|
|
| 1213 |
"node_i": "3564",
|
| 1214 |
+
"native_tokenizers": [],
|
| 1215 |
"scripts": [
|
| 1216 |
"Latn"
|
| 1217 |
+
]
|
|
|
|
| 1218 |
},
|
| 1219 |
{
|
| 1220 |
"name": "Malay, Banda",
|
| 1221 |
"iso_1_code": null,
|
| 1222 |
"iso_3_code": "bpq",
|
|
|
|
| 1223 |
"children": [],
|
| 1224 |
+
"tokenizers": {},
|
| 1225 |
"node_i": "3565",
|
| 1226 |
+
"native_tokenizers": [],
|
| 1227 |
+
"scripts": []
|
| 1228 |
},
|
| 1229 |
{
|
| 1230 |
"name": "Malaccan Malay Creole",
|
| 1231 |
"iso_1_code": null,
|
| 1232 |
"iso_3_code": "ccm",
|
|
|
|
| 1233 |
"children": [],
|
| 1234 |
+
"tokenizers": {},
|
| 1235 |
"node_i": "3566",
|
| 1236 |
+
"native_tokenizers": [],
|
| 1237 |
+
"scripts": []
|
| 1238 |
},
|
| 1239 |
{
|
| 1240 |
"name": "Malay, Cocos Islands",
|
| 1241 |
"iso_1_code": "ms",
|
| 1242 |
"iso_3_code": "coa",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
"children": [],
|
| 1244 |
+
"tokenizers": {},
|
| 1245 |
"node_i": "3567",
|
| 1246 |
+
"native_tokenizers": [],
|
| 1247 |
+
"scripts": []
|
| 1248 |
},
|
| 1249 |
{
|
| 1250 |
"name": "Malay, Larantuka",
|
| 1251 |
"iso_1_code": null,
|
| 1252 |
"iso_3_code": "lrt",
|
|
|
|
| 1253 |
"children": [],
|
| 1254 |
+
"tokenizers": {},
|
| 1255 |
"node_i": "3568",
|
| 1256 |
+
"native_tokenizers": [],
|
| 1257 |
+
"scripts": []
|
| 1258 |
},
|
| 1259 |
{
|
| 1260 |
"name": "Malay, North Moluccan",
|
| 1261 |
"iso_1_code": "ms",
|
| 1262 |
"iso_3_code": "max",
|
| 1263 |
+
"children": [],
|
| 1264 |
"tokenizers": {
|
| 1265 |
"Latn": {
|
| 1266 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1267 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1268 |
+
"original_lang_code": "pcm",
|
| 1269 |
+
"script": "Latn",
|
| 1270 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1271 |
}
|
| 1272 |
},
|
|
|
|
| 1273 |
"node_i": "3569",
|
| 1274 |
+
"native_tokenizers": [],
|
| 1275 |
"scripts": [
|
| 1276 |
"Latn"
|
| 1277 |
+
]
|
|
|
|
| 1278 |
},
|
| 1279 |
{
|
| 1280 |
"name": "Malay, Baba",
|
| 1281 |
"iso_1_code": null,
|
| 1282 |
"iso_3_code": "mbf",
|
| 1283 |
+
"children": [],
|
| 1284 |
"tokenizers": {
|
| 1285 |
"Latn": {
|
| 1286 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1287 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1288 |
+
"original_lang_code": "pcm",
|
| 1289 |
+
"script": "Latn",
|
| 1290 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1291 |
}
|
| 1292 |
},
|
|
|
|
| 1293 |
"node_i": "3570",
|
| 1294 |
+
"native_tokenizers": [],
|
| 1295 |
"scripts": [
|
| 1296 |
"Latn"
|
| 1297 |
+
]
|
|
|
|
| 1298 |
},
|
| 1299 |
{
|
| 1300 |
"name": "Malay, Balinese",
|
| 1301 |
"iso_1_code": null,
|
| 1302 |
"iso_3_code": "mhp",
|
|
|
|
| 1303 |
"children": [],
|
| 1304 |
+
"tokenizers": {},
|
| 1305 |
"node_i": "3571",
|
| 1306 |
+
"native_tokenizers": [],
|
| 1307 |
+
"scripts": []
|
| 1308 |
},
|
| 1309 |
{
|
| 1310 |
"name": "Malay, Kupang",
|
| 1311 |
"iso_1_code": null,
|
| 1312 |
"iso_3_code": "mkn",
|
| 1313 |
+
"children": [],
|
| 1314 |
"tokenizers": {
|
| 1315 |
"Latn": {
|
| 1316 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1317 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1318 |
+
"original_lang_code": "pcm",
|
| 1319 |
+
"script": "Latn",
|
| 1320 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1321 |
}
|
| 1322 |
},
|
|
|
|
| 1323 |
"node_i": "3572",
|
| 1324 |
+
"native_tokenizers": [],
|
| 1325 |
"scripts": [
|
| 1326 |
"Latn"
|
| 1327 |
+
]
|
|
|
|
| 1328 |
},
|
| 1329 |
{
|
| 1330 |
"name": "Indonesian, Peranakan",
|
| 1331 |
"iso_1_code": null,
|
| 1332 |
"iso_3_code": "pea",
|
|
|
|
| 1333 |
"children": [],
|
| 1334 |
+
"tokenizers": {},
|
| 1335 |
"node_i": "3573",
|
| 1336 |
+
"native_tokenizers": [],
|
| 1337 |
+
"scripts": []
|
| 1338 |
},
|
| 1339 |
{
|
| 1340 |
"name": "Malay, Papuan",
|
| 1341 |
"iso_1_code": null,
|
| 1342 |
"iso_3_code": "pmy",
|
|
|
|
| 1343 |
"children": [],
|
| 1344 |
+
"tokenizers": {},
|
| 1345 |
"node_i": "3574",
|
| 1346 |
+
"native_tokenizers": [],
|
| 1347 |
+
"scripts": []
|
| 1348 |
},
|
| 1349 |
{
|
| 1350 |
"name": "Sri Lankan Malay Creole",
|
| 1351 |
"iso_1_code": null,
|
| 1352 |
"iso_3_code": "sci",
|
|
|
|
| 1353 |
"children": [],
|
| 1354 |
+
"tokenizers": {},
|
| 1355 |
"node_i": "3575",
|
| 1356 |
+
"native_tokenizers": [],
|
| 1357 |
+
"scripts": []
|
| 1358 |
},
|
| 1359 |
{
|
| 1360 |
"name": "Malay, Manado",
|
| 1361 |
"iso_1_code": "ms",
|
| 1362 |
"iso_3_code": "xmm",
|
| 1363 |
+
"children": [],
|
| 1364 |
"tokenizers": {
|
| 1365 |
"Latn": {
|
| 1366 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1367 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1368 |
+
"original_lang_code": "pcm",
|
| 1369 |
+
"script": "Latn",
|
| 1370 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1371 |
}
|
| 1372 |
},
|
|
|
|
| 1373 |
"node_i": "3576",
|
| 1374 |
+
"native_tokenizers": [],
|
| 1375 |
"scripts": [
|
| 1376 |
"Latn"
|
| 1377 |
+
]
|
|
|
|
| 1378 |
}
|
| 1379 |
],
|
| 1380 |
+
"tokenizers": {
|
| 1381 |
+
"Latn": {
|
| 1382 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1383 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1384 |
+
"original_lang_code": "pcm",
|
| 1385 |
+
"script": "Latn",
|
| 1386 |
+
"class_name": "StanzaTokenizer"
|
| 1387 |
+
}
|
| 1388 |
+
},
|
| 1389 |
"node_i": "3562",
|
| 1390 |
+
"native_tokenizers": [],
|
| 1391 |
+
"scripts": []
|
| 1392 |
},
|
| 1393 |
{
|
| 1394 |
"name": "Ngbandi based",
|
| 1395 |
"iso_1_code": null,
|
| 1396 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1397 |
"children": [
|
| 1398 |
{
|
| 1399 |
"name": "Sango",
|
| 1400 |
"iso_1_code": "sg",
|
| 1401 |
"iso_3_code": "sag",
|
| 1402 |
+
"children": [],
|
| 1403 |
"tokenizers": {
|
| 1404 |
"Latn": {
|
| 1405 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1406 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1407 |
+
"original_lang_code": "pcm",
|
| 1408 |
+
"script": "Latn",
|
| 1409 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1410 |
}
|
| 1411 |
},
|
|
|
|
| 1412 |
"node_i": "3578",
|
| 1413 |
+
"native_tokenizers": [],
|
| 1414 |
"scripts": [
|
| 1415 |
"Latn"
|
| 1416 |
+
]
|
|
|
|
| 1417 |
},
|
| 1418 |
{
|
| 1419 |
"name": "Sango, Riverain",
|
| 1420 |
"iso_1_code": null,
|
| 1421 |
"iso_3_code": "snj",
|
|
|
|
| 1422 |
"children": [],
|
| 1423 |
+
"tokenizers": {},
|
| 1424 |
"node_i": "3579",
|
| 1425 |
+
"native_tokenizers": [],
|
| 1426 |
+
"scripts": []
|
| 1427 |
}
|
| 1428 |
],
|
| 1429 |
+
"tokenizers": {
|
| 1430 |
+
"Latn": {
|
| 1431 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1432 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1433 |
+
"original_lang_code": "pcm",
|
| 1434 |
+
"script": "Latn",
|
| 1435 |
+
"class_name": "StanzaTokenizer"
|
| 1436 |
+
}
|
| 1437 |
+
},
|
| 1438 |
"node_i": "3577",
|
| 1439 |
+
"native_tokenizers": [],
|
| 1440 |
+
"scripts": []
|
| 1441 |
},
|
| 1442 |
{
|
| 1443 |
"name": "Portuguese based",
|
| 1444 |
"iso_1_code": null,
|
| 1445 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1446 |
"children": [
|
| 1447 |
{
|
| 1448 |
"name": "Angolar",
|
| 1449 |
"iso_1_code": null,
|
| 1450 |
"iso_3_code": "aoa",
|
|
|
|
| 1451 |
"children": [],
|
| 1452 |
+
"tokenizers": {},
|
| 1453 |
"node_i": "3581",
|
| 1454 |
+
"native_tokenizers": [],
|
| 1455 |
+
"scripts": []
|
| 1456 |
},
|
| 1457 |
{
|
| 1458 |
"name": "Cafundo Creole",
|
| 1459 |
"iso_1_code": null,
|
| 1460 |
"iso_3_code": "ccd",
|
|
|
|
| 1461 |
"children": [],
|
| 1462 |
+
"tokenizers": {},
|
| 1463 |
"node_i": "3582",
|
| 1464 |
+
"native_tokenizers": [],
|
| 1465 |
+
"scripts": []
|
| 1466 |
},
|
| 1467 |
{
|
| 1468 |
"name": "S\u00e3otomense",
|
| 1469 |
"iso_1_code": null,
|
| 1470 |
"iso_3_code": "cri",
|
| 1471 |
+
"children": [],
|
| 1472 |
"tokenizers": {
|
| 1473 |
"Latn": {
|
| 1474 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1475 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1476 |
+
"original_lang_code": "pcm",
|
| 1477 |
+
"script": "Latn",
|
| 1478 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1479 |
}
|
| 1480 |
},
|
|
|
|
| 1481 |
"node_i": "3583",
|
| 1482 |
+
"native_tokenizers": [],
|
| 1483 |
"scripts": [
|
| 1484 |
"Latn"
|
| 1485 |
+
]
|
|
|
|
| 1486 |
},
|
| 1487 |
{
|
| 1488 |
"name": "Fa d\u2019Ambu",
|
| 1489 |
"iso_1_code": null,
|
| 1490 |
"iso_3_code": "fab",
|
|
|
|
| 1491 |
"children": [],
|
| 1492 |
+
"tokenizers": {},
|
| 1493 |
"node_i": "3584",
|
| 1494 |
+
"native_tokenizers": [],
|
| 1495 |
+
"scripts": []
|
| 1496 |
},
|
| 1497 |
{
|
| 1498 |
"name": "Indo-Portuguese",
|
| 1499 |
"iso_1_code": null,
|
| 1500 |
"iso_3_code": "idb",
|
|
|
|
| 1501 |
"children": [],
|
| 1502 |
+
"tokenizers": {},
|
| 1503 |
"node_i": "3585",
|
| 1504 |
+
"native_tokenizers": [],
|
| 1505 |
+
"scripts": []
|
| 1506 |
},
|
| 1507 |
{
|
| 1508 |
"name": "Kabuverdianu",
|
| 1509 |
"iso_1_code": null,
|
| 1510 |
"iso_3_code": "kea",
|
| 1511 |
+
"children": [],
|
| 1512 |
"tokenizers": {
|
| 1513 |
"Latn": {
|
| 1514 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1515 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1516 |
+
"original_lang_code": "pcm",
|
| 1517 |
+
"script": "Latn",
|
| 1518 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1519 |
}
|
| 1520 |
},
|
|
|
|
| 1521 |
"node_i": "3586",
|
| 1522 |
+
"native_tokenizers": [],
|
| 1523 |
"scripts": [
|
| 1524 |
"Latn"
|
| 1525 |
+
]
|
|
|
|
| 1526 |
},
|
| 1527 |
{
|
| 1528 |
"name": "Malaccan Portuguese Creole",
|
| 1529 |
"iso_1_code": null,
|
| 1530 |
"iso_3_code": "mcm",
|
|
|
|
| 1531 |
"children": [],
|
| 1532 |
+
"tokenizers": {},
|
| 1533 |
"node_i": "3587",
|
| 1534 |
+
"native_tokenizers": [],
|
| 1535 |
+
"scripts": []
|
| 1536 |
},
|
| 1537 |
{
|
| 1538 |
"name": "Macanese",
|
| 1539 |
"iso_1_code": null,
|
| 1540 |
"iso_3_code": "mzs",
|
|
|
|
| 1541 |
"children": [],
|
| 1542 |
+
"tokenizers": {},
|
| 1543 |
"node_i": "3588",
|
| 1544 |
+
"native_tokenizers": [],
|
| 1545 |
+
"scripts": []
|
| 1546 |
},
|
| 1547 |
{
|
| 1548 |
"name": "Guinea-Bissau Creole",
|
| 1549 |
"iso_1_code": null,
|
| 1550 |
"iso_3_code": "pov",
|
| 1551 |
+
"children": [],
|
| 1552 |
"tokenizers": {
|
| 1553 |
"Latn": {
|
| 1554 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1555 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1556 |
+
"original_lang_code": "pcm",
|
| 1557 |
+
"script": "Latn",
|
| 1558 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1559 |
}
|
| 1560 |
},
|
|
|
|
| 1561 |
"node_i": "3589",
|
| 1562 |
+
"native_tokenizers": [],
|
| 1563 |
"scripts": [
|
| 1564 |
"Latn"
|
| 1565 |
+
]
|
|
|
|
| 1566 |
},
|
| 1567 |
{
|
| 1568 |
"name": "Principense",
|
| 1569 |
"iso_1_code": null,
|
| 1570 |
"iso_3_code": "pre",
|
|
|
|
| 1571 |
"children": [],
|
| 1572 |
+
"tokenizers": {},
|
| 1573 |
"node_i": "3590",
|
| 1574 |
+
"native_tokenizers": [],
|
| 1575 |
+
"scripts": []
|
| 1576 |
},
|
| 1577 |
{
|
| 1578 |
"name": "Ternate\u00f1o",
|
| 1579 |
"iso_1_code": null,
|
| 1580 |
"iso_3_code": "tmg",
|
|
|
|
| 1581 |
"children": [],
|
| 1582 |
+
"tokenizers": {},
|
| 1583 |
"node_i": "3591",
|
| 1584 |
+
"native_tokenizers": [],
|
| 1585 |
+
"scripts": []
|
| 1586 |
},
|
| 1587 |
{
|
| 1588 |
"name": "Pidgin, Timor",
|
| 1589 |
"iso_1_code": null,
|
| 1590 |
"iso_3_code": "tvy",
|
|
|
|
| 1591 |
"children": [],
|
| 1592 |
+
"tokenizers": {},
|
| 1593 |
"node_i": "3592",
|
| 1594 |
+
"native_tokenizers": [],
|
| 1595 |
+
"scripts": []
|
| 1596 |
},
|
| 1597 |
{
|
| 1598 |
"name": "Korlai Portuguese Creole",
|
| 1599 |
"iso_1_code": null,
|
| 1600 |
"iso_3_code": "vkp",
|
|
|
|
| 1601 |
"children": [],
|
| 1602 |
+
"tokenizers": {},
|
| 1603 |
"node_i": "3593",
|
| 1604 |
+
"native_tokenizers": [],
|
| 1605 |
+
"scripts": []
|
| 1606 |
}
|
| 1607 |
],
|
| 1608 |
+
"tokenizers": {
|
| 1609 |
+
"Latn": {
|
| 1610 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1611 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1612 |
+
"original_lang_code": "pcm",
|
| 1613 |
+
"script": "Latn",
|
| 1614 |
+
"class_name": "StanzaTokenizer"
|
| 1615 |
+
}
|
| 1616 |
+
},
|
| 1617 |
"node_i": "3580",
|
| 1618 |
+
"native_tokenizers": [],
|
| 1619 |
+
"scripts": []
|
| 1620 |
},
|
| 1621 |
{
|
| 1622 |
"name": "Spanish based",
|
| 1623 |
"iso_1_code": null,
|
| 1624 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1625 |
"children": [
|
| 1626 |
{
|
| 1627 |
"name": "Chavacano",
|
| 1628 |
"iso_1_code": null,
|
| 1629 |
"iso_3_code": "cbk",
|
| 1630 |
+
"children": [],
|
| 1631 |
"tokenizers": {
|
| 1632 |
"Latn": {
|
| 1633 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1634 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1635 |
+
"original_lang_code": "pcm",
|
| 1636 |
+
"script": "Latn",
|
| 1637 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1638 |
}
|
| 1639 |
},
|
|
|
|
| 1640 |
"node_i": "3595",
|
| 1641 |
+
"native_tokenizers": [],
|
| 1642 |
"scripts": [
|
| 1643 |
"Latn"
|
| 1644 |
+
]
|
|
|
|
| 1645 |
},
|
| 1646 |
{
|
| 1647 |
"name": "Palenquero",
|
| 1648 |
"iso_1_code": null,
|
| 1649 |
"iso_3_code": "pln",
|
|
|
|
| 1650 |
"children": [],
|
| 1651 |
+
"tokenizers": {},
|
| 1652 |
"node_i": "3596",
|
| 1653 |
+
"native_tokenizers": [],
|
| 1654 |
+
"scripts": []
|
| 1655 |
}
|
| 1656 |
],
|
| 1657 |
+
"tokenizers": {
|
| 1658 |
+
"Latn": {
|
| 1659 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1660 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1661 |
+
"original_lang_code": "pcm",
|
| 1662 |
+
"script": "Latn",
|
| 1663 |
+
"class_name": "StanzaTokenizer"
|
| 1664 |
+
}
|
| 1665 |
+
},
|
| 1666 |
"node_i": "3594",
|
| 1667 |
+
"native_tokenizers": [],
|
| 1668 |
+
"scripts": []
|
| 1669 |
},
|
| 1670 |
{
|
| 1671 |
"name": "Swahili based",
|
| 1672 |
"iso_1_code": null,
|
| 1673 |
"iso_3_code": null,
|
|
|
|
| 1674 |
"children": [
|
| 1675 |
{
|
| 1676 |
"name": "Cutchi-Swahili",
|
| 1677 |
"iso_1_code": null,
|
| 1678 |
"iso_3_code": "ccl",
|
|
|
|
| 1679 |
"children": [],
|
| 1680 |
+
"tokenizers": {},
|
| 1681 |
"node_i": "3598",
|
| 1682 |
+
"native_tokenizers": [],
|
| 1683 |
+
"scripts": []
|
| 1684 |
}
|
| 1685 |
],
|
| 1686 |
+
"tokenizers": {},
|
| 1687 |
"node_i": "3597",
|
| 1688 |
+
"native_tokenizers": [],
|
| 1689 |
+
"scripts": []
|
| 1690 |
},
|
| 1691 |
{
|
| 1692 |
"name": "Tetun based",
|
| 1693 |
"iso_1_code": null,
|
| 1694 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1695 |
"children": [
|
| 1696 |
{
|
| 1697 |
"name": "Tetun Dili",
|
| 1698 |
"iso_1_code": null,
|
| 1699 |
"iso_3_code": "tdt",
|
| 1700 |
+
"children": [],
|
| 1701 |
"tokenizers": {
|
| 1702 |
"Latn": {
|
| 1703 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1704 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1705 |
+
"original_lang_code": "pcm",
|
| 1706 |
+
"script": "Latn",
|
| 1707 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1708 |
}
|
| 1709 |
},
|
|
|
|
| 1710 |
"node_i": "3600",
|
| 1711 |
+
"native_tokenizers": [],
|
| 1712 |
"scripts": [
|
| 1713 |
"Latn"
|
| 1714 |
+
]
|
|
|
|
| 1715 |
}
|
| 1716 |
],
|
| 1717 |
+
"tokenizers": {
|
| 1718 |
+
"Latn": {
|
| 1719 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1720 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1721 |
+
"original_lang_code": "pcm",
|
| 1722 |
+
"script": "Latn",
|
| 1723 |
+
"class_name": "StanzaTokenizer"
|
| 1724 |
+
}
|
| 1725 |
+
},
|
| 1726 |
"node_i": "3599",
|
| 1727 |
+
"native_tokenizers": [],
|
| 1728 |
+
"scripts": []
|
| 1729 |
}
|
| 1730 |
],
|
| 1731 |
+
"tokenizers": {
|
| 1732 |
+
"Latn": {
|
| 1733 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
| 1734 |
+
"original_lang_name": "nigerian_pidgin",
|
| 1735 |
+
"original_lang_code": "pcm",
|
| 1736 |
+
"script": "Latn",
|
| 1737 |
+
"class_name": "StanzaTokenizer"
|
| 1738 |
+
}
|
| 1739 |
+
},
|
| 1740 |
"node_i": "3481",
|
| 1741 |
+
"native_tokenizers": [],
|
| 1742 |
+
"scripts": []
|
| 1743 |
}
|
data/Dravidian.json
CHANGED
|
@@ -2,1765 +2,1436 @@
|
|
| 2 |
"name": "Dravidian",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {
|
| 6 |
-
"Telu": {
|
| 7 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 8 |
-
"original_lang_name": "telugu",
|
| 9 |
-
"original_lang_code": "tel",
|
| 10 |
-
"scripts": [
|
| 11 |
-
"Telu",
|
| 12 |
-
"Latn"
|
| 13 |
-
],
|
| 14 |
-
"class_name": "SpaCyTokenizer",
|
| 15 |
-
"macrolanguage": false
|
| 16 |
-
},
|
| 17 |
-
"Latn": {
|
| 18 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 19 |
-
"original_lang_name": "tamil",
|
| 20 |
-
"original_lang_code": "tam",
|
| 21 |
-
"scripts": [
|
| 22 |
-
"Latn",
|
| 23 |
-
"Taml"
|
| 24 |
-
],
|
| 25 |
-
"class_name": "SpaCyTokenizer",
|
| 26 |
-
"macrolanguage": false
|
| 27 |
-
},
|
| 28 |
-
"Knda": {
|
| 29 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 30 |
-
"original_lang_name": "kannada",
|
| 31 |
-
"original_lang_code": "kan",
|
| 32 |
-
"scripts": [
|
| 33 |
-
"Latn",
|
| 34 |
-
"Knda"
|
| 35 |
-
],
|
| 36 |
-
"class_name": "SpaCyTokenizer",
|
| 37 |
-
"macrolanguage": false
|
| 38 |
-
},
|
| 39 |
-
"Mlym": {
|
| 40 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 41 |
-
"original_lang_name": "malayalam",
|
| 42 |
-
"original_lang_code": "mal",
|
| 43 |
-
"scripts": [
|
| 44 |
-
"Latn",
|
| 45 |
-
"Mlym"
|
| 46 |
-
],
|
| 47 |
-
"class_name": "SpaCyTokenizer",
|
| 48 |
-
"macrolanguage": false
|
| 49 |
-
},
|
| 50 |
-
"Taml": {
|
| 51 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 52 |
-
"original_lang_name": "tamil",
|
| 53 |
-
"original_lang_code": "tam",
|
| 54 |
-
"scripts": [
|
| 55 |
-
"Latn",
|
| 56 |
-
"Taml"
|
| 57 |
-
],
|
| 58 |
-
"class_name": "SpaCyTokenizer",
|
| 59 |
-
"macrolanguage": false
|
| 60 |
-
}
|
| 61 |
-
},
|
| 62 |
"children": [
|
| 63 |
{
|
| 64 |
"name": "Central",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": null,
|
| 67 |
-
"tokenizers": {},
|
| 68 |
"children": [
|
| 69 |
{
|
| 70 |
"name": "Kolami-Naiki",
|
| 71 |
"iso_1_code": null,
|
| 72 |
"iso_3_code": null,
|
| 73 |
-
"tokenizers": {},
|
| 74 |
"children": [
|
| 75 |
{
|
| 76 |
"name": "Kolami, Northwestern",
|
| 77 |
"iso_1_code": null,
|
| 78 |
"iso_3_code": "kfb",
|
| 79 |
-
"tokenizers": {},
|
| 80 |
"children": [],
|
|
|
|
| 81 |
"node_i": "3604",
|
| 82 |
-
"
|
| 83 |
-
"
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"name": "Kolami, Southeastern",
|
| 87 |
"iso_1_code": null,
|
| 88 |
"iso_3_code": "nit",
|
| 89 |
-
"tokenizers": {},
|
| 90 |
"children": [],
|
|
|
|
| 91 |
"node_i": "3605",
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
}
|
| 95 |
],
|
|
|
|
| 96 |
"node_i": "3603",
|
| 97 |
-
"
|
| 98 |
-
"
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"name": "Parji-Gadaba",
|
| 102 |
"iso_1_code": null,
|
| 103 |
"iso_3_code": null,
|
| 104 |
-
"tokenizers": {},
|
| 105 |
"children": [
|
| 106 |
{
|
| 107 |
"name": "Gadaba, Mudhili",
|
| 108 |
"iso_1_code": null,
|
| 109 |
"iso_3_code": "gau",
|
| 110 |
-
"tokenizers": {},
|
| 111 |
"children": [],
|
|
|
|
| 112 |
"node_i": "3607",
|
| 113 |
-
"
|
| 114 |
-
"
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"name": "Gadaba, Pottangi Ollar",
|
| 118 |
"iso_1_code": null,
|
| 119 |
"iso_3_code": "gdb",
|
| 120 |
-
"tokenizers": {},
|
| 121 |
"children": [],
|
|
|
|
| 122 |
"node_i": "3608",
|
| 123 |
-
"
|
| 124 |
-
"
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"name": "Duruwa",
|
| 128 |
"iso_1_code": null,
|
| 129 |
"iso_3_code": "pci",
|
| 130 |
-
"tokenizers": {},
|
| 131 |
"children": [],
|
|
|
|
| 132 |
"node_i": "3609",
|
| 133 |
-
"
|
| 134 |
-
"
|
| 135 |
}
|
| 136 |
],
|
|
|
|
| 137 |
"node_i": "3606",
|
| 138 |
-
"
|
| 139 |
-
"
|
| 140 |
}
|
| 141 |
],
|
|
|
|
| 142 |
"node_i": "3602",
|
| 143 |
-
"
|
| 144 |
-
"
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"name": "Northern",
|
| 148 |
"iso_1_code": null,
|
| 149 |
"iso_3_code": null,
|
| 150 |
-
"tokenizers": {},
|
| 151 |
"children": [
|
| 152 |
{
|
| 153 |
"name": "Brahui",
|
| 154 |
"iso_1_code": null,
|
| 155 |
"iso_3_code": "brh",
|
| 156 |
-
"tokenizers": {},
|
| 157 |
"children": [],
|
|
|
|
| 158 |
"node_i": "3611",
|
|
|
|
| 159 |
"scripts": [
|
| 160 |
"Arab"
|
| 161 |
-
]
|
| 162 |
-
"own_tokenizer": false
|
| 163 |
},
|
| 164 |
{
|
| 165 |
"name": "Kumarbhag Paharia",
|
| 166 |
"iso_1_code": null,
|
| 167 |
"iso_3_code": "kmj",
|
| 168 |
-
"tokenizers": {},
|
| 169 |
"children": [],
|
|
|
|
| 170 |
"node_i": "3612",
|
| 171 |
-
"
|
| 172 |
-
"
|
| 173 |
},
|
| 174 |
{
|
| 175 |
"name": "Kurux",
|
| 176 |
"iso_1_code": null,
|
| 177 |
"iso_3_code": "kru",
|
| 178 |
-
"tokenizers": {},
|
| 179 |
"children": [],
|
|
|
|
| 180 |
"node_i": "3613",
|
|
|
|
| 181 |
"scripts": [
|
| 182 |
"Deva"
|
| 183 |
-
]
|
| 184 |
-
"own_tokenizer": false
|
| 185 |
},
|
| 186 |
{
|
| 187 |
"name": "Sauria Paharia",
|
| 188 |
"iso_1_code": null,
|
| 189 |
"iso_3_code": "mjt",
|
| 190 |
-
"tokenizers": {},
|
| 191 |
"children": [],
|
|
|
|
| 192 |
"node_i": "3614",
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
},
|
| 196 |
{
|
| 197 |
"name": "Kisan",
|
| 198 |
"iso_1_code": null,
|
| 199 |
"iso_3_code": "xis",
|
| 200 |
-
"tokenizers": {},
|
| 201 |
"children": [],
|
|
|
|
| 202 |
"node_i": "3615",
|
| 203 |
-
"
|
| 204 |
-
"
|
| 205 |
}
|
| 206 |
],
|
|
|
|
| 207 |
"node_i": "3610",
|
| 208 |
-
"
|
| 209 |
-
"
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"name": "South-Central",
|
| 213 |
"iso_1_code": null,
|
| 214 |
"iso_3_code": null,
|
| 215 |
-
"tokenizers": {
|
| 216 |
-
"Telu": {
|
| 217 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 218 |
-
"original_lang_name": "telugu",
|
| 219 |
-
"original_lang_code": "tel",
|
| 220 |
-
"scripts": [
|
| 221 |
-
"Telu",
|
| 222 |
-
"Latn"
|
| 223 |
-
],
|
| 224 |
-
"class_name": "SpaCyTokenizer",
|
| 225 |
-
"macrolanguage": false
|
| 226 |
-
},
|
| 227 |
-
"Latn": {
|
| 228 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 229 |
-
"original_lang_name": "telugu",
|
| 230 |
-
"original_lang_code": "tel",
|
| 231 |
-
"scripts": [
|
| 232 |
-
"Telu",
|
| 233 |
-
"Latn"
|
| 234 |
-
],
|
| 235 |
-
"class_name": "SpaCyTokenizer",
|
| 236 |
-
"macrolanguage": false
|
| 237 |
-
}
|
| 238 |
-
},
|
| 239 |
"children": [
|
| 240 |
{
|
| 241 |
"name": "Gondi-Kui",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": null,
|
| 244 |
-
"tokenizers": {
|
| 245 |
-
"Telu": {
|
| 246 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 247 |
-
"original_lang_name": "telugu",
|
| 248 |
-
"original_lang_code": "tel",
|
| 249 |
-
"scripts": [
|
| 250 |
-
"Telu",
|
| 251 |
-
"Latn"
|
| 252 |
-
],
|
| 253 |
-
"class_name": "SpaCyTokenizer",
|
| 254 |
-
"macrolanguage": false
|
| 255 |
-
},
|
| 256 |
-
"Latn": {
|
| 257 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 258 |
-
"original_lang_name": "telugu",
|
| 259 |
-
"original_lang_code": "tel",
|
| 260 |
-
"scripts": [
|
| 261 |
-
"Telu",
|
| 262 |
-
"Latn"
|
| 263 |
-
],
|
| 264 |
-
"class_name": "SpaCyTokenizer",
|
| 265 |
-
"macrolanguage": false
|
| 266 |
-
}
|
| 267 |
-
},
|
| 268 |
"children": [
|
| 269 |
{
|
| 270 |
"name": "Gondi",
|
| 271 |
"iso_1_code": null,
|
| 272 |
"iso_3_code": null,
|
| 273 |
-
"tokenizers": {
|
| 274 |
-
"Telu": {
|
| 275 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 276 |
-
"original_lang_name": "telugu",
|
| 277 |
-
"original_lang_code": "tel",
|
| 278 |
-
"scripts": [
|
| 279 |
-
"Telu",
|
| 280 |
-
"Latn"
|
| 281 |
-
],
|
| 282 |
-
"class_name": "SpaCyTokenizer",
|
| 283 |
-
"macrolanguage": false
|
| 284 |
-
},
|
| 285 |
-
"Latn": {
|
| 286 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 287 |
-
"original_lang_name": "telugu",
|
| 288 |
-
"original_lang_code": "tel",
|
| 289 |
-
"scripts": [
|
| 290 |
-
"Telu",
|
| 291 |
-
"Latn"
|
| 292 |
-
],
|
| 293 |
-
"class_name": "SpaCyTokenizer",
|
| 294 |
-
"macrolanguage": false
|
| 295 |
-
}
|
| 296 |
-
},
|
| 297 |
"children": [
|
| 298 |
{
|
| 299 |
"name": "Maria, Dandami",
|
| 300 |
"iso_1_code": null,
|
| 301 |
"iso_3_code": "daq",
|
| 302 |
-
"tokenizers": {},
|
| 303 |
"children": [],
|
|
|
|
| 304 |
"node_i": "3619",
|
| 305 |
-
"
|
| 306 |
-
"
|
| 307 |
},
|
| 308 |
{
|
| 309 |
"name": "Muria, Eastern",
|
| 310 |
"iso_1_code": null,
|
| 311 |
"iso_3_code": "emu",
|
| 312 |
-
"tokenizers": {},
|
| 313 |
"children": [],
|
|
|
|
| 314 |
"node_i": "3620",
|
| 315 |
-
"
|
| 316 |
-
"
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"name": "Gondi, Aheri",
|
| 320 |
"iso_1_code": null,
|
| 321 |
"iso_3_code": "esg",
|
| 322 |
-
"tokenizers": {},
|
| 323 |
"children": [],
|
|
|
|
| 324 |
"node_i": "3621",
|
| 325 |
-
"
|
| 326 |
-
"
|
| 327 |
},
|
| 328 |
{
|
| 329 |
"name": "Muria, Far Western",
|
| 330 |
"iso_1_code": null,
|
| 331 |
"iso_3_code": "fmu",
|
| 332 |
-
"tokenizers": {},
|
| 333 |
"children": [],
|
|
|
|
| 334 |
"node_i": "3622",
|
|
|
|
| 335 |
"scripts": [
|
| 336 |
"Deva"
|
| 337 |
-
]
|
| 338 |
-
"own_tokenizer": false
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"name": "Gondi, Northern",
|
| 342 |
"iso_1_code": null,
|
| 343 |
"iso_3_code": "gno",
|
| 344 |
-
"tokenizers": {},
|
| 345 |
"children": [],
|
|
|
|
| 346 |
"node_i": "3623",
|
| 347 |
-
"
|
| 348 |
-
"
|
| 349 |
},
|
| 350 |
{
|
| 351 |
"name": "Khirwar",
|
| 352 |
"iso_1_code": null,
|
| 353 |
"iso_3_code": "kwx",
|
| 354 |
-
"tokenizers": {},
|
| 355 |
"children": [],
|
|
|
|
| 356 |
"node_i": "3624",
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
},
|
| 360 |
{
|
| 361 |
"name": "Maria",
|
| 362 |
"iso_1_code": null,
|
| 363 |
"iso_3_code": "mrr",
|
| 364 |
-
"tokenizers": {},
|
| 365 |
"children": [],
|
|
|
|
| 366 |
"node_i": "3625",
|
| 367 |
-
"
|
| 368 |
-
"
|
| 369 |
},
|
| 370 |
{
|
| 371 |
"name": "Muria, Western",
|
| 372 |
"iso_1_code": null,
|
| 373 |
"iso_3_code": "mut",
|
| 374 |
-
"tokenizers": {},
|
| 375 |
"children": [],
|
|
|
|
| 376 |
"node_i": "3626",
|
| 377 |
-
"
|
| 378 |
-
"
|
| 379 |
},
|
| 380 |
{
|
| 381 |
"name": "Nagarchal",
|
| 382 |
"iso_1_code": null,
|
| 383 |
"iso_3_code": "nbg",
|
| 384 |
-
"tokenizers": {},
|
| 385 |
"children": [],
|
|
|
|
| 386 |
"node_i": "3627",
|
| 387 |
-
"
|
| 388 |
-
"
|
| 389 |
},
|
| 390 |
{
|
| 391 |
"name": "Pardhan",
|
| 392 |
"iso_1_code": null,
|
| 393 |
"iso_3_code": "pch",
|
| 394 |
-
"tokenizers": {},
|
| 395 |
"children": [],
|
|
|
|
| 396 |
"node_i": "3628",
|
| 397 |
-
"
|
| 398 |
-
"
|
| 399 |
},
|
| 400 |
{
|
| 401 |
"name": "Gondi, Adilabad",
|
| 402 |
"iso_1_code": null,
|
| 403 |
"iso_3_code": "wsg",
|
|
|
|
| 404 |
"tokenizers": {
|
| 405 |
"Telu": {
|
| 406 |
-
"full_object": "
|
| 407 |
"original_lang_name": "telugu",
|
| 408 |
"original_lang_code": "tel",
|
| 409 |
-
"
|
| 410 |
-
|
| 411 |
-
"Latn"
|
| 412 |
-
],
|
| 413 |
-
"class_name": "SpaCyTokenizer",
|
| 414 |
-
"macrolanguage": false
|
| 415 |
}
|
| 416 |
},
|
| 417 |
-
"children": [],
|
| 418 |
"node_i": "3629",
|
|
|
|
| 419 |
"scripts": [
|
| 420 |
"Telu"
|
| 421 |
-
]
|
| 422 |
-
"own_tokenizer": false
|
| 423 |
}
|
| 424 |
],
|
| 425 |
-
"node_i": "3618",
|
| 426 |
-
"scripts": [],
|
| 427 |
-
"own_tokenizer": false
|
| 428 |
-
},
|
| 429 |
-
{
|
| 430 |
-
"name": "Konda-Kui",
|
| 431 |
-
"iso_1_code": null,
|
| 432 |
-
"iso_3_code": null,
|
| 433 |
"tokenizers": {
|
| 434 |
"Telu": {
|
| 435 |
-
"full_object": "
|
| 436 |
-
"original_lang_name": "telugu",
|
| 437 |
-
"original_lang_code": "tel",
|
| 438 |
-
"scripts": [
|
| 439 |
-
"Telu",
|
| 440 |
-
"Latn"
|
| 441 |
-
],
|
| 442 |
-
"class_name": "SpaCyTokenizer",
|
| 443 |
-
"macrolanguage": false
|
| 444 |
-
},
|
| 445 |
-
"Latn": {
|
| 446 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 447 |
"original_lang_name": "telugu",
|
| 448 |
"original_lang_code": "tel",
|
| 449 |
-
"
|
| 450 |
-
|
| 451 |
-
"Latn"
|
| 452 |
-
],
|
| 453 |
-
"class_name": "SpaCyTokenizer",
|
| 454 |
-
"macrolanguage": false
|
| 455 |
}
|
| 456 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
"children": [
|
| 458 |
{
|
| 459 |
"name": "Konda",
|
| 460 |
"iso_1_code": null,
|
| 461 |
"iso_3_code": null,
|
| 462 |
-
"tokenizers": {},
|
| 463 |
"children": [
|
| 464 |
{
|
| 465 |
"name": "Konda-Dora",
|
| 466 |
"iso_1_code": null,
|
| 467 |
"iso_3_code": "kfc",
|
| 468 |
-
"tokenizers": {},
|
| 469 |
"children": [],
|
|
|
|
| 470 |
"node_i": "3632",
|
| 471 |
-
"
|
| 472 |
-
"
|
| 473 |
},
|
| 474 |
{
|
| 475 |
"name": "Mukha-Dora",
|
| 476 |
"iso_1_code": null,
|
| 477 |
"iso_3_code": "mmk",
|
| 478 |
-
"tokenizers": {},
|
| 479 |
"children": [],
|
|
|
|
| 480 |
"node_i": "3633",
|
| 481 |
-
"
|
| 482 |
-
"
|
| 483 |
}
|
| 484 |
],
|
|
|
|
| 485 |
"node_i": "3631",
|
| 486 |
-
"
|
| 487 |
-
"
|
| 488 |
},
|
| 489 |
{
|
| 490 |
"name": "Manda-Kui",
|
| 491 |
"iso_1_code": null,
|
| 492 |
"iso_3_code": null,
|
| 493 |
-
"tokenizers": {
|
| 494 |
-
"Telu": {
|
| 495 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 496 |
-
"original_lang_name": "telugu",
|
| 497 |
-
"original_lang_code": "tel",
|
| 498 |
-
"scripts": [
|
| 499 |
-
"Telu",
|
| 500 |
-
"Latn"
|
| 501 |
-
],
|
| 502 |
-
"class_name": "SpaCyTokenizer",
|
| 503 |
-
"macrolanguage": false
|
| 504 |
-
},
|
| 505 |
-
"Latn": {
|
| 506 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 507 |
-
"original_lang_name": "telugu",
|
| 508 |
-
"original_lang_code": "tel",
|
| 509 |
-
"scripts": [
|
| 510 |
-
"Telu",
|
| 511 |
-
"Latn"
|
| 512 |
-
],
|
| 513 |
-
"class_name": "SpaCyTokenizer",
|
| 514 |
-
"macrolanguage": false
|
| 515 |
-
}
|
| 516 |
-
},
|
| 517 |
"children": [
|
| 518 |
{
|
| 519 |
"name": "Kui-Kuvi",
|
| 520 |
"iso_1_code": null,
|
| 521 |
"iso_3_code": null,
|
| 522 |
-
"tokenizers": {
|
| 523 |
-
"Telu": {
|
| 524 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 525 |
-
"original_lang_name": "telugu",
|
| 526 |
-
"original_lang_code": "tel",
|
| 527 |
-
"scripts": [
|
| 528 |
-
"Telu",
|
| 529 |
-
"Latn"
|
| 530 |
-
],
|
| 531 |
-
"class_name": "SpaCyTokenizer",
|
| 532 |
-
"macrolanguage": false
|
| 533 |
-
},
|
| 534 |
-
"Latn": {
|
| 535 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 536 |
-
"original_lang_name": "telugu",
|
| 537 |
-
"original_lang_code": "tel",
|
| 538 |
-
"scripts": [
|
| 539 |
-
"Telu",
|
| 540 |
-
"Latn"
|
| 541 |
-
],
|
| 542 |
-
"class_name": "SpaCyTokenizer",
|
| 543 |
-
"macrolanguage": false
|
| 544 |
-
}
|
| 545 |
-
},
|
| 546 |
"children": [
|
| 547 |
{
|
| 548 |
"name": "Kui, Dawik",
|
| 549 |
"iso_1_code": null,
|
| 550 |
"iso_3_code": "dwk",
|
| 551 |
-
"tokenizers": {},
|
| 552 |
"children": [],
|
|
|
|
| 553 |
"node_i": "3636",
|
| 554 |
-
"
|
| 555 |
-
"
|
| 556 |
},
|
| 557 |
{
|
| 558 |
"name": "Koya",
|
| 559 |
"iso_1_code": null,
|
| 560 |
"iso_3_code": "kff",
|
|
|
|
| 561 |
"tokenizers": {
|
| 562 |
"Telu": {
|
| 563 |
-
"full_object": "
|
| 564 |
"original_lang_name": "telugu",
|
| 565 |
"original_lang_code": "tel",
|
| 566 |
-
"
|
| 567 |
-
|
| 568 |
-
"Latn"
|
| 569 |
-
],
|
| 570 |
-
"class_name": "SpaCyTokenizer",
|
| 571 |
-
"macrolanguage": false
|
| 572 |
}
|
| 573 |
},
|
| 574 |
-
"children": [],
|
| 575 |
"node_i": "3637",
|
|
|
|
| 576 |
"scripts": [
|
| 577 |
"Telu"
|
| 578 |
-
]
|
| 579 |
-
"own_tokenizer": false
|
| 580 |
},
|
| 581 |
{
|
| 582 |
"name": "Kuvi",
|
| 583 |
"iso_1_code": null,
|
| 584 |
"iso_3_code": "kxv",
|
| 585 |
-
"tokenizers": {},
|
| 586 |
"children": [],
|
|
|
|
| 587 |
"node_i": "3638",
|
| 588 |
-
"
|
| 589 |
-
"
|
| 590 |
},
|
| 591 |
{
|
| 592 |
"name": "Kui",
|
| 593 |
"iso_1_code": null,
|
| 594 |
"iso_3_code": "uki",
|
| 595 |
-
"tokenizers": {},
|
| 596 |
"children": [],
|
|
|
|
| 597 |
"node_i": "3639",
|
| 598 |
-
"
|
| 599 |
-
"
|
| 600 |
}
|
| 601 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
"node_i": "3635",
|
| 603 |
-
"
|
| 604 |
-
"
|
| 605 |
},
|
| 606 |
{
|
| 607 |
"name": "Manda-Pengo",
|
| 608 |
"iso_1_code": null,
|
| 609 |
"iso_3_code": null,
|
| 610 |
-
"tokenizers": {},
|
| 611 |
"children": [
|
| 612 |
{
|
| 613 |
"name": "Manda",
|
| 614 |
"iso_1_code": null,
|
| 615 |
"iso_3_code": "mha",
|
| 616 |
-
"tokenizers": {},
|
| 617 |
"children": [],
|
|
|
|
| 618 |
"node_i": "3641",
|
| 619 |
-
"
|
| 620 |
-
"
|
| 621 |
},
|
| 622 |
{
|
| 623 |
"name": "Pengo",
|
| 624 |
"iso_1_code": null,
|
| 625 |
"iso_3_code": "peg",
|
| 626 |
-
"tokenizers": {},
|
| 627 |
"children": [],
|
|
|
|
| 628 |
"node_i": "3642",
|
| 629 |
-
"
|
| 630 |
-
"
|
| 631 |
}
|
| 632 |
],
|
|
|
|
| 633 |
"node_i": "3640",
|
| 634 |
-
"
|
| 635 |
-
"
|
| 636 |
}
|
| 637 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
"node_i": "3634",
|
| 639 |
-
"
|
| 640 |
-
"
|
| 641 |
}
|
| 642 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
"node_i": "3630",
|
| 644 |
-
"
|
| 645 |
-
"
|
| 646 |
}
|
| 647 |
],
|
| 648 |
-
"node_i": "3617",
|
| 649 |
-
"scripts": [],
|
| 650 |
-
"own_tokenizer": false
|
| 651 |
-
},
|
| 652 |
-
{
|
| 653 |
-
"name": "Telugu",
|
| 654 |
-
"iso_1_code": null,
|
| 655 |
-
"iso_3_code": null,
|
| 656 |
"tokenizers": {
|
| 657 |
"Telu": {
|
| 658 |
-
"full_object": "
|
| 659 |
"original_lang_name": "telugu",
|
| 660 |
"original_lang_code": "tel",
|
| 661 |
-
"
|
| 662 |
-
|
| 663 |
-
"Latn"
|
| 664 |
-
],
|
| 665 |
-
"class_name": "SpaCyTokenizer",
|
| 666 |
-
"macrolanguage": false
|
| 667 |
-
},
|
| 668 |
-
"Latn": {
|
| 669 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 670 |
-
"original_lang_name": "telugu",
|
| 671 |
-
"original_lang_code": "tel",
|
| 672 |
-
"scripts": [
|
| 673 |
-
"Telu",
|
| 674 |
-
"Latn"
|
| 675 |
-
],
|
| 676 |
-
"class_name": "SpaCyTokenizer",
|
| 677 |
-
"macrolanguage": false
|
| 678 |
}
|
| 679 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
"children": [
|
| 681 |
{
|
| 682 |
"name": "Chenchu",
|
| 683 |
"iso_1_code": null,
|
| 684 |
"iso_3_code": "cde",
|
| 685 |
-
"tokenizers": {},
|
| 686 |
"children": [],
|
|
|
|
| 687 |
"node_i": "3644",
|
| 688 |
-
"
|
| 689 |
-
"
|
| 690 |
},
|
| 691 |
{
|
| 692 |
"name": "Manna-Dora",
|
| 693 |
"iso_1_code": null,
|
| 694 |
"iso_3_code": "mju",
|
| 695 |
-
"tokenizers": {},
|
| 696 |
"children": [],
|
|
|
|
| 697 |
"node_i": "3645",
|
| 698 |
-
"
|
| 699 |
-
"
|
| 700 |
},
|
| 701 |
{
|
| 702 |
"name": "Telugu",
|
| 703 |
"iso_1_code": "te",
|
| 704 |
"iso_3_code": "tel",
|
|
|
|
| 705 |
"tokenizers": {
|
| 706 |
"Telu": {
|
| 707 |
-
"full_object": "
|
| 708 |
-
"original_lang_name": "telugu",
|
| 709 |
-
"original_lang_code": "tel",
|
| 710 |
-
"scripts": [
|
| 711 |
-
"Telu",
|
| 712 |
-
"Latn"
|
| 713 |
-
],
|
| 714 |
-
"class_name": "SpaCyTokenizer",
|
| 715 |
-
"macrolanguage": false
|
| 716 |
-
},
|
| 717 |
-
"Latn": {
|
| 718 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
| 719 |
"original_lang_name": "telugu",
|
| 720 |
"original_lang_code": "tel",
|
| 721 |
-
"
|
| 722 |
-
|
| 723 |
-
"Latn"
|
| 724 |
-
],
|
| 725 |
-
"class_name": "SpaCyTokenizer",
|
| 726 |
-
"macrolanguage": false
|
| 727 |
}
|
| 728 |
},
|
| 729 |
-
"children": [],
|
| 730 |
"node_i": "3646",
|
|
|
|
|
|
|
|
|
|
| 731 |
"scripts": [
|
| 732 |
"Telu",
|
| 733 |
"Latn"
|
| 734 |
-
]
|
| 735 |
-
"own_tokenizer": true
|
| 736 |
},
|
| 737 |
{
|
| 738 |
"name": "Waddar",
|
| 739 |
"iso_1_code": null,
|
| 740 |
"iso_3_code": "wbq",
|
| 741 |
-
"tokenizers": {},
|
| 742 |
"children": [],
|
|
|
|
| 743 |
"node_i": "3647",
|
| 744 |
-
"
|
| 745 |
-
"
|
| 746 |
}
|
| 747 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 748 |
"node_i": "3643",
|
| 749 |
-
"
|
| 750 |
-
"
|
| 751 |
}
|
| 752 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
"node_i": "3616",
|
| 754 |
-
"
|
| 755 |
-
"
|
| 756 |
},
|
| 757 |
{
|
| 758 |
"name": "Southern",
|
| 759 |
"iso_1_code": null,
|
| 760 |
"iso_3_code": null,
|
| 761 |
-
"tokenizers": {
|
| 762 |
-
"Latn": {
|
| 763 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 764 |
-
"original_lang_name": "tamil",
|
| 765 |
-
"original_lang_code": "tam",
|
| 766 |
-
"scripts": [
|
| 767 |
-
"Latn",
|
| 768 |
-
"Taml"
|
| 769 |
-
],
|
| 770 |
-
"class_name": "SpaCyTokenizer",
|
| 771 |
-
"macrolanguage": false
|
| 772 |
-
},
|
| 773 |
-
"Knda": {
|
| 774 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 775 |
-
"original_lang_name": "kannada",
|
| 776 |
-
"original_lang_code": "kan",
|
| 777 |
-
"scripts": [
|
| 778 |
-
"Latn",
|
| 779 |
-
"Knda"
|
| 780 |
-
],
|
| 781 |
-
"class_name": "SpaCyTokenizer",
|
| 782 |
-
"macrolanguage": false
|
| 783 |
-
},
|
| 784 |
-
"Mlym": {
|
| 785 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 786 |
-
"original_lang_name": "malayalam",
|
| 787 |
-
"original_lang_code": "mal",
|
| 788 |
-
"scripts": [
|
| 789 |
-
"Latn",
|
| 790 |
-
"Mlym"
|
| 791 |
-
],
|
| 792 |
-
"class_name": "SpaCyTokenizer",
|
| 793 |
-
"macrolanguage": false
|
| 794 |
-
},
|
| 795 |
-
"Taml": {
|
| 796 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 797 |
-
"original_lang_name": "tamil",
|
| 798 |
-
"original_lang_code": "tam",
|
| 799 |
-
"scripts": [
|
| 800 |
-
"Latn",
|
| 801 |
-
"Taml"
|
| 802 |
-
],
|
| 803 |
-
"class_name": "SpaCyTokenizer",
|
| 804 |
-
"macrolanguage": false
|
| 805 |
-
}
|
| 806 |
-
},
|
| 807 |
"children": [
|
| 808 |
{
|
| 809 |
"name": "Kurichiya",
|
| 810 |
"iso_1_code": null,
|
| 811 |
"iso_3_code": "kfh",
|
| 812 |
-
"tokenizers": {},
|
| 813 |
"children": [],
|
|
|
|
| 814 |
"node_i": "3649",
|
| 815 |
-
"
|
| 816 |
-
"
|
| 817 |
},
|
| 818 |
{
|
| 819 |
"name": "Kurumba, Attapady",
|
| 820 |
"iso_1_code": null,
|
| 821 |
"iso_3_code": "pkr",
|
| 822 |
-
"tokenizers": {},
|
| 823 |
"children": [],
|
|
|
|
| 824 |
"node_i": "3650",
|
| 825 |
-
"
|
| 826 |
-
"
|
| 827 |
},
|
| 828 |
{
|
| 829 |
"name": "Pathiya",
|
| 830 |
"iso_1_code": null,
|
| 831 |
"iso_3_code": "pty",
|
| 832 |
-
"tokenizers": {},
|
| 833 |
"children": [],
|
|
|
|
| 834 |
"node_i": "3651",
|
| 835 |
-
"
|
| 836 |
-
"
|
| 837 |
},
|
| 838 |
{
|
| 839 |
"name": "Muduga",
|
| 840 |
"iso_1_code": null,
|
| 841 |
"iso_3_code": "udg",
|
| 842 |
-
"tokenizers": {},
|
| 843 |
"children": [],
|
|
|
|
| 844 |
"node_i": "3652",
|
| 845 |
-
"
|
| 846 |
-
"
|
| 847 |
},
|
| 848 |
{
|
| 849 |
"name": "Kumbaran",
|
| 850 |
"iso_1_code": null,
|
| 851 |
"iso_3_code": "wkb",
|
| 852 |
-
"tokenizers": {},
|
| 853 |
"children": [],
|
|
|
|
| 854 |
"node_i": "3653",
|
| 855 |
-
"
|
| 856 |
-
"
|
| 857 |
},
|
| 858 |
{
|
| 859 |
"name": "Kalanadi",
|
| 860 |
"iso_1_code": null,
|
| 861 |
"iso_3_code": "wkl",
|
| 862 |
-
"tokenizers": {},
|
| 863 |
"children": [],
|
|
|
|
| 864 |
"node_i": "3654",
|
| 865 |
-
"
|
| 866 |
-
"
|
| 867 |
},
|
| 868 |
{
|
| 869 |
"name": "Kunduvadi",
|
| 870 |
"iso_1_code": null,
|
| 871 |
"iso_3_code": "wku",
|
| 872 |
-
"tokenizers": {},
|
| 873 |
"children": [],
|
|
|
|
| 874 |
"node_i": "3655",
|
| 875 |
-
"
|
| 876 |
-
"
|
| 877 |
},
|
| 878 |
{
|
| 879 |
"name": "Tamil-Kannada",
|
| 880 |
"iso_1_code": null,
|
| 881 |
"iso_3_code": null,
|
| 882 |
-
"tokenizers": {
|
| 883 |
-
"Latn": {
|
| 884 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 885 |
-
"original_lang_name": "tamil",
|
| 886 |
-
"original_lang_code": "tam",
|
| 887 |
-
"scripts": [
|
| 888 |
-
"Latn",
|
| 889 |
-
"Taml"
|
| 890 |
-
],
|
| 891 |
-
"class_name": "SpaCyTokenizer",
|
| 892 |
-
"macrolanguage": false
|
| 893 |
-
},
|
| 894 |
-
"Knda": {
|
| 895 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 896 |
-
"original_lang_name": "kannada",
|
| 897 |
-
"original_lang_code": "kan",
|
| 898 |
-
"scripts": [
|
| 899 |
-
"Latn",
|
| 900 |
-
"Knda"
|
| 901 |
-
],
|
| 902 |
-
"class_name": "SpaCyTokenizer",
|
| 903 |
-
"macrolanguage": false
|
| 904 |
-
},
|
| 905 |
-
"Mlym": {
|
| 906 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 907 |
-
"original_lang_name": "malayalam",
|
| 908 |
-
"original_lang_code": "mal",
|
| 909 |
-
"scripts": [
|
| 910 |
-
"Latn",
|
| 911 |
-
"Mlym"
|
| 912 |
-
],
|
| 913 |
-
"class_name": "SpaCyTokenizer",
|
| 914 |
-
"macrolanguage": false
|
| 915 |
-
},
|
| 916 |
-
"Taml": {
|
| 917 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 918 |
-
"original_lang_name": "tamil",
|
| 919 |
-
"original_lang_code": "tam",
|
| 920 |
-
"scripts": [
|
| 921 |
-
"Latn",
|
| 922 |
-
"Taml"
|
| 923 |
-
],
|
| 924 |
-
"class_name": "SpaCyTokenizer",
|
| 925 |
-
"macrolanguage": false
|
| 926 |
-
}
|
| 927 |
-
},
|
| 928 |
"children": [
|
| 929 |
{
|
| 930 |
"name": "Kannada",
|
| 931 |
"iso_1_code": null,
|
| 932 |
"iso_3_code": null,
|
| 933 |
-
"tokenizers": {
|
| 934 |
-
"Latn": {
|
| 935 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 936 |
-
"original_lang_name": "kannada",
|
| 937 |
-
"original_lang_code": "kan",
|
| 938 |
-
"scripts": [
|
| 939 |
-
"Latn",
|
| 940 |
-
"Knda"
|
| 941 |
-
],
|
| 942 |
-
"class_name": "SpaCyTokenizer",
|
| 943 |
-
"macrolanguage": false
|
| 944 |
-
},
|
| 945 |
-
"Knda": {
|
| 946 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 947 |
-
"original_lang_name": "kannada",
|
| 948 |
-
"original_lang_code": "kan",
|
| 949 |
-
"scripts": [
|
| 950 |
-
"Latn",
|
| 951 |
-
"Knda"
|
| 952 |
-
],
|
| 953 |
-
"class_name": "SpaCyTokenizer",
|
| 954 |
-
"macrolanguage": false
|
| 955 |
-
}
|
| 956 |
-
},
|
| 957 |
"children": [
|
| 958 |
{
|
| 959 |
"name": "Badaga",
|
| 960 |
"iso_1_code": null,
|
| 961 |
"iso_3_code": "bfq",
|
| 962 |
-
"tokenizers": {},
|
| 963 |
"children": [],
|
|
|
|
| 964 |
"node_i": "3658",
|
| 965 |
-
"
|
| 966 |
-
"
|
| 967 |
},
|
| 968 |
{
|
| 969 |
"name": "Holiya",
|
| 970 |
"iso_1_code": null,
|
| 971 |
"iso_3_code": "hoy",
|
| 972 |
-
"tokenizers": {},
|
| 973 |
"children": [],
|
|
|
|
| 974 |
"node_i": "3659",
|
| 975 |
-
"
|
| 976 |
-
"
|
| 977 |
},
|
| 978 |
{
|
| 979 |
"name": "Kannada",
|
| 980 |
"iso_1_code": "kn",
|
| 981 |
"iso_3_code": "kan",
|
|
|
|
| 982 |
"tokenizers": {
|
| 983 |
-
"Latn": {
|
| 984 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
| 985 |
-
"original_lang_name": "kannada",
|
| 986 |
-
"original_lang_code": "kan",
|
| 987 |
-
"scripts": [
|
| 988 |
-
"Latn",
|
| 989 |
-
"Knda"
|
| 990 |
-
],
|
| 991 |
-
"class_name": "SpaCyTokenizer",
|
| 992 |
-
"macrolanguage": false
|
| 993 |
-
},
|
| 994 |
"Knda": {
|
| 995 |
-
"full_object": "
|
| 996 |
"original_lang_name": "kannada",
|
| 997 |
"original_lang_code": "kan",
|
| 998 |
-
"
|
| 999 |
-
|
| 1000 |
-
"Knda"
|
| 1001 |
-
],
|
| 1002 |
-
"class_name": "SpaCyTokenizer",
|
| 1003 |
-
"macrolanguage": false
|
| 1004 |
}
|
| 1005 |
},
|
| 1006 |
-
"children": [],
|
| 1007 |
"node_i": "3660",
|
|
|
|
|
|
|
|
|
|
| 1008 |
"scripts": [
|
| 1009 |
"Latn",
|
| 1010 |
"Knda"
|
| 1011 |
-
]
|
| 1012 |
-
"own_tokenizer": true
|
| 1013 |
},
|
| 1014 |
{
|
| 1015 |
"name": "Urali",
|
| 1016 |
"iso_1_code": null,
|
| 1017 |
"iso_3_code": "url",
|
| 1018 |
-
"tokenizers": {},
|
| 1019 |
"children": [],
|
|
|
|
| 1020 |
"node_i": "3661",
|
| 1021 |
-
"
|
| 1022 |
-
"
|
| 1023 |
}
|
| 1024 |
],
|
| 1025 |
-
"node_i": "3657",
|
| 1026 |
-
"scripts": [],
|
| 1027 |
-
"own_tokenizer": false
|
| 1028 |
-
},
|
| 1029 |
-
{
|
| 1030 |
-
"name": "Tamil-Kodagu",
|
| 1031 |
-
"iso_1_code": null,
|
| 1032 |
-
"iso_3_code": null,
|
| 1033 |
"tokenizers": {
|
| 1034 |
-
"
|
| 1035 |
-
"full_object": "
|
| 1036 |
-
"original_lang_name": "
|
| 1037 |
-
"original_lang_code": "
|
| 1038 |
-
"
|
| 1039 |
-
|
| 1040 |
-
"Taml"
|
| 1041 |
-
],
|
| 1042 |
-
"class_name": "SpaCyTokenizer",
|
| 1043 |
-
"macrolanguage": false
|
| 1044 |
-
},
|
| 1045 |
-
"Mlym": {
|
| 1046 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 1047 |
-
"original_lang_name": "malayalam",
|
| 1048 |
-
"original_lang_code": "mal",
|
| 1049 |
-
"scripts": [
|
| 1050 |
-
"Latn",
|
| 1051 |
-
"Mlym"
|
| 1052 |
-
],
|
| 1053 |
-
"class_name": "SpaCyTokenizer",
|
| 1054 |
-
"macrolanguage": false
|
| 1055 |
-
},
|
| 1056 |
-
"Taml": {
|
| 1057 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1058 |
-
"original_lang_name": "tamil",
|
| 1059 |
-
"original_lang_code": "tam",
|
| 1060 |
-
"scripts": [
|
| 1061 |
-
"Latn",
|
| 1062 |
-
"Taml"
|
| 1063 |
-
],
|
| 1064 |
-
"class_name": "SpaCyTokenizer",
|
| 1065 |
-
"macrolanguage": false
|
| 1066 |
}
|
| 1067 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1068 |
"children": [
|
| 1069 |
{
|
| 1070 |
"name": "Kodagu",
|
| 1071 |
"iso_1_code": null,
|
| 1072 |
"iso_3_code": null,
|
| 1073 |
-
"tokenizers": {},
|
| 1074 |
"children": [
|
| 1075 |
{
|
| 1076 |
"name": "Kodava",
|
| 1077 |
"iso_1_code": null,
|
| 1078 |
"iso_3_code": "kfa",
|
| 1079 |
-
"tokenizers": {},
|
| 1080 |
"children": [],
|
|
|
|
| 1081 |
"node_i": "3664",
|
| 1082 |
-
"
|
| 1083 |
-
"
|
| 1084 |
},
|
| 1085 |
{
|
| 1086 |
"name": "Kurumba, Kannada",
|
| 1087 |
"iso_1_code": null,
|
| 1088 |
"iso_3_code": "kfi",
|
| 1089 |
-
"tokenizers": {},
|
| 1090 |
"children": [],
|
|
|
|
| 1091 |
"node_i": "3665",
|
| 1092 |
-
"
|
| 1093 |
-
"
|
| 1094 |
},
|
| 1095 |
{
|
| 1096 |
"name": "Kurumba, Mullu",
|
| 1097 |
"iso_1_code": null,
|
| 1098 |
"iso_3_code": "kpb",
|
| 1099 |
-
"tokenizers": {},
|
| 1100 |
"children": [],
|
|
|
|
| 1101 |
"node_i": "3666",
|
| 1102 |
-
"
|
| 1103 |
-
"
|
| 1104 |
},
|
| 1105 |
{
|
| 1106 |
"name": "Kurumba, Alu",
|
| 1107 |
"iso_1_code": null,
|
| 1108 |
"iso_3_code": "xua",
|
| 1109 |
-
"tokenizers": {},
|
| 1110 |
"children": [],
|
|
|
|
| 1111 |
"node_i": "3667",
|
| 1112 |
-
"
|
| 1113 |
-
"
|
| 1114 |
},
|
| 1115 |
{
|
| 1116 |
"name": "Kurumba, Jennu",
|
| 1117 |
"iso_1_code": null,
|
| 1118 |
"iso_3_code": "xuj",
|
| 1119 |
-
"tokenizers": {},
|
| 1120 |
"children": [],
|
|
|
|
| 1121 |
"node_i": "3668",
|
| 1122 |
-
"
|
| 1123 |
-
"
|
| 1124 |
}
|
| 1125 |
],
|
|
|
|
| 1126 |
"node_i": "3663",
|
| 1127 |
-
"
|
| 1128 |
-
"
|
| 1129 |
},
|
| 1130 |
{
|
| 1131 |
"name": "Tamil-Malayalam",
|
| 1132 |
"iso_1_code": null,
|
| 1133 |
"iso_3_code": null,
|
| 1134 |
-
"tokenizers": {
|
| 1135 |
-
"Latn": {
|
| 1136 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1137 |
-
"original_lang_name": "tamil",
|
| 1138 |
-
"original_lang_code": "tam",
|
| 1139 |
-
"scripts": [
|
| 1140 |
-
"Latn",
|
| 1141 |
-
"Taml"
|
| 1142 |
-
],
|
| 1143 |
-
"class_name": "SpaCyTokenizer",
|
| 1144 |
-
"macrolanguage": false
|
| 1145 |
-
},
|
| 1146 |
-
"Mlym": {
|
| 1147 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 1148 |
-
"original_lang_name": "malayalam",
|
| 1149 |
-
"original_lang_code": "mal",
|
| 1150 |
-
"scripts": [
|
| 1151 |
-
"Latn",
|
| 1152 |
-
"Mlym"
|
| 1153 |
-
],
|
| 1154 |
-
"class_name": "SpaCyTokenizer",
|
| 1155 |
-
"macrolanguage": false
|
| 1156 |
-
},
|
| 1157 |
-
"Taml": {
|
| 1158 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1159 |
-
"original_lang_name": "tamil",
|
| 1160 |
-
"original_lang_code": "tam",
|
| 1161 |
-
"scripts": [
|
| 1162 |
-
"Latn",
|
| 1163 |
-
"Taml"
|
| 1164 |
-
],
|
| 1165 |
-
"class_name": "SpaCyTokenizer",
|
| 1166 |
-
"macrolanguage": false
|
| 1167 |
-
}
|
| 1168 |
-
},
|
| 1169 |
"children": [
|
| 1170 |
{
|
| 1171 |
"name": "Mannan",
|
| 1172 |
"iso_1_code": null,
|
| 1173 |
"iso_3_code": "mjv",
|
| 1174 |
-
"tokenizers": {},
|
| 1175 |
"children": [],
|
|
|
|
| 1176 |
"node_i": "3670",
|
| 1177 |
-
"
|
| 1178 |
-
"
|
| 1179 |
},
|
| 1180 |
{
|
| 1181 |
"name": "Malayalam",
|
| 1182 |
"iso_1_code": null,
|
| 1183 |
"iso_3_code": null,
|
| 1184 |
-
"tokenizers": {
|
| 1185 |
-
"Latn": {
|
| 1186 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 1187 |
-
"original_lang_name": "malayalam",
|
| 1188 |
-
"original_lang_code": "mal",
|
| 1189 |
-
"scripts": [
|
| 1190 |
-
"Latn",
|
| 1191 |
-
"Mlym"
|
| 1192 |
-
],
|
| 1193 |
-
"class_name": "SpaCyTokenizer",
|
| 1194 |
-
"macrolanguage": false
|
| 1195 |
-
},
|
| 1196 |
-
"Mlym": {
|
| 1197 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 1198 |
-
"original_lang_name": "malayalam",
|
| 1199 |
-
"original_lang_code": "mal",
|
| 1200 |
-
"scripts": [
|
| 1201 |
-
"Latn",
|
| 1202 |
-
"Mlym"
|
| 1203 |
-
],
|
| 1204 |
-
"class_name": "SpaCyTokenizer",
|
| 1205 |
-
"macrolanguage": false
|
| 1206 |
-
}
|
| 1207 |
-
},
|
| 1208 |
"children": [
|
| 1209 |
{
|
| 1210 |
"name": "Aranadan",
|
| 1211 |
"iso_1_code": null,
|
| 1212 |
"iso_3_code": "aaf",
|
| 1213 |
-
"tokenizers": {},
|
| 1214 |
"children": [],
|
|
|
|
| 1215 |
"node_i": "3672",
|
| 1216 |
-
"
|
| 1217 |
-
"
|
| 1218 |
},
|
| 1219 |
{
|
| 1220 |
"name": "Kadar",
|
| 1221 |
"iso_1_code": null,
|
| 1222 |
"iso_3_code": "kej",
|
| 1223 |
-
"tokenizers": {},
|
| 1224 |
"children": [],
|
|
|
|
| 1225 |
"node_i": "3673",
|
| 1226 |
-
"
|
| 1227 |
-
"
|
| 1228 |
},
|
| 1229 |
{
|
| 1230 |
"name": "Malayalam",
|
| 1231 |
"iso_1_code": "ml",
|
| 1232 |
"iso_3_code": "mal",
|
|
|
|
| 1233 |
"tokenizers": {
|
| 1234 |
-
"Latn": {
|
| 1235 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
| 1236 |
-
"original_lang_name": "malayalam",
|
| 1237 |
-
"original_lang_code": "mal",
|
| 1238 |
-
"scripts": [
|
| 1239 |
-
"Latn",
|
| 1240 |
-
"Mlym"
|
| 1241 |
-
],
|
| 1242 |
-
"class_name": "SpaCyTokenizer",
|
| 1243 |
-
"macrolanguage": false
|
| 1244 |
-
},
|
| 1245 |
"Mlym": {
|
| 1246 |
-
"full_object": "
|
| 1247 |
"original_lang_name": "malayalam",
|
| 1248 |
"original_lang_code": "mal",
|
| 1249 |
-
"
|
| 1250 |
-
|
| 1251 |
-
"Mlym"
|
| 1252 |
-
],
|
| 1253 |
-
"class_name": "SpaCyTokenizer",
|
| 1254 |
-
"macrolanguage": false
|
| 1255 |
}
|
| 1256 |
},
|
| 1257 |
-
"children": [],
|
| 1258 |
"node_i": "3674",
|
|
|
|
|
|
|
|
|
|
| 1259 |
"scripts": [
|
| 1260 |
"Latn",
|
| 1261 |
"Mlym"
|
| 1262 |
-
]
|
| 1263 |
-
"own_tokenizer": true
|
| 1264 |
},
|
| 1265 |
{
|
| 1266 |
"name": "Malapandaram",
|
| 1267 |
"iso_1_code": null,
|
| 1268 |
"iso_3_code": "mjp",
|
| 1269 |
-
"tokenizers": {},
|
| 1270 |
"children": [],
|
|
|
|
| 1271 |
"node_i": "3675",
|
| 1272 |
-
"
|
| 1273 |
-
"
|
| 1274 |
},
|
| 1275 |
{
|
| 1276 |
"name": "Malaryan",
|
| 1277 |
"iso_1_code": null,
|
| 1278 |
"iso_3_code": "mjq",
|
| 1279 |
-
"tokenizers": {},
|
| 1280 |
"children": [],
|
|
|
|
| 1281 |
"node_i": "3676",
|
| 1282 |
-
"
|
| 1283 |
-
"
|
| 1284 |
},
|
| 1285 |
{
|
| 1286 |
"name": "Malavedan",
|
| 1287 |
"iso_1_code": null,
|
| 1288 |
"iso_3_code": "mjr",
|
| 1289 |
-
"tokenizers": {},
|
| 1290 |
"children": [],
|
|
|
|
| 1291 |
"node_i": "3677",
|
| 1292 |
-
"
|
| 1293 |
-
"
|
| 1294 |
},
|
| 1295 |
{
|
| 1296 |
"name": "Paliyan",
|
| 1297 |
"iso_1_code": null,
|
| 1298 |
"iso_3_code": "pcf",
|
| 1299 |
-
"tokenizers": {},
|
| 1300 |
"children": [],
|
|
|
|
| 1301 |
"node_i": "3678",
|
| 1302 |
-
"
|
| 1303 |
-
"
|
| 1304 |
},
|
| 1305 |
{
|
| 1306 |
"name": "Paniya",
|
| 1307 |
"iso_1_code": null,
|
| 1308 |
"iso_3_code": "pcg",
|
| 1309 |
-
"tokenizers": {},
|
| 1310 |
"children": [],
|
|
|
|
| 1311 |
"node_i": "3679",
|
| 1312 |
-
"
|
| 1313 |
-
"
|
| 1314 |
},
|
| 1315 |
{
|
| 1316 |
"name": "Ravula",
|
| 1317 |
"iso_1_code": null,
|
| 1318 |
"iso_3_code": "yea",
|
| 1319 |
-
"tokenizers": {},
|
| 1320 |
"children": [],
|
|
|
|
| 1321 |
"node_i": "3680",
|
| 1322 |
-
"
|
| 1323 |
-
"
|
| 1324 |
}
|
| 1325 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1326 |
"node_i": "3671",
|
| 1327 |
-
"
|
| 1328 |
-
"
|
| 1329 |
},
|
| 1330 |
{
|
| 1331 |
"name": "Tamil",
|
| 1332 |
"iso_1_code": null,
|
| 1333 |
"iso_3_code": null,
|
| 1334 |
-
"tokenizers": {
|
| 1335 |
-
"Latn": {
|
| 1336 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1337 |
-
"original_lang_name": "tamil",
|
| 1338 |
-
"original_lang_code": "tam",
|
| 1339 |
-
"scripts": [
|
| 1340 |
-
"Latn",
|
| 1341 |
-
"Taml"
|
| 1342 |
-
],
|
| 1343 |
-
"class_name": "SpaCyTokenizer",
|
| 1344 |
-
"macrolanguage": false
|
| 1345 |
-
},
|
| 1346 |
-
"Taml": {
|
| 1347 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1348 |
-
"original_lang_name": "tamil",
|
| 1349 |
-
"original_lang_code": "tam",
|
| 1350 |
-
"scripts": [
|
| 1351 |
-
"Latn",
|
| 1352 |
-
"Taml"
|
| 1353 |
-
],
|
| 1354 |
-
"class_name": "SpaCyTokenizer",
|
| 1355 |
-
"macrolanguage": false
|
| 1356 |
-
}
|
| 1357 |
-
},
|
| 1358 |
"children": [
|
| 1359 |
{
|
| 1360 |
"name": "Eravallan",
|
| 1361 |
"iso_1_code": null,
|
| 1362 |
"iso_3_code": "era",
|
| 1363 |
-
"tokenizers": {},
|
| 1364 |
"children": [],
|
|
|
|
| 1365 |
"node_i": "3682",
|
| 1366 |
-
"
|
| 1367 |
-
"
|
| 1368 |
},
|
| 1369 |
{
|
| 1370 |
"name": "Irula",
|
| 1371 |
"iso_1_code": null,
|
| 1372 |
"iso_3_code": "iru",
|
| 1373 |
-
"tokenizers": {},
|
| 1374 |
"children": [],
|
|
|
|
| 1375 |
"node_i": "3683",
|
| 1376 |
-
"
|
| 1377 |
-
"
|
| 1378 |
},
|
| 1379 |
{
|
| 1380 |
"name": "Kaikadi",
|
| 1381 |
"iso_1_code": null,
|
| 1382 |
"iso_3_code": "kep",
|
| 1383 |
-
"tokenizers": {},
|
| 1384 |
"children": [],
|
|
|
|
| 1385 |
"node_i": "3684",
|
| 1386 |
-
"
|
| 1387 |
-
"
|
| 1388 |
},
|
| 1389 |
{
|
| 1390 |
"name": "Kanikkaran",
|
| 1391 |
"iso_1_code": null,
|
| 1392 |
"iso_3_code": "kev",
|
| 1393 |
-
"tokenizers": {},
|
| 1394 |
"children": [],
|
|
|
|
| 1395 |
"node_i": "3685",
|
| 1396 |
-
"
|
| 1397 |
-
"
|
| 1398 |
},
|
| 1399 |
{
|
| 1400 |
"name": "Muthuvan",
|
| 1401 |
"iso_1_code": null,
|
| 1402 |
"iso_3_code": "muv",
|
| 1403 |
-
"tokenizers": {},
|
| 1404 |
"children": [],
|
|
|
|
| 1405 |
"node_i": "3686",
|
| 1406 |
-
"
|
| 1407 |
-
"
|
| 1408 |
},
|
| 1409 |
{
|
| 1410 |
"name": "Sholaga",
|
| 1411 |
"iso_1_code": null,
|
| 1412 |
"iso_3_code": "sle",
|
| 1413 |
-
"tokenizers": {},
|
| 1414 |
"children": [],
|
|
|
|
| 1415 |
"node_i": "3687",
|
| 1416 |
-
"
|
| 1417 |
-
"
|
| 1418 |
},
|
| 1419 |
{
|
| 1420 |
"name": "Tamil",
|
| 1421 |
"iso_1_code": "ta",
|
| 1422 |
"iso_3_code": "tam",
|
|
|
|
| 1423 |
"tokenizers": {
|
| 1424 |
-
"Latn": {
|
| 1425 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
| 1426 |
-
"original_lang_name": "tamil",
|
| 1427 |
-
"original_lang_code": "tam",
|
| 1428 |
-
"scripts": [
|
| 1429 |
-
"Latn",
|
| 1430 |
-
"Taml"
|
| 1431 |
-
],
|
| 1432 |
-
"class_name": "SpaCyTokenizer",
|
| 1433 |
-
"macrolanguage": false
|
| 1434 |
-
},
|
| 1435 |
"Taml": {
|
| 1436 |
-
"full_object": "
|
| 1437 |
"original_lang_name": "tamil",
|
| 1438 |
"original_lang_code": "tam",
|
| 1439 |
-
"
|
| 1440 |
-
|
| 1441 |
-
"Taml"
|
| 1442 |
-
],
|
| 1443 |
-
"class_name": "SpaCyTokenizer",
|
| 1444 |
-
"macrolanguage": false
|
| 1445 |
}
|
| 1446 |
},
|
| 1447 |
-
"children": [],
|
| 1448 |
"node_i": "3688",
|
|
|
|
|
|
|
|
|
|
| 1449 |
"scripts": [
|
| 1450 |
"Taml",
|
| 1451 |
"Latn"
|
| 1452 |
-
]
|
| 1453 |
-
"own_tokenizer": true
|
| 1454 |
},
|
| 1455 |
{
|
| 1456 |
"name": "Kurumba, Betta",
|
| 1457 |
"iso_1_code": null,
|
| 1458 |
"iso_3_code": "xub",
|
| 1459 |
-
"tokenizers": {},
|
| 1460 |
"children": [],
|
|
|
|
| 1461 |
"node_i": "3689",
|
| 1462 |
-
"
|
| 1463 |
-
"
|
| 1464 |
},
|
| 1465 |
{
|
| 1466 |
"name": "Yerukula",
|
| 1467 |
"iso_1_code": null,
|
| 1468 |
"iso_3_code": "yeu",
|
| 1469 |
-
"tokenizers": {},
|
| 1470 |
"children": [],
|
|
|
|
| 1471 |
"node_i": "3690",
|
| 1472 |
-
"
|
| 1473 |
-
"
|
| 1474 |
}
|
| 1475 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1476 |
"node_i": "3681",
|
| 1477 |
-
"
|
| 1478 |
-
"
|
| 1479 |
}
|
| 1480 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1481 |
"node_i": "3669",
|
| 1482 |
-
"
|
| 1483 |
-
"
|
| 1484 |
},
|
| 1485 |
{
|
| 1486 |
"name": "Toda-Kota",
|
| 1487 |
"iso_1_code": null,
|
| 1488 |
"iso_3_code": null,
|
| 1489 |
-
"tokenizers": {},
|
| 1490 |
"children": [
|
| 1491 |
{
|
| 1492 |
"name": "Kota",
|
| 1493 |
"iso_1_code": null,
|
| 1494 |
"iso_3_code": "kfe",
|
| 1495 |
-
"tokenizers": {},
|
| 1496 |
"children": [],
|
|
|
|
| 1497 |
"node_i": "3692",
|
| 1498 |
-
"
|
| 1499 |
-
"
|
| 1500 |
},
|
| 1501 |
{
|
| 1502 |
"name": "Toda",
|
| 1503 |
"iso_1_code": null,
|
| 1504 |
"iso_3_code": "tcx",
|
| 1505 |
-
"tokenizers": {},
|
| 1506 |
"children": [],
|
|
|
|
| 1507 |
"node_i": "3693",
|
| 1508 |
-
"
|
| 1509 |
-
"
|
| 1510 |
}
|
| 1511 |
],
|
|
|
|
| 1512 |
"node_i": "3691",
|
| 1513 |
-
"
|
| 1514 |
-
"
|
| 1515 |
}
|
| 1516 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1517 |
"node_i": "3662",
|
| 1518 |
-
"
|
| 1519 |
-
"
|
| 1520 |
},
|
| 1521 |
{
|
| 1522 |
"name": "Unclassified",
|
| 1523 |
"iso_1_code": null,
|
| 1524 |
"iso_3_code": null,
|
| 1525 |
-
"tokenizers": {},
|
| 1526 |
"children": [
|
| 1527 |
{
|
| 1528 |
"name": "Chetti, Wayanad",
|
| 1529 |
"iso_1_code": null,
|
| 1530 |
"iso_3_code": "ctt",
|
| 1531 |
-
"tokenizers": {},
|
| 1532 |
"children": [],
|
|
|
|
| 1533 |
"node_i": "3695",
|
| 1534 |
-
"
|
| 1535 |
-
"
|
| 1536 |
}
|
| 1537 |
],
|
|
|
|
| 1538 |
"node_i": "3694",
|
| 1539 |
-
"
|
| 1540 |
-
"
|
| 1541 |
}
|
| 1542 |
],
|
| 1543 |
-
"node_i": "3656",
|
| 1544 |
-
"scripts": [],
|
| 1545 |
-
"own_tokenizer": false
|
| 1546 |
-
},
|
| 1547 |
-
{
|
| 1548 |
-
"name": "Tulu",
|
| 1549 |
-
"iso_1_code": null,
|
| 1550 |
-
"iso_3_code": null,
|
| 1551 |
"tokenizers": {
|
| 1552 |
"Knda": {
|
| 1553 |
-
"full_object": "
|
| 1554 |
"original_lang_name": "kannada",
|
| 1555 |
"original_lang_code": "kan",
|
| 1556 |
-
"
|
| 1557 |
-
|
| 1558 |
-
|
| 1559 |
-
|
| 1560 |
-
"
|
| 1561 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1562 |
}
|
| 1563 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1564 |
"children": [
|
| 1565 |
{
|
| 1566 |
"name": "Bellari",
|
| 1567 |
"iso_1_code": null,
|
| 1568 |
"iso_3_code": "brw",
|
| 1569 |
-
"tokenizers": {},
|
| 1570 |
"children": [],
|
|
|
|
| 1571 |
"node_i": "3697",
|
| 1572 |
-
"
|
| 1573 |
-
"
|
| 1574 |
},
|
| 1575 |
{
|
| 1576 |
"name": "Kudiya",
|
| 1577 |
"iso_1_code": null,
|
| 1578 |
"iso_3_code": "kfg",
|
| 1579 |
-
"tokenizers": {},
|
| 1580 |
"children": [],
|
|
|
|
| 1581 |
"node_i": "3698",
|
| 1582 |
-
"
|
| 1583 |
-
"
|
| 1584 |
},
|
| 1585 |
{
|
| 1586 |
"name": "Tulu",
|
| 1587 |
"iso_1_code": null,
|
| 1588 |
"iso_3_code": "tcy",
|
|
|
|
| 1589 |
"tokenizers": {
|
| 1590 |
"Knda": {
|
| 1591 |
-
"full_object": "
|
| 1592 |
"original_lang_name": "kannada",
|
| 1593 |
"original_lang_code": "kan",
|
| 1594 |
-
"
|
| 1595 |
-
|
| 1596 |
-
"Knda"
|
| 1597 |
-
],
|
| 1598 |
-
"class_name": "SpaCyTokenizer",
|
| 1599 |
-
"macrolanguage": false
|
| 1600 |
}
|
| 1601 |
},
|
| 1602 |
-
"children": [],
|
| 1603 |
"node_i": "3699",
|
|
|
|
| 1604 |
"scripts": [
|
| 1605 |
"Knda"
|
| 1606 |
-
]
|
| 1607 |
-
"own_tokenizer": false
|
| 1608 |
},
|
| 1609 |
{
|
| 1610 |
"name": "Koraga",
|
| 1611 |
"iso_1_code": null,
|
| 1612 |
"iso_3_code": null,
|
| 1613 |
-
"tokenizers": {},
|
| 1614 |
"children": [
|
| 1615 |
{
|
| 1616 |
"name": "Koraga, Korra",
|
| 1617 |
"iso_1_code": null,
|
| 1618 |
"iso_3_code": "kfd",
|
| 1619 |
-
"tokenizers": {},
|
| 1620 |
"children": [],
|
|
|
|
| 1621 |
"node_i": "3701",
|
| 1622 |
-
"
|
| 1623 |
-
"
|
| 1624 |
},
|
| 1625 |
{
|
| 1626 |
"name": "Koraga, Mudu",
|
| 1627 |
"iso_1_code": null,
|
| 1628 |
"iso_3_code": "vmd",
|
| 1629 |
-
"tokenizers": {},
|
| 1630 |
"children": [],
|
|
|
|
| 1631 |
"node_i": "3702",
|
| 1632 |
-
"
|
| 1633 |
-
"
|
| 1634 |
}
|
| 1635 |
],
|
|
|
|
| 1636 |
"node_i": "3700",
|
| 1637 |
-
"
|
| 1638 |
-
"
|
| 1639 |
}
|
| 1640 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1641 |
"node_i": "3696",
|
| 1642 |
-
"
|
| 1643 |
-
"
|
| 1644 |
},
|
| 1645 |
{
|
| 1646 |
"name": "Unclassified",
|
| 1647 |
"iso_1_code": null,
|
| 1648 |
"iso_3_code": null,
|
| 1649 |
-
"tokenizers": {},
|
| 1650 |
"children": [
|
| 1651 |
{
|
| 1652 |
"name": "Mala Malasar",
|
| 1653 |
"iso_1_code": null,
|
| 1654 |
"iso_3_code": "ima",
|
| 1655 |
-
"tokenizers": {},
|
| 1656 |
"children": [],
|
|
|
|
| 1657 |
"node_i": "3704",
|
| 1658 |
-
"
|
| 1659 |
-
"
|
| 1660 |
},
|
| 1661 |
{
|
| 1662 |
"name": "Thachanadan",
|
| 1663 |
"iso_1_code": null,
|
| 1664 |
"iso_3_code": "thn",
|
| 1665 |
-
"tokenizers": {},
|
| 1666 |
"children": [],
|
|
|
|
| 1667 |
"node_i": "3705",
|
| 1668 |
-
"
|
| 1669 |
-
"
|
| 1670 |
},
|
| 1671 |
{
|
| 1672 |
"name": "Ullatan",
|
| 1673 |
"iso_1_code": null,
|
| 1674 |
"iso_3_code": "ull",
|
| 1675 |
-
"tokenizers": {},
|
| 1676 |
"children": [],
|
|
|
|
| 1677 |
"node_i": "3706",
|
| 1678 |
-
"
|
| 1679 |
-
"
|
| 1680 |
},
|
| 1681 |
{
|
| 1682 |
"name": "Malasar",
|
| 1683 |
"iso_1_code": null,
|
| 1684 |
"iso_3_code": "ymr",
|
| 1685 |
-
"tokenizers": {},
|
| 1686 |
"children": [],
|
|
|
|
| 1687 |
"node_i": "3707",
|
| 1688 |
-
"
|
| 1689 |
-
"
|
| 1690 |
}
|
| 1691 |
],
|
|
|
|
| 1692 |
"node_i": "3703",
|
| 1693 |
-
"
|
| 1694 |
-
"
|
| 1695 |
}
|
| 1696 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1697 |
"node_i": "3648",
|
| 1698 |
-
"
|
| 1699 |
-
"
|
| 1700 |
},
|
| 1701 |
{
|
| 1702 |
"name": "Unclassified",
|
| 1703 |
"iso_1_code": null,
|
| 1704 |
"iso_3_code": null,
|
| 1705 |
-
"tokenizers": {},
|
| 1706 |
"children": [
|
| 1707 |
{
|
| 1708 |
"name": "Allar",
|
| 1709 |
"iso_1_code": null,
|
| 1710 |
"iso_3_code": "all",
|
| 1711 |
-
"tokenizers": {},
|
| 1712 |
"children": [],
|
|
|
|
| 1713 |
"node_i": "3709",
|
| 1714 |
-
"
|
| 1715 |
-
"
|
| 1716 |
},
|
| 1717 |
{
|
| 1718 |
"name": "Bharia",
|
| 1719 |
"iso_1_code": null,
|
| 1720 |
"iso_3_code": "bha",
|
| 1721 |
-
"tokenizers": {},
|
| 1722 |
"children": [],
|
|
|
|
| 1723 |
"node_i": "3710",
|
| 1724 |
-
"
|
| 1725 |
-
"
|
| 1726 |
},
|
| 1727 |
{
|
| 1728 |
"name": "Malankuravan",
|
| 1729 |
"iso_1_code": null,
|
| 1730 |
"iso_3_code": "mjo",
|
| 1731 |
-
"tokenizers": {},
|
| 1732 |
"children": [],
|
|
|
|
| 1733 |
"node_i": "3711",
|
| 1734 |
-
"
|
| 1735 |
-
"
|
| 1736 |
},
|
| 1737 |
{
|
| 1738 |
"name": "Pattapu",
|
| 1739 |
"iso_1_code": null,
|
| 1740 |
"iso_3_code": "ptq",
|
| 1741 |
-
"tokenizers": {},
|
| 1742 |
"children": [],
|
|
|
|
| 1743 |
"node_i": "3712",
|
| 1744 |
-
"
|
| 1745 |
-
"
|
| 1746 |
},
|
| 1747 |
{
|
| 1748 |
"name": "Vishavan",
|
| 1749 |
"iso_1_code": null,
|
| 1750 |
"iso_3_code": "vis",
|
| 1751 |
-
"tokenizers": {},
|
| 1752 |
"children": [],
|
|
|
|
| 1753 |
"node_i": "3713",
|
| 1754 |
-
"
|
| 1755 |
-
"
|
| 1756 |
}
|
| 1757 |
],
|
|
|
|
| 1758 |
"node_i": "3708",
|
| 1759 |
-
"
|
| 1760 |
-
"
|
| 1761 |
}
|
| 1762 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1763 |
"node_i": "3601",
|
| 1764 |
-
"
|
| 1765 |
-
"
|
| 1766 |
}
|
|
|
|
| 2 |
"name": "Dravidian",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Central",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Kolami-Naiki",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": null,
|
|
|
|
| 15 |
"children": [
|
| 16 |
{
|
| 17 |
"name": "Kolami, Northwestern",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "kfb",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3604",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Kolami, Southeastern",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "nit",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3605",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
}
|
| 36 |
],
|
| 37 |
+
"tokenizers": {},
|
| 38 |
"node_i": "3603",
|
| 39 |
+
"native_tokenizers": [],
|
| 40 |
+
"scripts": []
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"name": "Parji-Gadaba",
|
| 44 |
"iso_1_code": null,
|
| 45 |
"iso_3_code": null,
|
|
|
|
| 46 |
"children": [
|
| 47 |
{
|
| 48 |
"name": "Gadaba, Mudhili",
|
| 49 |
"iso_1_code": null,
|
| 50 |
"iso_3_code": "gau",
|
|
|
|
| 51 |
"children": [],
|
| 52 |
+
"tokenizers": {},
|
| 53 |
"node_i": "3607",
|
| 54 |
+
"native_tokenizers": [],
|
| 55 |
+
"scripts": []
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"name": "Gadaba, Pottangi Ollar",
|
| 59 |
"iso_1_code": null,
|
| 60 |
"iso_3_code": "gdb",
|
|
|
|
| 61 |
"children": [],
|
| 62 |
+
"tokenizers": {},
|
| 63 |
"node_i": "3608",
|
| 64 |
+
"native_tokenizers": [],
|
| 65 |
+
"scripts": []
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"name": "Duruwa",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "pci",
|
|
|
|
| 71 |
"children": [],
|
| 72 |
+
"tokenizers": {},
|
| 73 |
"node_i": "3609",
|
| 74 |
+
"native_tokenizers": [],
|
| 75 |
+
"scripts": []
|
| 76 |
}
|
| 77 |
],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3606",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
}
|
| 83 |
],
|
| 84 |
+
"tokenizers": {},
|
| 85 |
"node_i": "3602",
|
| 86 |
+
"native_tokenizers": [],
|
| 87 |
+
"scripts": []
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"name": "Northern",
|
| 91 |
"iso_1_code": null,
|
| 92 |
"iso_3_code": null,
|
|
|
|
| 93 |
"children": [
|
| 94 |
{
|
| 95 |
"name": "Brahui",
|
| 96 |
"iso_1_code": null,
|
| 97 |
"iso_3_code": "brh",
|
|
|
|
| 98 |
"children": [],
|
| 99 |
+
"tokenizers": {},
|
| 100 |
"node_i": "3611",
|
| 101 |
+
"native_tokenizers": [],
|
| 102 |
"scripts": [
|
| 103 |
"Arab"
|
| 104 |
+
]
|
|
|
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"name": "Kumarbhag Paharia",
|
| 108 |
"iso_1_code": null,
|
| 109 |
"iso_3_code": "kmj",
|
|
|
|
| 110 |
"children": [],
|
| 111 |
+
"tokenizers": {},
|
| 112 |
"node_i": "3612",
|
| 113 |
+
"native_tokenizers": [],
|
| 114 |
+
"scripts": []
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"name": "Kurux",
|
| 118 |
"iso_1_code": null,
|
| 119 |
"iso_3_code": "kru",
|
|
|
|
| 120 |
"children": [],
|
| 121 |
+
"tokenizers": {},
|
| 122 |
"node_i": "3613",
|
| 123 |
+
"native_tokenizers": [],
|
| 124 |
"scripts": [
|
| 125 |
"Deva"
|
| 126 |
+
]
|
|
|
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"name": "Sauria Paharia",
|
| 130 |
"iso_1_code": null,
|
| 131 |
"iso_3_code": "mjt",
|
|
|
|
| 132 |
"children": [],
|
| 133 |
+
"tokenizers": {},
|
| 134 |
"node_i": "3614",
|
| 135 |
+
"native_tokenizers": [],
|
| 136 |
+
"scripts": []
|
| 137 |
},
|
| 138 |
{
|
| 139 |
"name": "Kisan",
|
| 140 |
"iso_1_code": null,
|
| 141 |
"iso_3_code": "xis",
|
|
|
|
| 142 |
"children": [],
|
| 143 |
+
"tokenizers": {},
|
| 144 |
"node_i": "3615",
|
| 145 |
+
"native_tokenizers": [],
|
| 146 |
+
"scripts": []
|
| 147 |
}
|
| 148 |
],
|
| 149 |
+
"tokenizers": {},
|
| 150 |
"node_i": "3610",
|
| 151 |
+
"native_tokenizers": [],
|
| 152 |
+
"scripts": []
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"name": "South-Central",
|
| 156 |
"iso_1_code": null,
|
| 157 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
"children": [
|
| 159 |
{
|
| 160 |
"name": "Gondi-Kui",
|
| 161 |
"iso_1_code": null,
|
| 162 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
"children": [
|
| 164 |
{
|
| 165 |
"name": "Gondi",
|
| 166 |
"iso_1_code": null,
|
| 167 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
"children": [
|
| 169 |
{
|
| 170 |
"name": "Maria, Dandami",
|
| 171 |
"iso_1_code": null,
|
| 172 |
"iso_3_code": "daq",
|
|
|
|
| 173 |
"children": [],
|
| 174 |
+
"tokenizers": {},
|
| 175 |
"node_i": "3619",
|
| 176 |
+
"native_tokenizers": [],
|
| 177 |
+
"scripts": []
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"name": "Muria, Eastern",
|
| 181 |
"iso_1_code": null,
|
| 182 |
"iso_3_code": "emu",
|
|
|
|
| 183 |
"children": [],
|
| 184 |
+
"tokenizers": {},
|
| 185 |
"node_i": "3620",
|
| 186 |
+
"native_tokenizers": [],
|
| 187 |
+
"scripts": []
|
| 188 |
},
|
| 189 |
{
|
| 190 |
"name": "Gondi, Aheri",
|
| 191 |
"iso_1_code": null,
|
| 192 |
"iso_3_code": "esg",
|
|
|
|
| 193 |
"children": [],
|
| 194 |
+
"tokenizers": {},
|
| 195 |
"node_i": "3621",
|
| 196 |
+
"native_tokenizers": [],
|
| 197 |
+
"scripts": []
|
| 198 |
},
|
| 199 |
{
|
| 200 |
"name": "Muria, Far Western",
|
| 201 |
"iso_1_code": null,
|
| 202 |
"iso_3_code": "fmu",
|
|
|
|
| 203 |
"children": [],
|
| 204 |
+
"tokenizers": {},
|
| 205 |
"node_i": "3622",
|
| 206 |
+
"native_tokenizers": [],
|
| 207 |
"scripts": [
|
| 208 |
"Deva"
|
| 209 |
+
]
|
|
|
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"name": "Gondi, Northern",
|
| 213 |
"iso_1_code": null,
|
| 214 |
"iso_3_code": "gno",
|
|
|
|
| 215 |
"children": [],
|
| 216 |
+
"tokenizers": {},
|
| 217 |
"node_i": "3623",
|
| 218 |
+
"native_tokenizers": [],
|
| 219 |
+
"scripts": []
|
| 220 |
},
|
| 221 |
{
|
| 222 |
"name": "Khirwar",
|
| 223 |
"iso_1_code": null,
|
| 224 |
"iso_3_code": "kwx",
|
|
|
|
| 225 |
"children": [],
|
| 226 |
+
"tokenizers": {},
|
| 227 |
"node_i": "3624",
|
| 228 |
+
"native_tokenizers": [],
|
| 229 |
+
"scripts": []
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"name": "Maria",
|
| 233 |
"iso_1_code": null,
|
| 234 |
"iso_3_code": "mrr",
|
|
|
|
| 235 |
"children": [],
|
| 236 |
+
"tokenizers": {},
|
| 237 |
"node_i": "3625",
|
| 238 |
+
"native_tokenizers": [],
|
| 239 |
+
"scripts": []
|
| 240 |
},
|
| 241 |
{
|
| 242 |
"name": "Muria, Western",
|
| 243 |
"iso_1_code": null,
|
| 244 |
"iso_3_code": "mut",
|
|
|
|
| 245 |
"children": [],
|
| 246 |
+
"tokenizers": {},
|
| 247 |
"node_i": "3626",
|
| 248 |
+
"native_tokenizers": [],
|
| 249 |
+
"scripts": []
|
| 250 |
},
|
| 251 |
{
|
| 252 |
"name": "Nagarchal",
|
| 253 |
"iso_1_code": null,
|
| 254 |
"iso_3_code": "nbg",
|
|
|
|
| 255 |
"children": [],
|
| 256 |
+
"tokenizers": {},
|
| 257 |
"node_i": "3627",
|
| 258 |
+
"native_tokenizers": [],
|
| 259 |
+
"scripts": []
|
| 260 |
},
|
| 261 |
{
|
| 262 |
"name": "Pardhan",
|
| 263 |
"iso_1_code": null,
|
| 264 |
"iso_3_code": "pch",
|
|
|
|
| 265 |
"children": [],
|
| 266 |
+
"tokenizers": {},
|
| 267 |
"node_i": "3628",
|
| 268 |
+
"native_tokenizers": [],
|
| 269 |
+
"scripts": []
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"name": "Gondi, Adilabad",
|
| 273 |
"iso_1_code": null,
|
| 274 |
"iso_3_code": "wsg",
|
| 275 |
+
"children": [],
|
| 276 |
"tokenizers": {
|
| 277 |
"Telu": {
|
| 278 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 279 |
"original_lang_name": "telugu",
|
| 280 |
"original_lang_code": "tel",
|
| 281 |
+
"script": "Telu",
|
| 282 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
}
|
| 284 |
},
|
|
|
|
| 285 |
"node_i": "3629",
|
| 286 |
+
"native_tokenizers": [],
|
| 287 |
"scripts": [
|
| 288 |
"Telu"
|
| 289 |
+
]
|
|
|
|
| 290 |
}
|
| 291 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
"tokenizers": {
|
| 293 |
"Telu": {
|
| 294 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
"original_lang_name": "telugu",
|
| 296 |
"original_lang_code": "tel",
|
| 297 |
+
"script": "Telu",
|
| 298 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
},
|
| 301 |
+
"node_i": "3618",
|
| 302 |
+
"native_tokenizers": [],
|
| 303 |
+
"scripts": []
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"name": "Konda-Kui",
|
| 307 |
+
"iso_1_code": null,
|
| 308 |
+
"iso_3_code": null,
|
| 309 |
"children": [
|
| 310 |
{
|
| 311 |
"name": "Konda",
|
| 312 |
"iso_1_code": null,
|
| 313 |
"iso_3_code": null,
|
|
|
|
| 314 |
"children": [
|
| 315 |
{
|
| 316 |
"name": "Konda-Dora",
|
| 317 |
"iso_1_code": null,
|
| 318 |
"iso_3_code": "kfc",
|
|
|
|
| 319 |
"children": [],
|
| 320 |
+
"tokenizers": {},
|
| 321 |
"node_i": "3632",
|
| 322 |
+
"native_tokenizers": [],
|
| 323 |
+
"scripts": []
|
| 324 |
},
|
| 325 |
{
|
| 326 |
"name": "Mukha-Dora",
|
| 327 |
"iso_1_code": null,
|
| 328 |
"iso_3_code": "mmk",
|
|
|
|
| 329 |
"children": [],
|
| 330 |
+
"tokenizers": {},
|
| 331 |
"node_i": "3633",
|
| 332 |
+
"native_tokenizers": [],
|
| 333 |
+
"scripts": []
|
| 334 |
}
|
| 335 |
],
|
| 336 |
+
"tokenizers": {},
|
| 337 |
"node_i": "3631",
|
| 338 |
+
"native_tokenizers": [],
|
| 339 |
+
"scripts": []
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"name": "Manda-Kui",
|
| 343 |
"iso_1_code": null,
|
| 344 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
"children": [
|
| 346 |
{
|
| 347 |
"name": "Kui-Kuvi",
|
| 348 |
"iso_1_code": null,
|
| 349 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
"children": [
|
| 351 |
{
|
| 352 |
"name": "Kui, Dawik",
|
| 353 |
"iso_1_code": null,
|
| 354 |
"iso_3_code": "dwk",
|
|
|
|
| 355 |
"children": [],
|
| 356 |
+
"tokenizers": {},
|
| 357 |
"node_i": "3636",
|
| 358 |
+
"native_tokenizers": [],
|
| 359 |
+
"scripts": []
|
| 360 |
},
|
| 361 |
{
|
| 362 |
"name": "Koya",
|
| 363 |
"iso_1_code": null,
|
| 364 |
"iso_3_code": "kff",
|
| 365 |
+
"children": [],
|
| 366 |
"tokenizers": {
|
| 367 |
"Telu": {
|
| 368 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 369 |
"original_lang_name": "telugu",
|
| 370 |
"original_lang_code": "tel",
|
| 371 |
+
"script": "Telu",
|
| 372 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
}
|
| 374 |
},
|
|
|
|
| 375 |
"node_i": "3637",
|
| 376 |
+
"native_tokenizers": [],
|
| 377 |
"scripts": [
|
| 378 |
"Telu"
|
| 379 |
+
]
|
|
|
|
| 380 |
},
|
| 381 |
{
|
| 382 |
"name": "Kuvi",
|
| 383 |
"iso_1_code": null,
|
| 384 |
"iso_3_code": "kxv",
|
|
|
|
| 385 |
"children": [],
|
| 386 |
+
"tokenizers": {},
|
| 387 |
"node_i": "3638",
|
| 388 |
+
"native_tokenizers": [],
|
| 389 |
+
"scripts": []
|
| 390 |
},
|
| 391 |
{
|
| 392 |
"name": "Kui",
|
| 393 |
"iso_1_code": null,
|
| 394 |
"iso_3_code": "uki",
|
|
|
|
| 395 |
"children": [],
|
| 396 |
+
"tokenizers": {},
|
| 397 |
"node_i": "3639",
|
| 398 |
+
"native_tokenizers": [],
|
| 399 |
+
"scripts": []
|
| 400 |
}
|
| 401 |
],
|
| 402 |
+
"tokenizers": {
|
| 403 |
+
"Telu": {
|
| 404 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 405 |
+
"original_lang_name": "telugu",
|
| 406 |
+
"original_lang_code": "tel",
|
| 407 |
+
"script": "Telu",
|
| 408 |
+
"class_name": "IndicNLPTokenizer"
|
| 409 |
+
}
|
| 410 |
+
},
|
| 411 |
"node_i": "3635",
|
| 412 |
+
"native_tokenizers": [],
|
| 413 |
+
"scripts": []
|
| 414 |
},
|
| 415 |
{
|
| 416 |
"name": "Manda-Pengo",
|
| 417 |
"iso_1_code": null,
|
| 418 |
"iso_3_code": null,
|
|
|
|
| 419 |
"children": [
|
| 420 |
{
|
| 421 |
"name": "Manda",
|
| 422 |
"iso_1_code": null,
|
| 423 |
"iso_3_code": "mha",
|
|
|
|
| 424 |
"children": [],
|
| 425 |
+
"tokenizers": {},
|
| 426 |
"node_i": "3641",
|
| 427 |
+
"native_tokenizers": [],
|
| 428 |
+
"scripts": []
|
| 429 |
},
|
| 430 |
{
|
| 431 |
"name": "Pengo",
|
| 432 |
"iso_1_code": null,
|
| 433 |
"iso_3_code": "peg",
|
|
|
|
| 434 |
"children": [],
|
| 435 |
+
"tokenizers": {},
|
| 436 |
"node_i": "3642",
|
| 437 |
+
"native_tokenizers": [],
|
| 438 |
+
"scripts": []
|
| 439 |
}
|
| 440 |
],
|
| 441 |
+
"tokenizers": {},
|
| 442 |
"node_i": "3640",
|
| 443 |
+
"native_tokenizers": [],
|
| 444 |
+
"scripts": []
|
| 445 |
}
|
| 446 |
],
|
| 447 |
+
"tokenizers": {
|
| 448 |
+
"Telu": {
|
| 449 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 450 |
+
"original_lang_name": "telugu",
|
| 451 |
+
"original_lang_code": "tel",
|
| 452 |
+
"script": "Telu",
|
| 453 |
+
"class_name": "IndicNLPTokenizer"
|
| 454 |
+
}
|
| 455 |
+
},
|
| 456 |
"node_i": "3634",
|
| 457 |
+
"native_tokenizers": [],
|
| 458 |
+
"scripts": []
|
| 459 |
}
|
| 460 |
],
|
| 461 |
+
"tokenizers": {
|
| 462 |
+
"Telu": {
|
| 463 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 464 |
+
"original_lang_name": "telugu",
|
| 465 |
+
"original_lang_code": "tel",
|
| 466 |
+
"script": "Telu",
|
| 467 |
+
"class_name": "IndicNLPTokenizer"
|
| 468 |
+
}
|
| 469 |
+
},
|
| 470 |
"node_i": "3630",
|
| 471 |
+
"native_tokenizers": [],
|
| 472 |
+
"scripts": []
|
| 473 |
}
|
| 474 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
"tokenizers": {
|
| 476 |
"Telu": {
|
| 477 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 478 |
"original_lang_name": "telugu",
|
| 479 |
"original_lang_code": "tel",
|
| 480 |
+
"script": "Telu",
|
| 481 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
}
|
| 483 |
},
|
| 484 |
+
"node_i": "3617",
|
| 485 |
+
"native_tokenizers": [],
|
| 486 |
+
"scripts": []
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"name": "Telugu",
|
| 490 |
+
"iso_1_code": null,
|
| 491 |
+
"iso_3_code": null,
|
| 492 |
"children": [
|
| 493 |
{
|
| 494 |
"name": "Chenchu",
|
| 495 |
"iso_1_code": null,
|
| 496 |
"iso_3_code": "cde",
|
|
|
|
| 497 |
"children": [],
|
| 498 |
+
"tokenizers": {},
|
| 499 |
"node_i": "3644",
|
| 500 |
+
"native_tokenizers": [],
|
| 501 |
+
"scripts": []
|
| 502 |
},
|
| 503 |
{
|
| 504 |
"name": "Manna-Dora",
|
| 505 |
"iso_1_code": null,
|
| 506 |
"iso_3_code": "mju",
|
|
|
|
| 507 |
"children": [],
|
| 508 |
+
"tokenizers": {},
|
| 509 |
"node_i": "3645",
|
| 510 |
+
"native_tokenizers": [],
|
| 511 |
+
"scripts": []
|
| 512 |
},
|
| 513 |
{
|
| 514 |
"name": "Telugu",
|
| 515 |
"iso_1_code": "te",
|
| 516 |
"iso_3_code": "tel",
|
| 517 |
+
"children": [],
|
| 518 |
"tokenizers": {
|
| 519 |
"Telu": {
|
| 520 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
"original_lang_name": "telugu",
|
| 522 |
"original_lang_code": "tel",
|
| 523 |
+
"script": "Telu",
|
| 524 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
}
|
| 526 |
},
|
|
|
|
| 527 |
"node_i": "3646",
|
| 528 |
+
"native_tokenizers": [
|
| 529 |
+
"Telu"
|
| 530 |
+
],
|
| 531 |
"scripts": [
|
| 532 |
"Telu",
|
| 533 |
"Latn"
|
| 534 |
+
]
|
|
|
|
| 535 |
},
|
| 536 |
{
|
| 537 |
"name": "Waddar",
|
| 538 |
"iso_1_code": null,
|
| 539 |
"iso_3_code": "wbq",
|
|
|
|
| 540 |
"children": [],
|
| 541 |
+
"tokenizers": {},
|
| 542 |
"node_i": "3647",
|
| 543 |
+
"native_tokenizers": [],
|
| 544 |
+
"scripts": []
|
| 545 |
}
|
| 546 |
],
|
| 547 |
+
"tokenizers": {
|
| 548 |
+
"Telu": {
|
| 549 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 550 |
+
"original_lang_name": "telugu",
|
| 551 |
+
"original_lang_code": "tel",
|
| 552 |
+
"script": "Telu",
|
| 553 |
+
"class_name": "IndicNLPTokenizer"
|
| 554 |
+
}
|
| 555 |
+
},
|
| 556 |
"node_i": "3643",
|
| 557 |
+
"native_tokenizers": [],
|
| 558 |
+
"scripts": []
|
| 559 |
}
|
| 560 |
],
|
| 561 |
+
"tokenizers": {
|
| 562 |
+
"Telu": {
|
| 563 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 564 |
+
"original_lang_name": "telugu",
|
| 565 |
+
"original_lang_code": "tel",
|
| 566 |
+
"script": "Telu",
|
| 567 |
+
"class_name": "IndicNLPTokenizer"
|
| 568 |
+
}
|
| 569 |
+
},
|
| 570 |
"node_i": "3616",
|
| 571 |
+
"native_tokenizers": [],
|
| 572 |
+
"scripts": []
|
| 573 |
},
|
| 574 |
{
|
| 575 |
"name": "Southern",
|
| 576 |
"iso_1_code": null,
|
| 577 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
"children": [
|
| 579 |
{
|
| 580 |
"name": "Kurichiya",
|
| 581 |
"iso_1_code": null,
|
| 582 |
"iso_3_code": "kfh",
|
|
|
|
| 583 |
"children": [],
|
| 584 |
+
"tokenizers": {},
|
| 585 |
"node_i": "3649",
|
| 586 |
+
"native_tokenizers": [],
|
| 587 |
+
"scripts": []
|
| 588 |
},
|
| 589 |
{
|
| 590 |
"name": "Kurumba, Attapady",
|
| 591 |
"iso_1_code": null,
|
| 592 |
"iso_3_code": "pkr",
|
|
|
|
| 593 |
"children": [],
|
| 594 |
+
"tokenizers": {},
|
| 595 |
"node_i": "3650",
|
| 596 |
+
"native_tokenizers": [],
|
| 597 |
+
"scripts": []
|
| 598 |
},
|
| 599 |
{
|
| 600 |
"name": "Pathiya",
|
| 601 |
"iso_1_code": null,
|
| 602 |
"iso_3_code": "pty",
|
|
|
|
| 603 |
"children": [],
|
| 604 |
+
"tokenizers": {},
|
| 605 |
"node_i": "3651",
|
| 606 |
+
"native_tokenizers": [],
|
| 607 |
+
"scripts": []
|
| 608 |
},
|
| 609 |
{
|
| 610 |
"name": "Muduga",
|
| 611 |
"iso_1_code": null,
|
| 612 |
"iso_3_code": "udg",
|
|
|
|
| 613 |
"children": [],
|
| 614 |
+
"tokenizers": {},
|
| 615 |
"node_i": "3652",
|
| 616 |
+
"native_tokenizers": [],
|
| 617 |
+
"scripts": []
|
| 618 |
},
|
| 619 |
{
|
| 620 |
"name": "Kumbaran",
|
| 621 |
"iso_1_code": null,
|
| 622 |
"iso_3_code": "wkb",
|
|
|
|
| 623 |
"children": [],
|
| 624 |
+
"tokenizers": {},
|
| 625 |
"node_i": "3653",
|
| 626 |
+
"native_tokenizers": [],
|
| 627 |
+
"scripts": []
|
| 628 |
},
|
| 629 |
{
|
| 630 |
"name": "Kalanadi",
|
| 631 |
"iso_1_code": null,
|
| 632 |
"iso_3_code": "wkl",
|
|
|
|
| 633 |
"children": [],
|
| 634 |
+
"tokenizers": {},
|
| 635 |
"node_i": "3654",
|
| 636 |
+
"native_tokenizers": [],
|
| 637 |
+
"scripts": []
|
| 638 |
},
|
| 639 |
{
|
| 640 |
"name": "Kunduvadi",
|
| 641 |
"iso_1_code": null,
|
| 642 |
"iso_3_code": "wku",
|
|
|
|
| 643 |
"children": [],
|
| 644 |
+
"tokenizers": {},
|
| 645 |
"node_i": "3655",
|
| 646 |
+
"native_tokenizers": [],
|
| 647 |
+
"scripts": []
|
| 648 |
},
|
| 649 |
{
|
| 650 |
"name": "Tamil-Kannada",
|
| 651 |
"iso_1_code": null,
|
| 652 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
"children": [
|
| 654 |
{
|
| 655 |
"name": "Kannada",
|
| 656 |
"iso_1_code": null,
|
| 657 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
"children": [
|
| 659 |
{
|
| 660 |
"name": "Badaga",
|
| 661 |
"iso_1_code": null,
|
| 662 |
"iso_3_code": "bfq",
|
|
|
|
| 663 |
"children": [],
|
| 664 |
+
"tokenizers": {},
|
| 665 |
"node_i": "3658",
|
| 666 |
+
"native_tokenizers": [],
|
| 667 |
+
"scripts": []
|
| 668 |
},
|
| 669 |
{
|
| 670 |
"name": "Holiya",
|
| 671 |
"iso_1_code": null,
|
| 672 |
"iso_3_code": "hoy",
|
|
|
|
| 673 |
"children": [],
|
| 674 |
+
"tokenizers": {},
|
| 675 |
"node_i": "3659",
|
| 676 |
+
"native_tokenizers": [],
|
| 677 |
+
"scripts": []
|
| 678 |
},
|
| 679 |
{
|
| 680 |
"name": "Kannada",
|
| 681 |
"iso_1_code": "kn",
|
| 682 |
"iso_3_code": "kan",
|
| 683 |
+
"children": [],
|
| 684 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
"Knda": {
|
| 686 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 687 |
"original_lang_name": "kannada",
|
| 688 |
"original_lang_code": "kan",
|
| 689 |
+
"script": "Knda",
|
| 690 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
}
|
| 692 |
},
|
|
|
|
| 693 |
"node_i": "3660",
|
| 694 |
+
"native_tokenizers": [
|
| 695 |
+
"Knda"
|
| 696 |
+
],
|
| 697 |
"scripts": [
|
| 698 |
"Latn",
|
| 699 |
"Knda"
|
| 700 |
+
]
|
|
|
|
| 701 |
},
|
| 702 |
{
|
| 703 |
"name": "Urali",
|
| 704 |
"iso_1_code": null,
|
| 705 |
"iso_3_code": "url",
|
|
|
|
| 706 |
"children": [],
|
| 707 |
+
"tokenizers": {},
|
| 708 |
"node_i": "3661",
|
| 709 |
+
"native_tokenizers": [],
|
| 710 |
+
"scripts": []
|
| 711 |
}
|
| 712 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
"tokenizers": {
|
| 714 |
+
"Knda": {
|
| 715 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 716 |
+
"original_lang_name": "kannada",
|
| 717 |
+
"original_lang_code": "kan",
|
| 718 |
+
"script": "Knda",
|
| 719 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
}
|
| 721 |
},
|
| 722 |
+
"node_i": "3657",
|
| 723 |
+
"native_tokenizers": [],
|
| 724 |
+
"scripts": []
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"name": "Tamil-Kodagu",
|
| 728 |
+
"iso_1_code": null,
|
| 729 |
+
"iso_3_code": null,
|
| 730 |
"children": [
|
| 731 |
{
|
| 732 |
"name": "Kodagu",
|
| 733 |
"iso_1_code": null,
|
| 734 |
"iso_3_code": null,
|
|
|
|
| 735 |
"children": [
|
| 736 |
{
|
| 737 |
"name": "Kodava",
|
| 738 |
"iso_1_code": null,
|
| 739 |
"iso_3_code": "kfa",
|
|
|
|
| 740 |
"children": [],
|
| 741 |
+
"tokenizers": {},
|
| 742 |
"node_i": "3664",
|
| 743 |
+
"native_tokenizers": [],
|
| 744 |
+
"scripts": []
|
| 745 |
},
|
| 746 |
{
|
| 747 |
"name": "Kurumba, Kannada",
|
| 748 |
"iso_1_code": null,
|
| 749 |
"iso_3_code": "kfi",
|
|
|
|
| 750 |
"children": [],
|
| 751 |
+
"tokenizers": {},
|
| 752 |
"node_i": "3665",
|
| 753 |
+
"native_tokenizers": [],
|
| 754 |
+
"scripts": []
|
| 755 |
},
|
| 756 |
{
|
| 757 |
"name": "Kurumba, Mullu",
|
| 758 |
"iso_1_code": null,
|
| 759 |
"iso_3_code": "kpb",
|
|
|
|
| 760 |
"children": [],
|
| 761 |
+
"tokenizers": {},
|
| 762 |
"node_i": "3666",
|
| 763 |
+
"native_tokenizers": [],
|
| 764 |
+
"scripts": []
|
| 765 |
},
|
| 766 |
{
|
| 767 |
"name": "Kurumba, Alu",
|
| 768 |
"iso_1_code": null,
|
| 769 |
"iso_3_code": "xua",
|
|
|
|
| 770 |
"children": [],
|
| 771 |
+
"tokenizers": {},
|
| 772 |
"node_i": "3667",
|
| 773 |
+
"native_tokenizers": [],
|
| 774 |
+
"scripts": []
|
| 775 |
},
|
| 776 |
{
|
| 777 |
"name": "Kurumba, Jennu",
|
| 778 |
"iso_1_code": null,
|
| 779 |
"iso_3_code": "xuj",
|
|
|
|
| 780 |
"children": [],
|
| 781 |
+
"tokenizers": {},
|
| 782 |
"node_i": "3668",
|
| 783 |
+
"native_tokenizers": [],
|
| 784 |
+
"scripts": []
|
| 785 |
}
|
| 786 |
],
|
| 787 |
+
"tokenizers": {},
|
| 788 |
"node_i": "3663",
|
| 789 |
+
"native_tokenizers": [],
|
| 790 |
+
"scripts": []
|
| 791 |
},
|
| 792 |
{
|
| 793 |
"name": "Tamil-Malayalam",
|
| 794 |
"iso_1_code": null,
|
| 795 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
"children": [
|
| 797 |
{
|
| 798 |
"name": "Mannan",
|
| 799 |
"iso_1_code": null,
|
| 800 |
"iso_3_code": "mjv",
|
|
|
|
| 801 |
"children": [],
|
| 802 |
+
"tokenizers": {},
|
| 803 |
"node_i": "3670",
|
| 804 |
+
"native_tokenizers": [],
|
| 805 |
+
"scripts": []
|
| 806 |
},
|
| 807 |
{
|
| 808 |
"name": "Malayalam",
|
| 809 |
"iso_1_code": null,
|
| 810 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
"children": [
|
| 812 |
{
|
| 813 |
"name": "Aranadan",
|
| 814 |
"iso_1_code": null,
|
| 815 |
"iso_3_code": "aaf",
|
|
|
|
| 816 |
"children": [],
|
| 817 |
+
"tokenizers": {},
|
| 818 |
"node_i": "3672",
|
| 819 |
+
"native_tokenizers": [],
|
| 820 |
+
"scripts": []
|
| 821 |
},
|
| 822 |
{
|
| 823 |
"name": "Kadar",
|
| 824 |
"iso_1_code": null,
|
| 825 |
"iso_3_code": "kej",
|
|
|
|
| 826 |
"children": [],
|
| 827 |
+
"tokenizers": {},
|
| 828 |
"node_i": "3673",
|
| 829 |
+
"native_tokenizers": [],
|
| 830 |
+
"scripts": []
|
| 831 |
},
|
| 832 |
{
|
| 833 |
"name": "Malayalam",
|
| 834 |
"iso_1_code": "ml",
|
| 835 |
"iso_3_code": "mal",
|
| 836 |
+
"children": [],
|
| 837 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 838 |
"Mlym": {
|
| 839 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 840 |
"original_lang_name": "malayalam",
|
| 841 |
"original_lang_code": "mal",
|
| 842 |
+
"script": "Mlym",
|
| 843 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 844 |
}
|
| 845 |
},
|
|
|
|
| 846 |
"node_i": "3674",
|
| 847 |
+
"native_tokenizers": [
|
| 848 |
+
"Mlym"
|
| 849 |
+
],
|
| 850 |
"scripts": [
|
| 851 |
"Latn",
|
| 852 |
"Mlym"
|
| 853 |
+
]
|
|
|
|
| 854 |
},
|
| 855 |
{
|
| 856 |
"name": "Malapandaram",
|
| 857 |
"iso_1_code": null,
|
| 858 |
"iso_3_code": "mjp",
|
|
|
|
| 859 |
"children": [],
|
| 860 |
+
"tokenizers": {},
|
| 861 |
"node_i": "3675",
|
| 862 |
+
"native_tokenizers": [],
|
| 863 |
+
"scripts": []
|
| 864 |
},
|
| 865 |
{
|
| 866 |
"name": "Malaryan",
|
| 867 |
"iso_1_code": null,
|
| 868 |
"iso_3_code": "mjq",
|
|
|
|
| 869 |
"children": [],
|
| 870 |
+
"tokenizers": {},
|
| 871 |
"node_i": "3676",
|
| 872 |
+
"native_tokenizers": [],
|
| 873 |
+
"scripts": []
|
| 874 |
},
|
| 875 |
{
|
| 876 |
"name": "Malavedan",
|
| 877 |
"iso_1_code": null,
|
| 878 |
"iso_3_code": "mjr",
|
|
|
|
| 879 |
"children": [],
|
| 880 |
+
"tokenizers": {},
|
| 881 |
"node_i": "3677",
|
| 882 |
+
"native_tokenizers": [],
|
| 883 |
+
"scripts": []
|
| 884 |
},
|
| 885 |
{
|
| 886 |
"name": "Paliyan",
|
| 887 |
"iso_1_code": null,
|
| 888 |
"iso_3_code": "pcf",
|
|
|
|
| 889 |
"children": [],
|
| 890 |
+
"tokenizers": {},
|
| 891 |
"node_i": "3678",
|
| 892 |
+
"native_tokenizers": [],
|
| 893 |
+
"scripts": []
|
| 894 |
},
|
| 895 |
{
|
| 896 |
"name": "Paniya",
|
| 897 |
"iso_1_code": null,
|
| 898 |
"iso_3_code": "pcg",
|
|
|
|
| 899 |
"children": [],
|
| 900 |
+
"tokenizers": {},
|
| 901 |
"node_i": "3679",
|
| 902 |
+
"native_tokenizers": [],
|
| 903 |
+
"scripts": []
|
| 904 |
},
|
| 905 |
{
|
| 906 |
"name": "Ravula",
|
| 907 |
"iso_1_code": null,
|
| 908 |
"iso_3_code": "yea",
|
|
|
|
| 909 |
"children": [],
|
| 910 |
+
"tokenizers": {},
|
| 911 |
"node_i": "3680",
|
| 912 |
+
"native_tokenizers": [],
|
| 913 |
+
"scripts": []
|
| 914 |
}
|
| 915 |
],
|
| 916 |
+
"tokenizers": {
|
| 917 |
+
"Mlym": {
|
| 918 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 919 |
+
"original_lang_name": "malayalam",
|
| 920 |
+
"original_lang_code": "mal",
|
| 921 |
+
"script": "Mlym",
|
| 922 |
+
"class_name": "IndicNLPTokenizer"
|
| 923 |
+
}
|
| 924 |
+
},
|
| 925 |
"node_i": "3671",
|
| 926 |
+
"native_tokenizers": [],
|
| 927 |
+
"scripts": []
|
| 928 |
},
|
| 929 |
{
|
| 930 |
"name": "Tamil",
|
| 931 |
"iso_1_code": null,
|
| 932 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 933 |
"children": [
|
| 934 |
{
|
| 935 |
"name": "Eravallan",
|
| 936 |
"iso_1_code": null,
|
| 937 |
"iso_3_code": "era",
|
|
|
|
| 938 |
"children": [],
|
| 939 |
+
"tokenizers": {},
|
| 940 |
"node_i": "3682",
|
| 941 |
+
"native_tokenizers": [],
|
| 942 |
+
"scripts": []
|
| 943 |
},
|
| 944 |
{
|
| 945 |
"name": "Irula",
|
| 946 |
"iso_1_code": null,
|
| 947 |
"iso_3_code": "iru",
|
|
|
|
| 948 |
"children": [],
|
| 949 |
+
"tokenizers": {},
|
| 950 |
"node_i": "3683",
|
| 951 |
+
"native_tokenizers": [],
|
| 952 |
+
"scripts": []
|
| 953 |
},
|
| 954 |
{
|
| 955 |
"name": "Kaikadi",
|
| 956 |
"iso_1_code": null,
|
| 957 |
"iso_3_code": "kep",
|
|
|
|
| 958 |
"children": [],
|
| 959 |
+
"tokenizers": {},
|
| 960 |
"node_i": "3684",
|
| 961 |
+
"native_tokenizers": [],
|
| 962 |
+
"scripts": []
|
| 963 |
},
|
| 964 |
{
|
| 965 |
"name": "Kanikkaran",
|
| 966 |
"iso_1_code": null,
|
| 967 |
"iso_3_code": "kev",
|
|
|
|
| 968 |
"children": [],
|
| 969 |
+
"tokenizers": {},
|
| 970 |
"node_i": "3685",
|
| 971 |
+
"native_tokenizers": [],
|
| 972 |
+
"scripts": []
|
| 973 |
},
|
| 974 |
{
|
| 975 |
"name": "Muthuvan",
|
| 976 |
"iso_1_code": null,
|
| 977 |
"iso_3_code": "muv",
|
|
|
|
| 978 |
"children": [],
|
| 979 |
+
"tokenizers": {},
|
| 980 |
"node_i": "3686",
|
| 981 |
+
"native_tokenizers": [],
|
| 982 |
+
"scripts": []
|
| 983 |
},
|
| 984 |
{
|
| 985 |
"name": "Sholaga",
|
| 986 |
"iso_1_code": null,
|
| 987 |
"iso_3_code": "sle",
|
|
|
|
| 988 |
"children": [],
|
| 989 |
+
"tokenizers": {},
|
| 990 |
"node_i": "3687",
|
| 991 |
+
"native_tokenizers": [],
|
| 992 |
+
"scripts": []
|
| 993 |
},
|
| 994 |
{
|
| 995 |
"name": "Tamil",
|
| 996 |
"iso_1_code": "ta",
|
| 997 |
"iso_3_code": "tam",
|
| 998 |
+
"children": [],
|
| 999 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
"Taml": {
|
| 1001 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1002 |
"original_lang_name": "tamil",
|
| 1003 |
"original_lang_code": "tam",
|
| 1004 |
+
"script": "Taml",
|
| 1005 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1006 |
}
|
| 1007 |
},
|
|
|
|
| 1008 |
"node_i": "3688",
|
| 1009 |
+
"native_tokenizers": [
|
| 1010 |
+
"Taml"
|
| 1011 |
+
],
|
| 1012 |
"scripts": [
|
| 1013 |
"Taml",
|
| 1014 |
"Latn"
|
| 1015 |
+
]
|
|
|
|
| 1016 |
},
|
| 1017 |
{
|
| 1018 |
"name": "Kurumba, Betta",
|
| 1019 |
"iso_1_code": null,
|
| 1020 |
"iso_3_code": "xub",
|
|
|
|
| 1021 |
"children": [],
|
| 1022 |
+
"tokenizers": {},
|
| 1023 |
"node_i": "3689",
|
| 1024 |
+
"native_tokenizers": [],
|
| 1025 |
+
"scripts": []
|
| 1026 |
},
|
| 1027 |
{
|
| 1028 |
"name": "Yerukula",
|
| 1029 |
"iso_1_code": null,
|
| 1030 |
"iso_3_code": "yeu",
|
|
|
|
| 1031 |
"children": [],
|
| 1032 |
+
"tokenizers": {},
|
| 1033 |
"node_i": "3690",
|
| 1034 |
+
"native_tokenizers": [],
|
| 1035 |
+
"scripts": []
|
| 1036 |
}
|
| 1037 |
],
|
| 1038 |
+
"tokenizers": {
|
| 1039 |
+
"Taml": {
|
| 1040 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1041 |
+
"original_lang_name": "tamil",
|
| 1042 |
+
"original_lang_code": "tam",
|
| 1043 |
+
"script": "Taml",
|
| 1044 |
+
"class_name": "IndicNLPTokenizer"
|
| 1045 |
+
}
|
| 1046 |
+
},
|
| 1047 |
"node_i": "3681",
|
| 1048 |
+
"native_tokenizers": [],
|
| 1049 |
+
"scripts": []
|
| 1050 |
}
|
| 1051 |
],
|
| 1052 |
+
"tokenizers": {
|
| 1053 |
+
"Mlym": {
|
| 1054 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 1055 |
+
"original_lang_name": "malayalam",
|
| 1056 |
+
"original_lang_code": "mal",
|
| 1057 |
+
"script": "Mlym",
|
| 1058 |
+
"class_name": "IndicNLPTokenizer"
|
| 1059 |
+
},
|
| 1060 |
+
"Taml": {
|
| 1061 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1062 |
+
"original_lang_name": "tamil",
|
| 1063 |
+
"original_lang_code": "tam",
|
| 1064 |
+
"script": "Taml",
|
| 1065 |
+
"class_name": "IndicNLPTokenizer"
|
| 1066 |
+
}
|
| 1067 |
+
},
|
| 1068 |
"node_i": "3669",
|
| 1069 |
+
"native_tokenizers": [],
|
| 1070 |
+
"scripts": []
|
| 1071 |
},
|
| 1072 |
{
|
| 1073 |
"name": "Toda-Kota",
|
| 1074 |
"iso_1_code": null,
|
| 1075 |
"iso_3_code": null,
|
|
|
|
| 1076 |
"children": [
|
| 1077 |
{
|
| 1078 |
"name": "Kota",
|
| 1079 |
"iso_1_code": null,
|
| 1080 |
"iso_3_code": "kfe",
|
|
|
|
| 1081 |
"children": [],
|
| 1082 |
+
"tokenizers": {},
|
| 1083 |
"node_i": "3692",
|
| 1084 |
+
"native_tokenizers": [],
|
| 1085 |
+
"scripts": []
|
| 1086 |
},
|
| 1087 |
{
|
| 1088 |
"name": "Toda",
|
| 1089 |
"iso_1_code": null,
|
| 1090 |
"iso_3_code": "tcx",
|
|
|
|
| 1091 |
"children": [],
|
| 1092 |
+
"tokenizers": {},
|
| 1093 |
"node_i": "3693",
|
| 1094 |
+
"native_tokenizers": [],
|
| 1095 |
+
"scripts": []
|
| 1096 |
}
|
| 1097 |
],
|
| 1098 |
+
"tokenizers": {},
|
| 1099 |
"node_i": "3691",
|
| 1100 |
+
"native_tokenizers": [],
|
| 1101 |
+
"scripts": []
|
| 1102 |
}
|
| 1103 |
],
|
| 1104 |
+
"tokenizers": {
|
| 1105 |
+
"Mlym": {
|
| 1106 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 1107 |
+
"original_lang_name": "malayalam",
|
| 1108 |
+
"original_lang_code": "mal",
|
| 1109 |
+
"script": "Mlym",
|
| 1110 |
+
"class_name": "IndicNLPTokenizer"
|
| 1111 |
+
},
|
| 1112 |
+
"Taml": {
|
| 1113 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1114 |
+
"original_lang_name": "tamil",
|
| 1115 |
+
"original_lang_code": "tam",
|
| 1116 |
+
"script": "Taml",
|
| 1117 |
+
"class_name": "IndicNLPTokenizer"
|
| 1118 |
+
}
|
| 1119 |
+
},
|
| 1120 |
"node_i": "3662",
|
| 1121 |
+
"native_tokenizers": [],
|
| 1122 |
+
"scripts": []
|
| 1123 |
},
|
| 1124 |
{
|
| 1125 |
"name": "Unclassified",
|
| 1126 |
"iso_1_code": null,
|
| 1127 |
"iso_3_code": null,
|
|
|
|
| 1128 |
"children": [
|
| 1129 |
{
|
| 1130 |
"name": "Chetti, Wayanad",
|
| 1131 |
"iso_1_code": null,
|
| 1132 |
"iso_3_code": "ctt",
|
|
|
|
| 1133 |
"children": [],
|
| 1134 |
+
"tokenizers": {},
|
| 1135 |
"node_i": "3695",
|
| 1136 |
+
"native_tokenizers": [],
|
| 1137 |
+
"scripts": []
|
| 1138 |
}
|
| 1139 |
],
|
| 1140 |
+
"tokenizers": {},
|
| 1141 |
"node_i": "3694",
|
| 1142 |
+
"native_tokenizers": [],
|
| 1143 |
+
"scripts": []
|
| 1144 |
}
|
| 1145 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1146 |
"tokenizers": {
|
| 1147 |
"Knda": {
|
| 1148 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 1149 |
"original_lang_name": "kannada",
|
| 1150 |
"original_lang_code": "kan",
|
| 1151 |
+
"script": "Knda",
|
| 1152 |
+
"class_name": "IndicNLPTokenizer"
|
| 1153 |
+
},
|
| 1154 |
+
"Mlym": {
|
| 1155 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 1156 |
+
"original_lang_name": "malayalam",
|
| 1157 |
+
"original_lang_code": "mal",
|
| 1158 |
+
"script": "Mlym",
|
| 1159 |
+
"class_name": "IndicNLPTokenizer"
|
| 1160 |
+
},
|
| 1161 |
+
"Taml": {
|
| 1162 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1163 |
+
"original_lang_name": "tamil",
|
| 1164 |
+
"original_lang_code": "tam",
|
| 1165 |
+
"script": "Taml",
|
| 1166 |
+
"class_name": "IndicNLPTokenizer"
|
| 1167 |
}
|
| 1168 |
},
|
| 1169 |
+
"node_i": "3656",
|
| 1170 |
+
"native_tokenizers": [],
|
| 1171 |
+
"scripts": []
|
| 1172 |
+
},
|
| 1173 |
+
{
|
| 1174 |
+
"name": "Tulu",
|
| 1175 |
+
"iso_1_code": null,
|
| 1176 |
+
"iso_3_code": null,
|
| 1177 |
"children": [
|
| 1178 |
{
|
| 1179 |
"name": "Bellari",
|
| 1180 |
"iso_1_code": null,
|
| 1181 |
"iso_3_code": "brw",
|
|
|
|
| 1182 |
"children": [],
|
| 1183 |
+
"tokenizers": {},
|
| 1184 |
"node_i": "3697",
|
| 1185 |
+
"native_tokenizers": [],
|
| 1186 |
+
"scripts": []
|
| 1187 |
},
|
| 1188 |
{
|
| 1189 |
"name": "Kudiya",
|
| 1190 |
"iso_1_code": null,
|
| 1191 |
"iso_3_code": "kfg",
|
|
|
|
| 1192 |
"children": [],
|
| 1193 |
+
"tokenizers": {},
|
| 1194 |
"node_i": "3698",
|
| 1195 |
+
"native_tokenizers": [],
|
| 1196 |
+
"scripts": []
|
| 1197 |
},
|
| 1198 |
{
|
| 1199 |
"name": "Tulu",
|
| 1200 |
"iso_1_code": null,
|
| 1201 |
"iso_3_code": "tcy",
|
| 1202 |
+
"children": [],
|
| 1203 |
"tokenizers": {
|
| 1204 |
"Knda": {
|
| 1205 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 1206 |
"original_lang_name": "kannada",
|
| 1207 |
"original_lang_code": "kan",
|
| 1208 |
+
"script": "Knda",
|
| 1209 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1210 |
}
|
| 1211 |
},
|
|
|
|
| 1212 |
"node_i": "3699",
|
| 1213 |
+
"native_tokenizers": [],
|
| 1214 |
"scripts": [
|
| 1215 |
"Knda"
|
| 1216 |
+
]
|
|
|
|
| 1217 |
},
|
| 1218 |
{
|
| 1219 |
"name": "Koraga",
|
| 1220 |
"iso_1_code": null,
|
| 1221 |
"iso_3_code": null,
|
|
|
|
| 1222 |
"children": [
|
| 1223 |
{
|
| 1224 |
"name": "Koraga, Korra",
|
| 1225 |
"iso_1_code": null,
|
| 1226 |
"iso_3_code": "kfd",
|
|
|
|
| 1227 |
"children": [],
|
| 1228 |
+
"tokenizers": {},
|
| 1229 |
"node_i": "3701",
|
| 1230 |
+
"native_tokenizers": [],
|
| 1231 |
+
"scripts": []
|
| 1232 |
},
|
| 1233 |
{
|
| 1234 |
"name": "Koraga, Mudu",
|
| 1235 |
"iso_1_code": null,
|
| 1236 |
"iso_3_code": "vmd",
|
|
|
|
| 1237 |
"children": [],
|
| 1238 |
+
"tokenizers": {},
|
| 1239 |
"node_i": "3702",
|
| 1240 |
+
"native_tokenizers": [],
|
| 1241 |
+
"scripts": []
|
| 1242 |
}
|
| 1243 |
],
|
| 1244 |
+
"tokenizers": {},
|
| 1245 |
"node_i": "3700",
|
| 1246 |
+
"native_tokenizers": [],
|
| 1247 |
+
"scripts": []
|
| 1248 |
}
|
| 1249 |
],
|
| 1250 |
+
"tokenizers": {
|
| 1251 |
+
"Knda": {
|
| 1252 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 1253 |
+
"original_lang_name": "kannada",
|
| 1254 |
+
"original_lang_code": "kan",
|
| 1255 |
+
"script": "Knda",
|
| 1256 |
+
"class_name": "IndicNLPTokenizer"
|
| 1257 |
+
}
|
| 1258 |
+
},
|
| 1259 |
"node_i": "3696",
|
| 1260 |
+
"native_tokenizers": [],
|
| 1261 |
+
"scripts": []
|
| 1262 |
},
|
| 1263 |
{
|
| 1264 |
"name": "Unclassified",
|
| 1265 |
"iso_1_code": null,
|
| 1266 |
"iso_3_code": null,
|
|
|
|
| 1267 |
"children": [
|
| 1268 |
{
|
| 1269 |
"name": "Mala Malasar",
|
| 1270 |
"iso_1_code": null,
|
| 1271 |
"iso_3_code": "ima",
|
|
|
|
| 1272 |
"children": [],
|
| 1273 |
+
"tokenizers": {},
|
| 1274 |
"node_i": "3704",
|
| 1275 |
+
"native_tokenizers": [],
|
| 1276 |
+
"scripts": []
|
| 1277 |
},
|
| 1278 |
{
|
| 1279 |
"name": "Thachanadan",
|
| 1280 |
"iso_1_code": null,
|
| 1281 |
"iso_3_code": "thn",
|
|
|
|
| 1282 |
"children": [],
|
| 1283 |
+
"tokenizers": {},
|
| 1284 |
"node_i": "3705",
|
| 1285 |
+
"native_tokenizers": [],
|
| 1286 |
+
"scripts": []
|
| 1287 |
},
|
| 1288 |
{
|
| 1289 |
"name": "Ullatan",
|
| 1290 |
"iso_1_code": null,
|
| 1291 |
"iso_3_code": "ull",
|
|
|
|
| 1292 |
"children": [],
|
| 1293 |
+
"tokenizers": {},
|
| 1294 |
"node_i": "3706",
|
| 1295 |
+
"native_tokenizers": [],
|
| 1296 |
+
"scripts": []
|
| 1297 |
},
|
| 1298 |
{
|
| 1299 |
"name": "Malasar",
|
| 1300 |
"iso_1_code": null,
|
| 1301 |
"iso_3_code": "ymr",
|
|
|
|
| 1302 |
"children": [],
|
| 1303 |
+
"tokenizers": {},
|
| 1304 |
"node_i": "3707",
|
| 1305 |
+
"native_tokenizers": [],
|
| 1306 |
+
"scripts": []
|
| 1307 |
}
|
| 1308 |
],
|
| 1309 |
+
"tokenizers": {},
|
| 1310 |
"node_i": "3703",
|
| 1311 |
+
"native_tokenizers": [],
|
| 1312 |
+
"scripts": []
|
| 1313 |
}
|
| 1314 |
],
|
| 1315 |
+
"tokenizers": {
|
| 1316 |
+
"Knda": {
|
| 1317 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 1318 |
+
"original_lang_name": "kannada",
|
| 1319 |
+
"original_lang_code": "kan",
|
| 1320 |
+
"script": "Knda",
|
| 1321 |
+
"class_name": "IndicNLPTokenizer"
|
| 1322 |
+
},
|
| 1323 |
+
"Mlym": {
|
| 1324 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 1325 |
+
"original_lang_name": "malayalam",
|
| 1326 |
+
"original_lang_code": "mal",
|
| 1327 |
+
"script": "Mlym",
|
| 1328 |
+
"class_name": "IndicNLPTokenizer"
|
| 1329 |
+
},
|
| 1330 |
+
"Taml": {
|
| 1331 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1332 |
+
"original_lang_name": "tamil",
|
| 1333 |
+
"original_lang_code": "tam",
|
| 1334 |
+
"script": "Taml",
|
| 1335 |
+
"class_name": "IndicNLPTokenizer"
|
| 1336 |
+
}
|
| 1337 |
+
},
|
| 1338 |
"node_i": "3648",
|
| 1339 |
+
"native_tokenizers": [],
|
| 1340 |
+
"scripts": []
|
| 1341 |
},
|
| 1342 |
{
|
| 1343 |
"name": "Unclassified",
|
| 1344 |
"iso_1_code": null,
|
| 1345 |
"iso_3_code": null,
|
|
|
|
| 1346 |
"children": [
|
| 1347 |
{
|
| 1348 |
"name": "Allar",
|
| 1349 |
"iso_1_code": null,
|
| 1350 |
"iso_3_code": "all",
|
|
|
|
| 1351 |
"children": [],
|
| 1352 |
+
"tokenizers": {},
|
| 1353 |
"node_i": "3709",
|
| 1354 |
+
"native_tokenizers": [],
|
| 1355 |
+
"scripts": []
|
| 1356 |
},
|
| 1357 |
{
|
| 1358 |
"name": "Bharia",
|
| 1359 |
"iso_1_code": null,
|
| 1360 |
"iso_3_code": "bha",
|
|
|
|
| 1361 |
"children": [],
|
| 1362 |
+
"tokenizers": {},
|
| 1363 |
"node_i": "3710",
|
| 1364 |
+
"native_tokenizers": [],
|
| 1365 |
+
"scripts": []
|
| 1366 |
},
|
| 1367 |
{
|
| 1368 |
"name": "Malankuravan",
|
| 1369 |
"iso_1_code": null,
|
| 1370 |
"iso_3_code": "mjo",
|
|
|
|
| 1371 |
"children": [],
|
| 1372 |
+
"tokenizers": {},
|
| 1373 |
"node_i": "3711",
|
| 1374 |
+
"native_tokenizers": [],
|
| 1375 |
+
"scripts": []
|
| 1376 |
},
|
| 1377 |
{
|
| 1378 |
"name": "Pattapu",
|
| 1379 |
"iso_1_code": null,
|
| 1380 |
"iso_3_code": "ptq",
|
|
|
|
| 1381 |
"children": [],
|
| 1382 |
+
"tokenizers": {},
|
| 1383 |
"node_i": "3712",
|
| 1384 |
+
"native_tokenizers": [],
|
| 1385 |
+
"scripts": []
|
| 1386 |
},
|
| 1387 |
{
|
| 1388 |
"name": "Vishavan",
|
| 1389 |
"iso_1_code": null,
|
| 1390 |
"iso_3_code": "vis",
|
|
|
|
| 1391 |
"children": [],
|
| 1392 |
+
"tokenizers": {},
|
| 1393 |
"node_i": "3713",
|
| 1394 |
+
"native_tokenizers": [],
|
| 1395 |
+
"scripts": []
|
| 1396 |
}
|
| 1397 |
],
|
| 1398 |
+
"tokenizers": {},
|
| 1399 |
"node_i": "3708",
|
| 1400 |
+
"native_tokenizers": [],
|
| 1401 |
+
"scripts": []
|
| 1402 |
}
|
| 1403 |
],
|
| 1404 |
+
"tokenizers": {
|
| 1405 |
+
"Telu": {
|
| 1406 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
| 1407 |
+
"original_lang_name": "telugu",
|
| 1408 |
+
"original_lang_code": "tel",
|
| 1409 |
+
"script": "Telu",
|
| 1410 |
+
"class_name": "IndicNLPTokenizer"
|
| 1411 |
+
},
|
| 1412 |
+
"Knda": {
|
| 1413 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
| 1414 |
+
"original_lang_name": "kannada",
|
| 1415 |
+
"original_lang_code": "kan",
|
| 1416 |
+
"script": "Knda",
|
| 1417 |
+
"class_name": "IndicNLPTokenizer"
|
| 1418 |
+
},
|
| 1419 |
+
"Mlym": {
|
| 1420 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
| 1421 |
+
"original_lang_name": "malayalam",
|
| 1422 |
+
"original_lang_code": "mal",
|
| 1423 |
+
"script": "Mlym",
|
| 1424 |
+
"class_name": "IndicNLPTokenizer"
|
| 1425 |
+
},
|
| 1426 |
+
"Taml": {
|
| 1427 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
| 1428 |
+
"original_lang_name": "tamil",
|
| 1429 |
+
"original_lang_code": "tam",
|
| 1430 |
+
"script": "Taml",
|
| 1431 |
+
"class_name": "IndicNLPTokenizer"
|
| 1432 |
+
}
|
| 1433 |
+
},
|
| 1434 |
"node_i": "3601",
|
| 1435 |
+
"native_tokenizers": [],
|
| 1436 |
+
"scripts": []
|
| 1437 |
}
|
data/East Bird’s Head-Sentani.json
CHANGED
|
@@ -2,173 +2,173 @@
|
|
| 2 |
"name": "East Bird\u2019s Head-Sentani",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Burmeso",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Burmeso",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "bzu",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3716",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
}
|
| 23 |
],
|
|
|
|
| 24 |
"node_i": "3715",
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"name": "East Bird\u2019s Head",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": null,
|
| 32 |
-
"tokenizers": {},
|
| 33 |
"children": [
|
| 34 |
{
|
| 35 |
"name": "Mantion",
|
| 36 |
"iso_1_code": null,
|
| 37 |
"iso_3_code": null,
|
| 38 |
-
"tokenizers": {},
|
| 39 |
"children": [
|
| 40 |
{
|
| 41 |
"name": "Sougb",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": "mnx",
|
| 44 |
-
"tokenizers": {},
|
| 45 |
"children": [],
|
|
|
|
| 46 |
"node_i": "3719",
|
|
|
|
| 47 |
"scripts": [
|
| 48 |
"Latn"
|
| 49 |
-
]
|
| 50 |
-
"own_tokenizer": false
|
| 51 |
}
|
| 52 |
],
|
|
|
|
| 53 |
"node_i": "3718",
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"name": "Meax",
|
| 59 |
"iso_1_code": null,
|
| 60 |
"iso_3_code": null,
|
| 61 |
-
"tokenizers": {},
|
| 62 |
"children": [
|
| 63 |
{
|
| 64 |
"name": "Meyah",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": "mej",
|
| 67 |
-
"tokenizers": {},
|
| 68 |
"children": [],
|
|
|
|
| 69 |
"node_i": "3721",
|
|
|
|
| 70 |
"scripts": [
|
| 71 |
"Latn"
|
| 72 |
-
]
|
| 73 |
-
"own_tokenizer": false
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"name": "Moskona",
|
| 77 |
"iso_1_code": null,
|
| 78 |
"iso_3_code": "mtj",
|
| 79 |
-
"tokenizers": {},
|
| 80 |
"children": [],
|
|
|
|
| 81 |
"node_i": "3722",
|
|
|
|
| 82 |
"scripts": [
|
| 83 |
"Latn"
|
| 84 |
-
]
|
| 85 |
-
"own_tokenizer": false
|
| 86 |
}
|
| 87 |
],
|
|
|
|
| 88 |
"node_i": "3720",
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
}
|
| 92 |
],
|
|
|
|
| 93 |
"node_i": "3717",
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"name": "Sentani",
|
| 99 |
"iso_1_code": null,
|
| 100 |
"iso_3_code": null,
|
| 101 |
-
"tokenizers": {},
|
| 102 |
"children": [
|
| 103 |
{
|
| 104 |
"name": "Demta",
|
| 105 |
"iso_1_code": null,
|
| 106 |
"iso_3_code": null,
|
| 107 |
-
"tokenizers": {},
|
| 108 |
"children": [
|
| 109 |
{
|
| 110 |
"name": "Sowari",
|
| 111 |
"iso_1_code": null,
|
| 112 |
"iso_3_code": "dmy",
|
| 113 |
-
"tokenizers": {},
|
| 114 |
"children": [],
|
|
|
|
| 115 |
"node_i": "3725",
|
| 116 |
-
"
|
| 117 |
-
"
|
| 118 |
}
|
| 119 |
],
|
|
|
|
| 120 |
"node_i": "3724",
|
| 121 |
-
"
|
| 122 |
-
"
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"name": "Sentani Proper",
|
| 126 |
"iso_1_code": null,
|
| 127 |
"iso_3_code": null,
|
| 128 |
-
"tokenizers": {},
|
| 129 |
"children": [
|
| 130 |
{
|
| 131 |
"name": "Nafri",
|
| 132 |
"iso_1_code": null,
|
| 133 |
"iso_3_code": "nxx",
|
| 134 |
-
"tokenizers": {},
|
| 135 |
"children": [],
|
|
|
|
| 136 |
"node_i": "3727",
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"name": "Sentani",
|
| 142 |
"iso_1_code": null,
|
| 143 |
"iso_3_code": "set",
|
| 144 |
-
"tokenizers": {},
|
| 145 |
"children": [],
|
|
|
|
| 146 |
"node_i": "3728",
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"name": "Tabla",
|
| 152 |
"iso_1_code": null,
|
| 153 |
"iso_3_code": "tnm",
|
| 154 |
-
"tokenizers": {},
|
| 155 |
"children": [],
|
|
|
|
| 156 |
"node_i": "3729",
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
}
|
| 160 |
],
|
|
|
|
| 161 |
"node_i": "3726",
|
| 162 |
-
"
|
| 163 |
-
"
|
| 164 |
}
|
| 165 |
],
|
|
|
|
| 166 |
"node_i": "3723",
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
}
|
| 170 |
],
|
|
|
|
| 171 |
"node_i": "3714",
|
| 172 |
-
"
|
| 173 |
-
"
|
| 174 |
}
|
|
|
|
| 2 |
"name": "East Bird\u2019s Head-Sentani",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Burmeso",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Burmeso",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "bzu",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3716",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
}
|
| 21 |
],
|
| 22 |
+
"tokenizers": {},
|
| 23 |
"node_i": "3715",
|
| 24 |
+
"native_tokenizers": [],
|
| 25 |
+
"scripts": []
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "East Bird\u2019s Head",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": null,
|
|
|
|
| 31 |
"children": [
|
| 32 |
{
|
| 33 |
"name": "Mantion",
|
| 34 |
"iso_1_code": null,
|
| 35 |
"iso_3_code": null,
|
|
|
|
| 36 |
"children": [
|
| 37 |
{
|
| 38 |
"name": "Sougb",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": "mnx",
|
|
|
|
| 41 |
"children": [],
|
| 42 |
+
"tokenizers": {},
|
| 43 |
"node_i": "3719",
|
| 44 |
+
"native_tokenizers": [],
|
| 45 |
"scripts": [
|
| 46 |
"Latn"
|
| 47 |
+
]
|
|
|
|
| 48 |
}
|
| 49 |
],
|
| 50 |
+
"tokenizers": {},
|
| 51 |
"node_i": "3718",
|
| 52 |
+
"native_tokenizers": [],
|
| 53 |
+
"scripts": []
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"name": "Meax",
|
| 57 |
"iso_1_code": null,
|
| 58 |
"iso_3_code": null,
|
|
|
|
| 59 |
"children": [
|
| 60 |
{
|
| 61 |
"name": "Meyah",
|
| 62 |
"iso_1_code": null,
|
| 63 |
"iso_3_code": "mej",
|
|
|
|
| 64 |
"children": [],
|
| 65 |
+
"tokenizers": {},
|
| 66 |
"node_i": "3721",
|
| 67 |
+
"native_tokenizers": [],
|
| 68 |
"scripts": [
|
| 69 |
"Latn"
|
| 70 |
+
]
|
|
|
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"name": "Moskona",
|
| 74 |
"iso_1_code": null,
|
| 75 |
"iso_3_code": "mtj",
|
|
|
|
| 76 |
"children": [],
|
| 77 |
+
"tokenizers": {},
|
| 78 |
"node_i": "3722",
|
| 79 |
+
"native_tokenizers": [],
|
| 80 |
"scripts": [
|
| 81 |
"Latn"
|
| 82 |
+
]
|
|
|
|
| 83 |
}
|
| 84 |
],
|
| 85 |
+
"tokenizers": {},
|
| 86 |
"node_i": "3720",
|
| 87 |
+
"native_tokenizers": [],
|
| 88 |
+
"scripts": []
|
| 89 |
}
|
| 90 |
],
|
| 91 |
+
"tokenizers": {},
|
| 92 |
"node_i": "3717",
|
| 93 |
+
"native_tokenizers": [],
|
| 94 |
+
"scripts": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"name": "Sentani",
|
| 98 |
"iso_1_code": null,
|
| 99 |
"iso_3_code": null,
|
|
|
|
| 100 |
"children": [
|
| 101 |
{
|
| 102 |
"name": "Demta",
|
| 103 |
"iso_1_code": null,
|
| 104 |
"iso_3_code": null,
|
|
|
|
| 105 |
"children": [
|
| 106 |
{
|
| 107 |
"name": "Sowari",
|
| 108 |
"iso_1_code": null,
|
| 109 |
"iso_3_code": "dmy",
|
|
|
|
| 110 |
"children": [],
|
| 111 |
+
"tokenizers": {},
|
| 112 |
"node_i": "3725",
|
| 113 |
+
"native_tokenizers": [],
|
| 114 |
+
"scripts": []
|
| 115 |
}
|
| 116 |
],
|
| 117 |
+
"tokenizers": {},
|
| 118 |
"node_i": "3724",
|
| 119 |
+
"native_tokenizers": [],
|
| 120 |
+
"scripts": []
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"name": "Sentani Proper",
|
| 124 |
"iso_1_code": null,
|
| 125 |
"iso_3_code": null,
|
|
|
|
| 126 |
"children": [
|
| 127 |
{
|
| 128 |
"name": "Nafri",
|
| 129 |
"iso_1_code": null,
|
| 130 |
"iso_3_code": "nxx",
|
|
|
|
| 131 |
"children": [],
|
| 132 |
+
"tokenizers": {},
|
| 133 |
"node_i": "3727",
|
| 134 |
+
"native_tokenizers": [],
|
| 135 |
+
"scripts": []
|
| 136 |
},
|
| 137 |
{
|
| 138 |
"name": "Sentani",
|
| 139 |
"iso_1_code": null,
|
| 140 |
"iso_3_code": "set",
|
|
|
|
| 141 |
"children": [],
|
| 142 |
+
"tokenizers": {},
|
| 143 |
"node_i": "3728",
|
| 144 |
+
"native_tokenizers": [],
|
| 145 |
+
"scripts": []
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"name": "Tabla",
|
| 149 |
"iso_1_code": null,
|
| 150 |
"iso_3_code": "tnm",
|
|
|
|
| 151 |
"children": [],
|
| 152 |
+
"tokenizers": {},
|
| 153 |
"node_i": "3729",
|
| 154 |
+
"native_tokenizers": [],
|
| 155 |
+
"scripts": []
|
| 156 |
}
|
| 157 |
],
|
| 158 |
+
"tokenizers": {},
|
| 159 |
"node_i": "3726",
|
| 160 |
+
"native_tokenizers": [],
|
| 161 |
+
"scripts": []
|
| 162 |
}
|
| 163 |
],
|
| 164 |
+
"tokenizers": {},
|
| 165 |
"node_i": "3723",
|
| 166 |
+
"native_tokenizers": [],
|
| 167 |
+
"scripts": []
|
| 168 |
}
|
| 169 |
],
|
| 170 |
+
"tokenizers": {},
|
| 171 |
"node_i": "3714",
|
| 172 |
+
"native_tokenizers": [],
|
| 173 |
+
"scripts": []
|
| 174 |
}
|
data/East Geelvink Bay.json
CHANGED
|
@@ -2,143 +2,143 @@
|
|
| 2 |
"name": "East Geelvink Bay",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Anasi",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "bpo",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3731",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Barapasi",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "brp",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3732",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "Burate",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": "bti",
|
| 31 |
-
"tokenizers": {},
|
| 32 |
"children": [],
|
|
|
|
| 33 |
"node_i": "3733",
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"name": "Kehu",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": "khh",
|
| 41 |
-
"tokenizers": {},
|
| 42 |
"children": [],
|
|
|
|
| 43 |
"node_i": "3734",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"name": "Kofei",
|
| 49 |
"iso_1_code": null,
|
| 50 |
"iso_3_code": "kpi",
|
| 51 |
-
"tokenizers": {},
|
| 52 |
"children": [],
|
|
|
|
| 53 |
"node_i": "3735",
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"name": "Nisa",
|
| 59 |
"iso_1_code": null,
|
| 60 |
"iso_3_code": "njs",
|
| 61 |
-
"tokenizers": {},
|
| 62 |
"children": [],
|
|
|
|
| 63 |
"node_i": "3736",
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"name": "Sauri",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "srt",
|
| 71 |
-
"tokenizers": {},
|
| 72 |
"children": [],
|
|
|
|
| 73 |
"node_i": "3737",
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"name": "Tefaro",
|
| 79 |
"iso_1_code": null,
|
| 80 |
"iso_3_code": "tfo",
|
| 81 |
-
"tokenizers": {},
|
| 82 |
"children": [],
|
|
|
|
| 83 |
"node_i": "3738",
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"name": "Tunggare",
|
| 89 |
"iso_1_code": null,
|
| 90 |
"iso_3_code": "trt",
|
| 91 |
-
"tokenizers": {},
|
| 92 |
"children": [],
|
|
|
|
| 93 |
"node_i": "3739",
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"name": "Woria",
|
| 99 |
"iso_1_code": null,
|
| 100 |
"iso_3_code": "wor",
|
| 101 |
-
"tokenizers": {},
|
| 102 |
"children": [],
|
|
|
|
| 103 |
"node_i": "3740",
|
| 104 |
-
"
|
| 105 |
-
"
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"name": "Bauzi",
|
| 109 |
"iso_1_code": null,
|
| 110 |
"iso_3_code": null,
|
| 111 |
-
"tokenizers": {},
|
| 112 |
"children": [
|
| 113 |
{
|
| 114 |
"name": "Bauzi",
|
| 115 |
"iso_1_code": null,
|
| 116 |
"iso_3_code": "bvz",
|
| 117 |
-
"tokenizers": {},
|
| 118 |
"children": [],
|
|
|
|
| 119 |
"node_i": "3742",
|
|
|
|
| 120 |
"scripts": [
|
| 121 |
"Latn"
|
| 122 |
-
]
|
| 123 |
-
"own_tokenizer": false
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"name": "Demisa",
|
| 127 |
"iso_1_code": null,
|
| 128 |
"iso_3_code": "dei",
|
| 129 |
-
"tokenizers": {},
|
| 130 |
"children": [],
|
|
|
|
| 131 |
"node_i": "3743",
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
}
|
| 135 |
],
|
|
|
|
| 136 |
"node_i": "3741",
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
}
|
| 140 |
],
|
|
|
|
| 141 |
"node_i": "3730",
|
| 142 |
-
"
|
| 143 |
-
"
|
| 144 |
}
|
|
|
|
| 2 |
"name": "East Geelvink Bay",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Anasi",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "bpo",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3731",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Barapasi",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "brp",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3732",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Burate",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "bti",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3733",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Kehu",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "khh",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3734",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Kofei",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "kpi",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3735",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"name": "Nisa",
|
| 58 |
"iso_1_code": null,
|
| 59 |
"iso_3_code": "njs",
|
|
|
|
| 60 |
"children": [],
|
| 61 |
+
"tokenizers": {},
|
| 62 |
"node_i": "3736",
|
| 63 |
+
"native_tokenizers": [],
|
| 64 |
+
"scripts": []
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"name": "Sauri",
|
| 68 |
"iso_1_code": null,
|
| 69 |
"iso_3_code": "srt",
|
|
|
|
| 70 |
"children": [],
|
| 71 |
+
"tokenizers": {},
|
| 72 |
"node_i": "3737",
|
| 73 |
+
"native_tokenizers": [],
|
| 74 |
+
"scripts": []
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"name": "Tefaro",
|
| 78 |
"iso_1_code": null,
|
| 79 |
"iso_3_code": "tfo",
|
|
|
|
| 80 |
"children": [],
|
| 81 |
+
"tokenizers": {},
|
| 82 |
"node_i": "3738",
|
| 83 |
+
"native_tokenizers": [],
|
| 84 |
+
"scripts": []
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "Tunggare",
|
| 88 |
"iso_1_code": null,
|
| 89 |
"iso_3_code": "trt",
|
|
|
|
| 90 |
"children": [],
|
| 91 |
+
"tokenizers": {},
|
| 92 |
"node_i": "3739",
|
| 93 |
+
"native_tokenizers": [],
|
| 94 |
+
"scripts": []
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"name": "Woria",
|
| 98 |
"iso_1_code": null,
|
| 99 |
"iso_3_code": "wor",
|
|
|
|
| 100 |
"children": [],
|
| 101 |
+
"tokenizers": {},
|
| 102 |
"node_i": "3740",
|
| 103 |
+
"native_tokenizers": [],
|
| 104 |
+
"scripts": []
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"name": "Bauzi",
|
| 108 |
"iso_1_code": null,
|
| 109 |
"iso_3_code": null,
|
|
|
|
| 110 |
"children": [
|
| 111 |
{
|
| 112 |
"name": "Bauzi",
|
| 113 |
"iso_1_code": null,
|
| 114 |
"iso_3_code": "bvz",
|
|
|
|
| 115 |
"children": [],
|
| 116 |
+
"tokenizers": {},
|
| 117 |
"node_i": "3742",
|
| 118 |
+
"native_tokenizers": [],
|
| 119 |
"scripts": [
|
| 120 |
"Latn"
|
| 121 |
+
]
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"name": "Demisa",
|
| 125 |
"iso_1_code": null,
|
| 126 |
"iso_3_code": "dei",
|
|
|
|
| 127 |
"children": [],
|
| 128 |
+
"tokenizers": {},
|
| 129 |
"node_i": "3743",
|
| 130 |
+
"native_tokenizers": [],
|
| 131 |
+
"scripts": []
|
| 132 |
}
|
| 133 |
],
|
| 134 |
+
"tokenizers": {},
|
| 135 |
"node_i": "3741",
|
| 136 |
+
"native_tokenizers": [],
|
| 137 |
+
"scripts": []
|
| 138 |
}
|
| 139 |
],
|
| 140 |
+
"tokenizers": {},
|
| 141 |
"node_i": "3730",
|
| 142 |
+
"native_tokenizers": [],
|
| 143 |
+
"scripts": []
|
| 144 |
}
|
data/East New Britain.json
CHANGED
|
@@ -2,104 +2,104 @@
|
|
| 2 |
"name": "East New Britain",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Baining",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Qaqet",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "byx",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3746",
|
|
|
|
| 20 |
"scripts": [
|
| 21 |
"Latn"
|
| 22 |
-
]
|
| 23 |
-
"own_tokenizer": false
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"name": "Kairak",
|
| 27 |
"iso_1_code": null,
|
| 28 |
"iso_3_code": "ckr",
|
| 29 |
-
"tokenizers": {},
|
| 30 |
"children": [],
|
|
|
|
| 31 |
"node_i": "3747",
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"name": "Mali",
|
| 37 |
"iso_1_code": null,
|
| 38 |
"iso_3_code": "gcc",
|
| 39 |
-
"tokenizers": {},
|
| 40 |
"children": [],
|
|
|
|
| 41 |
"node_i": "3748",
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"name": "Simbali",
|
| 47 |
"iso_1_code": null,
|
| 48 |
"iso_3_code": "smg",
|
| 49 |
-
"tokenizers": {},
|
| 50 |
"children": [],
|
|
|
|
| 51 |
"node_i": "3749",
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"name": "Ura",
|
| 57 |
"iso_1_code": null,
|
| 58 |
"iso_3_code": "uro",
|
| 59 |
-
"tokenizers": {},
|
| 60 |
"children": [],
|
|
|
|
| 61 |
"node_i": "3750",
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
},
|
| 65 |
{
|
| 66 |
"name": "Makolkol",
|
| 67 |
"iso_1_code": null,
|
| 68 |
"iso_3_code": "zmh",
|
| 69 |
-
"tokenizers": {},
|
| 70 |
"children": [],
|
|
|
|
| 71 |
"node_i": "3751",
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
}
|
| 75 |
],
|
|
|
|
| 76 |
"node_i": "3745",
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "Taulil",
|
| 82 |
"iso_1_code": null,
|
| 83 |
"iso_3_code": null,
|
| 84 |
-
"tokenizers": {},
|
| 85 |
"children": [
|
| 86 |
{
|
| 87 |
"name": "Tulil",
|
| 88 |
"iso_1_code": null,
|
| 89 |
"iso_3_code": "tuh",
|
| 90 |
-
"tokenizers": {},
|
| 91 |
"children": [],
|
|
|
|
| 92 |
"node_i": "3753",
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
}
|
| 96 |
],
|
|
|
|
| 97 |
"node_i": "3752",
|
| 98 |
-
"
|
| 99 |
-
"
|
| 100 |
}
|
| 101 |
],
|
|
|
|
| 102 |
"node_i": "3744",
|
| 103 |
-
"
|
| 104 |
-
"
|
| 105 |
}
|
|
|
|
| 2 |
"name": "East New Britain",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Baining",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Qaqet",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "byx",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3746",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
"scripts": [
|
| 20 |
"Latn"
|
| 21 |
+
]
|
|
|
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Kairak",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "ckr",
|
|
|
|
| 27 |
"children": [],
|
| 28 |
+
"tokenizers": {},
|
| 29 |
"node_i": "3747",
|
| 30 |
+
"native_tokenizers": [],
|
| 31 |
+
"scripts": []
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"name": "Mali",
|
| 35 |
"iso_1_code": null,
|
| 36 |
"iso_3_code": "gcc",
|
|
|
|
| 37 |
"children": [],
|
| 38 |
+
"tokenizers": {},
|
| 39 |
"node_i": "3748",
|
| 40 |
+
"native_tokenizers": [],
|
| 41 |
+
"scripts": []
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"name": "Simbali",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "smg",
|
|
|
|
| 47 |
"children": [],
|
| 48 |
+
"tokenizers": {},
|
| 49 |
"node_i": "3749",
|
| 50 |
+
"native_tokenizers": [],
|
| 51 |
+
"scripts": []
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Ura",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "uro",
|
|
|
|
| 57 |
"children": [],
|
| 58 |
+
"tokenizers": {},
|
| 59 |
"node_i": "3750",
|
| 60 |
+
"native_tokenizers": [],
|
| 61 |
+
"scripts": []
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"name": "Makolkol",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": "zmh",
|
|
|
|
| 67 |
"children": [],
|
| 68 |
+
"tokenizers": {},
|
| 69 |
"node_i": "3751",
|
| 70 |
+
"native_tokenizers": [],
|
| 71 |
+
"scripts": []
|
| 72 |
}
|
| 73 |
],
|
| 74 |
+
"tokenizers": {},
|
| 75 |
"node_i": "3745",
|
| 76 |
+
"native_tokenizers": [],
|
| 77 |
+
"scripts": []
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"name": "Taulil",
|
| 81 |
"iso_1_code": null,
|
| 82 |
"iso_3_code": null,
|
|
|
|
| 83 |
"children": [
|
| 84 |
{
|
| 85 |
"name": "Tulil",
|
| 86 |
"iso_1_code": null,
|
| 87 |
"iso_3_code": "tuh",
|
|
|
|
| 88 |
"children": [],
|
| 89 |
+
"tokenizers": {},
|
| 90 |
"node_i": "3753",
|
| 91 |
+
"native_tokenizers": [],
|
| 92 |
+
"scripts": []
|
| 93 |
}
|
| 94 |
],
|
| 95 |
+
"tokenizers": {},
|
| 96 |
"node_i": "3752",
|
| 97 |
+
"native_tokenizers": [],
|
| 98 |
+
"scripts": []
|
| 99 |
}
|
| 100 |
],
|
| 101 |
+
"tokenizers": {},
|
| 102 |
"node_i": "3744",
|
| 103 |
+
"native_tokenizers": [],
|
| 104 |
+
"scripts": []
|
| 105 |
}
|
data/Eastern Trans-Fly.json
CHANGED
|
@@ -2,54 +2,54 @@
|
|
| 2 |
"name": "Eastern Trans-Fly",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Bine",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "bon",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3755",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Wipi",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "gdr",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3756",
|
|
|
|
| 26 |
"scripts": [
|
| 27 |
"Latn"
|
| 28 |
-
]
|
| 29 |
-
"own_tokenizer": false
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Gizrra",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": "tof",
|
| 35 |
-
"tokenizers": {},
|
| 36 |
"children": [],
|
|
|
|
| 37 |
"node_i": "3757",
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "Meriam Mir",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": "ulk",
|
| 45 |
-
"tokenizers": {},
|
| 46 |
"children": [],
|
|
|
|
| 47 |
"node_i": "3758",
|
| 48 |
-
"
|
| 49 |
-
"
|
| 50 |
}
|
| 51 |
],
|
|
|
|
| 52 |
"node_i": "3754",
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
}
|
|
|
|
| 2 |
"name": "Eastern Trans-Fly",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Bine",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "bon",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3755",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Wipi",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "gdr",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3756",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
"scripts": [
|
| 27 |
"Latn"
|
| 28 |
+
]
|
|
|
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"name": "Gizrra",
|
| 32 |
"iso_1_code": null,
|
| 33 |
"iso_3_code": "tof",
|
|
|
|
| 34 |
"children": [],
|
| 35 |
+
"tokenizers": {},
|
| 36 |
"node_i": "3757",
|
| 37 |
+
"native_tokenizers": [],
|
| 38 |
+
"scripts": []
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"name": "Meriam Mir",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": "ulk",
|
|
|
|
| 44 |
"children": [],
|
| 45 |
+
"tokenizers": {},
|
| 46 |
"node_i": "3758",
|
| 47 |
+
"native_tokenizers": [],
|
| 48 |
+
"scripts": []
|
| 49 |
}
|
| 50 |
],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3754",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
}
|
data/Eskimo-Aleut.json
CHANGED
|
@@ -2,189 +2,189 @@
|
|
| 2 |
"name": "Eskimo-Aleut",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Aleut",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Aleut",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "ale",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3761",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
}
|
| 23 |
],
|
|
|
|
| 24 |
"node_i": "3760",
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"name": "Eskimo",
|
| 30 |
"iso_1_code": null,
|
| 31 |
"iso_3_code": null,
|
| 32 |
-
"tokenizers": {},
|
| 33 |
"children": [
|
| 34 |
{
|
| 35 |
"name": "Inuit-Inupiaq",
|
| 36 |
"iso_1_code": null,
|
| 37 |
"iso_3_code": null,
|
| 38 |
-
"tokenizers": {},
|
| 39 |
"children": [
|
| 40 |
{
|
| 41 |
"name": "Inupiatun, North Alaskan",
|
| 42 |
"iso_1_code": "ik",
|
| 43 |
"iso_3_code": "esi",
|
| 44 |
-
"tokenizers": {},
|
| 45 |
"children": [],
|
|
|
|
| 46 |
"node_i": "3764",
|
|
|
|
| 47 |
"scripts": [
|
| 48 |
"Latn"
|
| 49 |
-
]
|
| 50 |
-
"own_tokenizer": false
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"name": "Inupiatun, Northwest Alaska",
|
| 54 |
"iso_1_code": "ik",
|
| 55 |
"iso_3_code": "esk",
|
| 56 |
-
"tokenizers": {},
|
| 57 |
"children": [],
|
|
|
|
| 58 |
"node_i": "3765",
|
|
|
|
| 59 |
"scripts": [
|
| 60 |
"Latn"
|
| 61 |
-
]
|
| 62 |
-
"own_tokenizer": false
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"name": "Inuktitut, Eastern Canadian",
|
| 66 |
"iso_1_code": "iu",
|
| 67 |
"iso_3_code": "ike",
|
| 68 |
-
"tokenizers": {},
|
| 69 |
"children": [],
|
|
|
|
| 70 |
"node_i": "3766",
|
|
|
|
| 71 |
"scripts": [
|
| 72 |
"Cans"
|
| 73 |
-
]
|
| 74 |
-
"own_tokenizer": false
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"name": "Inuinnaqtun",
|
| 78 |
"iso_1_code": "iu",
|
| 79 |
"iso_3_code": "ikt",
|
| 80 |
-
"tokenizers": {},
|
| 81 |
"children": [],
|
|
|
|
| 82 |
"node_i": "3767",
|
|
|
|
| 83 |
"scripts": [
|
| 84 |
"Latn"
|
| 85 |
-
]
|
| 86 |
-
"own_tokenizer": false
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"name": "Greenlandic",
|
| 90 |
"iso_1_code": "kl",
|
| 91 |
"iso_3_code": "kal",
|
| 92 |
-
"tokenizers": {},
|
| 93 |
"children": [],
|
|
|
|
| 94 |
"node_i": "3768",
|
|
|
|
| 95 |
"scripts": [
|
| 96 |
"Latn"
|
| 97 |
-
]
|
| 98 |
-
"own_tokenizer": false
|
| 99 |
}
|
| 100 |
],
|
|
|
|
| 101 |
"node_i": "3763",
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"name": "Yupik",
|
| 107 |
"iso_1_code": null,
|
| 108 |
"iso_3_code": null,
|
| 109 |
-
"tokenizers": {},
|
| 110 |
"children": [
|
| 111 |
{
|
| 112 |
"name": "Yupik, Saint Lawrence Island",
|
| 113 |
"iso_1_code": null,
|
| 114 |
"iso_3_code": "ess",
|
| 115 |
-
"tokenizers": {},
|
| 116 |
"children": [],
|
|
|
|
| 117 |
"node_i": "3770",
|
|
|
|
| 118 |
"scripts": [
|
| 119 |
"Latn"
|
| 120 |
-
]
|
| 121 |
-
"own_tokenizer": false
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"name": "Yupik, Naukan",
|
| 125 |
"iso_1_code": null,
|
| 126 |
"iso_3_code": "ynk",
|
| 127 |
-
"tokenizers": {},
|
| 128 |
"children": [],
|
|
|
|
| 129 |
"node_i": "3771",
|
| 130 |
-
"
|
| 131 |
-
"
|
| 132 |
},
|
| 133 |
{
|
| 134 |
"name": "Yupik, Sirenik",
|
| 135 |
"iso_1_code": null,
|
| 136 |
"iso_3_code": "ysr",
|
| 137 |
-
"tokenizers": {},
|
| 138 |
"children": [],
|
|
|
|
| 139 |
"node_i": "3772",
|
| 140 |
-
"
|
| 141 |
-
"
|
| 142 |
},
|
| 143 |
{
|
| 144 |
"name": "Alaskan Yupik",
|
| 145 |
"iso_1_code": null,
|
| 146 |
"iso_3_code": null,
|
| 147 |
-
"tokenizers": {},
|
| 148 |
"children": [
|
| 149 |
{
|
| 150 |
"name": "Yupik, Pacific Gulf",
|
| 151 |
"iso_1_code": null,
|
| 152 |
"iso_3_code": "ems",
|
| 153 |
-
"tokenizers": {},
|
| 154 |
"children": [],
|
|
|
|
| 155 |
"node_i": "3774",
|
| 156 |
-
"
|
| 157 |
-
"
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"name": "Yupik, Central",
|
| 161 |
"iso_1_code": null,
|
| 162 |
"iso_3_code": "esu",
|
| 163 |
-
"tokenizers": {},
|
| 164 |
"children": [],
|
|
|
|
| 165 |
"node_i": "3775",
|
|
|
|
| 166 |
"scripts": [
|
| 167 |
"Latn"
|
| 168 |
-
]
|
| 169 |
-
"own_tokenizer": false
|
| 170 |
}
|
| 171 |
],
|
|
|
|
| 172 |
"node_i": "3773",
|
| 173 |
-
"
|
| 174 |
-
"
|
| 175 |
}
|
| 176 |
],
|
|
|
|
| 177 |
"node_i": "3769",
|
| 178 |
-
"
|
| 179 |
-
"
|
| 180 |
}
|
| 181 |
],
|
|
|
|
| 182 |
"node_i": "3762",
|
| 183 |
-
"
|
| 184 |
-
"
|
| 185 |
}
|
| 186 |
],
|
|
|
|
| 187 |
"node_i": "3759",
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
}
|
|
|
|
| 2 |
"name": "Eskimo-Aleut",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Aleut",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Aleut",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "ale",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3761",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
}
|
| 21 |
],
|
| 22 |
+
"tokenizers": {},
|
| 23 |
"node_i": "3760",
|
| 24 |
+
"native_tokenizers": [],
|
| 25 |
+
"scripts": []
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"name": "Eskimo",
|
| 29 |
"iso_1_code": null,
|
| 30 |
"iso_3_code": null,
|
|
|
|
| 31 |
"children": [
|
| 32 |
{
|
| 33 |
"name": "Inuit-Inupiaq",
|
| 34 |
"iso_1_code": null,
|
| 35 |
"iso_3_code": null,
|
|
|
|
| 36 |
"children": [
|
| 37 |
{
|
| 38 |
"name": "Inupiatun, North Alaskan",
|
| 39 |
"iso_1_code": "ik",
|
| 40 |
"iso_3_code": "esi",
|
|
|
|
| 41 |
"children": [],
|
| 42 |
+
"tokenizers": {},
|
| 43 |
"node_i": "3764",
|
| 44 |
+
"native_tokenizers": [],
|
| 45 |
"scripts": [
|
| 46 |
"Latn"
|
| 47 |
+
]
|
|
|
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Inupiatun, Northwest Alaska",
|
| 51 |
"iso_1_code": "ik",
|
| 52 |
"iso_3_code": "esk",
|
|
|
|
| 53 |
"children": [],
|
| 54 |
+
"tokenizers": {},
|
| 55 |
"node_i": "3765",
|
| 56 |
+
"native_tokenizers": [],
|
| 57 |
"scripts": [
|
| 58 |
"Latn"
|
| 59 |
+
]
|
|
|
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"name": "Inuktitut, Eastern Canadian",
|
| 63 |
"iso_1_code": "iu",
|
| 64 |
"iso_3_code": "ike",
|
|
|
|
| 65 |
"children": [],
|
| 66 |
+
"tokenizers": {},
|
| 67 |
"node_i": "3766",
|
| 68 |
+
"native_tokenizers": [],
|
| 69 |
"scripts": [
|
| 70 |
"Cans"
|
| 71 |
+
]
|
|
|
|
| 72 |
},
|
| 73 |
{
|
| 74 |
"name": "Inuinnaqtun",
|
| 75 |
"iso_1_code": "iu",
|
| 76 |
"iso_3_code": "ikt",
|
|
|
|
| 77 |
"children": [],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3767",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
"scripts": [
|
| 82 |
"Latn"
|
| 83 |
+
]
|
|
|
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"name": "Greenlandic",
|
| 87 |
"iso_1_code": "kl",
|
| 88 |
"iso_3_code": "kal",
|
|
|
|
| 89 |
"children": [],
|
| 90 |
+
"tokenizers": {},
|
| 91 |
"node_i": "3768",
|
| 92 |
+
"native_tokenizers": [],
|
| 93 |
"scripts": [
|
| 94 |
"Latn"
|
| 95 |
+
]
|
|
|
|
| 96 |
}
|
| 97 |
],
|
| 98 |
+
"tokenizers": {},
|
| 99 |
"node_i": "3763",
|
| 100 |
+
"native_tokenizers": [],
|
| 101 |
+
"scripts": []
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"name": "Yupik",
|
| 105 |
"iso_1_code": null,
|
| 106 |
"iso_3_code": null,
|
|
|
|
| 107 |
"children": [
|
| 108 |
{
|
| 109 |
"name": "Yupik, Saint Lawrence Island",
|
| 110 |
"iso_1_code": null,
|
| 111 |
"iso_3_code": "ess",
|
|
|
|
| 112 |
"children": [],
|
| 113 |
+
"tokenizers": {},
|
| 114 |
"node_i": "3770",
|
| 115 |
+
"native_tokenizers": [],
|
| 116 |
"scripts": [
|
| 117 |
"Latn"
|
| 118 |
+
]
|
|
|
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"name": "Yupik, Naukan",
|
| 122 |
"iso_1_code": null,
|
| 123 |
"iso_3_code": "ynk",
|
|
|
|
| 124 |
"children": [],
|
| 125 |
+
"tokenizers": {},
|
| 126 |
"node_i": "3771",
|
| 127 |
+
"native_tokenizers": [],
|
| 128 |
+
"scripts": []
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"name": "Yupik, Sirenik",
|
| 132 |
"iso_1_code": null,
|
| 133 |
"iso_3_code": "ysr",
|
|
|
|
| 134 |
"children": [],
|
| 135 |
+
"tokenizers": {},
|
| 136 |
"node_i": "3772",
|
| 137 |
+
"native_tokenizers": [],
|
| 138 |
+
"scripts": []
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"name": "Alaskan Yupik",
|
| 142 |
"iso_1_code": null,
|
| 143 |
"iso_3_code": null,
|
|
|
|
| 144 |
"children": [
|
| 145 |
{
|
| 146 |
"name": "Yupik, Pacific Gulf",
|
| 147 |
"iso_1_code": null,
|
| 148 |
"iso_3_code": "ems",
|
|
|
|
| 149 |
"children": [],
|
| 150 |
+
"tokenizers": {},
|
| 151 |
"node_i": "3774",
|
| 152 |
+
"native_tokenizers": [],
|
| 153 |
+
"scripts": []
|
| 154 |
},
|
| 155 |
{
|
| 156 |
"name": "Yupik, Central",
|
| 157 |
"iso_1_code": null,
|
| 158 |
"iso_3_code": "esu",
|
|
|
|
| 159 |
"children": [],
|
| 160 |
+
"tokenizers": {},
|
| 161 |
"node_i": "3775",
|
| 162 |
+
"native_tokenizers": [],
|
| 163 |
"scripts": [
|
| 164 |
"Latn"
|
| 165 |
+
]
|
|
|
|
| 166 |
}
|
| 167 |
],
|
| 168 |
+
"tokenizers": {},
|
| 169 |
"node_i": "3773",
|
| 170 |
+
"native_tokenizers": [],
|
| 171 |
+
"scripts": []
|
| 172 |
}
|
| 173 |
],
|
| 174 |
+
"tokenizers": {},
|
| 175 |
"node_i": "3769",
|
| 176 |
+
"native_tokenizers": [],
|
| 177 |
+
"scripts": []
|
| 178 |
}
|
| 179 |
],
|
| 180 |
+
"tokenizers": {},
|
| 181 |
"node_i": "3762",
|
| 182 |
+
"native_tokenizers": [],
|
| 183 |
+
"scripts": []
|
| 184 |
}
|
| 185 |
],
|
| 186 |
+
"tokenizers": {},
|
| 187 |
"node_i": "3759",
|
| 188 |
+
"native_tokenizers": [],
|
| 189 |
+
"scripts": []
|
| 190 |
}
|
data/Eyak-Athabaskan.json
CHANGED
|
@@ -2,648 +2,648 @@
|
|
| 2 |
"name": "Eyak-Athabaskan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Eyak",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "eya",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3777",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Athabaskan",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": null,
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [
|
| 23 |
{
|
| 24 |
"name": "Apachean",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": null,
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [
|
| 29 |
{
|
| 30 |
"name": "Navajo",
|
| 31 |
"iso_1_code": "nv",
|
| 32 |
"iso_3_code": "nav",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "3780",
|
|
|
|
| 36 |
"scripts": [
|
| 37 |
"Latn"
|
| 38 |
-
]
|
| 39 |
-
"own_tokenizer": false
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"name": "Apache",
|
| 43 |
"iso_1_code": null,
|
| 44 |
"iso_3_code": null,
|
| 45 |
-
"tokenizers": {},
|
| 46 |
"children": [
|
| 47 |
{
|
| 48 |
"name": "Apache, Jicarilla",
|
| 49 |
"iso_1_code": null,
|
| 50 |
"iso_3_code": "apj",
|
| 51 |
-
"tokenizers": {},
|
| 52 |
"children": [],
|
|
|
|
| 53 |
"node_i": "3782",
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"name": "Apache, Kiowa",
|
| 59 |
"iso_1_code": null,
|
| 60 |
"iso_3_code": "apk",
|
| 61 |
-
"tokenizers": {},
|
| 62 |
"children": [],
|
|
|
|
| 63 |
"node_i": "3783",
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"name": "Apache, Lipan",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "apl",
|
| 71 |
-
"tokenizers": {},
|
| 72 |
"children": [],
|
|
|
|
| 73 |
"node_i": "3784",
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"name": "Apache, Mescalero-Chiricahua",
|
| 79 |
"iso_1_code": null,
|
| 80 |
"iso_3_code": "apm",
|
| 81 |
-
"tokenizers": {},
|
| 82 |
"children": [],
|
|
|
|
| 83 |
"node_i": "3785",
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"name": "Apache, Western",
|
| 89 |
"iso_1_code": null,
|
| 90 |
"iso_3_code": "apw",
|
| 91 |
-
"tokenizers": {},
|
| 92 |
"children": [],
|
|
|
|
| 93 |
"node_i": "3786",
|
|
|
|
| 94 |
"scripts": [
|
| 95 |
"Latn"
|
| 96 |
-
]
|
| 97 |
-
"own_tokenizer": false
|
| 98 |
}
|
| 99 |
],
|
|
|
|
| 100 |
"node_i": "3781",
|
| 101 |
-
"
|
| 102 |
-
"
|
| 103 |
}
|
| 104 |
],
|
|
|
|
| 105 |
"node_i": "3779",
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"name": "Northern Athabaskan",
|
| 111 |
"iso_1_code": null,
|
| 112 |
"iso_3_code": null,
|
| 113 |
-
"tokenizers": {},
|
| 114 |
"children": [
|
| 115 |
{
|
| 116 |
"name": "Ahtena",
|
| 117 |
"iso_1_code": null,
|
| 118 |
"iso_3_code": "aht",
|
| 119 |
-
"tokenizers": {},
|
| 120 |
"children": [],
|
|
|
|
| 121 |
"node_i": "3788",
|
| 122 |
-
"
|
| 123 |
-
"
|
| 124 |
},
|
| 125 |
{
|
| 126 |
"name": "Babine",
|
| 127 |
"iso_1_code": null,
|
| 128 |
"iso_3_code": "bcr",
|
| 129 |
-
"tokenizers": {},
|
| 130 |
"children": [],
|
|
|
|
| 131 |
"node_i": "3789",
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
},
|
| 135 |
{
|
| 136 |
"name": "Beaver",
|
| 137 |
"iso_1_code": null,
|
| 138 |
"iso_3_code": "bea",
|
| 139 |
-
"tokenizers": {},
|
| 140 |
"children": [],
|
|
|
|
| 141 |
"node_i": "3790",
|
|
|
|
| 142 |
"scripts": [
|
| 143 |
"Latn"
|
| 144 |
-
]
|
| 145 |
-
"own_tokenizer": false
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"name": "Dene",
|
| 149 |
"iso_1_code": null,
|
| 150 |
"iso_3_code": "chp",
|
| 151 |
-
"tokenizers": {},
|
| 152 |
"children": [],
|
|
|
|
| 153 |
"node_i": "3791",
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"name": "Chilcotin",
|
| 159 |
"iso_1_code": null,
|
| 160 |
"iso_3_code": "clc",
|
| 161 |
-
"tokenizers": {},
|
| 162 |
"children": [],
|
|
|
|
| 163 |
"node_i": "3792",
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"name": "Tlicho",
|
| 169 |
"iso_1_code": null,
|
| 170 |
"iso_3_code": "dgr",
|
| 171 |
-
"tokenizers": {},
|
| 172 |
"children": [],
|
|
|
|
| 173 |
"node_i": "3793",
|
|
|
|
| 174 |
"scripts": [
|
| 175 |
"Latn"
|
| 176 |
-
]
|
| 177 |
-
"own_tokenizer": false
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"name": "Gwich\u2019in",
|
| 181 |
"iso_1_code": null,
|
| 182 |
"iso_3_code": "gwi",
|
| 183 |
-
"tokenizers": {},
|
| 184 |
"children": [],
|
|
|
|
| 185 |
"node_i": "3794",
|
|
|
|
| 186 |
"scripts": [
|
| 187 |
"Latn"
|
| 188 |
-
]
|
| 189 |
-
"own_tokenizer": false
|
| 190 |
},
|
| 191 |
{
|
| 192 |
"name": "Han",
|
| 193 |
"iso_1_code": null,
|
| 194 |
"iso_3_code": "haa",
|
| 195 |
-
"tokenizers": {},
|
| 196 |
"children": [],
|
|
|
|
| 197 |
"node_i": "3795",
|
| 198 |
-
"
|
| 199 |
-
"
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"name": "Holikachuk",
|
| 203 |
"iso_1_code": null,
|
| 204 |
"iso_3_code": "hoi",
|
| 205 |
-
"tokenizers": {},
|
| 206 |
"children": [],
|
|
|
|
| 207 |
"node_i": "3796",
|
| 208 |
-
"
|
| 209 |
-
"
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"name": "Deg Xinag",
|
| 213 |
"iso_1_code": null,
|
| 214 |
"iso_3_code": "ing",
|
| 215 |
-
"tokenizers": {},
|
| 216 |
"children": [],
|
|
|
|
| 217 |
"node_i": "3797",
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
},
|
| 221 |
{
|
| 222 |
"name": "Koyukon",
|
| 223 |
"iso_1_code": null,
|
| 224 |
"iso_3_code": "koy",
|
| 225 |
-
"tokenizers": {},
|
| 226 |
"children": [],
|
|
|
|
| 227 |
"node_i": "3798",
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
},
|
| 231 |
{
|
| 232 |
"name": "Kuskokwim, Upper",
|
| 233 |
"iso_1_code": null,
|
| 234 |
"iso_3_code": "kuu",
|
| 235 |
-
"tokenizers": {},
|
| 236 |
"children": [],
|
|
|
|
| 237 |
"node_i": "3799",
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
},
|
| 241 |
{
|
| 242 |
"name": "Sekani",
|
| 243 |
"iso_1_code": null,
|
| 244 |
"iso_3_code": "sek",
|
| 245 |
-
"tokenizers": {},
|
| 246 |
"children": [],
|
|
|
|
| 247 |
"node_i": "3800",
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
},
|
| 251 |
{
|
| 252 |
"name": "Sarsi",
|
| 253 |
"iso_1_code": null,
|
| 254 |
"iso_3_code": "srs",
|
| 255 |
-
"tokenizers": {},
|
| 256 |
"children": [],
|
|
|
|
| 257 |
"node_i": "3801",
|
| 258 |
-
"
|
| 259 |
-
"
|
| 260 |
},
|
| 261 |
{
|
| 262 |
"name": "Tanana, Lower",
|
| 263 |
"iso_1_code": null,
|
| 264 |
"iso_3_code": "taa",
|
| 265 |
-
"tokenizers": {},
|
| 266 |
"children": [],
|
|
|
|
| 267 |
"node_i": "3802",
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"name": "Tanana, Upper",
|
| 273 |
"iso_1_code": null,
|
| 274 |
"iso_3_code": "tau",
|
| 275 |
-
"tokenizers": {},
|
| 276 |
"children": [],
|
|
|
|
| 277 |
"node_i": "3803",
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"name": "Tanacross",
|
| 283 |
"iso_1_code": null,
|
| 284 |
"iso_3_code": "tcb",
|
| 285 |
-
"tokenizers": {},
|
| 286 |
"children": [],
|
|
|
|
| 287 |
"node_i": "3804",
|
| 288 |
-
"
|
| 289 |
-
"
|
| 290 |
},
|
| 291 |
{
|
| 292 |
"name": "Tanaina",
|
| 293 |
"iso_1_code": null,
|
| 294 |
"iso_3_code": "tfn",
|
| 295 |
-
"tokenizers": {},
|
| 296 |
"children": [],
|
|
|
|
| 297 |
"node_i": "3805",
|
| 298 |
-
"
|
| 299 |
-
"
|
| 300 |
},
|
| 301 |
{
|
| 302 |
"name": "Tsetsaut",
|
| 303 |
"iso_1_code": null,
|
| 304 |
"iso_3_code": "txc",
|
| 305 |
-
"tokenizers": {},
|
| 306 |
"children": [],
|
|
|
|
| 307 |
"node_i": "3806",
|
| 308 |
-
"
|
| 309 |
-
"
|
| 310 |
},
|
| 311 |
{
|
| 312 |
"name": "Carrier",
|
| 313 |
"iso_1_code": null,
|
| 314 |
"iso_3_code": null,
|
| 315 |
-
"tokenizers": {},
|
| 316 |
"children": [
|
| 317 |
{
|
| 318 |
"name": "Carrier, Southern",
|
| 319 |
"iso_1_code": null,
|
| 320 |
"iso_3_code": "caf",
|
| 321 |
-
"tokenizers": {},
|
| 322 |
"children": [],
|
|
|
|
| 323 |
"node_i": "3808",
|
|
|
|
| 324 |
"scripts": [
|
| 325 |
"Latn"
|
| 326 |
-
]
|
| 327 |
-
"own_tokenizer": false
|
| 328 |
},
|
| 329 |
{
|
| 330 |
"name": "Carrier",
|
| 331 |
"iso_1_code": null,
|
| 332 |
"iso_3_code": "crx",
|
| 333 |
-
"tokenizers": {},
|
| 334 |
"children": [],
|
|
|
|
| 335 |
"node_i": "3809",
|
|
|
|
| 336 |
"scripts": [
|
| 337 |
"Latn"
|
| 338 |
-
]
|
| 339 |
-
"own_tokenizer": false
|
| 340 |
}
|
| 341 |
],
|
|
|
|
| 342 |
"node_i": "3807",
|
| 343 |
-
"
|
| 344 |
-
"
|
| 345 |
},
|
| 346 |
{
|
| 347 |
"name": "Slavey-Hare",
|
| 348 |
"iso_1_code": null,
|
| 349 |
"iso_3_code": null,
|
| 350 |
-
"tokenizers": {},
|
| 351 |
"children": [
|
| 352 |
{
|
| 353 |
"name": "Slavey, North",
|
| 354 |
"iso_1_code": null,
|
| 355 |
"iso_3_code": "scs",
|
| 356 |
-
"tokenizers": {},
|
| 357 |
"children": [],
|
|
|
|
| 358 |
"node_i": "3811",
|
| 359 |
-
"
|
| 360 |
-
"
|
| 361 |
},
|
| 362 |
{
|
| 363 |
"name": "Slavey, South",
|
| 364 |
"iso_1_code": null,
|
| 365 |
"iso_3_code": "xsl",
|
| 366 |
-
"tokenizers": {},
|
| 367 |
"children": [],
|
|
|
|
| 368 |
"node_i": "3812",
|
| 369 |
-
"
|
| 370 |
-
"
|
| 371 |
}
|
| 372 |
],
|
|
|
|
| 373 |
"node_i": "3810",
|
| 374 |
-
"
|
| 375 |
-
"
|
| 376 |
},
|
| 377 |
{
|
| 378 |
"name": "Tahltan",
|
| 379 |
"iso_1_code": null,
|
| 380 |
"iso_3_code": null,
|
| 381 |
-
"tokenizers": {},
|
| 382 |
"children": [
|
| 383 |
{
|
| 384 |
"name": "Kaska",
|
| 385 |
"iso_1_code": null,
|
| 386 |
"iso_3_code": "kkz",
|
| 387 |
-
"tokenizers": {},
|
| 388 |
"children": [],
|
|
|
|
| 389 |
"node_i": "3814",
|
| 390 |
-
"
|
| 391 |
-
"
|
| 392 |
},
|
| 393 |
{
|
| 394 |
"name": "Tagish",
|
| 395 |
"iso_1_code": null,
|
| 396 |
"iso_3_code": "tgx",
|
| 397 |
-
"tokenizers": {},
|
| 398 |
"children": [],
|
|
|
|
| 399 |
"node_i": "3815",
|
| 400 |
-
"
|
| 401 |
-
"
|
| 402 |
},
|
| 403 |
{
|
| 404 |
"name": "Tahltan",
|
| 405 |
"iso_1_code": null,
|
| 406 |
"iso_3_code": "tht",
|
| 407 |
-
"tokenizers": {},
|
| 408 |
"children": [],
|
|
|
|
| 409 |
"node_i": "3816",
|
| 410 |
-
"
|
| 411 |
-
"
|
| 412 |
}
|
| 413 |
],
|
|
|
|
| 414 |
"node_i": "3813",
|
| 415 |
-
"
|
| 416 |
-
"
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"name": "Tuchone",
|
| 420 |
"iso_1_code": null,
|
| 421 |
"iso_3_code": null,
|
| 422 |
-
"tokenizers": {},
|
| 423 |
"children": [
|
| 424 |
{
|
| 425 |
"name": "Tutchone, Southern",
|
| 426 |
"iso_1_code": null,
|
| 427 |
"iso_3_code": "tce",
|
| 428 |
-
"tokenizers": {},
|
| 429 |
"children": [],
|
|
|
|
| 430 |
"node_i": "3818",
|
| 431 |
-
"
|
| 432 |
-
"
|
| 433 |
},
|
| 434 |
{
|
| 435 |
"name": "Tutchone, Northern",
|
| 436 |
"iso_1_code": null,
|
| 437 |
"iso_3_code": "ttm",
|
| 438 |
-
"tokenizers": {},
|
| 439 |
"children": [],
|
|
|
|
| 440 |
"node_i": "3819",
|
| 441 |
-
"
|
| 442 |
-
"
|
| 443 |
}
|
| 444 |
],
|
|
|
|
| 445 |
"node_i": "3817",
|
| 446 |
-
"
|
| 447 |
-
"
|
| 448 |
}
|
| 449 |
],
|
|
|
|
| 450 |
"node_i": "3787",
|
| 451 |
-
"
|
| 452 |
-
"
|
| 453 |
},
|
| 454 |
{
|
| 455 |
"name": "Pacific Coast Athabaskan",
|
| 456 |
"iso_1_code": null,
|
| 457 |
"iso_3_code": null,
|
| 458 |
-
"tokenizers": {},
|
| 459 |
"children": [
|
| 460 |
{
|
| 461 |
"name": "Kwalhioqua-Tlatskanai",
|
| 462 |
"iso_1_code": null,
|
| 463 |
"iso_3_code": "qwt",
|
| 464 |
-
"tokenizers": {},
|
| 465 |
"children": [],
|
|
|
|
| 466 |
"node_i": "3821",
|
| 467 |
-
"
|
| 468 |
-
"
|
| 469 |
},
|
| 470 |
{
|
| 471 |
"name": "California Athabaskan",
|
| 472 |
"iso_1_code": null,
|
| 473 |
"iso_3_code": null,
|
| 474 |
-
"tokenizers": {},
|
| 475 |
"children": [
|
| 476 |
{
|
| 477 |
"name": "Hupa",
|
| 478 |
"iso_1_code": null,
|
| 479 |
"iso_3_code": "hup",
|
| 480 |
-
"tokenizers": {},
|
| 481 |
"children": [],
|
|
|
|
| 482 |
"node_i": "3823",
|
| 483 |
-
"
|
| 484 |
-
"
|
| 485 |
},
|
| 486 |
{
|
| 487 |
"name": "Kato",
|
| 488 |
"iso_1_code": null,
|
| 489 |
"iso_3_code": "ktw",
|
| 490 |
-
"tokenizers": {},
|
| 491 |
"children": [],
|
|
|
|
| 492 |
"node_i": "3824",
|
| 493 |
-
"
|
| 494 |
-
"
|
| 495 |
},
|
| 496 |
{
|
| 497 |
"name": "Mattole",
|
| 498 |
"iso_1_code": null,
|
| 499 |
"iso_3_code": "mvb",
|
| 500 |
-
"tokenizers": {},
|
| 501 |
"children": [],
|
|
|
|
| 502 |
"node_i": "3825",
|
| 503 |
-
"
|
| 504 |
-
"
|
| 505 |
},
|
| 506 |
{
|
| 507 |
"name": "Wailaki",
|
| 508 |
"iso_1_code": null,
|
| 509 |
"iso_3_code": "wlk",
|
| 510 |
-
"tokenizers": {},
|
| 511 |
"children": [],
|
|
|
|
| 512 |
"node_i": "3826",
|
| 513 |
-
"
|
| 514 |
-
"
|
| 515 |
}
|
| 516 |
],
|
|
|
|
| 517 |
"node_i": "3822",
|
| 518 |
-
"
|
| 519 |
-
"
|
| 520 |
},
|
| 521 |
{
|
| 522 |
"name": "Oregon Athabaskan",
|
| 523 |
"iso_1_code": null,
|
| 524 |
"iso_3_code": null,
|
| 525 |
-
"tokenizers": {},
|
| 526 |
"children": [
|
| 527 |
{
|
| 528 |
"name": "Galice",
|
| 529 |
"iso_1_code": null,
|
| 530 |
"iso_3_code": "gce",
|
| 531 |
-
"tokenizers": {},
|
| 532 |
"children": [],
|
|
|
|
| 533 |
"node_i": "3828",
|
| 534 |
-
"
|
| 535 |
-
"
|
| 536 |
},
|
| 537 |
{
|
| 538 |
"name": "Upper Umpqua",
|
| 539 |
"iso_1_code": null,
|
| 540 |
"iso_3_code": "xup",
|
| 541 |
-
"tokenizers": {},
|
| 542 |
"children": [],
|
|
|
|
| 543 |
"node_i": "3829",
|
| 544 |
-
"
|
| 545 |
-
"
|
| 546 |
},
|
| 547 |
{
|
| 548 |
"name": "Tolowa-Chetco",
|
| 549 |
"iso_1_code": null,
|
| 550 |
"iso_3_code": null,
|
| 551 |
-
"tokenizers": {},
|
| 552 |
"children": [
|
| 553 |
{
|
| 554 |
"name": "Chetco",
|
| 555 |
"iso_1_code": null,
|
| 556 |
"iso_3_code": "ctc",
|
| 557 |
-
"tokenizers": {},
|
| 558 |
"children": [],
|
|
|
|
| 559 |
"node_i": "3831",
|
| 560 |
-
"
|
| 561 |
-
"
|
| 562 |
},
|
| 563 |
{
|
| 564 |
"name": "Tolowa",
|
| 565 |
"iso_1_code": null,
|
| 566 |
"iso_3_code": "tol",
|
| 567 |
-
"tokenizers": {},
|
| 568 |
"children": [],
|
|
|
|
| 569 |
"node_i": "3832",
|
| 570 |
-
"
|
| 571 |
-
"
|
| 572 |
}
|
| 573 |
],
|
|
|
|
| 574 |
"node_i": "3830",
|
| 575 |
-
"
|
| 576 |
-
"
|
| 577 |
},
|
| 578 |
{
|
| 579 |
"name": "Tututni-Chasta Costa-Coquille",
|
| 580 |
"iso_1_code": null,
|
| 581 |
"iso_3_code": null,
|
| 582 |
-
"tokenizers": {},
|
| 583 |
"children": [
|
| 584 |
{
|
| 585 |
"name": "Coquille",
|
| 586 |
"iso_1_code": null,
|
| 587 |
"iso_3_code": "coq",
|
| 588 |
-
"tokenizers": {},
|
| 589 |
"children": [],
|
|
|
|
| 590 |
"node_i": "3834",
|
| 591 |
-
"
|
| 592 |
-
"
|
| 593 |
},
|
| 594 |
{
|
| 595 |
"name": "Tututni",
|
| 596 |
"iso_1_code": null,
|
| 597 |
"iso_3_code": "tuu",
|
| 598 |
-
"tokenizers": {},
|
| 599 |
"children": [],
|
|
|
|
| 600 |
"node_i": "3835",
|
| 601 |
-
"
|
| 602 |
-
"
|
| 603 |
}
|
| 604 |
],
|
|
|
|
| 605 |
"node_i": "3833",
|
| 606 |
-
"
|
| 607 |
-
"
|
| 608 |
}
|
| 609 |
],
|
|
|
|
| 610 |
"node_i": "3827",
|
| 611 |
-
"
|
| 612 |
-
"
|
| 613 |
}
|
| 614 |
],
|
|
|
|
| 615 |
"node_i": "3820",
|
| 616 |
-
"
|
| 617 |
-
"
|
| 618 |
}
|
| 619 |
],
|
|
|
|
| 620 |
"node_i": "3778",
|
| 621 |
-
"
|
| 622 |
-
"
|
| 623 |
},
|
| 624 |
{
|
| 625 |
"name": "Tlingit",
|
| 626 |
"iso_1_code": null,
|
| 627 |
"iso_3_code": null,
|
| 628 |
-
"tokenizers": {},
|
| 629 |
"children": [
|
| 630 |
{
|
| 631 |
"name": "Tlingit",
|
| 632 |
"iso_1_code": null,
|
| 633 |
"iso_3_code": "tli",
|
| 634 |
-
"tokenizers": {},
|
| 635 |
"children": [],
|
|
|
|
| 636 |
"node_i": "3837",
|
| 637 |
-
"
|
| 638 |
-
"
|
| 639 |
}
|
| 640 |
],
|
|
|
|
| 641 |
"node_i": "3836",
|
| 642 |
-
"
|
| 643 |
-
"
|
| 644 |
}
|
| 645 |
],
|
|
|
|
| 646 |
"node_i": "3776",
|
| 647 |
-
"
|
| 648 |
-
"
|
| 649 |
}
|
|
|
|
| 2 |
"name": "Eyak-Athabaskan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Eyak",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "eya",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3777",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Athabaskan",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": null,
|
|
|
|
| 20 |
"children": [
|
| 21 |
{
|
| 22 |
"name": "Apachean",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": null,
|
|
|
|
| 25 |
"children": [
|
| 26 |
{
|
| 27 |
"name": "Navajo",
|
| 28 |
"iso_1_code": "nv",
|
| 29 |
"iso_3_code": "nav",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3780",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
"scripts": [
|
| 35 |
"Latn"
|
| 36 |
+
]
|
|
|
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"name": "Apache",
|
| 40 |
"iso_1_code": null,
|
| 41 |
"iso_3_code": null,
|
|
|
|
| 42 |
"children": [
|
| 43 |
{
|
| 44 |
"name": "Apache, Jicarilla",
|
| 45 |
"iso_1_code": null,
|
| 46 |
"iso_3_code": "apj",
|
|
|
|
| 47 |
"children": [],
|
| 48 |
+
"tokenizers": {},
|
| 49 |
"node_i": "3782",
|
| 50 |
+
"native_tokenizers": [],
|
| 51 |
+
"scripts": []
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"name": "Apache, Kiowa",
|
| 55 |
"iso_1_code": null,
|
| 56 |
"iso_3_code": "apk",
|
|
|
|
| 57 |
"children": [],
|
| 58 |
+
"tokenizers": {},
|
| 59 |
"node_i": "3783",
|
| 60 |
+
"native_tokenizers": [],
|
| 61 |
+
"scripts": []
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"name": "Apache, Lipan",
|
| 65 |
"iso_1_code": null,
|
| 66 |
"iso_3_code": "apl",
|
|
|
|
| 67 |
"children": [],
|
| 68 |
+
"tokenizers": {},
|
| 69 |
"node_i": "3784",
|
| 70 |
+
"native_tokenizers": [],
|
| 71 |
+
"scripts": []
|
| 72 |
},
|
| 73 |
{
|
| 74 |
"name": "Apache, Mescalero-Chiricahua",
|
| 75 |
"iso_1_code": null,
|
| 76 |
"iso_3_code": "apm",
|
|
|
|
| 77 |
"children": [],
|
| 78 |
+
"tokenizers": {},
|
| 79 |
"node_i": "3785",
|
| 80 |
+
"native_tokenizers": [],
|
| 81 |
+
"scripts": []
|
| 82 |
},
|
| 83 |
{
|
| 84 |
"name": "Apache, Western",
|
| 85 |
"iso_1_code": null,
|
| 86 |
"iso_3_code": "apw",
|
|
|
|
| 87 |
"children": [],
|
| 88 |
+
"tokenizers": {},
|
| 89 |
"node_i": "3786",
|
| 90 |
+
"native_tokenizers": [],
|
| 91 |
"scripts": [
|
| 92 |
"Latn"
|
| 93 |
+
]
|
|
|
|
| 94 |
}
|
| 95 |
],
|
| 96 |
+
"tokenizers": {},
|
| 97 |
"node_i": "3781",
|
| 98 |
+
"native_tokenizers": [],
|
| 99 |
+
"scripts": []
|
| 100 |
}
|
| 101 |
],
|
| 102 |
+
"tokenizers": {},
|
| 103 |
"node_i": "3779",
|
| 104 |
+
"native_tokenizers": [],
|
| 105 |
+
"scripts": []
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"name": "Northern Athabaskan",
|
| 109 |
"iso_1_code": null,
|
| 110 |
"iso_3_code": null,
|
|
|
|
| 111 |
"children": [
|
| 112 |
{
|
| 113 |
"name": "Ahtena",
|
| 114 |
"iso_1_code": null,
|
| 115 |
"iso_3_code": "aht",
|
|
|
|
| 116 |
"children": [],
|
| 117 |
+
"tokenizers": {},
|
| 118 |
"node_i": "3788",
|
| 119 |
+
"native_tokenizers": [],
|
| 120 |
+
"scripts": []
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"name": "Babine",
|
| 124 |
"iso_1_code": null,
|
| 125 |
"iso_3_code": "bcr",
|
|
|
|
| 126 |
"children": [],
|
| 127 |
+
"tokenizers": {},
|
| 128 |
"node_i": "3789",
|
| 129 |
+
"native_tokenizers": [],
|
| 130 |
+
"scripts": []
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"name": "Beaver",
|
| 134 |
"iso_1_code": null,
|
| 135 |
"iso_3_code": "bea",
|
|
|
|
| 136 |
"children": [],
|
| 137 |
+
"tokenizers": {},
|
| 138 |
"node_i": "3790",
|
| 139 |
+
"native_tokenizers": [],
|
| 140 |
"scripts": [
|
| 141 |
"Latn"
|
| 142 |
+
]
|
|
|
|
| 143 |
},
|
| 144 |
{
|
| 145 |
"name": "Dene",
|
| 146 |
"iso_1_code": null,
|
| 147 |
"iso_3_code": "chp",
|
|
|
|
| 148 |
"children": [],
|
| 149 |
+
"tokenizers": {},
|
| 150 |
"node_i": "3791",
|
| 151 |
+
"native_tokenizers": [],
|
| 152 |
+
"scripts": []
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"name": "Chilcotin",
|
| 156 |
"iso_1_code": null,
|
| 157 |
"iso_3_code": "clc",
|
|
|
|
| 158 |
"children": [],
|
| 159 |
+
"tokenizers": {},
|
| 160 |
"node_i": "3792",
|
| 161 |
+
"native_tokenizers": [],
|
| 162 |
+
"scripts": []
|
| 163 |
},
|
| 164 |
{
|
| 165 |
"name": "Tlicho",
|
| 166 |
"iso_1_code": null,
|
| 167 |
"iso_3_code": "dgr",
|
|
|
|
| 168 |
"children": [],
|
| 169 |
+
"tokenizers": {},
|
| 170 |
"node_i": "3793",
|
| 171 |
+
"native_tokenizers": [],
|
| 172 |
"scripts": [
|
| 173 |
"Latn"
|
| 174 |
+
]
|
|
|
|
| 175 |
},
|
| 176 |
{
|
| 177 |
"name": "Gwich\u2019in",
|
| 178 |
"iso_1_code": null,
|
| 179 |
"iso_3_code": "gwi",
|
|
|
|
| 180 |
"children": [],
|
| 181 |
+
"tokenizers": {},
|
| 182 |
"node_i": "3794",
|
| 183 |
+
"native_tokenizers": [],
|
| 184 |
"scripts": [
|
| 185 |
"Latn"
|
| 186 |
+
]
|
|
|
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"name": "Han",
|
| 190 |
"iso_1_code": null,
|
| 191 |
"iso_3_code": "haa",
|
|
|
|
| 192 |
"children": [],
|
| 193 |
+
"tokenizers": {},
|
| 194 |
"node_i": "3795",
|
| 195 |
+
"native_tokenizers": [],
|
| 196 |
+
"scripts": []
|
| 197 |
},
|
| 198 |
{
|
| 199 |
"name": "Holikachuk",
|
| 200 |
"iso_1_code": null,
|
| 201 |
"iso_3_code": "hoi",
|
|
|
|
| 202 |
"children": [],
|
| 203 |
+
"tokenizers": {},
|
| 204 |
"node_i": "3796",
|
| 205 |
+
"native_tokenizers": [],
|
| 206 |
+
"scripts": []
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"name": "Deg Xinag",
|
| 210 |
"iso_1_code": null,
|
| 211 |
"iso_3_code": "ing",
|
|
|
|
| 212 |
"children": [],
|
| 213 |
+
"tokenizers": {},
|
| 214 |
"node_i": "3797",
|
| 215 |
+
"native_tokenizers": [],
|
| 216 |
+
"scripts": []
|
| 217 |
},
|
| 218 |
{
|
| 219 |
"name": "Koyukon",
|
| 220 |
"iso_1_code": null,
|
| 221 |
"iso_3_code": "koy",
|
|
|
|
| 222 |
"children": [],
|
| 223 |
+
"tokenizers": {},
|
| 224 |
"node_i": "3798",
|
| 225 |
+
"native_tokenizers": [],
|
| 226 |
+
"scripts": []
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"name": "Kuskokwim, Upper",
|
| 230 |
"iso_1_code": null,
|
| 231 |
"iso_3_code": "kuu",
|
|
|
|
| 232 |
"children": [],
|
| 233 |
+
"tokenizers": {},
|
| 234 |
"node_i": "3799",
|
| 235 |
+
"native_tokenizers": [],
|
| 236 |
+
"scripts": []
|
| 237 |
},
|
| 238 |
{
|
| 239 |
"name": "Sekani",
|
| 240 |
"iso_1_code": null,
|
| 241 |
"iso_3_code": "sek",
|
|
|
|
| 242 |
"children": [],
|
| 243 |
+
"tokenizers": {},
|
| 244 |
"node_i": "3800",
|
| 245 |
+
"native_tokenizers": [],
|
| 246 |
+
"scripts": []
|
| 247 |
},
|
| 248 |
{
|
| 249 |
"name": "Sarsi",
|
| 250 |
"iso_1_code": null,
|
| 251 |
"iso_3_code": "srs",
|
|
|
|
| 252 |
"children": [],
|
| 253 |
+
"tokenizers": {},
|
| 254 |
"node_i": "3801",
|
| 255 |
+
"native_tokenizers": [],
|
| 256 |
+
"scripts": []
|
| 257 |
},
|
| 258 |
{
|
| 259 |
"name": "Tanana, Lower",
|
| 260 |
"iso_1_code": null,
|
| 261 |
"iso_3_code": "taa",
|
|
|
|
| 262 |
"children": [],
|
| 263 |
+
"tokenizers": {},
|
| 264 |
"node_i": "3802",
|
| 265 |
+
"native_tokenizers": [],
|
| 266 |
+
"scripts": []
|
| 267 |
},
|
| 268 |
{
|
| 269 |
"name": "Tanana, Upper",
|
| 270 |
"iso_1_code": null,
|
| 271 |
"iso_3_code": "tau",
|
|
|
|
| 272 |
"children": [],
|
| 273 |
+
"tokenizers": {},
|
| 274 |
"node_i": "3803",
|
| 275 |
+
"native_tokenizers": [],
|
| 276 |
+
"scripts": []
|
| 277 |
},
|
| 278 |
{
|
| 279 |
"name": "Tanacross",
|
| 280 |
"iso_1_code": null,
|
| 281 |
"iso_3_code": "tcb",
|
|
|
|
| 282 |
"children": [],
|
| 283 |
+
"tokenizers": {},
|
| 284 |
"node_i": "3804",
|
| 285 |
+
"native_tokenizers": [],
|
| 286 |
+
"scripts": []
|
| 287 |
},
|
| 288 |
{
|
| 289 |
"name": "Tanaina",
|
| 290 |
"iso_1_code": null,
|
| 291 |
"iso_3_code": "tfn",
|
|
|
|
| 292 |
"children": [],
|
| 293 |
+
"tokenizers": {},
|
| 294 |
"node_i": "3805",
|
| 295 |
+
"native_tokenizers": [],
|
| 296 |
+
"scripts": []
|
| 297 |
},
|
| 298 |
{
|
| 299 |
"name": "Tsetsaut",
|
| 300 |
"iso_1_code": null,
|
| 301 |
"iso_3_code": "txc",
|
|
|
|
| 302 |
"children": [],
|
| 303 |
+
"tokenizers": {},
|
| 304 |
"node_i": "3806",
|
| 305 |
+
"native_tokenizers": [],
|
| 306 |
+
"scripts": []
|
| 307 |
},
|
| 308 |
{
|
| 309 |
"name": "Carrier",
|
| 310 |
"iso_1_code": null,
|
| 311 |
"iso_3_code": null,
|
|
|
|
| 312 |
"children": [
|
| 313 |
{
|
| 314 |
"name": "Carrier, Southern",
|
| 315 |
"iso_1_code": null,
|
| 316 |
"iso_3_code": "caf",
|
|
|
|
| 317 |
"children": [],
|
| 318 |
+
"tokenizers": {},
|
| 319 |
"node_i": "3808",
|
| 320 |
+
"native_tokenizers": [],
|
| 321 |
"scripts": [
|
| 322 |
"Latn"
|
| 323 |
+
]
|
|
|
|
| 324 |
},
|
| 325 |
{
|
| 326 |
"name": "Carrier",
|
| 327 |
"iso_1_code": null,
|
| 328 |
"iso_3_code": "crx",
|
|
|
|
| 329 |
"children": [],
|
| 330 |
+
"tokenizers": {},
|
| 331 |
"node_i": "3809",
|
| 332 |
+
"native_tokenizers": [],
|
| 333 |
"scripts": [
|
| 334 |
"Latn"
|
| 335 |
+
]
|
|
|
|
| 336 |
}
|
| 337 |
],
|
| 338 |
+
"tokenizers": {},
|
| 339 |
"node_i": "3807",
|
| 340 |
+
"native_tokenizers": [],
|
| 341 |
+
"scripts": []
|
| 342 |
},
|
| 343 |
{
|
| 344 |
"name": "Slavey-Hare",
|
| 345 |
"iso_1_code": null,
|
| 346 |
"iso_3_code": null,
|
|
|
|
| 347 |
"children": [
|
| 348 |
{
|
| 349 |
"name": "Slavey, North",
|
| 350 |
"iso_1_code": null,
|
| 351 |
"iso_3_code": "scs",
|
|
|
|
| 352 |
"children": [],
|
| 353 |
+
"tokenizers": {},
|
| 354 |
"node_i": "3811",
|
| 355 |
+
"native_tokenizers": [],
|
| 356 |
+
"scripts": []
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"name": "Slavey, South",
|
| 360 |
"iso_1_code": null,
|
| 361 |
"iso_3_code": "xsl",
|
|
|
|
| 362 |
"children": [],
|
| 363 |
+
"tokenizers": {},
|
| 364 |
"node_i": "3812",
|
| 365 |
+
"native_tokenizers": [],
|
| 366 |
+
"scripts": []
|
| 367 |
}
|
| 368 |
],
|
| 369 |
+
"tokenizers": {},
|
| 370 |
"node_i": "3810",
|
| 371 |
+
"native_tokenizers": [],
|
| 372 |
+
"scripts": []
|
| 373 |
},
|
| 374 |
{
|
| 375 |
"name": "Tahltan",
|
| 376 |
"iso_1_code": null,
|
| 377 |
"iso_3_code": null,
|
|
|
|
| 378 |
"children": [
|
| 379 |
{
|
| 380 |
"name": "Kaska",
|
| 381 |
"iso_1_code": null,
|
| 382 |
"iso_3_code": "kkz",
|
|
|
|
| 383 |
"children": [],
|
| 384 |
+
"tokenizers": {},
|
| 385 |
"node_i": "3814",
|
| 386 |
+
"native_tokenizers": [],
|
| 387 |
+
"scripts": []
|
| 388 |
},
|
| 389 |
{
|
| 390 |
"name": "Tagish",
|
| 391 |
"iso_1_code": null,
|
| 392 |
"iso_3_code": "tgx",
|
|
|
|
| 393 |
"children": [],
|
| 394 |
+
"tokenizers": {},
|
| 395 |
"node_i": "3815",
|
| 396 |
+
"native_tokenizers": [],
|
| 397 |
+
"scripts": []
|
| 398 |
},
|
| 399 |
{
|
| 400 |
"name": "Tahltan",
|
| 401 |
"iso_1_code": null,
|
| 402 |
"iso_3_code": "tht",
|
|
|
|
| 403 |
"children": [],
|
| 404 |
+
"tokenizers": {},
|
| 405 |
"node_i": "3816",
|
| 406 |
+
"native_tokenizers": [],
|
| 407 |
+
"scripts": []
|
| 408 |
}
|
| 409 |
],
|
| 410 |
+
"tokenizers": {},
|
| 411 |
"node_i": "3813",
|
| 412 |
+
"native_tokenizers": [],
|
| 413 |
+
"scripts": []
|
| 414 |
},
|
| 415 |
{
|
| 416 |
"name": "Tuchone",
|
| 417 |
"iso_1_code": null,
|
| 418 |
"iso_3_code": null,
|
|
|
|
| 419 |
"children": [
|
| 420 |
{
|
| 421 |
"name": "Tutchone, Southern",
|
| 422 |
"iso_1_code": null,
|
| 423 |
"iso_3_code": "tce",
|
|
|
|
| 424 |
"children": [],
|
| 425 |
+
"tokenizers": {},
|
| 426 |
"node_i": "3818",
|
| 427 |
+
"native_tokenizers": [],
|
| 428 |
+
"scripts": []
|
| 429 |
},
|
| 430 |
{
|
| 431 |
"name": "Tutchone, Northern",
|
| 432 |
"iso_1_code": null,
|
| 433 |
"iso_3_code": "ttm",
|
|
|
|
| 434 |
"children": [],
|
| 435 |
+
"tokenizers": {},
|
| 436 |
"node_i": "3819",
|
| 437 |
+
"native_tokenizers": [],
|
| 438 |
+
"scripts": []
|
| 439 |
}
|
| 440 |
],
|
| 441 |
+
"tokenizers": {},
|
| 442 |
"node_i": "3817",
|
| 443 |
+
"native_tokenizers": [],
|
| 444 |
+
"scripts": []
|
| 445 |
}
|
| 446 |
],
|
| 447 |
+
"tokenizers": {},
|
| 448 |
"node_i": "3787",
|
| 449 |
+
"native_tokenizers": [],
|
| 450 |
+
"scripts": []
|
| 451 |
},
|
| 452 |
{
|
| 453 |
"name": "Pacific Coast Athabaskan",
|
| 454 |
"iso_1_code": null,
|
| 455 |
"iso_3_code": null,
|
|
|
|
| 456 |
"children": [
|
| 457 |
{
|
| 458 |
"name": "Kwalhioqua-Tlatskanai",
|
| 459 |
"iso_1_code": null,
|
| 460 |
"iso_3_code": "qwt",
|
|
|
|
| 461 |
"children": [],
|
| 462 |
+
"tokenizers": {},
|
| 463 |
"node_i": "3821",
|
| 464 |
+
"native_tokenizers": [],
|
| 465 |
+
"scripts": []
|
| 466 |
},
|
| 467 |
{
|
| 468 |
"name": "California Athabaskan",
|
| 469 |
"iso_1_code": null,
|
| 470 |
"iso_3_code": null,
|
|
|
|
| 471 |
"children": [
|
| 472 |
{
|
| 473 |
"name": "Hupa",
|
| 474 |
"iso_1_code": null,
|
| 475 |
"iso_3_code": "hup",
|
|
|
|
| 476 |
"children": [],
|
| 477 |
+
"tokenizers": {},
|
| 478 |
"node_i": "3823",
|
| 479 |
+
"native_tokenizers": [],
|
| 480 |
+
"scripts": []
|
| 481 |
},
|
| 482 |
{
|
| 483 |
"name": "Kato",
|
| 484 |
"iso_1_code": null,
|
| 485 |
"iso_3_code": "ktw",
|
|
|
|
| 486 |
"children": [],
|
| 487 |
+
"tokenizers": {},
|
| 488 |
"node_i": "3824",
|
| 489 |
+
"native_tokenizers": [],
|
| 490 |
+
"scripts": []
|
| 491 |
},
|
| 492 |
{
|
| 493 |
"name": "Mattole",
|
| 494 |
"iso_1_code": null,
|
| 495 |
"iso_3_code": "mvb",
|
|
|
|
| 496 |
"children": [],
|
| 497 |
+
"tokenizers": {},
|
| 498 |
"node_i": "3825",
|
| 499 |
+
"native_tokenizers": [],
|
| 500 |
+
"scripts": []
|
| 501 |
},
|
| 502 |
{
|
| 503 |
"name": "Wailaki",
|
| 504 |
"iso_1_code": null,
|
| 505 |
"iso_3_code": "wlk",
|
|
|
|
| 506 |
"children": [],
|
| 507 |
+
"tokenizers": {},
|
| 508 |
"node_i": "3826",
|
| 509 |
+
"native_tokenizers": [],
|
| 510 |
+
"scripts": []
|
| 511 |
}
|
| 512 |
],
|
| 513 |
+
"tokenizers": {},
|
| 514 |
"node_i": "3822",
|
| 515 |
+
"native_tokenizers": [],
|
| 516 |
+
"scripts": []
|
| 517 |
},
|
| 518 |
{
|
| 519 |
"name": "Oregon Athabaskan",
|
| 520 |
"iso_1_code": null,
|
| 521 |
"iso_3_code": null,
|
|
|
|
| 522 |
"children": [
|
| 523 |
{
|
| 524 |
"name": "Galice",
|
| 525 |
"iso_1_code": null,
|
| 526 |
"iso_3_code": "gce",
|
|
|
|
| 527 |
"children": [],
|
| 528 |
+
"tokenizers": {},
|
| 529 |
"node_i": "3828",
|
| 530 |
+
"native_tokenizers": [],
|
| 531 |
+
"scripts": []
|
| 532 |
},
|
| 533 |
{
|
| 534 |
"name": "Upper Umpqua",
|
| 535 |
"iso_1_code": null,
|
| 536 |
"iso_3_code": "xup",
|
|
|
|
| 537 |
"children": [],
|
| 538 |
+
"tokenizers": {},
|
| 539 |
"node_i": "3829",
|
| 540 |
+
"native_tokenizers": [],
|
| 541 |
+
"scripts": []
|
| 542 |
},
|
| 543 |
{
|
| 544 |
"name": "Tolowa-Chetco",
|
| 545 |
"iso_1_code": null,
|
| 546 |
"iso_3_code": null,
|
|
|
|
| 547 |
"children": [
|
| 548 |
{
|
| 549 |
"name": "Chetco",
|
| 550 |
"iso_1_code": null,
|
| 551 |
"iso_3_code": "ctc",
|
|
|
|
| 552 |
"children": [],
|
| 553 |
+
"tokenizers": {},
|
| 554 |
"node_i": "3831",
|
| 555 |
+
"native_tokenizers": [],
|
| 556 |
+
"scripts": []
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"name": "Tolowa",
|
| 560 |
"iso_1_code": null,
|
| 561 |
"iso_3_code": "tol",
|
|
|
|
| 562 |
"children": [],
|
| 563 |
+
"tokenizers": {},
|
| 564 |
"node_i": "3832",
|
| 565 |
+
"native_tokenizers": [],
|
| 566 |
+
"scripts": []
|
| 567 |
}
|
| 568 |
],
|
| 569 |
+
"tokenizers": {},
|
| 570 |
"node_i": "3830",
|
| 571 |
+
"native_tokenizers": [],
|
| 572 |
+
"scripts": []
|
| 573 |
},
|
| 574 |
{
|
| 575 |
"name": "Tututni-Chasta Costa-Coquille",
|
| 576 |
"iso_1_code": null,
|
| 577 |
"iso_3_code": null,
|
|
|
|
| 578 |
"children": [
|
| 579 |
{
|
| 580 |
"name": "Coquille",
|
| 581 |
"iso_1_code": null,
|
| 582 |
"iso_3_code": "coq",
|
|
|
|
| 583 |
"children": [],
|
| 584 |
+
"tokenizers": {},
|
| 585 |
"node_i": "3834",
|
| 586 |
+
"native_tokenizers": [],
|
| 587 |
+
"scripts": []
|
| 588 |
},
|
| 589 |
{
|
| 590 |
"name": "Tututni",
|
| 591 |
"iso_1_code": null,
|
| 592 |
"iso_3_code": "tuu",
|
|
|
|
| 593 |
"children": [],
|
| 594 |
+
"tokenizers": {},
|
| 595 |
"node_i": "3835",
|
| 596 |
+
"native_tokenizers": [],
|
| 597 |
+
"scripts": []
|
| 598 |
}
|
| 599 |
],
|
| 600 |
+
"tokenizers": {},
|
| 601 |
"node_i": "3833",
|
| 602 |
+
"native_tokenizers": [],
|
| 603 |
+
"scripts": []
|
| 604 |
}
|
| 605 |
],
|
| 606 |
+
"tokenizers": {},
|
| 607 |
"node_i": "3827",
|
| 608 |
+
"native_tokenizers": [],
|
| 609 |
+
"scripts": []
|
| 610 |
}
|
| 611 |
],
|
| 612 |
+
"tokenizers": {},
|
| 613 |
"node_i": "3820",
|
| 614 |
+
"native_tokenizers": [],
|
| 615 |
+
"scripts": []
|
| 616 |
}
|
| 617 |
],
|
| 618 |
+
"tokenizers": {},
|
| 619 |
"node_i": "3778",
|
| 620 |
+
"native_tokenizers": [],
|
| 621 |
+
"scripts": []
|
| 622 |
},
|
| 623 |
{
|
| 624 |
"name": "Tlingit",
|
| 625 |
"iso_1_code": null,
|
| 626 |
"iso_3_code": null,
|
|
|
|
| 627 |
"children": [
|
| 628 |
{
|
| 629 |
"name": "Tlingit",
|
| 630 |
"iso_1_code": null,
|
| 631 |
"iso_3_code": "tli",
|
|
|
|
| 632 |
"children": [],
|
| 633 |
+
"tokenizers": {},
|
| 634 |
"node_i": "3837",
|
| 635 |
+
"native_tokenizers": [],
|
| 636 |
+
"scripts": []
|
| 637 |
}
|
| 638 |
],
|
| 639 |
+
"tokenizers": {},
|
| 640 |
"node_i": "3836",
|
| 641 |
+
"native_tokenizers": [],
|
| 642 |
+
"scripts": []
|
| 643 |
}
|
| 644 |
],
|
| 645 |
+
"tokenizers": {},
|
| 646 |
"node_i": "3776",
|
| 647 |
+
"native_tokenizers": [],
|
| 648 |
+
"scripts": []
|
| 649 |
}
|
data/Fas.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Fas",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Baibai",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "bbf",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3839",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Momu",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "fqs",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3840",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3838",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Fas",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Baibai",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "bbf",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3839",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Momu",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "fqs",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3840",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3838",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Guajiboan.json
CHANGED
|
@@ -2,77 +2,77 @@
|
|
| 2 |
"name": "Guajiboan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Cuiba",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "cui",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3842",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Guayabero",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "guo",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3843",
|
|
|
|
| 26 |
"scripts": [
|
| 27 |
"Latn"
|
| 28 |
-
]
|
| 29 |
-
"own_tokenizer": false
|
| 30 |
},
|
| 31 |
{
|
| 32 |
"name": "Guajibo",
|
| 33 |
"iso_1_code": null,
|
| 34 |
"iso_3_code": null,
|
| 35 |
-
"tokenizers": {},
|
| 36 |
"children": [
|
| 37 |
{
|
| 38 |
"name": "Playero",
|
| 39 |
"iso_1_code": null,
|
| 40 |
"iso_3_code": "gob",
|
| 41 |
-
"tokenizers": {},
|
| 42 |
"children": [],
|
|
|
|
| 43 |
"node_i": "3845",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"name": "Guahibo",
|
| 49 |
"iso_1_code": null,
|
| 50 |
"iso_3_code": "guh",
|
| 51 |
-
"tokenizers": {},
|
| 52 |
"children": [],
|
|
|
|
| 53 |
"node_i": "3846",
|
|
|
|
| 54 |
"scripts": [
|
| 55 |
"Latn"
|
| 56 |
-
]
|
| 57 |
-
"own_tokenizer": false
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"name": "Macagu\u00e1n",
|
| 61 |
"iso_1_code": null,
|
| 62 |
"iso_3_code": "mbn",
|
| 63 |
-
"tokenizers": {},
|
| 64 |
"children": [],
|
|
|
|
| 65 |
"node_i": "3847",
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
}
|
| 69 |
],
|
|
|
|
| 70 |
"node_i": "3844",
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
}
|
| 74 |
],
|
|
|
|
| 75 |
"node_i": "3841",
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
}
|
|
|
|
| 2 |
"name": "Guajiboan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Cuiba",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "cui",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3842",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Guayabero",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "guo",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3843",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
"scripts": [
|
| 27 |
"Latn"
|
| 28 |
+
]
|
|
|
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"name": "Guajibo",
|
| 32 |
"iso_1_code": null,
|
| 33 |
"iso_3_code": null,
|
|
|
|
| 34 |
"children": [
|
| 35 |
{
|
| 36 |
"name": "Playero",
|
| 37 |
"iso_1_code": null,
|
| 38 |
"iso_3_code": "gob",
|
|
|
|
| 39 |
"children": [],
|
| 40 |
+
"tokenizers": {},
|
| 41 |
"node_i": "3845",
|
| 42 |
+
"native_tokenizers": [],
|
| 43 |
+
"scripts": []
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"name": "Guahibo",
|
| 47 |
"iso_1_code": null,
|
| 48 |
"iso_3_code": "guh",
|
|
|
|
| 49 |
"children": [],
|
| 50 |
+
"tokenizers": {},
|
| 51 |
"node_i": "3846",
|
| 52 |
+
"native_tokenizers": [],
|
| 53 |
"scripts": [
|
| 54 |
"Latn"
|
| 55 |
+
]
|
|
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"name": "Macagu\u00e1n",
|
| 59 |
"iso_1_code": null,
|
| 60 |
"iso_3_code": "mbn",
|
|
|
|
| 61 |
"children": [],
|
| 62 |
+
"tokenizers": {},
|
| 63 |
"node_i": "3847",
|
| 64 |
+
"native_tokenizers": [],
|
| 65 |
+
"scripts": []
|
| 66 |
}
|
| 67 |
],
|
| 68 |
+
"tokenizers": {},
|
| 69 |
"node_i": "3844",
|
| 70 |
+
"native_tokenizers": [],
|
| 71 |
+
"scripts": []
|
| 72 |
}
|
| 73 |
],
|
| 74 |
+
"tokenizers": {},
|
| 75 |
"node_i": "3841",
|
| 76 |
+
"native_tokenizers": [],
|
| 77 |
+
"scripts": []
|
| 78 |
}
|
data/Guaykuruan.json
CHANGED
|
@@ -2,90 +2,90 @@
|
|
| 2 |
"name": "Guaykuruan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Guaykur\u00fa",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Abipon",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": "axb",
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [],
|
|
|
|
| 19 |
"node_i": "3850",
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"name": "Kadiw\u00e9u",
|
| 25 |
"iso_1_code": null,
|
| 26 |
"iso_3_code": "kbc",
|
| 27 |
-
"tokenizers": {},
|
| 28 |
"children": [],
|
|
|
|
| 29 |
"node_i": "3851",
|
|
|
|
| 30 |
"scripts": [
|
| 31 |
"Latn"
|
| 32 |
-
]
|
| 33 |
-
"own_tokenizer": false
|
| 34 |
}
|
| 35 |
],
|
|
|
|
| 36 |
"node_i": "3849",
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"name": "Southern",
|
| 42 |
"iso_1_code": null,
|
| 43 |
"iso_3_code": null,
|
| 44 |
-
"tokenizers": {},
|
| 45 |
"children": [
|
| 46 |
{
|
| 47 |
"name": "Mocov\u00ed",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "moc",
|
| 50 |
-
"tokenizers": {},
|
| 51 |
"children": [],
|
|
|
|
| 52 |
"node_i": "3853",
|
|
|
|
| 53 |
"scripts": [
|
| 54 |
"Latn"
|
| 55 |
-
]
|
| 56 |
-
"own_tokenizer": false
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"name": "Pilag\u00e1",
|
| 60 |
"iso_1_code": null,
|
| 61 |
"iso_3_code": "plg",
|
| 62 |
-
"tokenizers": {},
|
| 63 |
"children": [],
|
|
|
|
| 64 |
"node_i": "3854",
|
|
|
|
| 65 |
"scripts": [
|
| 66 |
"Latn"
|
| 67 |
-
]
|
| 68 |
-
"own_tokenizer": false
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"name": "Toba",
|
| 72 |
"iso_1_code": null,
|
| 73 |
"iso_3_code": "tob",
|
| 74 |
-
"tokenizers": {},
|
| 75 |
"children": [],
|
|
|
|
| 76 |
"node_i": "3855",
|
|
|
|
| 77 |
"scripts": [
|
| 78 |
"Latn"
|
| 79 |
-
]
|
| 80 |
-
"own_tokenizer": false
|
| 81 |
}
|
| 82 |
],
|
|
|
|
| 83 |
"node_i": "3852",
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
}
|
| 87 |
],
|
|
|
|
| 88 |
"node_i": "3848",
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
}
|
|
|
|
| 2 |
"name": "Guaykuruan",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Guaykur\u00fa",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Abipon",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": "axb",
|
|
|
|
| 15 |
"children": [],
|
| 16 |
+
"tokenizers": {},
|
| 17 |
"node_i": "3850",
|
| 18 |
+
"native_tokenizers": [],
|
| 19 |
+
"scripts": []
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"name": "Kadiw\u00e9u",
|
| 23 |
"iso_1_code": null,
|
| 24 |
"iso_3_code": "kbc",
|
|
|
|
| 25 |
"children": [],
|
| 26 |
+
"tokenizers": {},
|
| 27 |
"node_i": "3851",
|
| 28 |
+
"native_tokenizers": [],
|
| 29 |
"scripts": [
|
| 30 |
"Latn"
|
| 31 |
+
]
|
|
|
|
| 32 |
}
|
| 33 |
],
|
| 34 |
+
"tokenizers": {},
|
| 35 |
"node_i": "3849",
|
| 36 |
+
"native_tokenizers": [],
|
| 37 |
+
"scripts": []
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Southern",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": null,
|
|
|
|
| 43 |
"children": [
|
| 44 |
{
|
| 45 |
"name": "Mocov\u00ed",
|
| 46 |
"iso_1_code": null,
|
| 47 |
"iso_3_code": "moc",
|
|
|
|
| 48 |
"children": [],
|
| 49 |
+
"tokenizers": {},
|
| 50 |
"node_i": "3853",
|
| 51 |
+
"native_tokenizers": [],
|
| 52 |
"scripts": [
|
| 53 |
"Latn"
|
| 54 |
+
]
|
|
|
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"name": "Pilag\u00e1",
|
| 58 |
"iso_1_code": null,
|
| 59 |
"iso_3_code": "plg",
|
|
|
|
| 60 |
"children": [],
|
| 61 |
+
"tokenizers": {},
|
| 62 |
"node_i": "3854",
|
| 63 |
+
"native_tokenizers": [],
|
| 64 |
"scripts": [
|
| 65 |
"Latn"
|
| 66 |
+
]
|
|
|
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"name": "Toba",
|
| 70 |
"iso_1_code": null,
|
| 71 |
"iso_3_code": "tob",
|
|
|
|
| 72 |
"children": [],
|
| 73 |
+
"tokenizers": {},
|
| 74 |
"node_i": "3855",
|
| 75 |
+
"native_tokenizers": [],
|
| 76 |
"scripts": [
|
| 77 |
"Latn"
|
| 78 |
+
]
|
|
|
|
| 79 |
}
|
| 80 |
],
|
| 81 |
+
"tokenizers": {},
|
| 82 |
"node_i": "3852",
|
| 83 |
+
"native_tokenizers": [],
|
| 84 |
+
"scripts": []
|
| 85 |
}
|
| 86 |
],
|
| 87 |
+
"tokenizers": {},
|
| 88 |
"node_i": "3848",
|
| 89 |
+
"native_tokenizers": [],
|
| 90 |
+
"scripts": []
|
| 91 |
}
|
data/Gum.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"name": "Gum",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [],
|
|
|
|
| 7 |
"node_i": "3856",
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
}
|
|
|
|
| 2 |
"name": "Gum",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [],
|
| 6 |
+
"tokenizers": {},
|
| 7 |
"node_i": "3856",
|
| 8 |
+
"native_tokenizers": [],
|
| 9 |
+
"scripts": []
|
| 10 |
}
|
data/Haida.json
CHANGED
|
@@ -2,30 +2,30 @@
|
|
| 2 |
"name": "Haida",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Haida, Southern",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "hax",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3858",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"name": "Haida, Northern",
|
| 19 |
"iso_1_code": null,
|
| 20 |
"iso_3_code": "hdn",
|
| 21 |
-
"tokenizers": {},
|
| 22 |
"children": [],
|
|
|
|
| 23 |
"node_i": "3859",
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
}
|
| 27 |
],
|
|
|
|
| 28 |
"node_i": "3857",
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
}
|
|
|
|
| 2 |
"name": "Haida",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Haida, Southern",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "hax",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3858",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
+
"scripts": []
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"name": "Haida, Northern",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "hdn",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3859",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
}
|
| 26 |
],
|
| 27 |
+
"tokenizers": {},
|
| 28 |
"node_i": "3857",
|
| 29 |
+
"native_tokenizers": [],
|
| 30 |
+
"scripts": []
|
| 31 |
}
|
data/Harákmbut.json
CHANGED
|
@@ -2,32 +2,32 @@
|
|
| 2 |
"name": "Har\u00e1kmbut",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Amarakaeri",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": "amr",
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [],
|
|
|
|
| 13 |
"node_i": "3861",
|
|
|
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
-
]
|
| 17 |
-
"own_tokenizer": false
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"name": "Huachipaeri",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "hug",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3862",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
}
|
| 29 |
],
|
|
|
|
| 30 |
"node_i": "3860",
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
}
|
|
|
|
| 2 |
"name": "Har\u00e1kmbut",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Amarakaeri",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": "amr",
|
|
|
|
| 10 |
"children": [],
|
| 11 |
+
"tokenizers": {},
|
| 12 |
"node_i": "3861",
|
| 13 |
+
"native_tokenizers": [],
|
| 14 |
"scripts": [
|
| 15 |
"Latn"
|
| 16 |
+
]
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"name": "Huachipaeri",
|
| 20 |
"iso_1_code": null,
|
| 21 |
"iso_3_code": "hug",
|
|
|
|
| 22 |
"children": [],
|
| 23 |
+
"tokenizers": {},
|
| 24 |
"node_i": "3862",
|
| 25 |
+
"native_tokenizers": [],
|
| 26 |
+
"scripts": []
|
| 27 |
}
|
| 28 |
],
|
| 29 |
+
"tokenizers": {},
|
| 30 |
"node_i": "3860",
|
| 31 |
+
"native_tokenizers": [],
|
| 32 |
+
"scripts": []
|
| 33 |
}
|
data/Hmong-Mien.json
CHANGED
|
@@ -2,527 +2,527 @@
|
|
| 2 |
"name": "Hmong-Mien",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
| 5 |
-
"tokenizers": {},
|
| 6 |
"children": [
|
| 7 |
{
|
| 8 |
"name": "Hmongic",
|
| 9 |
"iso_1_code": null,
|
| 10 |
"iso_3_code": null,
|
| 11 |
-
"tokenizers": {},
|
| 12 |
"children": [
|
| 13 |
{
|
| 14 |
"name": "Bunu",
|
| 15 |
"iso_1_code": null,
|
| 16 |
"iso_3_code": null,
|
| 17 |
-
"tokenizers": {},
|
| 18 |
"children": [
|
| 19 |
{
|
| 20 |
"name": "Bunu, Younuo",
|
| 21 |
"iso_1_code": null,
|
| 22 |
"iso_3_code": "buh",
|
| 23 |
-
"tokenizers": {},
|
| 24 |
"children": [],
|
|
|
|
| 25 |
"node_i": "3866",
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"name": "Bunu, Wunai",
|
| 31 |
"iso_1_code": null,
|
| 32 |
"iso_3_code": "bwn",
|
| 33 |
-
"tokenizers": {},
|
| 34 |
"children": [],
|
|
|
|
| 35 |
"node_i": "3867",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"name": "Bunu, Bu-Nao",
|
| 41 |
"iso_1_code": null,
|
| 42 |
"iso_3_code": "bwx",
|
| 43 |
-
"tokenizers": {},
|
| 44 |
"children": [],
|
|
|
|
| 45 |
"node_i": "3868",
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"name": "Bunu, Jiongnai",
|
| 51 |
"iso_1_code": null,
|
| 52 |
"iso_3_code": "pnu",
|
| 53 |
-
"tokenizers": {},
|
| 54 |
"children": [],
|
|
|
|
| 55 |
"node_i": "3869",
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
}
|
| 59 |
],
|
|
|
|
| 60 |
"node_i": "3865",
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"name": "Chuanqiandian",
|
| 66 |
"iso_1_code": null,
|
| 67 |
"iso_3_code": null,
|
| 68 |
-
"tokenizers": {},
|
| 69 |
"children": [
|
| 70 |
{
|
| 71 |
"name": "Miao, Chuanqiandian Cluster",
|
| 72 |
"iso_1_code": null,
|
| 73 |
"iso_3_code": "cqd",
|
| 74 |
-
"tokenizers": {},
|
| 75 |
"children": [],
|
|
|
|
| 76 |
"node_i": "3871",
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
},
|
| 80 |
{
|
| 81 |
"name": "Miao, Southern Mashan",
|
| 82 |
"iso_1_code": null,
|
| 83 |
"iso_3_code": "hma",
|
| 84 |
-
"tokenizers": {},
|
| 85 |
"children": [],
|
|
|
|
| 86 |
"node_i": "3872",
|
| 87 |
-
"
|
| 88 |
-
"
|
| 89 |
},
|
| 90 |
{
|
| 91 |
"name": "Miao, Central Huishui",
|
| 92 |
"iso_1_code": null,
|
| 93 |
"iso_3_code": "hmc",
|
| 94 |
-
"tokenizers": {},
|
| 95 |
"children": [],
|
|
|
|
| 96 |
"node_i": "3873",
|
| 97 |
-
"
|
| 98 |
-
"
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"name": "Miao, Large Flowery",
|
| 102 |
"iso_1_code": null,
|
| 103 |
"iso_3_code": "hmd",
|
| 104 |
-
"tokenizers": {},
|
| 105 |
"children": [],
|
|
|
|
| 106 |
"node_i": "3874",
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"name": "Miao, Eastern Huishui",
|
| 112 |
"iso_1_code": null,
|
| 113 |
"iso_3_code": "hme",
|
| 114 |
-
"tokenizers": {},
|
| 115 |
"children": [],
|
|
|
|
| 116 |
"node_i": "3875",
|
| 117 |
-
"
|
| 118 |
-
"
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"name": "Hmong Don",
|
| 122 |
"iso_1_code": null,
|
| 123 |
"iso_3_code": "hmf",
|
| 124 |
-
"tokenizers": {},
|
| 125 |
"children": [],
|
|
|
|
| 126 |
"node_i": "3876",
|
| 127 |
-
"
|
| 128 |
-
"
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"name": "Miao, Southwestern Guiyang",
|
| 132 |
"iso_1_code": null,
|
| 133 |
"iso_3_code": "hmg",
|
| 134 |
-
"tokenizers": {},
|
| 135 |
"children": [],
|
|
|
|
| 136 |
"node_i": "3877",
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"name": "Miao, Southwestern Huishui",
|
| 142 |
"iso_1_code": null,
|
| 143 |
"iso_3_code": "hmh",
|
| 144 |
-
"tokenizers": {},
|
| 145 |
"children": [],
|
|
|
|
| 146 |
"node_i": "3878",
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"name": "Miao, Northern Huishui",
|
| 152 |
"iso_1_code": null,
|
| 153 |
"iso_3_code": "hmi",
|
| 154 |
-
"tokenizers": {},
|
| 155 |
"children": [],
|
|
|
|
| 156 |
"node_i": "3879",
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
},
|
| 160 |
{
|
| 161 |
"name": "Ge",
|
| 162 |
"iso_1_code": null,
|
| 163 |
"iso_3_code": "hmj",
|
| 164 |
-
"tokenizers": {},
|
| 165 |
"children": [],
|
|
|
|
| 166 |
"node_i": "3880",
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
},
|
| 170 |
{
|
| 171 |
"name": "Miao, Luopohe",
|
| 172 |
"iso_1_code": null,
|
| 173 |
"iso_3_code": "hml",
|
| 174 |
-
"tokenizers": {},
|
| 175 |
"children": [],
|
|
|
|
| 176 |
"node_i": "3881",
|
| 177 |
-
"
|
| 178 |
-
"
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"name": "Miao, Central Mashan",
|
| 182 |
"iso_1_code": null,
|
| 183 |
"iso_3_code": "hmm",
|
| 184 |
-
"tokenizers": {},
|
| 185 |
"children": [],
|
|
|
|
| 186 |
"node_i": "3882",
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
},
|
| 190 |
{
|
| 191 |
"name": "Miao, Northern Mashan",
|
| 192 |
"iso_1_code": null,
|
| 193 |
"iso_3_code": "hmp",
|
| 194 |
-
"tokenizers": {},
|
| 195 |
"children": [],
|
|
|
|
| 196 |
"node_i": "3883",
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
},
|
| 200 |
{
|
| 201 |
"name": "Hmong D\u00f4",
|
| 202 |
"iso_1_code": null,
|
| 203 |
"iso_3_code": "hmv",
|
| 204 |
-
"tokenizers": {},
|
| 205 |
"children": [],
|
|
|
|
| 206 |
"node_i": "3884",
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
},
|
| 210 |
{
|
| 211 |
"name": "Miao, Western Mashan",
|
| 212 |
"iso_1_code": null,
|
| 213 |
"iso_3_code": "hmw",
|
| 214 |
-
"tokenizers": {},
|
| 215 |
"children": [],
|
|
|
|
| 216 |
"node_i": "3885",
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
},
|
| 220 |
{
|
| 221 |
"name": "Miao, Southern Guiyang",
|
| 222 |
"iso_1_code": null,
|
| 223 |
"iso_3_code": "hmy",
|
| 224 |
-
"tokenizers": {},
|
| 225 |
"children": [],
|
|
|
|
| 226 |
"node_i": "3886",
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
},
|
| 230 |
{
|
| 231 |
"name": "Sinicized Miao",
|
| 232 |
"iso_1_code": null,
|
| 233 |
"iso_3_code": "hmz",
|
| 234 |
-
"tokenizers": {},
|
| 235 |
"children": [],
|
|
|
|
| 236 |
"node_i": "3887",
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
},
|
| 240 |
{
|
| 241 |
"name": "Hmong Njua",
|
| 242 |
"iso_1_code": null,
|
| 243 |
"iso_3_code": "hnj",
|
| 244 |
-
"tokenizers": {},
|
| 245 |
"children": [],
|
|
|
|
| 246 |
"node_i": "3888",
|
|
|
|
| 247 |
"scripts": [
|
| 248 |
"Latn"
|
| 249 |
-
]
|
| 250 |
-
"own_tokenizer": false
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"name": "Miao, Horned",
|
| 254 |
"iso_1_code": null,
|
| 255 |
"iso_3_code": "hrm",
|
| 256 |
-
"tokenizers": {},
|
| 257 |
"children": [],
|
|
|
|
| 258 |
"node_i": "3889",
|
| 259 |
-
"
|
| 260 |
-
"
|
| 261 |
},
|
| 262 |
{
|
| 263 |
"name": "Miao, Northern Guiyang",
|
| 264 |
"iso_1_code": null,
|
| 265 |
"iso_3_code": "huj",
|
| 266 |
-
"tokenizers": {},
|
| 267 |
"children": [],
|
|
|
|
| 268 |
"node_i": "3890",
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
},
|
| 272 |
{
|
| 273 |
"name": "Hmong Daw",
|
| 274 |
"iso_1_code": null,
|
| 275 |
"iso_3_code": "mww",
|
| 276 |
-
"tokenizers": {},
|
| 277 |
"children": [],
|
|
|
|
| 278 |
"node_i": "3891",
|
|
|
|
| 279 |
"scripts": [
|
| 280 |
"Latn"
|
| 281 |
-
]
|
| 282 |
-
"own_tokenizer": false
|
| 283 |
},
|
| 284 |
{
|
| 285 |
"name": "Miao, Small Flowery",
|
| 286 |
"iso_1_code": null,
|
| 287 |
"iso_3_code": "sfm",
|
| 288 |
-
"tokenizers": {},
|
| 289 |
"children": [],
|
|
|
|
| 290 |
"node_i": "3892",
|
| 291 |
-
"
|
| 292 |
-
"
|
| 293 |
}
|
| 294 |
],
|
|
|
|
| 295 |
"node_i": "3870",
|
| 296 |
-
"
|
| 297 |
-
"
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"name": "Pa-hng",
|
| 301 |
"iso_1_code": null,
|
| 302 |
"iso_3_code": null,
|
| 303 |
-
"tokenizers": {},
|
| 304 |
"children": [
|
| 305 |
{
|
| 306 |
"name": "Pa-Hng",
|
| 307 |
"iso_1_code": null,
|
| 308 |
"iso_3_code": "pha",
|
| 309 |
-
"tokenizers": {},
|
| 310 |
"children": [],
|
|
|
|
| 311 |
"node_i": "3894",
|
| 312 |
-
"
|
| 313 |
-
"
|
| 314 |
}
|
| 315 |
],
|
|
|
|
| 316 |
"node_i": "3893",
|
| 317 |
-
"
|
| 318 |
-
"
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"name": "Qiandong",
|
| 322 |
"iso_1_code": null,
|
| 323 |
"iso_3_code": null,
|
| 324 |
-
"tokenizers": {},
|
| 325 |
"children": [
|
| 326 |
{
|
| 327 |
"name": "Miao, Northern Qiandong",
|
| 328 |
"iso_1_code": null,
|
| 329 |
"iso_3_code": "hea",
|
| 330 |
-
"tokenizers": {},
|
| 331 |
"children": [],
|
|
|
|
| 332 |
"node_i": "3896",
|
| 333 |
-
"
|
| 334 |
-
"
|
| 335 |
},
|
| 336 |
{
|
| 337 |
"name": "Miao, Eastern Qiandong",
|
| 338 |
"iso_1_code": null,
|
| 339 |
"iso_3_code": "hmq",
|
| 340 |
-
"tokenizers": {},
|
| 341 |
"children": [],
|
|
|
|
| 342 |
"node_i": "3897",
|
| 343 |
-
"
|
| 344 |
-
"
|
| 345 |
},
|
| 346 |
{
|
| 347 |
"name": "Miao, Southern Qiandong",
|
| 348 |
"iso_1_code": null,
|
| 349 |
"iso_3_code": "hms",
|
| 350 |
-
"tokenizers": {},
|
| 351 |
"children": [],
|
|
|
|
| 352 |
"node_i": "3898",
|
| 353 |
-
"
|
| 354 |
-
"
|
| 355 |
},
|
| 356 |
{
|
| 357 |
"name": "N\u00e1-Meo",
|
| 358 |
"iso_1_code": null,
|
| 359 |
"iso_3_code": "neo",
|
| 360 |
-
"tokenizers": {},
|
| 361 |
"children": [],
|
|
|
|
| 362 |
"node_i": "3899",
|
| 363 |
-
"
|
| 364 |
-
"
|
| 365 |
}
|
| 366 |
],
|
|
|
|
| 367 |
"node_i": "3895",
|
| 368 |
-
"
|
| 369 |
-
"
|
| 370 |
},
|
| 371 |
{
|
| 372 |
"name": "Xiangxi",
|
| 373 |
"iso_1_code": null,
|
| 374 |
"iso_3_code": null,
|
| 375 |
-
"tokenizers": {},
|
| 376 |
"children": [
|
| 377 |
{
|
| 378 |
"name": "Miao, Western Xiangxi",
|
| 379 |
"iso_1_code": null,
|
| 380 |
"iso_3_code": "mmr",
|
| 381 |
-
"tokenizers": {},
|
| 382 |
"children": [],
|
|
|
|
| 383 |
"node_i": "3901",
|
| 384 |
-
"
|
| 385 |
-
"
|
| 386 |
},
|
| 387 |
{
|
| 388 |
"name": "Miao, Eastern Xiangxi",
|
| 389 |
"iso_1_code": null,
|
| 390 |
"iso_3_code": "muq",
|
| 391 |
-
"tokenizers": {},
|
| 392 |
"children": [],
|
|
|
|
| 393 |
"node_i": "3902",
|
| 394 |
-
"
|
| 395 |
-
"
|
| 396 |
}
|
| 397 |
],
|
|
|
|
| 398 |
"node_i": "3900",
|
| 399 |
-
"
|
| 400 |
-
"
|
| 401 |
}
|
| 402 |
],
|
|
|
|
| 403 |
"node_i": "3864",
|
| 404 |
-
"
|
| 405 |
-
"
|
| 406 |
},
|
| 407 |
{
|
| 408 |
"name": "Ho Nte",
|
| 409 |
"iso_1_code": null,
|
| 410 |
"iso_3_code": null,
|
| 411 |
-
"tokenizers": {},
|
| 412 |
"children": [
|
| 413 |
{
|
| 414 |
"name": "She",
|
| 415 |
"iso_1_code": null,
|
| 416 |
"iso_3_code": "shx",
|
| 417 |
-
"tokenizers": {},
|
| 418 |
"children": [],
|
|
|
|
| 419 |
"node_i": "3904",
|
| 420 |
-
"
|
| 421 |
-
"
|
| 422 |
}
|
| 423 |
],
|
|
|
|
| 424 |
"node_i": "3903",
|
| 425 |
-
"
|
| 426 |
-
"
|
| 427 |
},
|
| 428 |
{
|
| 429 |
"name": "Mienic",
|
| 430 |
"iso_1_code": null,
|
| 431 |
"iso_3_code": null,
|
| 432 |
-
"tokenizers": {},
|
| 433 |
"children": [
|
| 434 |
{
|
| 435 |
"name": "Biao-Jiao",
|
| 436 |
"iso_1_code": null,
|
| 437 |
"iso_3_code": null,
|
| 438 |
-
"tokenizers": {},
|
| 439 |
"children": [
|
| 440 |
{
|
| 441 |
"name": "Biao-Jiao Mien",
|
| 442 |
"iso_1_code": null,
|
| 443 |
"iso_3_code": "bje",
|
| 444 |
-
"tokenizers": {},
|
| 445 |
"children": [],
|
|
|
|
| 446 |
"node_i": "3907",
|
| 447 |
-
"
|
| 448 |
-
"
|
| 449 |
}
|
| 450 |
],
|
|
|
|
| 451 |
"node_i": "3906",
|
| 452 |
-
"
|
| 453 |
-
"
|
| 454 |
},
|
| 455 |
{
|
| 456 |
"name": "Mian-Jin",
|
| 457 |
"iso_1_code": null,
|
| 458 |
"iso_3_code": null,
|
| 459 |
-
"tokenizers": {},
|
| 460 |
"children": [
|
| 461 |
{
|
| 462 |
"name": "Biao Mon",
|
| 463 |
"iso_1_code": null,
|
| 464 |
"iso_3_code": "bmt",
|
| 465 |
-
"tokenizers": {},
|
| 466 |
"children": [],
|
|
|
|
| 467 |
"node_i": "3909",
|
| 468 |
-
"
|
| 469 |
-
"
|
| 470 |
},
|
| 471 |
{
|
| 472 |
"name": "Iu Mien",
|
| 473 |
"iso_1_code": null,
|
| 474 |
"iso_3_code": "ium",
|
| 475 |
-
"tokenizers": {},
|
| 476 |
"children": [],
|
|
|
|
| 477 |
"node_i": "3910",
|
|
|
|
| 478 |
"scripts": [
|
| 479 |
"Latn"
|
| 480 |
-
]
|
| 481 |
-
"own_tokenizer": false
|
| 482 |
},
|
| 483 |
{
|
| 484 |
"name": "Kim Mun",
|
| 485 |
"iso_1_code": null,
|
| 486 |
"iso_3_code": "mji",
|
| 487 |
-
"tokenizers": {},
|
| 488 |
"children": [],
|
|
|
|
| 489 |
"node_i": "3911",
|
| 490 |
-
"
|
| 491 |
-
"
|
| 492 |
}
|
| 493 |
],
|
|
|
|
| 494 |
"node_i": "3908",
|
| 495 |
-
"
|
| 496 |
-
"
|
| 497 |
},
|
| 498 |
{
|
| 499 |
"name": "Zaomin",
|
| 500 |
"iso_1_code": null,
|
| 501 |
"iso_3_code": null,
|
| 502 |
-
"tokenizers": {},
|
| 503 |
"children": [
|
| 504 |
{
|
| 505 |
"name": "Dzao Min",
|
| 506 |
"iso_1_code": null,
|
| 507 |
"iso_3_code": "bpn",
|
| 508 |
-
"tokenizers": {},
|
| 509 |
"children": [],
|
|
|
|
| 510 |
"node_i": "3913",
|
| 511 |
-
"
|
| 512 |
-
"
|
| 513 |
}
|
| 514 |
],
|
|
|
|
| 515 |
"node_i": "3912",
|
| 516 |
-
"
|
| 517 |
-
"
|
| 518 |
}
|
| 519 |
],
|
|
|
|
| 520 |
"node_i": "3905",
|
| 521 |
-
"
|
| 522 |
-
"
|
| 523 |
}
|
| 524 |
],
|
|
|
|
| 525 |
"node_i": "3863",
|
| 526 |
-
"
|
| 527 |
-
"
|
| 528 |
}
|
|
|
|
| 2 |
"name": "Hmong-Mien",
|
| 3 |
"iso_1_code": null,
|
| 4 |
"iso_3_code": null,
|
|
|
|
| 5 |
"children": [
|
| 6 |
{
|
| 7 |
"name": "Hmongic",
|
| 8 |
"iso_1_code": null,
|
| 9 |
"iso_3_code": null,
|
|
|
|
| 10 |
"children": [
|
| 11 |
{
|
| 12 |
"name": "Bunu",
|
| 13 |
"iso_1_code": null,
|
| 14 |
"iso_3_code": null,
|
|
|
|
| 15 |
"children": [
|
| 16 |
{
|
| 17 |
"name": "Bunu, Younuo",
|
| 18 |
"iso_1_code": null,
|
| 19 |
"iso_3_code": "buh",
|
|
|
|
| 20 |
"children": [],
|
| 21 |
+
"tokenizers": {},
|
| 22 |
"node_i": "3866",
|
| 23 |
+
"native_tokenizers": [],
|
| 24 |
+
"scripts": []
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"name": "Bunu, Wunai",
|
| 28 |
"iso_1_code": null,
|
| 29 |
"iso_3_code": "bwn",
|
|
|
|
| 30 |
"children": [],
|
| 31 |
+
"tokenizers": {},
|
| 32 |
"node_i": "3867",
|
| 33 |
+
"native_tokenizers": [],
|
| 34 |
+
"scripts": []
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"name": "Bunu, Bu-Nao",
|
| 38 |
"iso_1_code": null,
|
| 39 |
"iso_3_code": "bwx",
|
|
|
|
| 40 |
"children": [],
|
| 41 |
+
"tokenizers": {},
|
| 42 |
"node_i": "3868",
|
| 43 |
+
"native_tokenizers": [],
|
| 44 |
+
"scripts": []
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"name": "Bunu, Jiongnai",
|
| 48 |
"iso_1_code": null,
|
| 49 |
"iso_3_code": "pnu",
|
|
|
|
| 50 |
"children": [],
|
| 51 |
+
"tokenizers": {},
|
| 52 |
"node_i": "3869",
|
| 53 |
+
"native_tokenizers": [],
|
| 54 |
+
"scripts": []
|
| 55 |
}
|
| 56 |
],
|
| 57 |
+
"tokenizers": {},
|
| 58 |
"node_i": "3865",
|
| 59 |
+
"native_tokenizers": [],
|
| 60 |
+
"scripts": []
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"name": "Chuanqiandian",
|
| 64 |
"iso_1_code": null,
|
| 65 |
"iso_3_code": null,
|
|
|
|
| 66 |
"children": [
|
| 67 |
{
|
| 68 |
"name": "Miao, Chuanqiandian Cluster",
|
| 69 |
"iso_1_code": null,
|
| 70 |
"iso_3_code": "cqd",
|
|
|
|
| 71 |
"children": [],
|
| 72 |
+
"tokenizers": {},
|
| 73 |
"node_i": "3871",
|
| 74 |
+
"native_tokenizers": [],
|
| 75 |
+
"scripts": []
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"name": "Miao, Southern Mashan",
|
| 79 |
"iso_1_code": null,
|
| 80 |
"iso_3_code": "hma",
|
|
|
|
| 81 |
"children": [],
|
| 82 |
+
"tokenizers": {},
|
| 83 |
"node_i": "3872",
|
| 84 |
+
"native_tokenizers": [],
|
| 85 |
+
"scripts": []
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"name": "Miao, Central Huishui",
|
| 89 |
"iso_1_code": null,
|
| 90 |
"iso_3_code": "hmc",
|
|
|
|
| 91 |
"children": [],
|
| 92 |
+
"tokenizers": {},
|
| 93 |
"node_i": "3873",
|
| 94 |
+
"native_tokenizers": [],
|
| 95 |
+
"scripts": []
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"name": "Miao, Large Flowery",
|
| 99 |
"iso_1_code": null,
|
| 100 |
"iso_3_code": "hmd",
|
|
|
|
| 101 |
"children": [],
|
| 102 |
+
"tokenizers": {},
|
| 103 |
"node_i": "3874",
|
| 104 |
+
"native_tokenizers": [],
|
| 105 |
+
"scripts": []
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"name": "Miao, Eastern Huishui",
|
| 109 |
"iso_1_code": null,
|
| 110 |
"iso_3_code": "hme",
|
|
|
|
| 111 |
"children": [],
|
| 112 |
+
"tokenizers": {},
|
| 113 |
"node_i": "3875",
|
| 114 |
+
"native_tokenizers": [],
|
| 115 |
+
"scripts": []
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"name": "Hmong Don",
|
| 119 |
"iso_1_code": null,
|
| 120 |
"iso_3_code": "hmf",
|
|
|
|
| 121 |
"children": [],
|
| 122 |
+
"tokenizers": {},
|
| 123 |
"node_i": "3876",
|
| 124 |
+
"native_tokenizers": [],
|
| 125 |
+
"scripts": []
|
| 126 |
},
|
| 127 |
{
|
| 128 |
"name": "Miao, Southwestern Guiyang",
|
| 129 |
"iso_1_code": null,
|
| 130 |
"iso_3_code": "hmg",
|
|
|
|
| 131 |
"children": [],
|
| 132 |
+
"tokenizers": {},
|
| 133 |
"node_i": "3877",
|
| 134 |
+
"native_tokenizers": [],
|
| 135 |
+
"scripts": []
|
| 136 |
},
|
| 137 |
{
|
| 138 |
"name": "Miao, Southwestern Huishui",
|
| 139 |
"iso_1_code": null,
|
| 140 |
"iso_3_code": "hmh",
|
|
|
|
| 141 |
"children": [],
|
| 142 |
+
"tokenizers": {},
|
| 143 |
"node_i": "3878",
|
| 144 |
+
"native_tokenizers": [],
|
| 145 |
+
"scripts": []
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"name": "Miao, Northern Huishui",
|
| 149 |
"iso_1_code": null,
|
| 150 |
"iso_3_code": "hmi",
|
|
|
|
| 151 |
"children": [],
|
| 152 |
+
"tokenizers": {},
|
| 153 |
"node_i": "3879",
|
| 154 |
+
"native_tokenizers": [],
|
| 155 |
+
"scripts": []
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"name": "Ge",
|
| 159 |
"iso_1_code": null,
|
| 160 |
"iso_3_code": "hmj",
|
|
|
|
| 161 |
"children": [],
|
| 162 |
+
"tokenizers": {},
|
| 163 |
"node_i": "3880",
|
| 164 |
+
"native_tokenizers": [],
|
| 165 |
+
"scripts": []
|
| 166 |
},
|
| 167 |
{
|
| 168 |
"name": "Miao, Luopohe",
|
| 169 |
"iso_1_code": null,
|
| 170 |
"iso_3_code": "hml",
|
|
|
|
| 171 |
"children": [],
|
| 172 |
+
"tokenizers": {},
|
| 173 |
"node_i": "3881",
|
| 174 |
+
"native_tokenizers": [],
|
| 175 |
+
"scripts": []
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"name": "Miao, Central Mashan",
|
| 179 |
"iso_1_code": null,
|
| 180 |
"iso_3_code": "hmm",
|
|
|
|
| 181 |
"children": [],
|
| 182 |
+
"tokenizers": {},
|
| 183 |
"node_i": "3882",
|
| 184 |
+
"native_tokenizers": [],
|
| 185 |
+
"scripts": []
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"name": "Miao, Northern Mashan",
|
| 189 |
"iso_1_code": null,
|
| 190 |
"iso_3_code": "hmp",
|
|
|
|
| 191 |
"children": [],
|
| 192 |
+
"tokenizers": {},
|
| 193 |
"node_i": "3883",
|
| 194 |
+
"native_tokenizers": [],
|
| 195 |
+
"scripts": []
|
| 196 |
},
|
| 197 |
{
|
| 198 |
"name": "Hmong D\u00f4",
|
| 199 |
"iso_1_code": null,
|
| 200 |
"iso_3_code": "hmv",
|
|
|
|
| 201 |
"children": [],
|
| 202 |
+
"tokenizers": {},
|
| 203 |
"node_i": "3884",
|
| 204 |
+
"native_tokenizers": [],
|
| 205 |
+
"scripts": []
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"name": "Miao, Western Mashan",
|
| 209 |
"iso_1_code": null,
|
| 210 |
"iso_3_code": "hmw",
|
|
|
|
| 211 |
"children": [],
|
| 212 |
+
"tokenizers": {},
|
| 213 |
"node_i": "3885",
|
| 214 |
+
"native_tokenizers": [],
|
| 215 |
+
"scripts": []
|
| 216 |
},
|
| 217 |
{
|
| 218 |
"name": "Miao, Southern Guiyang",
|
| 219 |
"iso_1_code": null,
|
| 220 |
"iso_3_code": "hmy",
|
|
|
|
| 221 |
"children": [],
|
| 222 |
+
"tokenizers": {},
|
| 223 |
"node_i": "3886",
|
| 224 |
+
"native_tokenizers": [],
|
| 225 |
+
"scripts": []
|
| 226 |
},
|
| 227 |
{
|
| 228 |
"name": "Sinicized Miao",
|
| 229 |
"iso_1_code": null,
|
| 230 |
"iso_3_code": "hmz",
|
|
|
|
| 231 |
"children": [],
|
| 232 |
+
"tokenizers": {},
|
| 233 |
"node_i": "3887",
|
| 234 |
+
"native_tokenizers": [],
|
| 235 |
+
"scripts": []
|
| 236 |
},
|
| 237 |
{
|
| 238 |
"name": "Hmong Njua",
|
| 239 |
"iso_1_code": null,
|
| 240 |
"iso_3_code": "hnj",
|
|
|
|
| 241 |
"children": [],
|
| 242 |
+
"tokenizers": {},
|
| 243 |
"node_i": "3888",
|
| 244 |
+
"native_tokenizers": [],
|
| 245 |
"scripts": [
|
| 246 |
"Latn"
|
| 247 |
+
]
|
|
|
|
| 248 |
},
|
| 249 |
{
|
| 250 |
"name": "Miao, Horned",
|
| 251 |
"iso_1_code": null,
|
| 252 |
"iso_3_code": "hrm",
|
|
|
|
| 253 |
"children": [],
|
| 254 |
+
"tokenizers": {},
|
| 255 |
"node_i": "3889",
|
| 256 |
+
"native_tokenizers": [],
|
| 257 |
+
"scripts": []
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"name": "Miao, Northern Guiyang",
|
| 261 |
"iso_1_code": null,
|
| 262 |
"iso_3_code": "huj",
|
|
|
|
| 263 |
"children": [],
|
| 264 |
+
"tokenizers": {},
|
| 265 |
"node_i": "3890",
|
| 266 |
+
"native_tokenizers": [],
|
| 267 |
+
"scripts": []
|
| 268 |
},
|
| 269 |
{
|
| 270 |
"name": "Hmong Daw",
|
| 271 |
"iso_1_code": null,
|
| 272 |
"iso_3_code": "mww",
|
|
|
|
| 273 |
"children": [],
|
| 274 |
+
"tokenizers": {},
|
| 275 |
"node_i": "3891",
|
| 276 |
+
"native_tokenizers": [],
|
| 277 |
"scripts": [
|
| 278 |
"Latn"
|
| 279 |
+
]
|
|
|
|
| 280 |
},
|
| 281 |
{
|
| 282 |
"name": "Miao, Small Flowery",
|
| 283 |
"iso_1_code": null,
|
| 284 |
"iso_3_code": "sfm",
|
|
|
|
| 285 |
"children": [],
|
| 286 |
+
"tokenizers": {},
|
| 287 |
"node_i": "3892",
|
| 288 |
+
"native_tokenizers": [],
|
| 289 |
+
"scripts": []
|
| 290 |
}
|
| 291 |
],
|
| 292 |
+
"tokenizers": {},
|
| 293 |
"node_i": "3870",
|
| 294 |
+
"native_tokenizers": [],
|
| 295 |
+
"scripts": []
|
| 296 |
},
|
| 297 |
{
|
| 298 |
"name": "Pa-hng",
|
| 299 |
"iso_1_code": null,
|
| 300 |
"iso_3_code": null,
|
|
|
|
| 301 |
"children": [
|
| 302 |
{
|
| 303 |
"name": "Pa-Hng",
|
| 304 |
"iso_1_code": null,
|
| 305 |
"iso_3_code": "pha",
|
|
|
|
| 306 |
"children": [],
|
| 307 |
+
"tokenizers": {},
|
| 308 |
"node_i": "3894",
|
| 309 |
+
"native_tokenizers": [],
|
| 310 |
+
"scripts": []
|
| 311 |
}
|
| 312 |
],
|
| 313 |
+
"tokenizers": {},
|
| 314 |
"node_i": "3893",
|
| 315 |
+
"native_tokenizers": [],
|
| 316 |
+
"scripts": []
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"name": "Qiandong",
|
| 320 |
"iso_1_code": null,
|
| 321 |
"iso_3_code": null,
|
|
|
|
| 322 |
"children": [
|
| 323 |
{
|
| 324 |
"name": "Miao, Northern Qiandong",
|
| 325 |
"iso_1_code": null,
|
| 326 |
"iso_3_code": "hea",
|
|
|
|
| 327 |
"children": [],
|
| 328 |
+
"tokenizers": {},
|
| 329 |
"node_i": "3896",
|
| 330 |
+
"native_tokenizers": [],
|
| 331 |
+
"scripts": []
|
| 332 |
},
|
| 333 |
{
|
| 334 |
"name": "Miao, Eastern Qiandong",
|
| 335 |
"iso_1_code": null,
|
| 336 |
"iso_3_code": "hmq",
|
|
|
|
| 337 |
"children": [],
|
| 338 |
+
"tokenizers": {},
|
| 339 |
"node_i": "3897",
|
| 340 |
+
"native_tokenizers": [],
|
| 341 |
+
"scripts": []
|
| 342 |
},
|
| 343 |
{
|
| 344 |
"name": "Miao, Southern Qiandong",
|
| 345 |
"iso_1_code": null,
|
| 346 |
"iso_3_code": "hms",
|
|
|
|
| 347 |
"children": [],
|
| 348 |
+
"tokenizers": {},
|
| 349 |
"node_i": "3898",
|
| 350 |
+
"native_tokenizers": [],
|
| 351 |
+
"scripts": []
|
| 352 |
},
|
| 353 |
{
|
| 354 |
"name": "N\u00e1-Meo",
|
| 355 |
"iso_1_code": null,
|
| 356 |
"iso_3_code": "neo",
|
|
|
|
| 357 |
"children": [],
|
| 358 |
+
"tokenizers": {},
|
| 359 |
"node_i": "3899",
|
| 360 |
+
"native_tokenizers": [],
|
| 361 |
+
"scripts": []
|
| 362 |
}
|
| 363 |
],
|
| 364 |
+
"tokenizers": {},
|
| 365 |
"node_i": "3895",
|
| 366 |
+
"native_tokenizers": [],
|
| 367 |
+
"scripts": []
|
| 368 |
},
|
| 369 |
{
|
| 370 |
"name": "Xiangxi",
|
| 371 |
"iso_1_code": null,
|
| 372 |
"iso_3_code": null,
|
|
|
|
| 373 |
"children": [
|
| 374 |
{
|
| 375 |
"name": "Miao, Western Xiangxi",
|
| 376 |
"iso_1_code": null,
|
| 377 |
"iso_3_code": "mmr",
|
|
|
|
| 378 |
"children": [],
|
| 379 |
+
"tokenizers": {},
|
| 380 |
"node_i": "3901",
|
| 381 |
+
"native_tokenizers": [],
|
| 382 |
+
"scripts": []
|
| 383 |
},
|
| 384 |
{
|
| 385 |
"name": "Miao, Eastern Xiangxi",
|
| 386 |
"iso_1_code": null,
|
| 387 |
"iso_3_code": "muq",
|
|
|
|
| 388 |
"children": [],
|
| 389 |
+
"tokenizers": {},
|
| 390 |
"node_i": "3902",
|
| 391 |
+
"native_tokenizers": [],
|
| 392 |
+
"scripts": []
|
| 393 |
}
|
| 394 |
],
|
| 395 |
+
"tokenizers": {},
|
| 396 |
"node_i": "3900",
|
| 397 |
+
"native_tokenizers": [],
|
| 398 |
+
"scripts": []
|
| 399 |
}
|
| 400 |
],
|
| 401 |
+
"tokenizers": {},
|
| 402 |
"node_i": "3864",
|
| 403 |
+
"native_tokenizers": [],
|
| 404 |
+
"scripts": []
|
| 405 |
},
|
| 406 |
{
|
| 407 |
"name": "Ho Nte",
|
| 408 |
"iso_1_code": null,
|
| 409 |
"iso_3_code": null,
|
|
|
|
| 410 |
"children": [
|
| 411 |
{
|
| 412 |
"name": "She",
|
| 413 |
"iso_1_code": null,
|
| 414 |
"iso_3_code": "shx",
|
|
|
|
| 415 |
"children": [],
|
| 416 |
+
"tokenizers": {},
|
| 417 |
"node_i": "3904",
|
| 418 |
+
"native_tokenizers": [],
|
| 419 |
+
"scripts": []
|
| 420 |
}
|
| 421 |
],
|
| 422 |
+
"tokenizers": {},
|
| 423 |
"node_i": "3903",
|
| 424 |
+
"native_tokenizers": [],
|
| 425 |
+
"scripts": []
|
| 426 |
},
|
| 427 |
{
|
| 428 |
"name": "Mienic",
|
| 429 |
"iso_1_code": null,
|
| 430 |
"iso_3_code": null,
|
|
|
|
| 431 |
"children": [
|
| 432 |
{
|
| 433 |
"name": "Biao-Jiao",
|
| 434 |
"iso_1_code": null,
|
| 435 |
"iso_3_code": null,
|
|
|
|
| 436 |
"children": [
|
| 437 |
{
|
| 438 |
"name": "Biao-Jiao Mien",
|
| 439 |
"iso_1_code": null,
|
| 440 |
"iso_3_code": "bje",
|
|
|
|
| 441 |
"children": [],
|
| 442 |
+
"tokenizers": {},
|
| 443 |
"node_i": "3907",
|
| 444 |
+
"native_tokenizers": [],
|
| 445 |
+
"scripts": []
|
| 446 |
}
|
| 447 |
],
|
| 448 |
+
"tokenizers": {},
|
| 449 |
"node_i": "3906",
|
| 450 |
+
"native_tokenizers": [],
|
| 451 |
+
"scripts": []
|
| 452 |
},
|
| 453 |
{
|
| 454 |
"name": "Mian-Jin",
|
| 455 |
"iso_1_code": null,
|
| 456 |
"iso_3_code": null,
|
|
|
|
| 457 |
"children": [
|
| 458 |
{
|
| 459 |
"name": "Biao Mon",
|
| 460 |
"iso_1_code": null,
|
| 461 |
"iso_3_code": "bmt",
|
|
|
|
| 462 |
"children": [],
|
| 463 |
+
"tokenizers": {},
|
| 464 |
"node_i": "3909",
|
| 465 |
+
"native_tokenizers": [],
|
| 466 |
+
"scripts": []
|
| 467 |
},
|
| 468 |
{
|
| 469 |
"name": "Iu Mien",
|
| 470 |
"iso_1_code": null,
|
| 471 |
"iso_3_code": "ium",
|
|
|
|
| 472 |
"children": [],
|
| 473 |
+
"tokenizers": {},
|
| 474 |
"node_i": "3910",
|
| 475 |
+
"native_tokenizers": [],
|
| 476 |
"scripts": [
|
| 477 |
"Latn"
|
| 478 |
+
]
|
|
|
|
| 479 |
},
|
| 480 |
{
|
| 481 |
"name": "Kim Mun",
|
| 482 |
"iso_1_code": null,
|
| 483 |
"iso_3_code": "mji",
|
|
|
|
| 484 |
"children": [],
|
| 485 |
+
"tokenizers": {},
|
| 486 |
"node_i": "3911",
|
| 487 |
+
"native_tokenizers": [],
|
| 488 |
+
"scripts": []
|
| 489 |
}
|
| 490 |
],
|
| 491 |
+
"tokenizers": {},
|
| 492 |
"node_i": "3908",
|
| 493 |
+
"native_tokenizers": [],
|
| 494 |
+
"scripts": []
|
| 495 |
},
|
| 496 |
{
|
| 497 |
"name": "Zaomin",
|
| 498 |
"iso_1_code": null,
|
| 499 |
"iso_3_code": null,
|
|
|
|
| 500 |
"children": [
|
| 501 |
{
|
| 502 |
"name": "Dzao Min",
|
| 503 |
"iso_1_code": null,
|
| 504 |
"iso_3_code": "bpn",
|
|
|
|
| 505 |
"children": [],
|
| 506 |
+
"tokenizers": {},
|
| 507 |
"node_i": "3913",
|
| 508 |
+
"native_tokenizers": [],
|
| 509 |
+
"scripts": []
|
| 510 |
}
|
| 511 |
],
|
| 512 |
+
"tokenizers": {},
|
| 513 |
"node_i": "3912",
|
| 514 |
+
"native_tokenizers": [],
|
| 515 |
+
"scripts": []
|
| 516 |
}
|
| 517 |
],
|
| 518 |
+
"tokenizers": {},
|
| 519 |
"node_i": "3905",
|
| 520 |
+
"native_tokenizers": [],
|
| 521 |
+
"scripts": []
|
| 522 |
}
|
| 523 |
],
|
| 524 |
+
"tokenizers": {},
|
| 525 |
"node_i": "3863",
|
| 526 |
+
"native_tokenizers": [],
|
| 527 |
+
"scripts": []
|
| 528 |
}
|