profplate commited on
Commit
739b228
·
verified ·
1 Parent(s): be09fae

Create ipa_data.py

Browse files
Files changed (1) hide show
  1. ipa_data.py +776 -0
ipa_data.py ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ IPA Data Module — Shared phoneme data for all IPA Hugging Face Spaces.
3
+
4
+ Contains:
5
+ - Full IPA consonant inventory with articulatory features
6
+ - Full IPA vowel inventory with height/backness/rounding
7
+ - Spanish phoneme annotations and example words
8
+ - Chart layout constants (place/manner ordering, vowel trapezoid coordinates)
9
+ """
10
+
11
+ # =============================================================================
12
+ # CONSONANT CHART DATA
13
+ # =============================================================================
14
+
15
+ # Column order for the consonant chart (place of articulation)
16
+ CONSONANT_PLACES = [
17
+ "bilabial",
18
+ "labiodental",
19
+ "dental",
20
+ "alveolar",
21
+ "postalveolar",
22
+ "retroflex",
23
+ "palatal",
24
+ "velar",
25
+ "uvular",
26
+ "pharyngeal",
27
+ "glottal",
28
+ ]
29
+
30
+ # Row order for the consonant chart (manner of articulation)
31
+ CONSONANT_MANNERS = [
32
+ "plosive",
33
+ "nasal",
34
+ "trill",
35
+ "tap/flap",
36
+ "fricative",
37
+ "lateral fricative",
38
+ "approximant",
39
+ "lateral approximant",
40
+ ]
41
+
42
+ # Full IPA consonant data
43
+ # Each entry: symbol -> {place, manner, voicing, name, spanish, spanish_example, espeak_code, languages}
44
+ CONSONANTS = {
45
+ # === PLOSIVES ===
46
+ "p": {
47
+ "place": "bilabial", "manner": "plosive", "voicing": "voiceless",
48
+ "name": "voiceless bilabial plosive",
49
+ "spanish": True, "spanish_example": "padre",
50
+ "espeak_code": "p",
51
+ "languages": ["English", "Spanish", "French", "German", "Mandarin"],
52
+ },
53
+ "b": {
54
+ "place": "bilabial", "manner": "plosive", "voicing": "voiced",
55
+ "name": "voiced bilabial plosive",
56
+ "spanish": True, "spanish_example": "boca",
57
+ "espeak_code": "b",
58
+ "languages": ["English", "Spanish", "French", "Arabic"],
59
+ },
60
+ "t": {
61
+ "place": "alveolar", "manner": "plosive", "voicing": "voiceless",
62
+ "name": "voiceless alveolar plosive",
63
+ "spanish": True, "spanish_example": "taza",
64
+ "espeak_code": "t",
65
+ "languages": ["English", "Spanish", "French", "German"],
66
+ },
67
+ "d": {
68
+ "place": "alveolar", "manner": "plosive", "voicing": "voiced",
69
+ "name": "voiced alveolar plosive",
70
+ "spanish": True, "spanish_example": "dedo",
71
+ "espeak_code": "d",
72
+ "languages": ["English", "Spanish", "French", "Italian"],
73
+ },
74
+ "t\u0288": { # ʈ
75
+ "place": "retroflex", "manner": "plosive", "voicing": "voiceless",
76
+ "name": "voiceless retroflex plosive",
77
+ "spanish": False, "spanish_example": None,
78
+ "espeak_code": "t.",
79
+ "languages": ["Hindi", "Swedish", "Norwegian"],
80
+ },
81
+ "\u0256": { # ɖ
82
+ "place": "retroflex", "manner": "plosive", "voicing": "voiced",
83
+ "name": "voiced retroflex plosive",
84
+ "spanish": False, "spanish_example": None,
85
+ "espeak_code": "d.",
86
+ "languages": ["Hindi", "Tamil", "Swedish"],
87
+ },
88
+ "c": {
89
+ "place": "palatal", "manner": "plosive", "voicing": "voiceless",
90
+ "name": "voiceless palatal plosive",
91
+ "spanish": False, "spanish_example": None,
92
+ "espeak_code": "c",
93
+ "languages": ["Hungarian", "Turkish", "Greek"],
94
+ },
95
+ "\u025f": { # ɟ
96
+ "place": "palatal", "manner": "plosive", "voicing": "voiced",
97
+ "name": "voiced palatal plosive",
98
+ "spanish": False, "spanish_example": None,
99
+ "espeak_code": "J",
100
+ "languages": ["Hungarian", "Turkish", "Latvian"],
101
+ },
102
+ "k": {
103
+ "place": "velar", "manner": "plosive", "voicing": "voiceless",
104
+ "name": "voiceless velar plosive",
105
+ "spanish": True, "spanish_example": "casa",
106
+ "espeak_code": "k",
107
+ "languages": ["English", "Spanish", "French", "German", "Mandarin"],
108
+ },
109
+ "\u0261": { # ɡ
110
+ "place": "velar", "manner": "plosive", "voicing": "voiced",
111
+ "name": "voiced velar plosive",
112
+ "spanish": True, "spanish_example": "gato",
113
+ "espeak_code": "g",
114
+ "languages": ["English", "Spanish", "French", "German"],
115
+ },
116
+ "q": {
117
+ "place": "uvular", "manner": "plosive", "voicing": "voiceless",
118
+ "name": "voiceless uvular plosive",
119
+ "spanish": False, "spanish_example": None,
120
+ "espeak_code": "q",
121
+ "languages": ["Arabic", "Quechua", "Inuktitut"],
122
+ },
123
+ "\u0262": { # ɢ
124
+ "place": "uvular", "manner": "plosive", "voicing": "voiced",
125
+ "name": "voiced uvular plosive",
126
+ "spanish": False, "spanish_example": None,
127
+ "espeak_code": None,
128
+ "languages": ["Somali", "Inuktitut"],
129
+ },
130
+ "\u0294": { # ʔ
131
+ "place": "glottal", "manner": "plosive", "voicing": "voiceless",
132
+ "name": "glottal stop",
133
+ "spanish": False, "spanish_example": None,
134
+ "espeak_code": "?",
135
+ "languages": ["English (uh-oh)", "Arabic", "Hawaiian", "German"],
136
+ },
137
+
138
+ # === NASALS ===
139
+ "m": {
140
+ "place": "bilabial", "manner": "nasal", "voicing": "voiced",
141
+ "name": "voiced bilabial nasal",
142
+ "spanish": True, "spanish_example": "madre",
143
+ "espeak_code": "m",
144
+ "languages": ["English", "Spanish", "French", "German", "Mandarin"],
145
+ },
146
+ "\u0271": { # ɱ
147
+ "place": "labiodental", "manner": "nasal", "voicing": "voiced",
148
+ "name": "voiced labiodental nasal",
149
+ "spanish": False, "spanish_example": None,
150
+ "espeak_code": "M",
151
+ "languages": ["English (comfort)", "Italian", "Dutch"],
152
+ },
153
+ "n": {
154
+ "place": "alveolar", "manner": "nasal", "voicing": "voiced",
155
+ "name": "voiced alveolar nasal",
156
+ "spanish": True, "spanish_example": "noche",
157
+ "espeak_code": "n",
158
+ "languages": ["English", "Spanish", "French", "German", "Mandarin"],
159
+ },
160
+ "\u0273": { # ɳ
161
+ "place": "retroflex", "manner": "nasal", "voicing": "voiced",
162
+ "name": "voiced retroflex nasal",
163
+ "spanish": False, "spanish_example": None,
164
+ "espeak_code": "n.",
165
+ "languages": ["Hindi", "Swedish", "Norwegian"],
166
+ },
167
+ "\u0272": { # ɲ
168
+ "place": "palatal", "manner": "nasal", "voicing": "voiced",
169
+ "name": "voiced palatal nasal",
170
+ "spanish": True, "spanish_example": "niño (ñ)",
171
+ "espeak_code": "n^",
172
+ "languages": ["Spanish", "French (cognac)", "Italian", "Portuguese"],
173
+ },
174
+ "\u014b": { # ŋ
175
+ "place": "velar", "manner": "nasal", "voicing": "voiced",
176
+ "name": "voiced velar nasal",
177
+ "spanish": True, "spanish_example": "tengo (before g/k)",
178
+ "espeak_code": "N",
179
+ "languages": ["English (sing)", "Spanish", "German", "Mandarin"],
180
+ },
181
+ "\u0274": { # ɴ
182
+ "place": "uvular", "manner": "nasal", "voicing": "voiced",
183
+ "name": "voiced uvular nasal",
184
+ "spanish": False, "spanish_example": None,
185
+ "espeak_code": None,
186
+ "languages": ["Japanese", "Inuktitut"],
187
+ },
188
+
189
+ # === TRILLS ===
190
+ "\u0299": { # ʙ
191
+ "place": "bilabial", "manner": "trill", "voicing": "voiced",
192
+ "name": "voiced bilabial trill",
193
+ "spanish": False, "spanish_example": None,
194
+ "espeak_code": None,
195
+ "languages": ["Nias", "Titan"],
196
+ },
197
+ "r": {
198
+ "place": "alveolar", "manner": "trill", "voicing": "voiced",
199
+ "name": "voiced alveolar trill",
200
+ "spanish": True, "spanish_example": "perro",
201
+ "espeak_code": "r",
202
+ "languages": ["Spanish", "Italian", "Russian", "Arabic"],
203
+ },
204
+ "\u0280": { # ʀ
205
+ "place": "uvular", "manner": "trill", "voicing": "voiced",
206
+ "name": "voiced uvular trill",
207
+ "spanish": False, "spanish_example": None,
208
+ "espeak_code": "R",
209
+ "languages": ["French", "German", "Dutch"],
210
+ },
211
+
212
+ # === TAPS / FLAPS ===
213
+ "\u2c71": { # ⱱ
214
+ "place": "labiodental", "manner": "tap/flap", "voicing": "voiced",
215
+ "name": "voiced labiodental flap",
216
+ "spanish": False, "spanish_example": None,
217
+ "espeak_code": None,
218
+ "languages": ["Mono", "some Central African languages"],
219
+ },
220
+ "\u027e": { # ɾ
221
+ "place": "alveolar", "manner": "tap/flap", "voicing": "voiced",
222
+ "name": "voiced alveolar tap",
223
+ "spanish": True, "spanish_example": "pero",
224
+ "espeak_code": "*",
225
+ "languages": ["Spanish", "Japanese", "Korean", "Portuguese"],
226
+ },
227
+ "\u027d": { # ɽ
228
+ "place": "retroflex", "manner": "tap/flap", "voicing": "voiced",
229
+ "name": "voiced retroflex flap",
230
+ "spanish": False, "spanish_example": None,
231
+ "espeak_code": "*.",
232
+ "languages": ["Hindi", "Urdu", "Hausa"],
233
+ },
234
+
235
+ # === FRICATIVES ===
236
+ "\u0278": { # ɸ
237
+ "place": "bilabial", "manner": "fricative", "voicing": "voiceless",
238
+ "name": "voiceless bilabial fricative",
239
+ "spanish": False, "spanish_example": None,
240
+ "espeak_code": "F",
241
+ "languages": ["Japanese (fu)", "Ewe"],
242
+ },
243
+ "\u03b2": { # β
244
+ "place": "bilabial", "manner": "fricative", "voicing": "voiced",
245
+ "name": "voiced bilabial fricative",
246
+ "spanish": True, "spanish_example": "lobo (allophone of /b/)",
247
+ "espeak_code": "B",
248
+ "languages": ["Spanish (allophone)", "Ewe", "Japanese"],
249
+ },
250
+ "f": {
251
+ "place": "labiodental", "manner": "fricative", "voicing": "voiceless",
252
+ "name": "voiceless labiodental fricative",
253
+ "spanish": True, "spanish_example": "fuego",
254
+ "espeak_code": "f",
255
+ "languages": ["English", "Spanish", "French", "German"],
256
+ },
257
+ "v": {
258
+ "place": "labiodental", "manner": "fricative", "voicing": "voiced",
259
+ "name": "voiced labiodental fricative",
260
+ "spanish": False, "spanish_example": None,
261
+ "espeak_code": "v",
262
+ "languages": ["English", "French", "German", "Russian"],
263
+ },
264
+ "\u03b8": { # θ
265
+ "place": "dental", "manner": "fricative", "voicing": "voiceless",
266
+ "name": "voiceless dental fricative",
267
+ "spanish": True, "spanish_example": "zapato (Castilian)",
268
+ "espeak_code": "T",
269
+ "languages": ["English (think)", "Spanish (Castilian)", "Greek"],
270
+ },
271
+ "\u00f0": { # ð
272
+ "place": "dental", "manner": "fricative", "voicing": "voiced",
273
+ "name": "voiced dental fricative",
274
+ "spanish": True, "spanish_example": "dedo (allophone of /d/)",
275
+ "espeak_code": "D",
276
+ "languages": ["English (the)", "Spanish (allophone)", "Icelandic"],
277
+ },
278
+ "s": {
279
+ "place": "alveolar", "manner": "fricative", "voicing": "voiceless",
280
+ "name": "voiceless alveolar fricative",
281
+ "spanish": True, "spanish_example": "sol",
282
+ "espeak_code": "s",
283
+ "languages": ["English", "Spanish", "French", "German", "Mandarin"],
284
+ },
285
+ "z": {
286
+ "place": "alveolar", "manner": "fricative", "voicing": "voiced",
287
+ "name": "voiced alveolar fricative",
288
+ "spanish": False, "spanish_example": None,
289
+ "espeak_code": "z",
290
+ "languages": ["English", "French", "Italian", "Russian"],
291
+ },
292
+ "\u0283": { # ʃ
293
+ "place": "postalveolar", "manner": "fricative", "voicing": "voiceless",
294
+ "name": "voiceless postalveolar fricative",
295
+ "spanish": False, "spanish_example": None,
296
+ "espeak_code": "S",
297
+ "languages": ["English (ship)", "French (chat)", "German (schön)"],
298
+ },
299
+ "\u0292": { # ʒ
300
+ "place": "postalveolar", "manner": "fricative", "voicing": "voiced",
301
+ "name": "voiced postalveolar fricative",
302
+ "spanish": False, "spanish_example": None,
303
+ "espeak_code": "Z",
304
+ "languages": ["English (measure)", "French (je)", "Portuguese"],
305
+ },
306
+ "\u0282": { # ʂ
307
+ "place": "retroflex", "manner": "fricative", "voicing": "voiceless",
308
+ "name": "voiceless retroflex fricative",
309
+ "spanish": False, "spanish_example": None,
310
+ "espeak_code": "s.",
311
+ "languages": ["Mandarin (sh)", "Polish", "Sanskrit"],
312
+ },
313
+ "\u0290": { # ʐ
314
+ "place": "retroflex", "manner": "fricative", "voicing": "voiced",
315
+ "name": "voiced retroflex fricative",
316
+ "spanish": False, "spanish_example": None,
317
+ "espeak_code": "z.",
318
+ "languages": ["Mandarin (r-)", "Polish", "Russian"],
319
+ },
320
+ "\u00e7": { # ç
321
+ "place": "palatal", "manner": "fricative", "voicing": "voiceless",
322
+ "name": "voiceless palatal fricative",
323
+ "spanish": False, "spanish_example": None,
324
+ "espeak_code": "C",
325
+ "languages": ["German (ich)", "Greek", "Japanese (hi)"],
326
+ },
327
+ "\u029d": { # ʝ
328
+ "place": "palatal", "manner": "fricative", "voicing": "voiced",
329
+ "name": "voiced palatal fricative",
330
+ "spanish": True, "spanish_example": "mayo (some dialects)",
331
+ "espeak_code": "j",
332
+ "languages": ["Spanish (some dialects)", "Greek", "Irish"],
333
+ },
334
+ "x": {
335
+ "place": "velar", "manner": "fricative", "voicing": "voiceless",
336
+ "name": "voiceless velar fricative",
337
+ "spanish": True, "spanish_example": "jota",
338
+ "espeak_code": "x",
339
+ "languages": ["Spanish", "German (Bach)", "Russian", "Arabic"],
340
+ },
341
+ "\u0263": { # ɣ
342
+ "place": "velar", "manner": "fricative", "voicing": "voiced",
343
+ "name": "voiced velar fricative",
344
+ "spanish": True, "spanish_example": "lago (allophone of /g/)",
345
+ "espeak_code": "Q",
346
+ "languages": ["Spanish (allophone)", "Greek", "Irish"],
347
+ },
348
+ "\u03c7": { # χ
349
+ "place": "uvular", "manner": "fricative", "voicing": "voiceless",
350
+ "name": "voiceless uvular fricative",
351
+ "spanish": False, "spanish_example": None,
352
+ "espeak_code": "X",
353
+ "languages": ["German (some dialects)", "Hebrew", "Welsh"],
354
+ },
355
+ "\u0281": { # ʁ
356
+ "place": "uvular", "manner": "fricative", "voicing": "voiced",
357
+ "name": "voiced uvular fricative",
358
+ "spanish": False, "spanish_example": None,
359
+ "espeak_code": "g\"",
360
+ "languages": ["French (r)", "German (r)", "Dutch"],
361
+ },
362
+ "\u0127": { # ħ
363
+ "place": "pharyngeal", "manner": "fricative", "voicing": "voiceless",
364
+ "name": "voiceless pharyngeal fricative",
365
+ "spanish": False, "spanish_example": None,
366
+ "espeak_code": "H",
367
+ "languages": ["Arabic (ح)", "Hebrew", "Somali"],
368
+ },
369
+ "\u0295": { # ʕ
370
+ "place": "pharyngeal", "manner": "fricative", "voicing": "voiced",
371
+ "name": "voiced pharyngeal fricative",
372
+ "spanish": False, "spanish_example": None,
373
+ "espeak_code": "Q\"",
374
+ "languages": ["Arabic (ع)", "Hebrew", "Somali"],
375
+ },
376
+ "h": {
377
+ "place": "glottal", "manner": "fricative", "voicing": "voiceless",
378
+ "name": "voiceless glottal fricative",
379
+ "spanish": True, "spanish_example": "huevo (some dialects)",
380
+ "espeak_code": "h",
381
+ "languages": ["English", "German", "Japanese", "Arabic"],
382
+ },
383
+ "\u0266": { # ɦ
384
+ "place": "glottal", "manner": "fricative", "voicing": "voiced",
385
+ "name": "voiced glottal fricative",
386
+ "spanish": False, "spanish_example": None,
387
+ "espeak_code": "h\\",
388
+ "languages": ["Dutch", "Czech", "Ukrainian"],
389
+ },
390
+
391
+ # === LATERAL FRICATIVES ===
392
+ "\u026c": { # ɬ
393
+ "place": "alveolar", "manner": "lateral fricative", "voicing": "voiceless",
394
+ "name": "voiceless alveolar lateral fricative",
395
+ "spanish": False, "spanish_example": None,
396
+ "espeak_code": "l#",
397
+ "languages": ["Welsh (ll)", "Zulu", "Navajo"],
398
+ },
399
+ "\u026e": { # ɮ
400
+ "place": "alveolar", "manner": "lateral fricative", "voicing": "voiced",
401
+ "name": "voiced alveolar lateral fricative",
402
+ "spanish": False, "spanish_example": None,
403
+ "espeak_code": None,
404
+ "languages": ["Mongolian", "Zulu"],
405
+ },
406
+
407
+ # === APPROXIMANTS ===
408
+ "\u028b": { # ʋ
409
+ "place": "labiodental", "manner": "approximant", "voicing": "voiced",
410
+ "name": "voiced labiodental approximant",
411
+ "spanish": False, "spanish_example": None,
412
+ "espeak_code": "v#",
413
+ "languages": ["Dutch", "Finnish", "Hindi"],
414
+ },
415
+ "\u0279": { # ɹ
416
+ "place": "alveolar", "manner": "approximant", "voicing": "voiced",
417
+ "name": "voiced alveolar approximant",
418
+ "spanish": False, "spanish_example": None,
419
+ "espeak_code": "r\\",
420
+ "languages": ["English (red)", "some Dutch dialects"],
421
+ },
422
+ "\u027b": { # ɻ
423
+ "place": "retroflex", "manner": "approximant", "voicing": "voiced",
424
+ "name": "voiced retroflex approximant",
425
+ "spanish": False, "spanish_example": None,
426
+ "espeak_code": "r\\.",
427
+ "languages": ["Tamil", "American English (r)"],
428
+ },
429
+ "j": {
430
+ "place": "palatal", "manner": "approximant", "voicing": "voiced",
431
+ "name": "voiced palatal approximant",
432
+ "spanish": True, "spanish_example": "yo (some dialects)",
433
+ "espeak_code": "j\\",
434
+ "languages": ["English (yes)", "Spanish", "French", "German"],
435
+ },
436
+ "w": {
437
+ "place": "velar", "manner": "approximant", "voicing": "voiced",
438
+ "name": "voiced labial-velar approximant",
439
+ "spanish": True, "spanish_example": "huevo",
440
+ "espeak_code": "w",
441
+ "languages": ["English", "Spanish", "French", "Mandarin"],
442
+ },
443
+
444
+ # === LATERAL APPROXIMANTS ===
445
+ "l": {
446
+ "place": "alveolar", "manner": "lateral approximant", "voicing": "voiced",
447
+ "name": "voiced alveolar lateral approximant",
448
+ "spanish": True, "spanish_example": "luna",
449
+ "espeak_code": "l",
450
+ "languages": ["English", "Spanish", "French", "German"],
451
+ },
452
+ "\u026d": { # ɭ
453
+ "place": "retroflex", "manner": "lateral approximant", "voicing": "voiced",
454
+ "name": "voiced retroflex lateral approximant",
455
+ "spanish": False, "spanish_example": None,
456
+ "espeak_code": "l.",
457
+ "languages": ["Tamil", "Hindi", "Norwegian"],
458
+ },
459
+ "\u028e": { # ʎ
460
+ "place": "palatal", "manner": "lateral approximant", "voicing": "voiced",
461
+ "name": "voiced palatal lateral approximant",
462
+ "spanish": True, "spanish_example": "calle (traditional)",
463
+ "espeak_code": "l^",
464
+ "languages": ["Spanish (traditional)", "Italian", "Portuguese", "Catalan"],
465
+ },
466
+ "\u029f": { # ʟ
467
+ "place": "velar", "manner": "lateral approximant", "voicing": "voiced",
468
+ "name": "voiced velar lateral approximant",
469
+ "spanish": False, "spanish_example": None,
470
+ "espeak_code": "L",
471
+ "languages": ["Mid-Waghi", "some PNG languages"],
472
+ },
473
+ }
474
+
475
+
476
+ # =============================================================================
477
+ # VOWEL CHART DATA
478
+ # =============================================================================
479
+
480
+ # Vowel height levels (top to bottom)
481
+ VOWEL_HEIGHTS = ["close", "near-close", "close-mid", "mid", "open-mid", "near-open", "open"]
482
+
483
+ # Vowel backness levels (left to right)
484
+ VOWEL_BACKNESSES = ["front", "central", "back"]
485
+
486
+ # Full IPA vowel data
487
+ VOWELS = {
488
+ # === CLOSE VOWELS ===
489
+ "i": {
490
+ "height": "close", "backness": "front", "rounding": "unrounded",
491
+ "name": "close front unrounded vowel",
492
+ "spanish": True, "spanish_example": "sí",
493
+ "espeak_code": "i",
494
+ "languages": ["English (see)", "Spanish", "French", "German"],
495
+ },
496
+ "y": {
497
+ "height": "close", "backness": "front", "rounding": "rounded",
498
+ "name": "close front rounded vowel",
499
+ "spanish": False, "spanish_example": None,
500
+ "espeak_code": "y",
501
+ "languages": ["French (tu)", "German (über)", "Finnish", "Mandarin (ü)"],
502
+ },
503
+ "\u0268": { # ɨ
504
+ "height": "close", "backness": "central", "rounding": "unrounded",
505
+ "name": "close central unrounded vowel",
506
+ "spanish": False, "spanish_example": None,
507
+ "espeak_code": "i\"",
508
+ "languages": ["Russian (ы)", "Polish", "Romanian"],
509
+ },
510
+ "\u0289": { # ʉ
511
+ "height": "close", "backness": "central", "rounding": "rounded",
512
+ "name": "close central rounded vowel",
513
+ "spanish": False, "spanish_example": None,
514
+ "espeak_code": "u\"",
515
+ "languages": ["Swedish", "Norwegian", "some English dialects"],
516
+ },
517
+ "\u026f": { # ɯ
518
+ "height": "close", "backness": "back", "rounding": "unrounded",
519
+ "name": "close back unrounded vowel",
520
+ "spanish": False, "spanish_example": None,
521
+ "espeak_code": "u-",
522
+ "languages": ["Turkish", "Japanese", "Korean"],
523
+ },
524
+ "u": {
525
+ "height": "close", "backness": "back", "rounding": "rounded",
526
+ "name": "close back rounded vowel",
527
+ "spanish": True, "spanish_example": "tú",
528
+ "espeak_code": "u",
529
+ "languages": ["English (boot)", "Spanish", "French", "German"],
530
+ },
531
+
532
+ # === NEAR-CLOSE VOWELS ===
533
+ "\u026a": { # ɪ
534
+ "height": "near-close", "backness": "front", "rounding": "unrounded",
535
+ "name": "near-close front unrounded vowel",
536
+ "spanish": False, "spanish_example": None,
537
+ "espeak_code": "I",
538
+ "languages": ["English (bit)", "German"],
539
+ },
540
+ "\u028f": { # ʏ
541
+ "height": "near-close", "backness": "front", "rounding": "rounded",
542
+ "name": "near-close front rounded vowel",
543
+ "spanish": False, "spanish_example": None,
544
+ "espeak_code": "Y",
545
+ "languages": ["German (hübsch)", "Swedish"],
546
+ },
547
+ "\u028a": { # ʊ
548
+ "height": "near-close", "backness": "back", "rounding": "rounded",
549
+ "name": "near-close back rounded vowel",
550
+ "spanish": False, "spanish_example": None,
551
+ "espeak_code": "U",
552
+ "languages": ["English (book)", "German"],
553
+ },
554
+
555
+ # === CLOSE-MID VOWELS ===
556
+ "e": {
557
+ "height": "close-mid", "backness": "front", "rounding": "unrounded",
558
+ "name": "close-mid front unrounded vowel",
559
+ "spanish": True, "spanish_example": "mesa",
560
+ "espeak_code": "e",
561
+ "languages": ["Spanish", "French (été)", "German (Beet)", "Italian"],
562
+ },
563
+ "\u00f8": { # ø
564
+ "height": "close-mid", "backness": "front", "rounding": "rounded",
565
+ "name": "close-mid front rounded vowel",
566
+ "spanish": False, "spanish_example": None,
567
+ "espeak_code": "Y\"",
568
+ "languages": ["French (peu)", "German (schön)", "Danish", "Finnish"],
569
+ },
570
+ "\u0258": { # ɘ
571
+ "height": "close-mid", "backness": "central", "rounding": "unrounded",
572
+ "name": "close-mid central unrounded vowel",
573
+ "spanish": False, "spanish_example": None,
574
+ "espeak_code": None,
575
+ "languages": ["Korean", "Paicî"],
576
+ },
577
+ "\u0275": { # ɵ
578
+ "height": "close-mid", "backness": "central", "rounding": "rounded",
579
+ "name": "close-mid central rounded vowel",
580
+ "spanish": False, "spanish_example": None,
581
+ "espeak_code": None,
582
+ "languages": ["Swedish", "Australian English"],
583
+ },
584
+ "\u0264": { # ɤ
585
+ "height": "close-mid", "backness": "back", "rounding": "unrounded",
586
+ "name": "close-mid back unrounded vowel",
587
+ "spanish": False, "spanish_example": None,
588
+ "espeak_code": "o-",
589
+ "languages": ["Korean", "Vietnamese", "Thai"],
590
+ },
591
+ "o": {
592
+ "height": "close-mid", "backness": "back", "rounding": "rounded",
593
+ "name": "close-mid back rounded vowel",
594
+ "spanish": True, "spanish_example": "como",
595
+ "espeak_code": "o",
596
+ "languages": ["Spanish", "French (beau)", "German (Boot)", "Italian"],
597
+ },
598
+
599
+ # === MID VOWEL ===
600
+ "\u0259": { # ə (schwa)
601
+ "height": "mid", "backness": "central", "rounding": "unrounded",
602
+ "name": "mid central vowel (schwa)",
603
+ "spanish": False, "spanish_example": None,
604
+ "espeak_code": "@",
605
+ "languages": ["English (about)", "French (le)", "German (bitte)"],
606
+ },
607
+
608
+ # === OPEN-MID VOWELS ===
609
+ "\u025b": { # ɛ
610
+ "height": "open-mid", "backness": "front", "rounding": "unrounded",
611
+ "name": "open-mid front unrounded vowel",
612
+ "spanish": False, "spanish_example": None,
613
+ "espeak_code": "E",
614
+ "languages": ["English (bed)", "French (fête)", "Italian"],
615
+ },
616
+ "\u0153": { # œ
617
+ "height": "open-mid", "backness": "front", "rounding": "rounded",
618
+ "name": "open-mid front rounded vowel",
619
+ "spanish": False, "spanish_example": None,
620
+ "espeak_code": "W",
621
+ "languages": ["French (peur)", "German (Hölle)"],
622
+ },
623
+ "\u025c": { # ɜ
624
+ "height": "open-mid", "backness": "central", "rounding": "unrounded",
625
+ "name": "open-mid central unrounded vowel",
626
+ "spanish": False, "spanish_example": None,
627
+ "espeak_code": "3",
628
+ "languages": ["English (bird - British)"],
629
+ },
630
+ "\u025e": { # ɞ
631
+ "height": "open-mid", "backness": "central", "rounding": "rounded",
632
+ "name": "open-mid central rounded vowel",
633
+ "spanish": False, "spanish_example": None,
634
+ "espeak_code": None,
635
+ "languages": ["Irish", "some English dialects"],
636
+ },
637
+ "\u028c": { # ʌ
638
+ "height": "open-mid", "backness": "back", "rounding": "unrounded",
639
+ "name": "open-mid back unrounded vowel",
640
+ "spanish": False, "spanish_example": None,
641
+ "espeak_code": "V",
642
+ "languages": ["English (but)", "Korean"],
643
+ },
644
+ "\u0254": { # ɔ
645
+ "height": "open-mid", "backness": "back", "rounding": "rounded",
646
+ "name": "open-mid back rounded vowel",
647
+ "spanish": False, "spanish_example": None,
648
+ "espeak_code": "O",
649
+ "languages": ["English (thought)", "French (mort)", "Italian"],
650
+ },
651
+
652
+ # === NEAR-OPEN VOWELS ===
653
+ "\u00e6": { # æ
654
+ "height": "near-open", "backness": "front", "rounding": "unrounded",
655
+ "name": "near-open front unrounded vowel",
656
+ "spanish": False, "spanish_example": None,
657
+ "espeak_code": "a#",
658
+ "languages": ["English (cat)", "Danish", "Arabic"],
659
+ },
660
+ "\u0250": { # ɐ
661
+ "height": "near-open", "backness": "central", "rounding": "unrounded",
662
+ "name": "near-open central vowel",
663
+ "spanish": False, "spanish_example": None,
664
+ "espeak_code": "a\"",
665
+ "languages": ["German (bitter)", "Portuguese", "some English dialects"],
666
+ },
667
+
668
+ # === OPEN VOWELS ===
669
+ "a": {
670
+ "height": "open", "backness": "front", "rounding": "unrounded",
671
+ "name": "open front unrounded vowel",
672
+ "spanish": True, "spanish_example": "casa",
673
+ "espeak_code": "a",
674
+ "languages": ["Spanish", "French (patte)", "Italian", "German"],
675
+ },
676
+ "\u0276": { # ɶ
677
+ "height": "open", "backness": "front", "rounding": "rounded",
678
+ "name": "open front rounded vowel",
679
+ "spanish": False, "spanish_example": None,
680
+ "espeak_code": None,
681
+ "languages": ["Danish", "some German dialects"],
682
+ },
683
+ "\u0251": { # ɑ
684
+ "height": "open", "backness": "back", "rounding": "unrounded",
685
+ "name": "open back unrounded vowel",
686
+ "spanish": False, "spanish_example": None,
687
+ "espeak_code": "A",
688
+ "languages": ["English (father)", "French (pâte)", "Persian"],
689
+ },
690
+ "\u0252": { # ɒ
691
+ "height": "open", "backness": "back", "rounding": "rounded",
692
+ "name": "open back rounded vowel",
693
+ "spanish": False, "spanish_example": None,
694
+ "espeak_code": "A.",
695
+ "languages": ["British English (lot)", "Hungarian"],
696
+ },
697
+ }
698
+
699
+
700
+ # =============================================================================
701
+ # VOWEL TRAPEZOID COORDINATES (for SVG rendering)
702
+ # =============================================================================
703
+
704
+ # SVG viewBox: 0 0 500 400
705
+ # The trapezoid is narrower at top (close) and wider at bottom (open)
706
+ # x increases left-to-right (front → back), y increases top-to-bottom (close → open)
707
+ VOWEL_TRAPEZOID_COORDS = {
708
+ # (x, y) positions on the trapezoid SVG
709
+ # Close row
710
+ ("close", "front"): (80, 40),
711
+ ("close", "central"): (250, 40),
712
+ ("close", "back"): (420, 40),
713
+ # Near-close row
714
+ ("near-close", "front"): (110, 95),
715
+ ("near-close", "back"): (400, 95),
716
+ # Close-mid row
717
+ ("close-mid", "front"): (130, 145),
718
+ ("close-mid", "central"): (255, 145),
719
+ ("close-mid", "back"): (380, 145),
720
+ # Mid row
721
+ ("mid", "central"): (260, 200),
722
+ # Open-mid row
723
+ ("open-mid", "front"): (170, 250),
724
+ ("open-mid", "central"): (265, 250),
725
+ ("open-mid", "back"): (360, 250),
726
+ # Near-open row
727
+ ("near-open", "front"): (195, 305),
728
+ ("near-open", "central"): (270, 305),
729
+ # Open row
730
+ ("open", "front"): (215, 355),
731
+ ("open", "back"): (340, 355),
732
+ }
733
+
734
+
735
+ # =============================================================================
736
+ # HELPER FUNCTIONS
737
+ # =============================================================================
738
+
739
+ def get_consonant_at(place, manner, voicing):
740
+ """Get the IPA symbol for a consonant at a specific position in the chart."""
741
+ for symbol, data in CONSONANTS.items():
742
+ if data["place"] == place and data["manner"] == manner and data["voicing"] == voicing:
743
+ return symbol
744
+ return None
745
+
746
+
747
+ def get_vowel_at(height, backness, rounding):
748
+ """Get the IPA symbol for a vowel at a specific position in the chart."""
749
+ for symbol, data in VOWELS.items():
750
+ if data["height"] == height and data["backness"] == backness and data["rounding"] == rounding:
751
+ return symbol
752
+ return None
753
+
754
+
755
+ def get_spanish_consonants():
756
+ """Return all consonants that appear in Spanish."""
757
+ return {sym: data for sym, data in CONSONANTS.items() if data["spanish"]}
758
+
759
+
760
+ def get_spanish_vowels():
761
+ """Return all vowels that appear in Spanish."""
762
+ return {sym: data for sym, data in VOWELS.items() if data["spanish"]}
763
+
764
+
765
+ def get_all_spanish_phonemes():
766
+ """Return all Spanish phonemes (consonants + vowels)."""
767
+ return {**get_spanish_consonants(), **get_spanish_vowels()}
768
+
769
+
770
+ def get_phoneme_info(symbol):
771
+ """Look up any IPA symbol and return its full data."""
772
+ if symbol in CONSONANTS:
773
+ return {"type": "consonant", **CONSONANTS[symbol]}
774
+ if symbol in VOWELS:
775
+ return {"type": "vowel", **VOWELS[symbol]}
776
+ return None