Spaces:
Sleeping
Sleeping
| """ | |
| IPA Sound Lab — Click any IPA symbol to hear its sound. | |
| Uses eSpeak-NG for phoneme audio synthesis. Includes a comparison mode | |
| to hear two phonemes side by side. | |
| """ | |
| import gradio as gr | |
| import subprocess | |
| import tempfile | |
| import os | |
| import numpy as np | |
| import struct | |
| import wave | |
| from ipa_data import ( | |
| CONSONANTS, VOWELS, | |
| CONSONANT_PLACES, CONSONANT_MANNERS, | |
| VOWEL_TRAPEZOID_COORDS, | |
| get_consonant_at, get_phoneme_info, | |
| ) | |
| print("IPA Sound Lab loading...") | |
| # ============================================================================= | |
| # AUDIO GENERATION WITH ESPEAK-NG | |
| # ============================================================================= | |
| # Cache directory for generated audio files | |
| AUDIO_CACHE_DIR = tempfile.mkdtemp(prefix="ipa_audio_") | |
| def generate_phoneme_audio(espeak_code, symbol=""): | |
| """Generate a WAV file for a single IPA phoneme using eSpeak-NG.""" | |
| if not espeak_code: | |
| return None | |
| cache_key = espeak_code.replace("/", "_").replace("\\", "_").replace('"', "_") | |
| cache_path = os.path.join(AUDIO_CACHE_DIR, f"phoneme_{cache_key}.wav") | |
| if os.path.exists(cache_path): | |
| return cache_path | |
| try: | |
| # eSpeak-NG phoneme notation: [[phoneme]] | |
| phoneme_input = f"[[{espeak_code}]]" | |
| subprocess.run( | |
| ["espeak-ng", "-v", "en", "-w", cache_path, phoneme_input], | |
| capture_output=True, | |
| timeout=5, | |
| ) | |
| if os.path.exists(cache_path) and os.path.getsize(cache_path) > 44: | |
| return cache_path | |
| except (subprocess.TimeoutExpired, FileNotFoundError): | |
| pass | |
| return None | |
| def read_wav_as_numpy(wav_path): | |
| """Read a WAV file and return (sample_rate, numpy_array) for Gradio.""" | |
| if not wav_path or not os.path.exists(wav_path): | |
| return None | |
| with wave.open(wav_path, "rb") as wf: | |
| sample_rate = wf.getframerate() | |
| n_frames = wf.getnframes() | |
| n_channels = wf.getnchannels() | |
| sample_width = wf.getsampwidth() | |
| raw_data = wf.readframes(n_frames) | |
| if sample_width == 2: | |
| fmt = f"<{n_frames * n_channels}h" | |
| samples = np.array(struct.unpack(fmt, raw_data), dtype=np.float32) | |
| samples = samples / 32768.0 # normalize to [-1, 1] | |
| else: | |
| samples = np.frombuffer(raw_data, dtype=np.int16).astype(np.float32) / 32768.0 | |
| if n_channels > 1: | |
| samples = samples[::n_channels] # take first channel | |
| return (sample_rate, samples) | |
| # Pre-generate audio for all phonemes at startup | |
| print("Pre-generating phoneme audio...") | |
| AUDIO_CACHE = {} | |
| all_phonemes = {**CONSONANTS, **VOWELS} | |
| generated_count = 0 | |
| for sym, data in all_phonemes.items(): | |
| if data.get("espeak_code"): | |
| wav_path = generate_phoneme_audio(data["espeak_code"], sym) | |
| if wav_path: | |
| AUDIO_CACHE[sym] = wav_path | |
| generated_count += 1 | |
| print(f"Generated audio for {generated_count}/{len(all_phonemes)} phonemes") | |
| # ============================================================================= | |
| # GRADIO INTERFACE FUNCTIONS | |
| # ============================================================================= | |
| def play_phoneme(symbol): | |
| """Look up a phoneme and return its audio + description.""" | |
| if not symbol or not symbol.strip(): | |
| return None, "Type or paste an IPA symbol above" | |
| symbol = symbol.strip() | |
| info = get_phoneme_info(symbol) | |
| if not info: | |
| return None, f"Symbol '{symbol}' not found in IPA database" | |
| # Build description | |
| if info["type"] == "consonant": | |
| desc = f"**/{symbol}/** — {info['name']}\n\n" | |
| desc += f"- **Place:** {info['place']}\n" | |
| desc += f"- **Manner:** {info['manner']}\n" | |
| desc += f"- **Voicing:** {info['voicing']}\n" | |
| else: | |
| desc = f"**/{symbol}/** — {info['name']}\n\n" | |
| desc += f"- **Height:** {info['height']}\n" | |
| desc += f"- **Backness:** {info['backness']}\n" | |
| desc += f"- **Rounding:** {info['rounding']}\n" | |
| if info["spanish"]: | |
| desc += f"\n**Spanish:** Yes — *{info['spanish_example']}*\n" | |
| else: | |
| desc += "\n**Spanish:** No\n" | |
| desc += f"\n**Other languages:** {', '.join(info['languages'])}" | |
| # Get audio | |
| audio_data = None | |
| if symbol in AUDIO_CACHE: | |
| audio_data = read_wav_as_numpy(AUDIO_CACHE[symbol]) | |
| else: | |
| desc += "\n\n*(Audio not available for this phoneme)*" | |
| return audio_data, desc | |
| def compare_phonemes(symbol1, symbol2): | |
| """Compare two phonemes side by side.""" | |
| if not symbol1 or not symbol2: | |
| return None, None, "Select two phonemes to compare" | |
| symbol1 = symbol1.strip() | |
| symbol2 = symbol2.strip() | |
| info1 = get_phoneme_info(symbol1) | |
| info2 = get_phoneme_info(symbol2) | |
| if not info1: | |
| return None, None, f"Symbol '{symbol1}' not found" | |
| if not info2: | |
| return None, None, f"Symbol '{symbol2}' not found" | |
| # Audio | |
| audio1 = read_wav_as_numpy(AUDIO_CACHE.get(symbol1, "")) if symbol1 in AUDIO_CACHE else None | |
| audio2 = read_wav_as_numpy(AUDIO_CACHE.get(symbol2, "")) if symbol2 in AUDIO_CACHE else None | |
| # Comparison text | |
| desc = f"## /{symbol1}/ vs /{symbol2}/\n\n" | |
| desc += f"| Feature | /{symbol1}/ | /{symbol2}/ |\n" | |
| desc += f"|---------|-------|-------|\n" | |
| if info1["type"] == "consonant" and info2["type"] == "consonant": | |
| desc += f"| Place | {info1['place']} | {info2['place']} |\n" | |
| desc += f"| Manner | {info1['manner']} | {info2['manner']} |\n" | |
| desc += f"| Voicing | {info1['voicing']} | {info2['voicing']} |\n" | |
| elif info1["type"] == "vowel" and info2["type"] == "vowel": | |
| desc += f"| Height | {info1['height']} | {info2['height']} |\n" | |
| desc += f"| Backness | {info1['backness']} | {info2['backness']} |\n" | |
| desc += f"| Rounding | {info1['rounding']} | {info2['rounding']} |\n" | |
| else: | |
| desc += f"| Type | {info1['type']} | {info2['type']} |\n" | |
| desc += f"| Name | {info1['name']} | {info2['name']} |\n" | |
| desc += f"| Spanish | {'Yes' if info1['spanish'] else 'No'} | {'Yes' if info2['spanish'] else 'No'} |\n" | |
| # Highlight what's different | |
| differences = [] | |
| if info1["type"] == info2["type"]: | |
| if info1["type"] == "consonant": | |
| if info1["place"] != info2["place"]: | |
| differences.append(f"different place ({info1['place']} vs {info2['place']})") | |
| if info1["manner"] != info2["manner"]: | |
| differences.append(f"different manner ({info1['manner']} vs {info2['manner']})") | |
| if info1["voicing"] != info2["voicing"]: | |
| differences.append(f"different voicing ({info1['voicing']} vs {info2['voicing']})") | |
| else: | |
| if info1["height"] != info2["height"]: | |
| differences.append(f"different height ({info1['height']} vs {info2['height']})") | |
| if info1["backness"] != info2["backness"]: | |
| differences.append(f"different backness ({info1['backness']} vs {info2['backness']})") | |
| if info1["rounding"] != info2["rounding"]: | |
| differences.append(f"different rounding ({info1['rounding']} vs {info2['rounding']})") | |
| if differences: | |
| desc += f"\n**Key differences:** {'; '.join(differences)}" | |
| elif info1["type"] == info2["type"]: | |
| desc += "\n**These phonemes share the same articulatory features!**" | |
| return audio1, audio2, desc | |
| # ============================================================================= | |
| # BUILD PHONEME CHOOSER OPTIONS | |
| # ============================================================================= | |
| # Build dropdown choices grouped by type | |
| consonant_choices = sorted( | |
| [f"{sym} ({data['name']})" for sym, data in CONSONANTS.items()], | |
| key=lambda x: x | |
| ) | |
| vowel_choices = sorted( | |
| [f"{sym} ({data['name']})" for sym, data in VOWELS.items()], | |
| key=lambda x: x | |
| ) | |
| all_choices = consonant_choices + vowel_choices | |
| # Spanish subset for quick access | |
| spanish_consonant_choices = sorted( | |
| [f"{sym} ({data['name']})" for sym, data in CONSONANTS.items() if data["spanish"]], | |
| key=lambda x: x | |
| ) | |
| spanish_vowel_choices = sorted( | |
| [f"{sym} ({data['name']})" for sym, data in VOWELS.items() if data["spanish"]], | |
| key=lambda x: x | |
| ) | |
| # Common comparison pairs | |
| COMPARISON_PAIRS = [ | |
| ("b", "β", "Spanish: /b/ (initial) vs /β/ (between vowels)"), | |
| ("d", "ð", "Spanish: /d/ (initial) vs /ð/ (between vowels)"), | |
| ("r", "ɾ", "Spanish: trilled /r/ (perro) vs tap /ɾ/ (pero)"), | |
| ("s", "θ", "Spanish: /s/ (Latin America) vs /θ/ (Castilian)"), | |
| ("b", "v", "English distinguishes these; Spanish doesn't"), | |
| ("i", "u", "Front vs back close vowels"), | |
| ("e", "o", "Front vs back mid vowels"), | |
| ("ʃ", "ʒ", "English: ship vs measure"), | |
| ("p", "b", "Same place/manner, different voicing"), | |
| ("n", "ɲ", "Spanish: /n/ vs /ñ/"), | |
| ] | |
| def extract_symbol(choice_str): | |
| """Extract the IPA symbol from a dropdown choice string.""" | |
| if not choice_str: | |
| return "" | |
| return choice_str.split(" (")[0].strip() | |
| def play_from_dropdown(choice): | |
| """Play a phoneme selected from the dropdown.""" | |
| symbol = extract_symbol(choice) | |
| return play_phoneme(symbol) | |
| def compare_from_dropdowns(choice1, choice2): | |
| """Compare two phonemes from dropdown selections.""" | |
| sym1 = extract_symbol(choice1) | |
| sym2 = extract_symbol(choice2) | |
| return compare_phonemes(sym1, sym2) | |
| def load_comparison_pair(pair_index): | |
| """Load a pre-set comparison pair.""" | |
| if pair_index is None or pair_index == "": | |
| return "", "" | |
| idx = int(pair_index) | |
| if 0 <= idx < len(COMPARISON_PAIRS): | |
| sym1, sym2, _ = COMPARISON_PAIRS[idx] | |
| # Find the matching dropdown labels | |
| label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1) | |
| label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2) | |
| return label1, label2 | |
| return "", "" | |
| # ============================================================================= | |
| # GRADIO APP | |
| # ============================================================================= | |
| with gr.Blocks( | |
| title="IPA Sound Lab", | |
| theme=gr.themes.Soft(), | |
| ) as demo: | |
| gr.Markdown( | |
| "# IPA Sound Lab\n" | |
| "Hear any IPA sound. Select a phoneme to play its sound, " | |
| "or compare two phonemes side by side." | |
| ) | |
| with gr.Tabs(): | |
| # --- Tab 1: Single phoneme explorer --- | |
| with gr.Tab("Explore Sounds"): | |
| gr.Markdown("### Select a phoneme to hear it") | |
| with gr.Row(): | |
| phoneme_dropdown = gr.Dropdown( | |
| choices=all_choices, | |
| label="Choose a phoneme", | |
| value=None, | |
| filterable=True, | |
| ) | |
| with gr.Row(): | |
| gr.Markdown("**Quick picks (Spanish):**") | |
| with gr.Row(): | |
| spanish_btns = [] | |
| spanish_all = list( | |
| {sym: data for sym, data in {**CONSONANTS, **VOWELS}.items() if data["spanish"]}.keys() | |
| ) | |
| # Show Spanish phonemes as quick-pick buttons (first 15) | |
| for sym in sorted(spanish_all)[:15]: | |
| info = get_phoneme_info(sym) | |
| btn = gr.Button(f"/{sym}/", size="sm", min_width=50) | |
| spanish_btns.append((btn, sym)) | |
| with gr.Row(): | |
| for sym in sorted(spanish_all)[15:]: | |
| info = get_phoneme_info(sym) | |
| btn = gr.Button(f"/{sym}/", size="sm", min_width=50) | |
| spanish_btns.append((btn, sym)) | |
| audio_output = gr.Audio(label="Phoneme Audio", type="numpy") | |
| description_output = gr.Markdown(value="Select a phoneme above to hear it and see its description") | |
| # Wire up dropdown | |
| phoneme_dropdown.change( | |
| fn=play_from_dropdown, | |
| inputs=[phoneme_dropdown], | |
| outputs=[audio_output, description_output], | |
| ) | |
| # Wire up Spanish quick-pick buttons | |
| for btn, sym in spanish_btns: | |
| btn.click( | |
| fn=play_phoneme, | |
| inputs=[gr.State(sym)], | |
| outputs=[audio_output, description_output], | |
| ) | |
| # --- Tab 2: Comparison mode --- | |
| with gr.Tab("Compare Sounds"): | |
| gr.Markdown( | |
| "### Compare two phonemes\n" | |
| "Hear two sounds side by side and see what makes them different." | |
| ) | |
| gr.Markdown("**Try these interesting pairs:**") | |
| pair_buttons = [] | |
| with gr.Row(): | |
| for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[:5]): | |
| btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm") | |
| pair_buttons.append((btn, i)) | |
| with gr.Row(): | |
| for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[5:], start=5): | |
| btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm") | |
| pair_buttons.append((btn, i)) | |
| with gr.Row(): | |
| with gr.Column(): | |
| dropdown1 = gr.Dropdown( | |
| choices=all_choices, | |
| label="First phoneme", | |
| filterable=True, | |
| ) | |
| audio1 = gr.Audio(label="Sound 1", type="numpy") | |
| with gr.Column(): | |
| dropdown2 = gr.Dropdown( | |
| choices=all_choices, | |
| label="Second phoneme", | |
| filterable=True, | |
| ) | |
| audio2 = gr.Audio(label="Sound 2", type="numpy") | |
| compare_btn = gr.Button("Compare", variant="primary") | |
| comparison_output = gr.Markdown(value="Select two phonemes and click Compare") | |
| compare_btn.click( | |
| fn=compare_from_dropdowns, | |
| inputs=[dropdown1, dropdown2], | |
| outputs=[audio1, audio2, comparison_output], | |
| ) | |
| # Wire up pair buttons | |
| for btn, idx in pair_buttons: | |
| def make_pair_loader(pair_idx): | |
| def load_and_compare(): | |
| sym1, sym2, _ = COMPARISON_PAIRS[pair_idx] | |
| label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1) | |
| label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2) | |
| a1, a2, desc = compare_phonemes(sym1, sym2) | |
| return label1, label2, a1, a2, desc | |
| return load_and_compare | |
| btn.click( | |
| fn=make_pair_loader(idx), | |
| inputs=[], | |
| outputs=[dropdown1, dropdown2, audio1, audio2, comparison_output], | |
| ) | |
| gr.Markdown( | |
| "---\n" | |
| "**Audio note:** Sounds are generated using eSpeak-NG (formant synthesis). " | |
| "They demonstrate the correct articulation but sound synthetic — real human " | |
| "pronunciation will have more natural variation.\n\n" | |
| "**Spanish learners:** The green-highlighted phonemes in the Chart Explorer " | |
| "are the ones you need to master. Use this tool to hear the difference between " | |
| "similar sounds (like /r/ vs /ɾ/ — *perro* vs *pero*)." | |
| ) | |
| print("IPA Sound Lab ready!") | |
| demo.launch() | |