""" IPA Sound Lab — Click any IPA symbol to hear its sound. Uses eSpeak-NG for phoneme audio synthesis. Includes a comparison mode to hear two phonemes side by side. """ import gradio as gr import subprocess import tempfile import os import numpy as np import struct import wave from ipa_data import ( CONSONANTS, VOWELS, CONSONANT_PLACES, CONSONANT_MANNERS, VOWEL_TRAPEZOID_COORDS, get_consonant_at, get_phoneme_info, ) print("IPA Sound Lab loading...") # ============================================================================= # AUDIO GENERATION WITH ESPEAK-NG # ============================================================================= # Cache directory for generated audio files AUDIO_CACHE_DIR = tempfile.mkdtemp(prefix="ipa_audio_") def generate_phoneme_audio(espeak_code, symbol=""): """Generate a WAV file for a single IPA phoneme using eSpeak-NG.""" if not espeak_code: return None cache_key = espeak_code.replace("/", "_").replace("\\", "_").replace('"', "_") cache_path = os.path.join(AUDIO_CACHE_DIR, f"phoneme_{cache_key}.wav") if os.path.exists(cache_path): return cache_path try: # eSpeak-NG phoneme notation: [[phoneme]] phoneme_input = f"[[{espeak_code}]]" subprocess.run( ["espeak-ng", "-v", "en", "-w", cache_path, phoneme_input], capture_output=True, timeout=5, ) if os.path.exists(cache_path) and os.path.getsize(cache_path) > 44: return cache_path except (subprocess.TimeoutExpired, FileNotFoundError): pass return None def read_wav_as_numpy(wav_path): """Read a WAV file and return (sample_rate, numpy_array) for Gradio.""" if not wav_path or not os.path.exists(wav_path): return None with wave.open(wav_path, "rb") as wf: sample_rate = wf.getframerate() n_frames = wf.getnframes() n_channels = wf.getnchannels() sample_width = wf.getsampwidth() raw_data = wf.readframes(n_frames) if sample_width == 2: fmt = f"<{n_frames * n_channels}h" samples = np.array(struct.unpack(fmt, raw_data), dtype=np.float32) samples = samples / 32768.0 # normalize to [-1, 1] else: samples = np.frombuffer(raw_data, dtype=np.int16).astype(np.float32) / 32768.0 if n_channels > 1: samples = samples[::n_channels] # take first channel return (sample_rate, samples) # Pre-generate audio for all phonemes at startup print("Pre-generating phoneme audio...") AUDIO_CACHE = {} all_phonemes = {**CONSONANTS, **VOWELS} generated_count = 0 for sym, data in all_phonemes.items(): if data.get("espeak_code"): wav_path = generate_phoneme_audio(data["espeak_code"], sym) if wav_path: AUDIO_CACHE[sym] = wav_path generated_count += 1 print(f"Generated audio for {generated_count}/{len(all_phonemes)} phonemes") # ============================================================================= # GRADIO INTERFACE FUNCTIONS # ============================================================================= def play_phoneme(symbol): """Look up a phoneme and return its audio + description.""" if not symbol or not symbol.strip(): return None, "Type or paste an IPA symbol above" symbol = symbol.strip() info = get_phoneme_info(symbol) if not info: return None, f"Symbol '{symbol}' not found in IPA database" # Build description if info["type"] == "consonant": desc = f"**/{symbol}/** — {info['name']}\n\n" desc += f"- **Place:** {info['place']}\n" desc += f"- **Manner:** {info['manner']}\n" desc += f"- **Voicing:** {info['voicing']}\n" else: desc = f"**/{symbol}/** — {info['name']}\n\n" desc += f"- **Height:** {info['height']}\n" desc += f"- **Backness:** {info['backness']}\n" desc += f"- **Rounding:** {info['rounding']}\n" if info["spanish"]: desc += f"\n**Spanish:** Yes — *{info['spanish_example']}*\n" else: desc += "\n**Spanish:** No\n" desc += f"\n**Other languages:** {', '.join(info['languages'])}" # Get audio audio_data = None if symbol in AUDIO_CACHE: audio_data = read_wav_as_numpy(AUDIO_CACHE[symbol]) else: desc += "\n\n*(Audio not available for this phoneme)*" return audio_data, desc def compare_phonemes(symbol1, symbol2): """Compare two phonemes side by side.""" if not symbol1 or not symbol2: return None, None, "Select two phonemes to compare" symbol1 = symbol1.strip() symbol2 = symbol2.strip() info1 = get_phoneme_info(symbol1) info2 = get_phoneme_info(symbol2) if not info1: return None, None, f"Symbol '{symbol1}' not found" if not info2: return None, None, f"Symbol '{symbol2}' not found" # Audio audio1 = read_wav_as_numpy(AUDIO_CACHE.get(symbol1, "")) if symbol1 in AUDIO_CACHE else None audio2 = read_wav_as_numpy(AUDIO_CACHE.get(symbol2, "")) if symbol2 in AUDIO_CACHE else None # Comparison text desc = f"## /{symbol1}/ vs /{symbol2}/\n\n" desc += f"| Feature | /{symbol1}/ | /{symbol2}/ |\n" desc += f"|---------|-------|-------|\n" if info1["type"] == "consonant" and info2["type"] == "consonant": desc += f"| Place | {info1['place']} | {info2['place']} |\n" desc += f"| Manner | {info1['manner']} | {info2['manner']} |\n" desc += f"| Voicing | {info1['voicing']} | {info2['voicing']} |\n" elif info1["type"] == "vowel" and info2["type"] == "vowel": desc += f"| Height | {info1['height']} | {info2['height']} |\n" desc += f"| Backness | {info1['backness']} | {info2['backness']} |\n" desc += f"| Rounding | {info1['rounding']} | {info2['rounding']} |\n" else: desc += f"| Type | {info1['type']} | {info2['type']} |\n" desc += f"| Name | {info1['name']} | {info2['name']} |\n" desc += f"| Spanish | {'Yes' if info1['spanish'] else 'No'} | {'Yes' if info2['spanish'] else 'No'} |\n" # Highlight what's different differences = [] if info1["type"] == info2["type"]: if info1["type"] == "consonant": if info1["place"] != info2["place"]: differences.append(f"different place ({info1['place']} vs {info2['place']})") if info1["manner"] != info2["manner"]: differences.append(f"different manner ({info1['manner']} vs {info2['manner']})") if info1["voicing"] != info2["voicing"]: differences.append(f"different voicing ({info1['voicing']} vs {info2['voicing']})") else: if info1["height"] != info2["height"]: differences.append(f"different height ({info1['height']} vs {info2['height']})") if info1["backness"] != info2["backness"]: differences.append(f"different backness ({info1['backness']} vs {info2['backness']})") if info1["rounding"] != info2["rounding"]: differences.append(f"different rounding ({info1['rounding']} vs {info2['rounding']})") if differences: desc += f"\n**Key differences:** {'; '.join(differences)}" elif info1["type"] == info2["type"]: desc += "\n**These phonemes share the same articulatory features!**" return audio1, audio2, desc # ============================================================================= # BUILD PHONEME CHOOSER OPTIONS # ============================================================================= # Build dropdown choices grouped by type consonant_choices = sorted( [f"{sym} ({data['name']})" for sym, data in CONSONANTS.items()], key=lambda x: x ) vowel_choices = sorted( [f"{sym} ({data['name']})" for sym, data in VOWELS.items()], key=lambda x: x ) all_choices = consonant_choices + vowel_choices # Spanish subset for quick access spanish_consonant_choices = sorted( [f"{sym} ({data['name']})" for sym, data in CONSONANTS.items() if data["spanish"]], key=lambda x: x ) spanish_vowel_choices = sorted( [f"{sym} ({data['name']})" for sym, data in VOWELS.items() if data["spanish"]], key=lambda x: x ) # Common comparison pairs COMPARISON_PAIRS = [ ("b", "β", "Spanish: /b/ (initial) vs /β/ (between vowels)"), ("d", "ð", "Spanish: /d/ (initial) vs /ð/ (between vowels)"), ("r", "ɾ", "Spanish: trilled /r/ (perro) vs tap /ɾ/ (pero)"), ("s", "θ", "Spanish: /s/ (Latin America) vs /θ/ (Castilian)"), ("b", "v", "English distinguishes these; Spanish doesn't"), ("i", "u", "Front vs back close vowels"), ("e", "o", "Front vs back mid vowels"), ("ʃ", "ʒ", "English: ship vs measure"), ("p", "b", "Same place/manner, different voicing"), ("n", "ɲ", "Spanish: /n/ vs /ñ/"), ] def extract_symbol(choice_str): """Extract the IPA symbol from a dropdown choice string.""" if not choice_str: return "" return choice_str.split(" (")[0].strip() def play_from_dropdown(choice): """Play a phoneme selected from the dropdown.""" symbol = extract_symbol(choice) return play_phoneme(symbol) def compare_from_dropdowns(choice1, choice2): """Compare two phonemes from dropdown selections.""" sym1 = extract_symbol(choice1) sym2 = extract_symbol(choice2) return compare_phonemes(sym1, sym2) def load_comparison_pair(pair_index): """Load a pre-set comparison pair.""" if pair_index is None or pair_index == "": return "", "" idx = int(pair_index) if 0 <= idx < len(COMPARISON_PAIRS): sym1, sym2, _ = COMPARISON_PAIRS[idx] # Find the matching dropdown labels label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1) label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2) return label1, label2 return "", "" # ============================================================================= # GRADIO APP # ============================================================================= with gr.Blocks( title="IPA Sound Lab", theme=gr.themes.Soft(), ) as demo: gr.Markdown( "# IPA Sound Lab\n" "Hear any IPA sound. Select a phoneme to play its sound, " "or compare two phonemes side by side." ) with gr.Tabs(): # --- Tab 1: Single phoneme explorer --- with gr.Tab("Explore Sounds"): gr.Markdown("### Select a phoneme to hear it") with gr.Row(): phoneme_dropdown = gr.Dropdown( choices=all_choices, label="Choose a phoneme", value=None, filterable=True, ) with gr.Row(): gr.Markdown("**Quick picks (Spanish):**") with gr.Row(): spanish_btns = [] spanish_all = list( {sym: data for sym, data in {**CONSONANTS, **VOWELS}.items() if data["spanish"]}.keys() ) # Show Spanish phonemes as quick-pick buttons (first 15) for sym in sorted(spanish_all)[:15]: info = get_phoneme_info(sym) btn = gr.Button(f"/{sym}/", size="sm", min_width=50) spanish_btns.append((btn, sym)) with gr.Row(): for sym in sorted(spanish_all)[15:]: info = get_phoneme_info(sym) btn = gr.Button(f"/{sym}/", size="sm", min_width=50) spanish_btns.append((btn, sym)) audio_output = gr.Audio(label="Phoneme Audio", type="numpy") description_output = gr.Markdown(value="Select a phoneme above to hear it and see its description") # Wire up dropdown phoneme_dropdown.change( fn=play_from_dropdown, inputs=[phoneme_dropdown], outputs=[audio_output, description_output], ) # Wire up Spanish quick-pick buttons for btn, sym in spanish_btns: btn.click( fn=play_phoneme, inputs=[gr.State(sym)], outputs=[audio_output, description_output], ) # --- Tab 2: Comparison mode --- with gr.Tab("Compare Sounds"): gr.Markdown( "### Compare two phonemes\n" "Hear two sounds side by side and see what makes them different." ) gr.Markdown("**Try these interesting pairs:**") pair_buttons = [] with gr.Row(): for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[:5]): btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm") pair_buttons.append((btn, i)) with gr.Row(): for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[5:], start=5): btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm") pair_buttons.append((btn, i)) with gr.Row(): with gr.Column(): dropdown1 = gr.Dropdown( choices=all_choices, label="First phoneme", filterable=True, ) audio1 = gr.Audio(label="Sound 1", type="numpy") with gr.Column(): dropdown2 = gr.Dropdown( choices=all_choices, label="Second phoneme", filterable=True, ) audio2 = gr.Audio(label="Sound 2", type="numpy") compare_btn = gr.Button("Compare", variant="primary") comparison_output = gr.Markdown(value="Select two phonemes and click Compare") compare_btn.click( fn=compare_from_dropdowns, inputs=[dropdown1, dropdown2], outputs=[audio1, audio2, comparison_output], ) # Wire up pair buttons for btn, idx in pair_buttons: def make_pair_loader(pair_idx): def load_and_compare(): sym1, sym2, _ = COMPARISON_PAIRS[pair_idx] label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1) label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2) a1, a2, desc = compare_phonemes(sym1, sym2) return label1, label2, a1, a2, desc return load_and_compare btn.click( fn=make_pair_loader(idx), inputs=[], outputs=[dropdown1, dropdown2, audio1, audio2, comparison_output], ) gr.Markdown( "---\n" "**Audio note:** Sounds are generated using eSpeak-NG (formant synthesis). " "They demonstrate the correct articulation but sound synthetic — real human " "pronunciation will have more natural variation.\n\n" "**Spanish learners:** The green-highlighted phonemes in the Chart Explorer " "are the ones you need to master. Use this tool to hear the difference between " "similar sounds (like /r/ vs /ɾ/ — *perro* vs *pero*)." ) print("IPA Sound Lab ready!") demo.launch()