ipa-sound-lab / app.py
profplate's picture
Update app.py
6624370 verified
"""
IPA Sound Lab — Click any IPA symbol to hear its sound.
Uses eSpeak-NG for phoneme audio synthesis. Includes a comparison mode
to hear two phonemes side by side.
"""
import gradio as gr
import subprocess
import tempfile
import os
import numpy as np
import struct
import wave
from ipa_data import (
CONSONANTS, VOWELS,
CONSONANT_PLACES, CONSONANT_MANNERS,
VOWEL_TRAPEZOID_COORDS,
get_consonant_at, get_phoneme_info,
)
print("IPA Sound Lab loading...")
# =============================================================================
# AUDIO GENERATION WITH ESPEAK-NG
# =============================================================================
# Cache directory for generated audio files
AUDIO_CACHE_DIR = tempfile.mkdtemp(prefix="ipa_audio_")
def generate_phoneme_audio(espeak_code, symbol=""):
"""Generate a WAV file for a single IPA phoneme using eSpeak-NG."""
if not espeak_code:
return None
cache_key = espeak_code.replace("/", "_").replace("\\", "_").replace('"', "_")
cache_path = os.path.join(AUDIO_CACHE_DIR, f"phoneme_{cache_key}.wav")
if os.path.exists(cache_path):
return cache_path
try:
# eSpeak-NG phoneme notation: [[phoneme]]
phoneme_input = f"[[{espeak_code}]]"
subprocess.run(
["espeak-ng", "-v", "en", "-w", cache_path, phoneme_input],
capture_output=True,
timeout=5,
)
if os.path.exists(cache_path) and os.path.getsize(cache_path) > 44:
return cache_path
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
return None
def read_wav_as_numpy(wav_path):
"""Read a WAV file and return (sample_rate, numpy_array) for Gradio."""
if not wav_path or not os.path.exists(wav_path):
return None
with wave.open(wav_path, "rb") as wf:
sample_rate = wf.getframerate()
n_frames = wf.getnframes()
n_channels = wf.getnchannels()
sample_width = wf.getsampwidth()
raw_data = wf.readframes(n_frames)
if sample_width == 2:
fmt = f"<{n_frames * n_channels}h"
samples = np.array(struct.unpack(fmt, raw_data), dtype=np.float32)
samples = samples / 32768.0 # normalize to [-1, 1]
else:
samples = np.frombuffer(raw_data, dtype=np.int16).astype(np.float32) / 32768.0
if n_channels > 1:
samples = samples[::n_channels] # take first channel
return (sample_rate, samples)
# Pre-generate audio for all phonemes at startup
print("Pre-generating phoneme audio...")
AUDIO_CACHE = {}
all_phonemes = {**CONSONANTS, **VOWELS}
generated_count = 0
for sym, data in all_phonemes.items():
if data.get("espeak_code"):
wav_path = generate_phoneme_audio(data["espeak_code"], sym)
if wav_path:
AUDIO_CACHE[sym] = wav_path
generated_count += 1
print(f"Generated audio for {generated_count}/{len(all_phonemes)} phonemes")
# =============================================================================
# GRADIO INTERFACE FUNCTIONS
# =============================================================================
def play_phoneme(symbol):
"""Look up a phoneme and return its audio + description."""
if not symbol or not symbol.strip():
return None, "Type or paste an IPA symbol above"
symbol = symbol.strip()
info = get_phoneme_info(symbol)
if not info:
return None, f"Symbol '{symbol}' not found in IPA database"
# Build description
if info["type"] == "consonant":
desc = f"**/{symbol}/** — {info['name']}\n\n"
desc += f"- **Place:** {info['place']}\n"
desc += f"- **Manner:** {info['manner']}\n"
desc += f"- **Voicing:** {info['voicing']}\n"
else:
desc = f"**/{symbol}/** — {info['name']}\n\n"
desc += f"- **Height:** {info['height']}\n"
desc += f"- **Backness:** {info['backness']}\n"
desc += f"- **Rounding:** {info['rounding']}\n"
if info["spanish"]:
desc += f"\n**Spanish:** Yes — *{info['spanish_example']}*\n"
else:
desc += "\n**Spanish:** No\n"
desc += f"\n**Other languages:** {', '.join(info['languages'])}"
# Get audio
audio_data = None
if symbol in AUDIO_CACHE:
audio_data = read_wav_as_numpy(AUDIO_CACHE[symbol])
else:
desc += "\n\n*(Audio not available for this phoneme)*"
return audio_data, desc
def compare_phonemes(symbol1, symbol2):
"""Compare two phonemes side by side."""
if not symbol1 or not symbol2:
return None, None, "Select two phonemes to compare"
symbol1 = symbol1.strip()
symbol2 = symbol2.strip()
info1 = get_phoneme_info(symbol1)
info2 = get_phoneme_info(symbol2)
if not info1:
return None, None, f"Symbol '{symbol1}' not found"
if not info2:
return None, None, f"Symbol '{symbol2}' not found"
# Audio
audio1 = read_wav_as_numpy(AUDIO_CACHE.get(symbol1, "")) if symbol1 in AUDIO_CACHE else None
audio2 = read_wav_as_numpy(AUDIO_CACHE.get(symbol2, "")) if symbol2 in AUDIO_CACHE else None
# Comparison text
desc = f"## /{symbol1}/ vs /{symbol2}/\n\n"
desc += f"| Feature | /{symbol1}/ | /{symbol2}/ |\n"
desc += f"|---------|-------|-------|\n"
if info1["type"] == "consonant" and info2["type"] == "consonant":
desc += f"| Place | {info1['place']} | {info2['place']} |\n"
desc += f"| Manner | {info1['manner']} | {info2['manner']} |\n"
desc += f"| Voicing | {info1['voicing']} | {info2['voicing']} |\n"
elif info1["type"] == "vowel" and info2["type"] == "vowel":
desc += f"| Height | {info1['height']} | {info2['height']} |\n"
desc += f"| Backness | {info1['backness']} | {info2['backness']} |\n"
desc += f"| Rounding | {info1['rounding']} | {info2['rounding']} |\n"
else:
desc += f"| Type | {info1['type']} | {info2['type']} |\n"
desc += f"| Name | {info1['name']} | {info2['name']} |\n"
desc += f"| Spanish | {'Yes' if info1['spanish'] else 'No'} | {'Yes' if info2['spanish'] else 'No'} |\n"
# Highlight what's different
differences = []
if info1["type"] == info2["type"]:
if info1["type"] == "consonant":
if info1["place"] != info2["place"]:
differences.append(f"different place ({info1['place']} vs {info2['place']})")
if info1["manner"] != info2["manner"]:
differences.append(f"different manner ({info1['manner']} vs {info2['manner']})")
if info1["voicing"] != info2["voicing"]:
differences.append(f"different voicing ({info1['voicing']} vs {info2['voicing']})")
else:
if info1["height"] != info2["height"]:
differences.append(f"different height ({info1['height']} vs {info2['height']})")
if info1["backness"] != info2["backness"]:
differences.append(f"different backness ({info1['backness']} vs {info2['backness']})")
if info1["rounding"] != info2["rounding"]:
differences.append(f"different rounding ({info1['rounding']} vs {info2['rounding']})")
if differences:
desc += f"\n**Key differences:** {'; '.join(differences)}"
elif info1["type"] == info2["type"]:
desc += "\n**These phonemes share the same articulatory features!**"
return audio1, audio2, desc
# =============================================================================
# BUILD PHONEME CHOOSER OPTIONS
# =============================================================================
# Build dropdown choices grouped by type
consonant_choices = sorted(
[f"{sym} ({data['name']})" for sym, data in CONSONANTS.items()],
key=lambda x: x
)
vowel_choices = sorted(
[f"{sym} ({data['name']})" for sym, data in VOWELS.items()],
key=lambda x: x
)
all_choices = consonant_choices + vowel_choices
# Spanish subset for quick access
spanish_consonant_choices = sorted(
[f"{sym} ({data['name']})" for sym, data in CONSONANTS.items() if data["spanish"]],
key=lambda x: x
)
spanish_vowel_choices = sorted(
[f"{sym} ({data['name']})" for sym, data in VOWELS.items() if data["spanish"]],
key=lambda x: x
)
# Common comparison pairs
COMPARISON_PAIRS = [
("b", "β", "Spanish: /b/ (initial) vs /β/ (between vowels)"),
("d", "ð", "Spanish: /d/ (initial) vs /ð/ (between vowels)"),
("r", "ɾ", "Spanish: trilled /r/ (perro) vs tap /ɾ/ (pero)"),
("s", "θ", "Spanish: /s/ (Latin America) vs /θ/ (Castilian)"),
("b", "v", "English distinguishes these; Spanish doesn't"),
("i", "u", "Front vs back close vowels"),
("e", "o", "Front vs back mid vowels"),
("ʃ", "ʒ", "English: ship vs measure"),
("p", "b", "Same place/manner, different voicing"),
("n", "ɲ", "Spanish: /n/ vs /ñ/"),
]
def extract_symbol(choice_str):
"""Extract the IPA symbol from a dropdown choice string."""
if not choice_str:
return ""
return choice_str.split(" (")[0].strip()
def play_from_dropdown(choice):
"""Play a phoneme selected from the dropdown."""
symbol = extract_symbol(choice)
return play_phoneme(symbol)
def compare_from_dropdowns(choice1, choice2):
"""Compare two phonemes from dropdown selections."""
sym1 = extract_symbol(choice1)
sym2 = extract_symbol(choice2)
return compare_phonemes(sym1, sym2)
def load_comparison_pair(pair_index):
"""Load a pre-set comparison pair."""
if pair_index is None or pair_index == "":
return "", ""
idx = int(pair_index)
if 0 <= idx < len(COMPARISON_PAIRS):
sym1, sym2, _ = COMPARISON_PAIRS[idx]
# Find the matching dropdown labels
label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1)
label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2)
return label1, label2
return "", ""
# =============================================================================
# GRADIO APP
# =============================================================================
with gr.Blocks(
title="IPA Sound Lab",
theme=gr.themes.Soft(),
) as demo:
gr.Markdown(
"# IPA Sound Lab\n"
"Hear any IPA sound. Select a phoneme to play its sound, "
"or compare two phonemes side by side."
)
with gr.Tabs():
# --- Tab 1: Single phoneme explorer ---
with gr.Tab("Explore Sounds"):
gr.Markdown("### Select a phoneme to hear it")
with gr.Row():
phoneme_dropdown = gr.Dropdown(
choices=all_choices,
label="Choose a phoneme",
value=None,
filterable=True,
)
with gr.Row():
gr.Markdown("**Quick picks (Spanish):**")
with gr.Row():
spanish_btns = []
spanish_all = list(
{sym: data for sym, data in {**CONSONANTS, **VOWELS}.items() if data["spanish"]}.keys()
)
# Show Spanish phonemes as quick-pick buttons (first 15)
for sym in sorted(spanish_all)[:15]:
info = get_phoneme_info(sym)
btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
spanish_btns.append((btn, sym))
with gr.Row():
for sym in sorted(spanish_all)[15:]:
info = get_phoneme_info(sym)
btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
spanish_btns.append((btn, sym))
audio_output = gr.Audio(label="Phoneme Audio", type="numpy")
description_output = gr.Markdown(value="Select a phoneme above to hear it and see its description")
# Wire up dropdown
phoneme_dropdown.change(
fn=play_from_dropdown,
inputs=[phoneme_dropdown],
outputs=[audio_output, description_output],
)
# Wire up Spanish quick-pick buttons
for btn, sym in spanish_btns:
btn.click(
fn=play_phoneme,
inputs=[gr.State(sym)],
outputs=[audio_output, description_output],
)
# --- Tab 2: Comparison mode ---
with gr.Tab("Compare Sounds"):
gr.Markdown(
"### Compare two phonemes\n"
"Hear two sounds side by side and see what makes them different."
)
gr.Markdown("**Try these interesting pairs:**")
pair_buttons = []
with gr.Row():
for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[:5]):
btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
pair_buttons.append((btn, i))
with gr.Row():
for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[5:], start=5):
btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
pair_buttons.append((btn, i))
with gr.Row():
with gr.Column():
dropdown1 = gr.Dropdown(
choices=all_choices,
label="First phoneme",
filterable=True,
)
audio1 = gr.Audio(label="Sound 1", type="numpy")
with gr.Column():
dropdown2 = gr.Dropdown(
choices=all_choices,
label="Second phoneme",
filterable=True,
)
audio2 = gr.Audio(label="Sound 2", type="numpy")
compare_btn = gr.Button("Compare", variant="primary")
comparison_output = gr.Markdown(value="Select two phonemes and click Compare")
compare_btn.click(
fn=compare_from_dropdowns,
inputs=[dropdown1, dropdown2],
outputs=[audio1, audio2, comparison_output],
)
# Wire up pair buttons
for btn, idx in pair_buttons:
def make_pair_loader(pair_idx):
def load_and_compare():
sym1, sym2, _ = COMPARISON_PAIRS[pair_idx]
label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1)
label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2)
a1, a2, desc = compare_phonemes(sym1, sym2)
return label1, label2, a1, a2, desc
return load_and_compare
btn.click(
fn=make_pair_loader(idx),
inputs=[],
outputs=[dropdown1, dropdown2, audio1, audio2, comparison_output],
)
gr.Markdown(
"---\n"
"**Audio note:** Sounds are generated using eSpeak-NG (formant synthesis). "
"They demonstrate the correct articulation but sound synthetic — real human "
"pronunciation will have more natural variation.\n\n"
"**Spanish learners:** The green-highlighted phonemes in the Chart Explorer "
"are the ones you need to master. Use this tool to hear the difference between "
"similar sounds (like /r/ vs /ɾ/ — *perro* vs *pero*)."
)
print("IPA Sound Lab ready!")
demo.launch()