Spaces:

profplate
/

ipa-sound-lab

Sleeping

File size: 15,525 Bytes

"""
IPA Sound Lab — Click any IPA symbol to hear its sound.

Uses eSpeak-NG for phoneme audio synthesis. Includes a comparison mode
to hear two phonemes side by side.
"""

import gradio as gr
import subprocess
import tempfile
import os
import numpy as np
import struct
import wave
from ipa_data import (
    CONSONANTS, VOWELS,
    CONSONANT_PLACES, CONSONANT_MANNERS,
    VOWEL_TRAPEZOID_COORDS,
    get_consonant_at, get_phoneme_info,
)

print("IPA Sound Lab loading...")

# =============================================================================
# AUDIO GENERATION WITH ESPEAK-NG
# =============================================================================

# Cache directory for generated audio files
AUDIO_CACHE_DIR = tempfile.mkdtemp(prefix="ipa_audio_")


def generate_phoneme_audio(espeak_code, symbol=""):
    """Generate a WAV file for a single IPA phoneme using eSpeak-NG."""
    if not espeak_code:
        return None

    cache_key = espeak_code.replace("/", "_").replace("\\", "_").replace('"', "_")
    cache_path = os.path.join(AUDIO_CACHE_DIR, f"phoneme_{cache_key}.wav")

    if os.path.exists(cache_path):
        return cache_path

    try:
        # eSpeak-NG phoneme notation: [[phoneme]]
        phoneme_input = f"[[{espeak_code}]]"
        subprocess.run(
            ["espeak-ng", "-v", "en", "-w", cache_path, phoneme_input],
            capture_output=True,
            timeout=5,
        )
        if os.path.exists(cache_path) and os.path.getsize(cache_path) > 44:
            return cache_path
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass

    return None


def read_wav_as_numpy(wav_path):
    """Read a WAV file and return (sample_rate, numpy_array) for Gradio."""
    if not wav_path or not os.path.exists(wav_path):
        return None

    with wave.open(wav_path, "rb") as wf:
        sample_rate = wf.getframerate()
        n_frames = wf.getnframes()
        n_channels = wf.getnchannels()
        sample_width = wf.getsampwidth()
        raw_data = wf.readframes(n_frames)

    if sample_width == 2:
        fmt = f"<{n_frames * n_channels}h"
        samples = np.array(struct.unpack(fmt, raw_data), dtype=np.float32)
        samples = samples / 32768.0  # normalize to [-1, 1]
    else:
        samples = np.frombuffer(raw_data, dtype=np.int16).astype(np.float32) / 32768.0

    if n_channels > 1:
        samples = samples[::n_channels]  # take first channel

    return (sample_rate, samples)


# Pre-generate audio for all phonemes at startup
print("Pre-generating phoneme audio...")
AUDIO_CACHE = {}
all_phonemes = {**CONSONANTS, **VOWELS}
generated_count = 0
for sym, data in all_phonemes.items():
    if data.get("espeak_code"):
        wav_path = generate_phoneme_audio(data["espeak_code"], sym)
        if wav_path:
            AUDIO_CACHE[sym] = wav_path
            generated_count += 1

print(f"Generated audio for {generated_count}/{len(all_phonemes)} phonemes")


# =============================================================================
# GRADIO INTERFACE FUNCTIONS
# =============================================================================

def play_phoneme(symbol):
    """Look up a phoneme and return its audio + description."""
    if not symbol or not symbol.strip():
        return None, "Type or paste an IPA symbol above"

    symbol = symbol.strip()
    info = get_phoneme_info(symbol)

    if not info:
        return None, f"Symbol '{symbol}' not found in IPA database"

    # Build description
    if info["type"] == "consonant":
        desc = f"**/{symbol}/** — {info['name']}\n\n"
        desc += f"- **Place:** {info['place']}\n"
        desc += f"- **Manner:** {info['manner']}\n"
        desc += f"- **Voicing:** {info['voicing']}\n"
    else:
        desc = f"**/{symbol}/** — {info['name']}\n\n"
        desc += f"- **Height:** {info['height']}\n"
        desc += f"- **Backness:** {info['backness']}\n"
        desc += f"- **Rounding:** {info['rounding']}\n"

    if info["spanish"]:
        desc += f"\n**Spanish:** Yes — *{info['spanish_example']}*\n"
    else:
        desc += "\n**Spanish:** No\n"

    desc += f"\n**Other languages:** {', '.join(info['languages'])}"

    # Get audio
    audio_data = None
    if symbol in AUDIO_CACHE:
        audio_data = read_wav_as_numpy(AUDIO_CACHE[symbol])
    else:
        desc += "\n\n*(Audio not available for this phoneme)*"

    return audio_data, desc


def compare_phonemes(symbol1, symbol2):
    """Compare two phonemes side by side."""
    if not symbol1 or not symbol2:
        return None, None, "Select two phonemes to compare"

    symbol1 = symbol1.strip()
    symbol2 = symbol2.strip()

    info1 = get_phoneme_info(symbol1)
    info2 = get_phoneme_info(symbol2)

    if not info1:
        return None, None, f"Symbol '{symbol1}' not found"
    if not info2:
        return None, None, f"Symbol '{symbol2}' not found"

    # Audio
    audio1 = read_wav_as_numpy(AUDIO_CACHE.get(symbol1, "")) if symbol1 in AUDIO_CACHE else None
    audio2 = read_wav_as_numpy(AUDIO_CACHE.get(symbol2, "")) if symbol2 in AUDIO_CACHE else None

    # Comparison text
    desc = f"## /{symbol1}/ vs /{symbol2}/\n\n"
    desc += f"| Feature | /{symbol1}/ | /{symbol2}/ |\n"
    desc += f"|---------|-------|-------|\n"

    if info1["type"] == "consonant" and info2["type"] == "consonant":
        desc += f"| Place | {info1['place']} | {info2['place']} |\n"
        desc += f"| Manner | {info1['manner']} | {info2['manner']} |\n"
        desc += f"| Voicing | {info1['voicing']} | {info2['voicing']} |\n"
    elif info1["type"] == "vowel" and info2["type"] == "vowel":
        desc += f"| Height | {info1['height']} | {info2['height']} |\n"
        desc += f"| Backness | {info1['backness']} | {info2['backness']} |\n"
        desc += f"| Rounding | {info1['rounding']} | {info2['rounding']} |\n"
    else:
        desc += f"| Type | {info1['type']} | {info2['type']} |\n"
        desc += f"| Name | {info1['name']} | {info2['name']} |\n"

    desc += f"| Spanish | {'Yes' if info1['spanish'] else 'No'} | {'Yes' if info2['spanish'] else 'No'} |\n"

    # Highlight what's different
    differences = []
    if info1["type"] == info2["type"]:
        if info1["type"] == "consonant":
            if info1["place"] != info2["place"]:
                differences.append(f"different place ({info1['place']} vs {info2['place']})")
            if info1["manner"] != info2["manner"]:
                differences.append(f"different manner ({info1['manner']} vs {info2['manner']})")
            if info1["voicing"] != info2["voicing"]:
                differences.append(f"different voicing ({info1['voicing']} vs {info2['voicing']})")
        else:
            if info1["height"] != info2["height"]:
                differences.append(f"different height ({info1['height']} vs {info2['height']})")
            if info1["backness"] != info2["backness"]:
                differences.append(f"different backness ({info1['backness']} vs {info2['backness']})")
            if info1["rounding"] != info2["rounding"]:
                differences.append(f"different rounding ({info1['rounding']} vs {info2['rounding']})")

    if differences:
        desc += f"\n**Key differences:** {'; '.join(differences)}"
    elif info1["type"] == info2["type"]:
        desc += "\n**These phonemes share the same articulatory features!**"

    return audio1, audio2, desc


# =============================================================================
# BUILD PHONEME CHOOSER OPTIONS
# =============================================================================

# Build dropdown choices grouped by type
consonant_choices = sorted(
    [f"{sym}  ({data['name']})" for sym, data in CONSONANTS.items()],
    key=lambda x: x
)
vowel_choices = sorted(
    [f"{sym}  ({data['name']})" for sym, data in VOWELS.items()],
    key=lambda x: x
)
all_choices = consonant_choices + vowel_choices

# Spanish subset for quick access
spanish_consonant_choices = sorted(
    [f"{sym}  ({data['name']})" for sym, data in CONSONANTS.items() if data["spanish"]],
    key=lambda x: x
)
spanish_vowel_choices = sorted(
    [f"{sym}  ({data['name']})" for sym, data in VOWELS.items() if data["spanish"]],
    key=lambda x: x
)

# Common comparison pairs
COMPARISON_PAIRS = [
    ("b", "β", "Spanish: /b/ (initial) vs /β/ (between vowels)"),
    ("d", "ð", "Spanish: /d/ (initial) vs /ð/ (between vowels)"),
    ("r", "ɾ", "Spanish: trilled /r/ (perro) vs tap /ɾ/ (pero)"),
    ("s", "θ", "Spanish: /s/ (Latin America) vs /θ/ (Castilian)"),
    ("b", "v", "English distinguishes these; Spanish doesn't"),
    ("i", "u", "Front vs back close vowels"),
    ("e", "o", "Front vs back mid vowels"),
    ("ʃ", "ʒ", "English: ship vs measure"),
    ("p", "b", "Same place/manner, different voicing"),
    ("n", "ɲ", "Spanish: /n/ vs /ñ/"),
]


def extract_symbol(choice_str):
    """Extract the IPA symbol from a dropdown choice string."""
    if not choice_str:
        return ""
    return choice_str.split("  (")[0].strip()


def play_from_dropdown(choice):
    """Play a phoneme selected from the dropdown."""
    symbol = extract_symbol(choice)
    return play_phoneme(symbol)


def compare_from_dropdowns(choice1, choice2):
    """Compare two phonemes from dropdown selections."""
    sym1 = extract_symbol(choice1)
    sym2 = extract_symbol(choice2)
    return compare_phonemes(sym1, sym2)


def load_comparison_pair(pair_index):
    """Load a pre-set comparison pair."""
    if pair_index is None or pair_index == "":
        return "", ""
    idx = int(pair_index)
    if 0 <= idx < len(COMPARISON_PAIRS):
        sym1, sym2, _ = COMPARISON_PAIRS[idx]
        # Find the matching dropdown labels
        label1 = next((c for c in all_choices if c.startswith(sym1 + "  (")), sym1)
        label2 = next((c for c in all_choices if c.startswith(sym2 + "  (")), sym2)
        return label1, label2
    return "", ""


# =============================================================================
# GRADIO APP
# =============================================================================

with gr.Blocks(
    title="IPA Sound Lab",
    theme=gr.themes.Soft(),
) as demo:
    gr.Markdown(
        "# IPA Sound Lab\n"
        "Hear any IPA sound. Select a phoneme to play its sound, "
        "or compare two phonemes side by side."
    )

    with gr.Tabs():
        # --- Tab 1: Single phoneme explorer ---
        with gr.Tab("Explore Sounds"):
            gr.Markdown("### Select a phoneme to hear it")

            with gr.Row():
                phoneme_dropdown = gr.Dropdown(
                    choices=all_choices,
                    label="Choose a phoneme",
                    value=None,
                    filterable=True,
                )

            with gr.Row():
                gr.Markdown("**Quick picks (Spanish):**")
            with gr.Row():
                spanish_btns = []
                spanish_all = list(
                    {sym: data for sym, data in {**CONSONANTS, **VOWELS}.items() if data["spanish"]}.keys()
                )
                # Show Spanish phonemes as quick-pick buttons (first 15)
                for sym in sorted(spanish_all)[:15]:
                    info = get_phoneme_info(sym)
                    btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
                    spanish_btns.append((btn, sym))

            with gr.Row():
                for sym in sorted(spanish_all)[15:]:
                    info = get_phoneme_info(sym)
                    btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
                    spanish_btns.append((btn, sym))

            audio_output = gr.Audio(label="Phoneme Audio", type="numpy")
            description_output = gr.Markdown(value="Select a phoneme above to hear it and see its description")

            # Wire up dropdown
            phoneme_dropdown.change(
                fn=play_from_dropdown,
                inputs=[phoneme_dropdown],
                outputs=[audio_output, description_output],
            )

            # Wire up Spanish quick-pick buttons
            for btn, sym in spanish_btns:
                btn.click(
                    fn=play_phoneme,
                    inputs=[gr.State(sym)],
                    outputs=[audio_output, description_output],
                )

        # --- Tab 2: Comparison mode ---
        with gr.Tab("Compare Sounds"):
            gr.Markdown(
                "### Compare two phonemes\n"
                "Hear two sounds side by side and see what makes them different."
            )

            gr.Markdown("**Try these interesting pairs:**")
            pair_buttons = []
            with gr.Row():
                for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[:5]):
                    btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
                    pair_buttons.append((btn, i))
            with gr.Row():
                for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[5:], start=5):
                    btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
                    pair_buttons.append((btn, i))

            with gr.Row():
                with gr.Column():
                    dropdown1 = gr.Dropdown(
                        choices=all_choices,
                        label="First phoneme",
                        filterable=True,
                    )
                    audio1 = gr.Audio(label="Sound 1", type="numpy")
                with gr.Column():
                    dropdown2 = gr.Dropdown(
                        choices=all_choices,
                        label="Second phoneme",
                        filterable=True,
                    )
                    audio2 = gr.Audio(label="Sound 2", type="numpy")

            compare_btn = gr.Button("Compare", variant="primary")
            comparison_output = gr.Markdown(value="Select two phonemes and click Compare")

            compare_btn.click(
                fn=compare_from_dropdowns,
                inputs=[dropdown1, dropdown2],
                outputs=[audio1, audio2, comparison_output],
            )

            # Wire up pair buttons
            for btn, idx in pair_buttons:
                def make_pair_loader(pair_idx):
                    def load_and_compare():
                        sym1, sym2, _ = COMPARISON_PAIRS[pair_idx]
                        label1 = next((c for c in all_choices if c.startswith(sym1 + "  (")), sym1)
                        label2 = next((c for c in all_choices if c.startswith(sym2 + "  (")), sym2)
                        a1, a2, desc = compare_phonemes(sym1, sym2)
                        return label1, label2, a1, a2, desc
                    return load_and_compare

                btn.click(
                    fn=make_pair_loader(idx),
                    inputs=[],
                    outputs=[dropdown1, dropdown2, audio1, audio2, comparison_output],
                )

    gr.Markdown(
        "---\n"
        "**Audio note:** Sounds are generated using eSpeak-NG (formant synthesis). "
        "They demonstrate the correct articulation but sound synthetic — real human "
        "pronunciation will have more natural variation.\n\n"
        "**Spanish learners:** The green-highlighted phonemes in the Chart Explorer "
        "are the ones you need to master. Use this tool to hear the difference between "
        "similar sounds (like /r/ vs /ɾ/ — *perro* vs *pero*)."
    )


print("IPA Sound Lab ready!")
demo.launch()