Spaces:

profplate
/

ipa-sound-lab

Sleeping

App Files Files Community

ipa-sound-lab / app.py

profplate

Update app.py

6624370 verified 7 days ago

raw

history blame contribute delete

15.5 kB

	"""
	IPA Sound Lab — Click any IPA symbol to hear its sound.

	Uses eSpeak-NG for phoneme audio synthesis. Includes a comparison mode
	to hear two phonemes side by side.
	"""

	import gradio as gr
	import subprocess
	import tempfile
	import os
	import numpy as np
	import struct
	import wave
	from ipa_data import (
	CONSONANTS, VOWELS,
	CONSONANT_PLACES, CONSONANT_MANNERS,
	VOWEL_TRAPEZOID_COORDS,
	get_consonant_at, get_phoneme_info,
	)

	print("IPA Sound Lab loading...")

	# =============================================================================
	# AUDIO GENERATION WITH ESPEAK-NG
	# =============================================================================

	# Cache directory for generated audio files
	AUDIO_CACHE_DIR = tempfile.mkdtemp(prefix="ipa_audio_")


	def generate_phoneme_audio(espeak_code, symbol=""):
	"""Generate a WAV file for a single IPA phoneme using eSpeak-NG."""
	if not espeak_code:
	return None

	cache_key = espeak_code.replace("/", "_").replace("\\", "_").replace('"', "_")
	cache_path = os.path.join(AUDIO_CACHE_DIR, f"phoneme_{cache_key}.wav")

	if os.path.exists(cache_path):
	return cache_path

	try:
	# eSpeak-NG phoneme notation: [[phoneme]]
	phoneme_input = f"[[{espeak_code}]]"
	subprocess.run(
	["espeak-ng", "-v", "en", "-w", cache_path, phoneme_input],
	capture_output=True,
	timeout=5,
	)
	if os.path.exists(cache_path) and os.path.getsize(cache_path) > 44:
	return cache_path
	except (subprocess.TimeoutExpired, FileNotFoundError):
	pass

	return None


	def read_wav_as_numpy(wav_path):
	"""Read a WAV file and return (sample_rate, numpy_array) for Gradio."""
	if not wav_path or not os.path.exists(wav_path):
	return None

	with wave.open(wav_path, "rb") as wf:
	sample_rate = wf.getframerate()
	n_frames = wf.getnframes()
	n_channels = wf.getnchannels()
	sample_width = wf.getsampwidth()
	raw_data = wf.readframes(n_frames)

	if sample_width == 2:
	fmt = f"<{n_frames * n_channels}h"
	samples = np.array(struct.unpack(fmt, raw_data), dtype=np.float32)
	samples = samples / 32768.0 # normalize to [-1, 1]
	else:
	samples = np.frombuffer(raw_data, dtype=np.int16).astype(np.float32) / 32768.0

	if n_channels > 1:
	samples = samples[::n_channels] # take first channel

	return (sample_rate, samples)


	# Pre-generate audio for all phonemes at startup
	print("Pre-generating phoneme audio...")
	AUDIO_CACHE = {}
	all_phonemes = {CONSONANTS, VOWELS}
	generated_count = 0
	for sym, data in all_phonemes.items():
	if data.get("espeak_code"):
	wav_path = generate_phoneme_audio(data["espeak_code"], sym)
	if wav_path:
	AUDIO_CACHE[sym] = wav_path
	generated_count += 1

	print(f"Generated audio for {generated_count}/{len(all_phonemes)} phonemes")


	# =============================================================================
	# GRADIO INTERFACE FUNCTIONS
	# =============================================================================

	def play_phoneme(symbol):
	"""Look up a phoneme and return its audio + description."""
	if not symbol or not symbol.strip():
	return None, "Type or paste an IPA symbol above"

	symbol = symbol.strip()
	info = get_phoneme_info(symbol)

	if not info:
	return None, f"Symbol '{symbol}' not found in IPA database"

	# Build description
	if info["type"] == "consonant":
	desc = f"/{symbol}/ — {info['name']}\n\n"
	desc += f"- Place: {info['place']}\n"
	desc += f"- Manner: {info['manner']}\n"
	desc += f"- Voicing: {info['voicing']}\n"
	else:
	desc = f"/{symbol}/ — {info['name']}\n\n"
	desc += f"- Height: {info['height']}\n"
	desc += f"- Backness: {info['backness']}\n"
	desc += f"- Rounding: {info['rounding']}\n"

	if info["spanish"]:
	desc += f"\nSpanish: Yes — {info['spanish_example']}\n"
	else:
	desc += "\nSpanish: No\n"

	desc += f"\nOther languages: {', '.join(info['languages'])}"

	# Get audio
	audio_data = None
	if symbol in AUDIO_CACHE:
	audio_data = read_wav_as_numpy(AUDIO_CACHE[symbol])
	else:
	desc += "\n\n(Audio not available for this phoneme)"

	return audio_data, desc


	def compare_phonemes(symbol1, symbol2):
	"""Compare two phonemes side by side."""
	if not symbol1 or not symbol2:
	return None, None, "Select two phonemes to compare"

	symbol1 = symbol1.strip()
	symbol2 = symbol2.strip()

	info1 = get_phoneme_info(symbol1)
	info2 = get_phoneme_info(symbol2)

	if not info1:
	return None, None, f"Symbol '{symbol1}' not found"
	if not info2:
	return None, None, f"Symbol '{symbol2}' not found"

	# Audio
	audio1 = read_wav_as_numpy(AUDIO_CACHE.get(symbol1, "")) if symbol1 in AUDIO_CACHE else None
	audio2 = read_wav_as_numpy(AUDIO_CACHE.get(symbol2, "")) if symbol2 in AUDIO_CACHE else None

	# Comparison text
	desc = f"## /{symbol1}/ vs /{symbol2}/\n\n"
	desc += f"\| Feature \| /{symbol1}/ \| /{symbol2}/ \|\n"
	desc += f"\|---------\|-------\|-------\|\n"

	if info1["type"] == "consonant" and info2["type"] == "consonant":
	desc += f"\| Place \| {info1['place']} \| {info2['place']} \|\n"
	desc += f"\| Manner \| {info1['manner']} \| {info2['manner']} \|\n"
	desc += f"\| Voicing \| {info1['voicing']} \| {info2['voicing']} \|\n"
	elif info1["type"] == "vowel" and info2["type"] == "vowel":
	desc += f"\| Height \| {info1['height']} \| {info2['height']} \|\n"
	desc += f"\| Backness \| {info1['backness']} \| {info2['backness']} \|\n"
	desc += f"\| Rounding \| {info1['rounding']} \| {info2['rounding']} \|\n"
	else:
	desc += f"\| Type \| {info1['type']} \| {info2['type']} \|\n"
	desc += f"\| Name \| {info1['name']} \| {info2['name']} \|\n"

	desc += f"\| Spanish \| {'Yes' if info1['spanish'] else 'No'} \| {'Yes' if info2['spanish'] else 'No'} \|\n"

	# Highlight what's different
	differences = []
	if info1["type"] == info2["type"]:
	if info1["type"] == "consonant":
	if info1["place"] != info2["place"]:
	differences.append(f"different place ({info1['place']} vs {info2['place']})")
	if info1["manner"] != info2["manner"]:
	differences.append(f"different manner ({info1['manner']} vs {info2['manner']})")
	if info1["voicing"] != info2["voicing"]:
	differences.append(f"different voicing ({info1['voicing']} vs {info2['voicing']})")
	else:
	if info1["height"] != info2["height"]:
	differences.append(f"different height ({info1['height']} vs {info2['height']})")
	if info1["backness"] != info2["backness"]:
	differences.append(f"different backness ({info1['backness']} vs {info2['backness']})")
	if info1["rounding"] != info2["rounding"]:
	differences.append(f"different rounding ({info1['rounding']} vs {info2['rounding']})")

	if differences:
	desc += f"\nKey differences: {'; '.join(differences)}"
	elif info1["type"] == info2["type"]:
	desc += "\nThese phonemes share the same articulatory features!"

	return audio1, audio2, desc


	# =============================================================================
	# BUILD PHONEME CHOOSER OPTIONS
	# =============================================================================

	# Build dropdown choices grouped by type
	consonant_choices = sorted(
	[f"{sym} ({data['name']})" for sym, data in CONSONANTS.items()],
	key=lambda x: x
	)
	vowel_choices = sorted(
	[f"{sym} ({data['name']})" for sym, data in VOWELS.items()],
	key=lambda x: x
	)
	all_choices = consonant_choices + vowel_choices

	# Spanish subset for quick access
	spanish_consonant_choices = sorted(
	[f"{sym} ({data['name']})" for sym, data in CONSONANTS.items() if data["spanish"]],
	key=lambda x: x
	)
	spanish_vowel_choices = sorted(
	[f"{sym} ({data['name']})" for sym, data in VOWELS.items() if data["spanish"]],
	key=lambda x: x
	)

	# Common comparison pairs
	COMPARISON_PAIRS = [
	("b", "β", "Spanish: /b/ (initial) vs /β/ (between vowels)"),
	("d", "ð", "Spanish: /d/ (initial) vs /ð/ (between vowels)"),
	("r", "ɾ", "Spanish: trilled /r/ (perro) vs tap /ɾ/ (pero)"),
	("s", "θ", "Spanish: /s/ (Latin America) vs /θ/ (Castilian)"),
	("b", "v", "English distinguishes these; Spanish doesn't"),
	("i", "u", "Front vs back close vowels"),
	("e", "o", "Front vs back mid vowels"),
	("ʃ", "ʒ", "English: ship vs measure"),
	("p", "b", "Same place/manner, different voicing"),
	("n", "ɲ", "Spanish: /n/ vs /ñ/"),
	]


	def extract_symbol(choice_str):
	"""Extract the IPA symbol from a dropdown choice string."""
	if not choice_str:
	return ""
	return choice_str.split(" (")[0].strip()


	def play_from_dropdown(choice):
	"""Play a phoneme selected from the dropdown."""
	symbol = extract_symbol(choice)
	return play_phoneme(symbol)


	def compare_from_dropdowns(choice1, choice2):
	"""Compare two phonemes from dropdown selections."""
	sym1 = extract_symbol(choice1)
	sym2 = extract_symbol(choice2)
	return compare_phonemes(sym1, sym2)


	def load_comparison_pair(pair_index):
	"""Load a pre-set comparison pair."""
	if pair_index is None or pair_index == "":
	return "", ""
	idx = int(pair_index)
	if 0 <= idx < len(COMPARISON_PAIRS):
	sym1, sym2, _ = COMPARISON_PAIRS[idx]
	# Find the matching dropdown labels
	label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1)
	label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2)
	return label1, label2
	return "", ""


	# =============================================================================
	# GRADIO APP
	# =============================================================================

	with gr.Blocks(
	title="IPA Sound Lab",
	theme=gr.themes.Soft(),
	) as demo:
	gr.Markdown(
	"# IPA Sound Lab\n"
	"Hear any IPA sound. Select a phoneme to play its sound, "
	"or compare two phonemes side by side."
	)

	with gr.Tabs():
	# --- Tab 1: Single phoneme explorer ---
	with gr.Tab("Explore Sounds"):
	gr.Markdown("### Select a phoneme to hear it")

	with gr.Row():
	phoneme_dropdown = gr.Dropdown(
	choices=all_choices,
	label="Choose a phoneme",
	value=None,
	filterable=True,
	)

	with gr.Row():
	gr.Markdown("Quick picks (Spanish):")
	with gr.Row():
	spanish_btns = []
	spanish_all = list(
	{sym: data for sym, data in {CONSONANTS, VOWELS}.items() if data["spanish"]}.keys()
	)
	# Show Spanish phonemes as quick-pick buttons (first 15)
	for sym in sorted(spanish_all)[:15]:
	info = get_phoneme_info(sym)
	btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
	spanish_btns.append((btn, sym))

	with gr.Row():
	for sym in sorted(spanish_all)[15:]:
	info = get_phoneme_info(sym)
	btn = gr.Button(f"/{sym}/", size="sm", min_width=50)
	spanish_btns.append((btn, sym))

	audio_output = gr.Audio(label="Phoneme Audio", type="numpy")
	description_output = gr.Markdown(value="Select a phoneme above to hear it and see its description")

	# Wire up dropdown
	phoneme_dropdown.change(
	fn=play_from_dropdown,
	inputs=[phoneme_dropdown],
	outputs=[audio_output, description_output],
	)

	# Wire up Spanish quick-pick buttons
	for btn, sym in spanish_btns:
	btn.click(
	fn=play_phoneme,
	inputs=[gr.State(sym)],
	outputs=[audio_output, description_output],
	)

	# --- Tab 2: Comparison mode ---
	with gr.Tab("Compare Sounds"):
	gr.Markdown(
	"### Compare two phonemes\n"
	"Hear two sounds side by side and see what makes them different."
	)

	gr.Markdown("Try these interesting pairs:")
	pair_buttons = []
	with gr.Row():
	for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[:5]):
	btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
	pair_buttons.append((btn, i))
	with gr.Row():
	for i, (sym1, sym2, label) in enumerate(COMPARISON_PAIRS[5:], start=5):
	btn = gr.Button(f"/{sym1}/ vs /{sym2}/", size="sm")
	pair_buttons.append((btn, i))

	with gr.Row():
	with gr.Column():
	dropdown1 = gr.Dropdown(
	choices=all_choices,
	label="First phoneme",
	filterable=True,
	)
	audio1 = gr.Audio(label="Sound 1", type="numpy")
	with gr.Column():
	dropdown2 = gr.Dropdown(
	choices=all_choices,
	label="Second phoneme",
	filterable=True,
	)
	audio2 = gr.Audio(label="Sound 2", type="numpy")

	compare_btn = gr.Button("Compare", variant="primary")
	comparison_output = gr.Markdown(value="Select two phonemes and click Compare")

	compare_btn.click(
	fn=compare_from_dropdowns,
	inputs=[dropdown1, dropdown2],
	outputs=[audio1, audio2, comparison_output],
	)

	# Wire up pair buttons
	for btn, idx in pair_buttons:
	def make_pair_loader(pair_idx):
	def load_and_compare():
	sym1, sym2, _ = COMPARISON_PAIRS[pair_idx]
	label1 = next((c for c in all_choices if c.startswith(sym1 + " (")), sym1)
	label2 = next((c for c in all_choices if c.startswith(sym2 + " (")), sym2)
	a1, a2, desc = compare_phonemes(sym1, sym2)
	return label1, label2, a1, a2, desc
	return load_and_compare

	btn.click(
	fn=make_pair_loader(idx),
	inputs=[],
	outputs=[dropdown1, dropdown2, audio1, audio2, comparison_output],
	)

	gr.Markdown(
	"---\n"
	"Audio note: Sounds are generated using eSpeak-NG (formant synthesis). "
	"They demonstrate the correct articulation but sound synthetic — real human "
	"pronunciation will have more natural variation.\n\n"
	"Spanish learners: The green-highlighted phonemes in the Chart Explorer "
	"are the ones you need to master. Use this tool to hear the difference between "
	"similar sounds (like /r/ vs /ɾ/ — perro vs pero)."
	)


	print("IPA Sound Lab ready!")
	demo.launch()