Spaces:

yoavzamir
/

speechkid-api

Paused

App Files Files Community

speechkid-api / simulate.py

Nanny7

🚀 Force deploy new standalone AI engine

f3c6fc0 4 months ago

Raw

History Blame Contribute Delete

16.8 kB

	"""
	Speech Analysis MVP - Real-Time Simulation Script

	This script provides an interactive terminal interface for testing
	the speech analysis system in real-time using your microphone.

	Author: Speech AI Project
	"""

	import os
	import sys
	import time
	import threading

	import numpy as np
	import sounddevice as sd
	from scipy.io import wavfile

	# Import our score engine
	from score_engine import score_pronunciation


	# =============================================================================
	# Configuration
	# =============================================================================

	SAMPLE_RATE = 44100
	RECORD_DURATION = 3 # seconds
	TEMP_FILE = "temp_test.wav"
	AUDIO_DIR = "audio_data"

	# Available words for practice
	AVAILABLE_WORDS = ["shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem"]

	# Colors for terminal output (ANSI escape codes)
	class Colors:
	HEADER = '\033[95m'
	BLUE = '\033[94m'
	CYAN = '\033[96m'
	GREEN = '\033[92m'
	YELLOW = '\033[93m'
	RED = '\033[91m'
	BOLD = '\033[1m'
	UNDERLINE = '\033[4m'
	END = '\033[0m'


	# =============================================================================
	# Terminal UI Functions
	# =============================================================================

	def clear_screen():
	"""Clear the terminal screen."""
	os.system('cls' if os.name == 'nt' else 'clear')


	def print_header():
	"""Print the application header."""
	print(f"\n{Colors.CYAN}{Colors.BOLD}")
	print("╔═══════════════════════════════════════════════════════════════╗")
	print("║ 🎤 SPEECH ANALYSIS - 'SH' SOUND TRAINER 🎤 ║")
	print("╚═══════════════════════════════════════════════════════════════╝")
	print(f"{Colors.END}")


	def print_box(title: str, content: list, color: str = Colors.CYAN):
	"""Print content in a nice box."""
	max_len = max(len(title), max(len(line) for line in content)) + 4

	print(f"\n{color}┌{'─' * max_len}┐{Colors.END}")
	print(f"{color}│{Colors.BOLD} {title.center(max_len - 2)} {Colors.END}{color}│{Colors.END}")
	print(f"{color}├{'─' * max_len}┤{Colors.END}")

	for line in content:
	padding = max_len - len(line) - 2
	print(f"{color}│{Colors.END} {line}{' ' * padding} {color}│{Colors.END}")

	print(f"{color}└{'─' * max_len}┘{Colors.END}")


	def print_word_menu():
	"""Print the word selection menu."""
	print(f"\n{Colors.YELLOW}Available words to practice:{Colors.END}\n")

	for i, word in enumerate(AVAILABLE_WORDS, 1):
	print(f" {Colors.BOLD}{i}.{Colors.END} {word}")

	print(f"\n {Colors.BOLD}0.{Colors.END} Exit")
	print()


	def get_word_choice() -> str:
	"""Get the user's word choice."""
	while True:
	try:
	choice = input(f"{Colors.CYAN}Enter your choice (1-{len(AVAILABLE_WORDS)}): {Colors.END}")

	if choice == '0':
	return None

	idx = int(choice) - 1
	if 0 <= idx < len(AVAILABLE_WORDS):
	return AVAILABLE_WORDS[idx]
	else:
	print(f"{Colors.RED}Invalid choice. Please try again.{Colors.END}")
	except ValueError:
	print(f"{Colors.RED}Please enter a number.{Colors.END}")


	def countdown_display(seconds: int):
	"""Display a countdown before recording starts."""
	print(f"\n{Colors.YELLOW}Get ready to say the word...{Colors.END}")

	for i in range(3, 0, -1):
	print(f" {Colors.BOLD}{i}...{Colors.END}", end='', flush=True)
	time.sleep(0.7)

	print(f"\n\n{Colors.GREEN}{Colors.BOLD}🔴 RECORDING NOW! Speak clearly...{Colors.END}\n")


	def recording_progress(duration: int):
	"""Display a progress bar during recording."""
	bar_length = 40

	for i in range(duration * 10):
	progress = (i + 1) / (duration * 10)
	filled = int(bar_length * progress)
	bar = '█' * filled + '░' * (bar_length - filled)
	remaining = duration - (i / 10)

	print(f"\r [{Colors.GREEN}{bar}{Colors.END}] {remaining:.1f}s remaining", end='', flush=True)
	time.sleep(0.1)

	print(f"\r [{Colors.GREEN}{'█' * bar_length}{Colors.END}] Done! ")
	print(f"\n{Colors.CYAN}Processing your recording...{Colors.END}")


	# =============================================================================
	# Audio Recording Functions
	# =============================================================================

	def record_audio(duration: int = RECORD_DURATION, sample_rate: int = SAMPLE_RATE) -> np.ndarray:
	"""
	Record audio from the microphone.

	Args:
	duration: Recording duration in seconds
	sample_rate: Audio sample rate

	Returns:
	Recorded audio as numpy array
	"""
	# Start recording in a separate thread so we can show progress
	recording = sd.rec(
	int(duration * sample_rate),
	samplerate=sample_rate,
	channels=1,
	dtype='float32'
	)

	# Show progress while recording
	recording_progress(duration)

	# Wait for recording to complete
	sd.wait()

	return recording.flatten()


	def save_audio(audio: np.ndarray, filepath: str, sample_rate: int = SAMPLE_RATE):
	"""Save audio to a WAV file."""
	# Convert to 16-bit PCM
	audio_int16 = np.int16(audio * 32767)
	wavfile.write(filepath, sample_rate, audio_int16)


	# =============================================================================
	# Result Display Functions
	# =============================================================================

	ERROR_TYPE_LABELS = {
	# New phoneme pipeline diagnosis codes
	"CORRECT": "CORRECT (תקין)",
	"ERROR_OMISSION": "OMISSION (השמטה)",
	"ERROR_S_SUBSTITUTION": "S SUBSTITUTION (החלפה ל-ס)",
	"ERROR_LATERAL_LISP": "LATERAL LISP (ש' צידית/רטובה)",
	"UNCLEAR_DISTORTION": "UNCLEAR (עיוות)",
	# Legacy pipeline codes
	"wet_ch": "WET CH (ש' רטובה)",
	"s_substitution": "S SUBSTITUTION (החלפה ל-ס)",
	"lateral_sh": "LATERAL SH (ש' צידית)",
	"omission": "OMISSION (השמטה)",
	"distortion": "DISTORTION (עיוות)",
	}


	def display_result(result: dict, word: str):
	"""Display the scoring result in a beautiful format."""
	score = result['score']
	status = result['status']
	error_type = result['error_type']
	feedback = result['feedback']
	details = result['details']

	# Determine color based on score
	if score >= 90:
	score_color = Colors.GREEN
	emoji = "🌟"
	elif score >= 70:
	score_color = Colors.YELLOW
	emoji = "✓"
	elif score >= 50:
	score_color = Colors.YELLOW
	emoji = "⚠"
	else:
	score_color = Colors.RED
	emoji = "✗"

	# Status box color
	status_color = Colors.GREEN if status == "PASS" else Colors.RED

	# Print result header
	print(f"\n{Colors.BOLD}{'═' * 60}{Colors.END}")
	print(f"{Colors.BOLD} RESULTS FOR: '{word.upper()}'{Colors.END}")
	print(f"{Colors.BOLD}{'═' * 60}{Colors.END}")

	# Score display
	print(f"\n {emoji} {Colors.BOLD}SCORE:{Colors.END} {score_color}{Colors.BOLD}{score}/100{Colors.END}")

	# Status display
	if status == "PASS":
	print(f" ✓ {Colors.GREEN}{Colors.BOLD}STATUS: PASS{Colors.END}")
	else:
	print(f" ✗ {Colors.RED}{Colors.BOLD}STATUS: FAIL{Colors.END}")

	# Error type (show for ALL detected errors, not just FAIL)
	if error_type != "none":
	error_color = Colors.RED if status == "FAIL" else Colors.YELLOW
	error_display = ERROR_TYPE_LABELS.get(error_type, error_type.replace('_', ' ').upper())
	print(f"\n {error_color}{Colors.BOLD}⚠ ERROR TYPE: {error_display}{Colors.END}")

	# Feedback — print directly (handles Hebrew RTL properly)
	print(f"\n {status_color}{Colors.BOLD}💬 FEEDBACK:{Colors.END}")
	print(f" {feedback}")

	# Pipeline indicator
	pipeline = result.get('pipeline', 'legacy')
	pipeline_label = "Phoneme (Wav2Vec2)" if pipeline == "phoneme" else "Legacy (DTW)"
	print(f"\n {Colors.CYAN}Pipeline: {pipeline_label}{Colors.END}")

	# Technical details — adapt to pipeline
	print(f"\n {Colors.CYAN}Technical Details:{Colors.END}")
	if pipeline == "phoneme":
	print(f" • Centroid: {details.get('centroid_hz', 0)} Hz")
	print(f" • S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%")
	print(f" • Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%")
	print(f" • Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%")
	print(f" • High/Mid Ratio: {details.get('high_mid_ratio', 0.0):.3f}")
	print(f" • Bandwidth: {details.get('bandwidth_mean', 0.0):.1f} Hz")
	print(f" • Spectral Skewness: {details.get('spectral_skewness', 0.0):.4f}")

	# Alignment info
	alignment = result.get('alignment', {})
	shin = alignment.get('shin', {})
	if shin:
	print(f"\n {Colors.CYAN}Alignment:{Colors.END}")
	print(f" • Shin segment: {shin.get('start_sec', 0):.3f}s - {shin.get('end_sec', 0):.3f}s ({shin.get('duration', 0)*1000:.0f}ms)")
	print(f" • Alignment confidence: {shin.get('score', 0):.2f}")

	segments = alignment.get('segments', [])
	if segments:
	seg_str = " ".join(
	f"{s['char']}[{s['start']:.2f}-{s['end']:.2f}]"
	for s in segments
	)
	print(f" • All segments: {seg_str}")
	else:
	print(f" • Distance: {details.get('distance', 0.0):.4f} (raw: {details.get('raw_distance', 0.0):.4f}, modifier: x{details.get('modifier', 1.0):.1f})")
	print(f" • Centroid: {details.get('centroid_hz', 0)} Hz")
	print(f" • S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%")
	print(f" • Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%")
	print(f" • Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%")
	print(f" • Spectral Flatness: {details.get('spectral_flatness', 0.0):.4f}")
	print(f" • Amp Modulation: {details.get('amp_modulation', 0.0):.3f}")
	print(f" • Fricative Frames: {details.get('fricative_frames', 0)}")
	print(f" • Stable: {'Yes' if details.get('is_stable', False) else 'No'}")

	print(f"\n{Colors.BOLD}{'═' * 60}{Colors.END}")


	def display_comparison(word: str):
	"""Show what a good vs bad pronunciation looks like."""
	print(f"\n{Colors.CYAN}Quick Reference for '{word}':{Colors.END}")
	print(f" • {Colors.GREEN}Good 'Sh':{Colors.END} Tongue back, lips rounded, soft smooth airflow")
	print(f" • {Colors.RED}Bad 'S':{Colors.END} Tongue forward, teeth close, sharp airflow")
	print(f" • {Colors.RED}Bad 'wet CH':{Colors.END} Slushy/saliva sound — keep tongue centered, blow dry air")


	# =============================================================================
	# Main Simulation Loop
	# =============================================================================

	def run_simulation():
	"""Main simulation loop."""
	clear_screen()
	print_header()

	while True:
	print_word_menu()

	# Get word choice
	word = get_word_choice()

	if word is None:
	print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! 👋{Colors.END}\n")
	break

	# Show word info
	print(f"\n{Colors.BOLD}Selected word: {Colors.CYAN}{word.upper()}{Colors.END}")
	display_comparison(word)

	# Wait for user to be ready
	input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}")

	# Countdown
	countdown_display(3)

	# Record audio
	try:
	audio = record_audio(RECORD_DURATION)
	save_audio(audio, TEMP_FILE)
	except Exception as e:
	print(f"\n{Colors.RED}Error recording audio: {e}{Colors.END}")
	print(f"{Colors.YELLOW}Make sure your microphone is connected and working.{Colors.END}")
	input(f"\n{Colors.CYAN}Press ENTER to try again...{Colors.END}")
	continue

	# Score the recording
	try:
	result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR)

	if result['status'] == 'ERROR':
	print(f"\n{Colors.RED}Error: {result['feedback']}{Colors.END}")
	else:
	display_result(result, word)
	except Exception as e:
	print(f"\n{Colors.RED}Error scoring recording: {e}{Colors.END}")

	# Ask to continue
	print(f"\n{Colors.CYAN}Options:{Colors.END}")
	print(f" {Colors.BOLD}1.{Colors.END} Try '{word}' again")
	print(f" {Colors.BOLD}2.{Colors.END} Choose a different word")
	print(f" {Colors.BOLD}0.{Colors.END} Exit")

	choice = input(f"\n{Colors.CYAN}Your choice: {Colors.END}")

	if choice == '0':
	print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! 👋{Colors.END}\n")
	break
	elif choice == '1':
	# Try same word again
	print(f"\n{Colors.YELLOW}Let's try '{word}' again!{Colors.END}")
	input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}")

	countdown_display(3)

	try:
	audio = record_audio(RECORD_DURATION)
	save_audio(audio, TEMP_FILE)
	result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR)

	if result['status'] != 'ERROR':
	display_result(result, word)
	except Exception as e:
	print(f"\n{Colors.RED}Error: {e}{Colors.END}")

	input(f"\n{Colors.CYAN}Press ENTER to continue...{Colors.END}")
	clear_screen()
	print_header()
	else:
	# Go back to word menu
	clear_screen()
	print_header()

	# Cleanup
	if os.path.exists(TEMP_FILE):
	try:
	os.remove(TEMP_FILE)
	except:
	pass


	# =============================================================================
	# Entry Point
	# =============================================================================

	def main():
	"""Main entry point."""
	# Check for sounddevice
	try:
	import sounddevice as sd
	except ImportError:
	print(f"{Colors.RED}Error: 'sounddevice' is not installed.{Colors.END}")
	print(f"Please install it with: pip install sounddevice")
	sys.exit(1)

	# Check for available audio devices (Windows-friendly: try default, then list all inputs)
	try:
	default_input = sd.query_devices(kind='input')
	print(f"{Colors.GREEN}✓ Microphone detected: {default_input['name']}{Colors.END}")
	except Exception:
	# Fallback: iterate device indices to find any input device (works on Windows)
	try:
	input_idx = None
	input_name = None
	for i in range(64):
	try:
	dev = sd.query_devices(i)
	if dev.get('max_input_channels', 0) > 0:
	input_idx = i
	input_name = dev.get('name', f'Device {i}')
	break
	except Exception:
	break
	if input_idx is not None and input_name:
	try:
	out_idx = sd.default.device[1] if isinstance(sd.default.device, (list, tuple)) else sd.default.device
	sd.default.device = (input_idx, out_idx)
	except Exception:
	pass
	print(f"{Colors.GREEN}✓ Microphone detected: {input_name}{Colors.END}")
	else:
	raise RuntimeError("No input device in device list")
	except Exception as e2:
	print(f"{Colors.RED}Error: No microphone detected.{Colors.END}")
	print(f"Details: {e2!s}")
	print(f"{Colors.YELLOW}On Windows: Set default microphone in Settings > System > Sound > Input.{Colors.END}")
	sys.exit(1)

	# Run simulation
	run_simulation()


	if __name__ == "__main__":
	main()