""" Speech Analysis MVP - Real-Time Simulation Script This script provides an interactive terminal interface for testing the speech analysis system in real-time using your microphone. Author: Speech AI Project """ import os import sys import time import threading import numpy as np import sounddevice as sd from scipy.io import wavfile # Import our score engine from score_engine import score_pronunciation # ============================================================================= # Configuration # ============================================================================= SAMPLE_RATE = 44100 RECORD_DURATION = 3 # seconds TEMP_FILE = "temp_test.wav" AUDIO_DIR = "audio_data" # Available words for practice AVAILABLE_WORDS = ["shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem"] # Colors for terminal output (ANSI escape codes) class Colors: HEADER = '\033[95m' BLUE = '\033[94m' CYAN = '\033[96m' GREEN = '\033[92m' YELLOW = '\033[93m' RED = '\033[91m' BOLD = '\033[1m' UNDERLINE = '\033[4m' END = '\033[0m' # ============================================================================= # Terminal UI Functions # ============================================================================= def clear_screen(): """Clear the terminal screen.""" os.system('cls' if os.name == 'nt' else 'clear') def print_header(): """Print the application header.""" print(f"\n{Colors.CYAN}{Colors.BOLD}") print("╔═══════════════════════════════════════════════════════════════╗") print("║ 🎤 SPEECH ANALYSIS - 'SH' SOUND TRAINER 🎤 ║") print("╚═══════════════════════════════════════════════════════════════╝") print(f"{Colors.END}") def print_box(title: str, content: list, color: str = Colors.CYAN): """Print content in a nice box.""" max_len = max(len(title), max(len(line) for line in content)) + 4 print(f"\n{color}┌{'─' * max_len}┐{Colors.END}") print(f"{color}│{Colors.BOLD} {title.center(max_len - 2)} {Colors.END}{color}│{Colors.END}") print(f"{color}├{'─' * max_len}┤{Colors.END}") for line in content: padding = max_len - len(line) - 2 print(f"{color}│{Colors.END} {line}{' ' * padding} {color}│{Colors.END}") print(f"{color}└{'─' * max_len}┘{Colors.END}") def print_word_menu(): """Print the word selection menu.""" print(f"\n{Colors.YELLOW}Available words to practice:{Colors.END}\n") for i, word in enumerate(AVAILABLE_WORDS, 1): print(f" {Colors.BOLD}{i}.{Colors.END} {word}") print(f"\n {Colors.BOLD}0.{Colors.END} Exit") print() def get_word_choice() -> str: """Get the user's word choice.""" while True: try: choice = input(f"{Colors.CYAN}Enter your choice (1-{len(AVAILABLE_WORDS)}): {Colors.END}") if choice == '0': return None idx = int(choice) - 1 if 0 <= idx < len(AVAILABLE_WORDS): return AVAILABLE_WORDS[idx] else: print(f"{Colors.RED}Invalid choice. Please try again.{Colors.END}") except ValueError: print(f"{Colors.RED}Please enter a number.{Colors.END}") def countdown_display(seconds: int): """Display a countdown before recording starts.""" print(f"\n{Colors.YELLOW}Get ready to say the word...{Colors.END}") for i in range(3, 0, -1): print(f" {Colors.BOLD}{i}...{Colors.END}", end='', flush=True) time.sleep(0.7) print(f"\n\n{Colors.GREEN}{Colors.BOLD}🔴 RECORDING NOW! Speak clearly...{Colors.END}\n") def recording_progress(duration: int): """Display a progress bar during recording.""" bar_length = 40 for i in range(duration * 10): progress = (i + 1) / (duration * 10) filled = int(bar_length * progress) bar = '█' * filled + '░' * (bar_length - filled) remaining = duration - (i / 10) print(f"\r [{Colors.GREEN}{bar}{Colors.END}] {remaining:.1f}s remaining", end='', flush=True) time.sleep(0.1) print(f"\r [{Colors.GREEN}{'█' * bar_length}{Colors.END}] Done! ") print(f"\n{Colors.CYAN}Processing your recording...{Colors.END}") # ============================================================================= # Audio Recording Functions # ============================================================================= def record_audio(duration: int = RECORD_DURATION, sample_rate: int = SAMPLE_RATE) -> np.ndarray: """ Record audio from the microphone. Args: duration: Recording duration in seconds sample_rate: Audio sample rate Returns: Recorded audio as numpy array """ # Start recording in a separate thread so we can show progress recording = sd.rec( int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32' ) # Show progress while recording recording_progress(duration) # Wait for recording to complete sd.wait() return recording.flatten() def save_audio(audio: np.ndarray, filepath: str, sample_rate: int = SAMPLE_RATE): """Save audio to a WAV file.""" # Convert to 16-bit PCM audio_int16 = np.int16(audio * 32767) wavfile.write(filepath, sample_rate, audio_int16) # ============================================================================= # Result Display Functions # ============================================================================= ERROR_TYPE_LABELS = { # New phoneme pipeline diagnosis codes "CORRECT": "CORRECT (תקין)", "ERROR_OMISSION": "OMISSION (השמטה)", "ERROR_S_SUBSTITUTION": "S SUBSTITUTION (החלפה ל-ס)", "ERROR_LATERAL_LISP": "LATERAL LISP (ש' צידית/רטובה)", "UNCLEAR_DISTORTION": "UNCLEAR (עיוות)", # Legacy pipeline codes "wet_ch": "WET CH (ש' רטובה)", "s_substitution": "S SUBSTITUTION (החלפה ל-ס)", "lateral_sh": "LATERAL SH (ש' צידית)", "omission": "OMISSION (השמטה)", "distortion": "DISTORTION (עיוות)", } def display_result(result: dict, word: str): """Display the scoring result in a beautiful format.""" score = result['score'] status = result['status'] error_type = result['error_type'] feedback = result['feedback'] details = result['details'] # Determine color based on score if score >= 90: score_color = Colors.GREEN emoji = "🌟" elif score >= 70: score_color = Colors.YELLOW emoji = "✓" elif score >= 50: score_color = Colors.YELLOW emoji = "⚠" else: score_color = Colors.RED emoji = "✗" # Status box color status_color = Colors.GREEN if status == "PASS" else Colors.RED # Print result header print(f"\n{Colors.BOLD}{'═' * 60}{Colors.END}") print(f"{Colors.BOLD} RESULTS FOR: '{word.upper()}'{Colors.END}") print(f"{Colors.BOLD}{'═' * 60}{Colors.END}") # Score display print(f"\n {emoji} {Colors.BOLD}SCORE:{Colors.END} {score_color}{Colors.BOLD}{score}/100{Colors.END}") # Status display if status == "PASS": print(f" ✓ {Colors.GREEN}{Colors.BOLD}STATUS: PASS{Colors.END}") else: print(f" ✗ {Colors.RED}{Colors.BOLD}STATUS: FAIL{Colors.END}") # Error type (show for ALL detected errors, not just FAIL) if error_type != "none": error_color = Colors.RED if status == "FAIL" else Colors.YELLOW error_display = ERROR_TYPE_LABELS.get(error_type, error_type.replace('_', ' ').upper()) print(f"\n {error_color}{Colors.BOLD}⚠ ERROR TYPE: {error_display}{Colors.END}") # Feedback — print directly (handles Hebrew RTL properly) print(f"\n {status_color}{Colors.BOLD}💬 FEEDBACK:{Colors.END}") print(f" {feedback}") # Pipeline indicator pipeline = result.get('pipeline', 'legacy') pipeline_label = "Phoneme (Wav2Vec2)" if pipeline == "phoneme" else "Legacy (DTW)" print(f"\n {Colors.CYAN}Pipeline: {pipeline_label}{Colors.END}") # Technical details — adapt to pipeline print(f"\n {Colors.CYAN}Technical Details:{Colors.END}") if pipeline == "phoneme": print(f" • Centroid: {details.get('centroid_hz', 0)} Hz") print(f" • S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%") print(f" • Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%") print(f" • Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%") print(f" • High/Mid Ratio: {details.get('high_mid_ratio', 0.0):.3f}") print(f" • Bandwidth: {details.get('bandwidth_mean', 0.0):.1f} Hz") print(f" • Spectral Skewness: {details.get('spectral_skewness', 0.0):.4f}") # Alignment info alignment = result.get('alignment', {}) shin = alignment.get('shin', {}) if shin: print(f"\n {Colors.CYAN}Alignment:{Colors.END}") print(f" • Shin segment: {shin.get('start_sec', 0):.3f}s - {shin.get('end_sec', 0):.3f}s ({shin.get('duration', 0)*1000:.0f}ms)") print(f" • Alignment confidence: {shin.get('score', 0):.2f}") segments = alignment.get('segments', []) if segments: seg_str = " ".join( f"{s['char']}[{s['start']:.2f}-{s['end']:.2f}]" for s in segments ) print(f" • All segments: {seg_str}") else: print(f" • Distance: {details.get('distance', 0.0):.4f} (raw: {details.get('raw_distance', 0.0):.4f}, modifier: x{details.get('modifier', 1.0):.1f})") print(f" • Centroid: {details.get('centroid_hz', 0)} Hz") print(f" • S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%") print(f" • Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%") print(f" • Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%") print(f" • Spectral Flatness: {details.get('spectral_flatness', 0.0):.4f}") print(f" • Amp Modulation: {details.get('amp_modulation', 0.0):.3f}") print(f" • Fricative Frames: {details.get('fricative_frames', 0)}") print(f" • Stable: {'Yes' if details.get('is_stable', False) else 'No'}") print(f"\n{Colors.BOLD}{'═' * 60}{Colors.END}") def display_comparison(word: str): """Show what a good vs bad pronunciation looks like.""" print(f"\n{Colors.CYAN}Quick Reference for '{word}':{Colors.END}") print(f" • {Colors.GREEN}Good 'Sh':{Colors.END} Tongue back, lips rounded, soft smooth airflow") print(f" • {Colors.RED}Bad 'S':{Colors.END} Tongue forward, teeth close, sharp airflow") print(f" • {Colors.RED}Bad 'wet CH':{Colors.END} Slushy/saliva sound — keep tongue centered, blow dry air") # ============================================================================= # Main Simulation Loop # ============================================================================= def run_simulation(): """Main simulation loop.""" clear_screen() print_header() while True: print_word_menu() # Get word choice word = get_word_choice() if word is None: print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! 👋{Colors.END}\n") break # Show word info print(f"\n{Colors.BOLD}Selected word: {Colors.CYAN}{word.upper()}{Colors.END}") display_comparison(word) # Wait for user to be ready input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}") # Countdown countdown_display(3) # Record audio try: audio = record_audio(RECORD_DURATION) save_audio(audio, TEMP_FILE) except Exception as e: print(f"\n{Colors.RED}Error recording audio: {e}{Colors.END}") print(f"{Colors.YELLOW}Make sure your microphone is connected and working.{Colors.END}") input(f"\n{Colors.CYAN}Press ENTER to try again...{Colors.END}") continue # Score the recording try: result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR) if result['status'] == 'ERROR': print(f"\n{Colors.RED}Error: {result['feedback']}{Colors.END}") else: display_result(result, word) except Exception as e: print(f"\n{Colors.RED}Error scoring recording: {e}{Colors.END}") # Ask to continue print(f"\n{Colors.CYAN}Options:{Colors.END}") print(f" {Colors.BOLD}1.{Colors.END} Try '{word}' again") print(f" {Colors.BOLD}2.{Colors.END} Choose a different word") print(f" {Colors.BOLD}0.{Colors.END} Exit") choice = input(f"\n{Colors.CYAN}Your choice: {Colors.END}") if choice == '0': print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! 👋{Colors.END}\n") break elif choice == '1': # Try same word again print(f"\n{Colors.YELLOW}Let's try '{word}' again!{Colors.END}") input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}") countdown_display(3) try: audio = record_audio(RECORD_DURATION) save_audio(audio, TEMP_FILE) result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR) if result['status'] != 'ERROR': display_result(result, word) except Exception as e: print(f"\n{Colors.RED}Error: {e}{Colors.END}") input(f"\n{Colors.CYAN}Press ENTER to continue...{Colors.END}") clear_screen() print_header() else: # Go back to word menu clear_screen() print_header() # Cleanup if os.path.exists(TEMP_FILE): try: os.remove(TEMP_FILE) except: pass # ============================================================================= # Entry Point # ============================================================================= def main(): """Main entry point.""" # Check for sounddevice try: import sounddevice as sd except ImportError: print(f"{Colors.RED}Error: 'sounddevice' is not installed.{Colors.END}") print(f"Please install it with: pip install sounddevice") sys.exit(1) # Check for available audio devices (Windows-friendly: try default, then list all inputs) try: default_input = sd.query_devices(kind='input') print(f"{Colors.GREEN}✓ Microphone detected: {default_input['name']}{Colors.END}") except Exception: # Fallback: iterate device indices to find any input device (works on Windows) try: input_idx = None input_name = None for i in range(64): try: dev = sd.query_devices(i) if dev.get('max_input_channels', 0) > 0: input_idx = i input_name = dev.get('name', f'Device {i}') break except Exception: break if input_idx is not None and input_name: try: out_idx = sd.default.device[1] if isinstance(sd.default.device, (list, tuple)) else sd.default.device sd.default.device = (input_idx, out_idx) except Exception: pass print(f"{Colors.GREEN}✓ Microphone detected: {input_name}{Colors.END}") else: raise RuntimeError("No input device in device list") except Exception as e2: print(f"{Colors.RED}Error: No microphone detected.{Colors.END}") print(f"Details: {e2!s}") print(f"{Colors.YELLOW}On Windows: Set default microphone in Settings > System > Sound > Input.{Colors.END}") sys.exit(1) # Run simulation run_simulation() if __name__ == "__main__": main()