Spaces:
Paused
Paused
| """ | |
| Speech Analysis MVP - Real-Time Simulation Script | |
| This script provides an interactive terminal interface for testing | |
| the speech analysis system in real-time using your microphone. | |
| Author: Speech AI Project | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import threading | |
| import numpy as np | |
| import sounddevice as sd | |
| from scipy.io import wavfile | |
| # Import our score engine | |
| from score_engine import score_pronunciation | |
| # ============================================================================= | |
| # Configuration | |
| # ============================================================================= | |
| SAMPLE_RATE = 44100 | |
| RECORD_DURATION = 3 # seconds | |
| TEMP_FILE = "temp_test.wav" | |
| AUDIO_DIR = "audio_data" | |
| # Available words for practice | |
| AVAILABLE_WORDS = ["shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem"] | |
| # Colors for terminal output (ANSI escape codes) | |
| class Colors: | |
| HEADER = '\033[95m' | |
| BLUE = '\033[94m' | |
| CYAN = '\033[96m' | |
| GREEN = '\033[92m' | |
| YELLOW = '\033[93m' | |
| RED = '\033[91m' | |
| BOLD = '\033[1m' | |
| UNDERLINE = '\033[4m' | |
| END = '\033[0m' | |
| # ============================================================================= | |
| # Terminal UI Functions | |
| # ============================================================================= | |
| def clear_screen(): | |
| """Clear the terminal screen.""" | |
| os.system('cls' if os.name == 'nt' else 'clear') | |
| def print_header(): | |
| """Print the application header.""" | |
| print(f"\n{Colors.CYAN}{Colors.BOLD}") | |
| print("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ") | |
| print("โ ๐ค SPEECH ANALYSIS - 'SH' SOUND TRAINER ๐ค โ") | |
| print("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ") | |
| print(f"{Colors.END}") | |
| def print_box(title: str, content: list, color: str = Colors.CYAN): | |
| """Print content in a nice box.""" | |
| max_len = max(len(title), max(len(line) for line in content)) + 4 | |
| print(f"\n{color}โ{'โ' * max_len}โ{Colors.END}") | |
| print(f"{color}โ{Colors.BOLD} {title.center(max_len - 2)} {Colors.END}{color}โ{Colors.END}") | |
| print(f"{color}โ{'โ' * max_len}โค{Colors.END}") | |
| for line in content: | |
| padding = max_len - len(line) - 2 | |
| print(f"{color}โ{Colors.END} {line}{' ' * padding} {color}โ{Colors.END}") | |
| print(f"{color}โ{'โ' * max_len}โ{Colors.END}") | |
| def print_word_menu(): | |
| """Print the word selection menu.""" | |
| print(f"\n{Colors.YELLOW}Available words to practice:{Colors.END}\n") | |
| for i, word in enumerate(AVAILABLE_WORDS, 1): | |
| print(f" {Colors.BOLD}{i}.{Colors.END} {word}") | |
| print(f"\n {Colors.BOLD}0.{Colors.END} Exit") | |
| print() | |
| def get_word_choice() -> str: | |
| """Get the user's word choice.""" | |
| while True: | |
| try: | |
| choice = input(f"{Colors.CYAN}Enter your choice (1-{len(AVAILABLE_WORDS)}): {Colors.END}") | |
| if choice == '0': | |
| return None | |
| idx = int(choice) - 1 | |
| if 0 <= idx < len(AVAILABLE_WORDS): | |
| return AVAILABLE_WORDS[idx] | |
| else: | |
| print(f"{Colors.RED}Invalid choice. Please try again.{Colors.END}") | |
| except ValueError: | |
| print(f"{Colors.RED}Please enter a number.{Colors.END}") | |
| def countdown_display(seconds: int): | |
| """Display a countdown before recording starts.""" | |
| print(f"\n{Colors.YELLOW}Get ready to say the word...{Colors.END}") | |
| for i in range(3, 0, -1): | |
| print(f" {Colors.BOLD}{i}...{Colors.END}", end='', flush=True) | |
| time.sleep(0.7) | |
| print(f"\n\n{Colors.GREEN}{Colors.BOLD}๐ด RECORDING NOW! Speak clearly...{Colors.END}\n") | |
| def recording_progress(duration: int): | |
| """Display a progress bar during recording.""" | |
| bar_length = 40 | |
| for i in range(duration * 10): | |
| progress = (i + 1) / (duration * 10) | |
| filled = int(bar_length * progress) | |
| bar = 'โ' * filled + 'โ' * (bar_length - filled) | |
| remaining = duration - (i / 10) | |
| print(f"\r [{Colors.GREEN}{bar}{Colors.END}] {remaining:.1f}s remaining", end='', flush=True) | |
| time.sleep(0.1) | |
| print(f"\r [{Colors.GREEN}{'โ' * bar_length}{Colors.END}] Done! ") | |
| print(f"\n{Colors.CYAN}Processing your recording...{Colors.END}") | |
| # ============================================================================= | |
| # Audio Recording Functions | |
| # ============================================================================= | |
| def record_audio(duration: int = RECORD_DURATION, sample_rate: int = SAMPLE_RATE) -> np.ndarray: | |
| """ | |
| Record audio from the microphone. | |
| Args: | |
| duration: Recording duration in seconds | |
| sample_rate: Audio sample rate | |
| Returns: | |
| Recorded audio as numpy array | |
| """ | |
| # Start recording in a separate thread so we can show progress | |
| recording = sd.rec( | |
| int(duration * sample_rate), | |
| samplerate=sample_rate, | |
| channels=1, | |
| dtype='float32' | |
| ) | |
| # Show progress while recording | |
| recording_progress(duration) | |
| # Wait for recording to complete | |
| sd.wait() | |
| return recording.flatten() | |
| def save_audio(audio: np.ndarray, filepath: str, sample_rate: int = SAMPLE_RATE): | |
| """Save audio to a WAV file.""" | |
| # Convert to 16-bit PCM | |
| audio_int16 = np.int16(audio * 32767) | |
| wavfile.write(filepath, sample_rate, audio_int16) | |
| # ============================================================================= | |
| # Result Display Functions | |
| # ============================================================================= | |
| ERROR_TYPE_LABELS = { | |
| # New phoneme pipeline diagnosis codes | |
| "CORRECT": "CORRECT (ืชืงืื)", | |
| "ERROR_OMISSION": "OMISSION (ืืฉืืื)", | |
| "ERROR_S_SUBSTITUTION": "S SUBSTITUTION (ืืืืคื ื-ืก)", | |
| "ERROR_LATERAL_LISP": "LATERAL LISP (ืฉ' ืฆืืืืช/ืจืืืื)", | |
| "UNCLEAR_DISTORTION": "UNCLEAR (ืขืืืืช)", | |
| # Legacy pipeline codes | |
| "wet_ch": "WET CH (ืฉ' ืจืืืื)", | |
| "s_substitution": "S SUBSTITUTION (ืืืืคื ื-ืก)", | |
| "lateral_sh": "LATERAL SH (ืฉ' ืฆืืืืช)", | |
| "omission": "OMISSION (ืืฉืืื)", | |
| "distortion": "DISTORTION (ืขืืืืช)", | |
| } | |
| def display_result(result: dict, word: str): | |
| """Display the scoring result in a beautiful format.""" | |
| score = result['score'] | |
| status = result['status'] | |
| error_type = result['error_type'] | |
| feedback = result['feedback'] | |
| details = result['details'] | |
| # Determine color based on score | |
| if score >= 90: | |
| score_color = Colors.GREEN | |
| emoji = "๐" | |
| elif score >= 70: | |
| score_color = Colors.YELLOW | |
| emoji = "โ" | |
| elif score >= 50: | |
| score_color = Colors.YELLOW | |
| emoji = "โ " | |
| else: | |
| score_color = Colors.RED | |
| emoji = "โ" | |
| # Status box color | |
| status_color = Colors.GREEN if status == "PASS" else Colors.RED | |
| # Print result header | |
| print(f"\n{Colors.BOLD}{'โ' * 60}{Colors.END}") | |
| print(f"{Colors.BOLD} RESULTS FOR: '{word.upper()}'{Colors.END}") | |
| print(f"{Colors.BOLD}{'โ' * 60}{Colors.END}") | |
| # Score display | |
| print(f"\n {emoji} {Colors.BOLD}SCORE:{Colors.END} {score_color}{Colors.BOLD}{score}/100{Colors.END}") | |
| # Status display | |
| if status == "PASS": | |
| print(f" โ {Colors.GREEN}{Colors.BOLD}STATUS: PASS{Colors.END}") | |
| else: | |
| print(f" โ {Colors.RED}{Colors.BOLD}STATUS: FAIL{Colors.END}") | |
| # Error type (show for ALL detected errors, not just FAIL) | |
| if error_type != "none": | |
| error_color = Colors.RED if status == "FAIL" else Colors.YELLOW | |
| error_display = ERROR_TYPE_LABELS.get(error_type, error_type.replace('_', ' ').upper()) | |
| print(f"\n {error_color}{Colors.BOLD}โ ERROR TYPE: {error_display}{Colors.END}") | |
| # Feedback โ print directly (handles Hebrew RTL properly) | |
| print(f"\n {status_color}{Colors.BOLD}๐ฌ FEEDBACK:{Colors.END}") | |
| print(f" {feedback}") | |
| # Pipeline indicator | |
| pipeline = result.get('pipeline', 'legacy') | |
| pipeline_label = "Phoneme (Wav2Vec2)" if pipeline == "phoneme" else "Legacy (DTW)" | |
| print(f"\n {Colors.CYAN}Pipeline: {pipeline_label}{Colors.END}") | |
| # Technical details โ adapt to pipeline | |
| print(f"\n {Colors.CYAN}Technical Details:{Colors.END}") | |
| if pipeline == "phoneme": | |
| print(f" โข Centroid: {details.get('centroid_hz', 0)} Hz") | |
| print(f" โข S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%") | |
| print(f" โข Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%") | |
| print(f" โข Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%") | |
| print(f" โข High/Mid Ratio: {details.get('high_mid_ratio', 0.0):.3f}") | |
| print(f" โข Bandwidth: {details.get('bandwidth_mean', 0.0):.1f} Hz") | |
| print(f" โข Spectral Skewness: {details.get('spectral_skewness', 0.0):.4f}") | |
| # Alignment info | |
| alignment = result.get('alignment', {}) | |
| shin = alignment.get('shin', {}) | |
| if shin: | |
| print(f"\n {Colors.CYAN}Alignment:{Colors.END}") | |
| print(f" โข Shin segment: {shin.get('start_sec', 0):.3f}s - {shin.get('end_sec', 0):.3f}s ({shin.get('duration', 0)*1000:.0f}ms)") | |
| print(f" โข Alignment confidence: {shin.get('score', 0):.2f}") | |
| segments = alignment.get('segments', []) | |
| if segments: | |
| seg_str = " ".join( | |
| f"{s['char']}[{s['start']:.2f}-{s['end']:.2f}]" | |
| for s in segments | |
| ) | |
| print(f" โข All segments: {seg_str}") | |
| else: | |
| print(f" โข Distance: {details.get('distance', 0.0):.4f} (raw: {details.get('raw_distance', 0.0):.4f}, modifier: x{details.get('modifier', 1.0):.1f})") | |
| print(f" โข Centroid: {details.get('centroid_hz', 0)} Hz") | |
| print(f" โข S-Band Energy: {details.get('s_band_percent', 0.0):.1f}%") | |
| print(f" โข Lateral Energy: {details.get('lateral_percent', 0.0):.1f}%") | |
| print(f" โข Sub-3kHz Energy: {details.get('sub3k_percent', 0.0):.1f}%") | |
| print(f" โข Spectral Flatness: {details.get('spectral_flatness', 0.0):.4f}") | |
| print(f" โข Amp Modulation: {details.get('amp_modulation', 0.0):.3f}") | |
| print(f" โข Fricative Frames: {details.get('fricative_frames', 0)}") | |
| print(f" โข Stable: {'Yes' if details.get('is_stable', False) else 'No'}") | |
| print(f"\n{Colors.BOLD}{'โ' * 60}{Colors.END}") | |
| def display_comparison(word: str): | |
| """Show what a good vs bad pronunciation looks like.""" | |
| print(f"\n{Colors.CYAN}Quick Reference for '{word}':{Colors.END}") | |
| print(f" โข {Colors.GREEN}Good 'Sh':{Colors.END} Tongue back, lips rounded, soft smooth airflow") | |
| print(f" โข {Colors.RED}Bad 'S':{Colors.END} Tongue forward, teeth close, sharp airflow") | |
| print(f" โข {Colors.RED}Bad 'wet CH':{Colors.END} Slushy/saliva sound โ keep tongue centered, blow dry air") | |
| # ============================================================================= | |
| # Main Simulation Loop | |
| # ============================================================================= | |
| def run_simulation(): | |
| """Main simulation loop.""" | |
| clear_screen() | |
| print_header() | |
| while True: | |
| print_word_menu() | |
| # Get word choice | |
| word = get_word_choice() | |
| if word is None: | |
| print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! ๐{Colors.END}\n") | |
| break | |
| # Show word info | |
| print(f"\n{Colors.BOLD}Selected word: {Colors.CYAN}{word.upper()}{Colors.END}") | |
| display_comparison(word) | |
| # Wait for user to be ready | |
| input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}") | |
| # Countdown | |
| countdown_display(3) | |
| # Record audio | |
| try: | |
| audio = record_audio(RECORD_DURATION) | |
| save_audio(audio, TEMP_FILE) | |
| except Exception as e: | |
| print(f"\n{Colors.RED}Error recording audio: {e}{Colors.END}") | |
| print(f"{Colors.YELLOW}Make sure your microphone is connected and working.{Colors.END}") | |
| input(f"\n{Colors.CYAN}Press ENTER to try again...{Colors.END}") | |
| continue | |
| # Score the recording | |
| try: | |
| result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR) | |
| if result['status'] == 'ERROR': | |
| print(f"\n{Colors.RED}Error: {result['feedback']}{Colors.END}") | |
| else: | |
| display_result(result, word) | |
| except Exception as e: | |
| print(f"\n{Colors.RED}Error scoring recording: {e}{Colors.END}") | |
| # Ask to continue | |
| print(f"\n{Colors.CYAN}Options:{Colors.END}") | |
| print(f" {Colors.BOLD}1.{Colors.END} Try '{word}' again") | |
| print(f" {Colors.BOLD}2.{Colors.END} Choose a different word") | |
| print(f" {Colors.BOLD}0.{Colors.END} Exit") | |
| choice = input(f"\n{Colors.CYAN}Your choice: {Colors.END}") | |
| if choice == '0': | |
| print(f"\n{Colors.CYAN}Thanks for practicing! Goodbye! ๐{Colors.END}\n") | |
| break | |
| elif choice == '1': | |
| # Try same word again | |
| print(f"\n{Colors.YELLOW}Let's try '{word}' again!{Colors.END}") | |
| input(f"\n{Colors.YELLOW}Press ENTER when you're ready to record...{Colors.END}") | |
| countdown_display(3) | |
| try: | |
| audio = record_audio(RECORD_DURATION) | |
| save_audio(audio, TEMP_FILE) | |
| result = score_pronunciation(TEMP_FILE, word, AUDIO_DIR) | |
| if result['status'] != 'ERROR': | |
| display_result(result, word) | |
| except Exception as e: | |
| print(f"\n{Colors.RED}Error: {e}{Colors.END}") | |
| input(f"\n{Colors.CYAN}Press ENTER to continue...{Colors.END}") | |
| clear_screen() | |
| print_header() | |
| else: | |
| # Go back to word menu | |
| clear_screen() | |
| print_header() | |
| # Cleanup | |
| if os.path.exists(TEMP_FILE): | |
| try: | |
| os.remove(TEMP_FILE) | |
| except: | |
| pass | |
| # ============================================================================= | |
| # Entry Point | |
| # ============================================================================= | |
| def main(): | |
| """Main entry point.""" | |
| # Check for sounddevice | |
| try: | |
| import sounddevice as sd | |
| except ImportError: | |
| print(f"{Colors.RED}Error: 'sounddevice' is not installed.{Colors.END}") | |
| print(f"Please install it with: pip install sounddevice") | |
| sys.exit(1) | |
| # Check for available audio devices (Windows-friendly: try default, then list all inputs) | |
| try: | |
| default_input = sd.query_devices(kind='input') | |
| print(f"{Colors.GREEN}โ Microphone detected: {default_input['name']}{Colors.END}") | |
| except Exception: | |
| # Fallback: iterate device indices to find any input device (works on Windows) | |
| try: | |
| input_idx = None | |
| input_name = None | |
| for i in range(64): | |
| try: | |
| dev = sd.query_devices(i) | |
| if dev.get('max_input_channels', 0) > 0: | |
| input_idx = i | |
| input_name = dev.get('name', f'Device {i}') | |
| break | |
| except Exception: | |
| break | |
| if input_idx is not None and input_name: | |
| try: | |
| out_idx = sd.default.device[1] if isinstance(sd.default.device, (list, tuple)) else sd.default.device | |
| sd.default.device = (input_idx, out_idx) | |
| except Exception: | |
| pass | |
| print(f"{Colors.GREEN}โ Microphone detected: {input_name}{Colors.END}") | |
| else: | |
| raise RuntimeError("No input device in device list") | |
| except Exception as e2: | |
| print(f"{Colors.RED}Error: No microphone detected.{Colors.END}") | |
| print(f"Details: {e2!s}") | |
| print(f"{Colors.YELLOW}On Windows: Set default microphone in Settings > System > Sound > Input.{Colors.END}") | |
| sys.exit(1) | |
| # Run simulation | |
| run_simulation() | |
| if __name__ == "__main__": | |
| main() | |