""" Remote Speech Simulator — Records from microphone, sends to HF Spaces API. Usage: python simulate_remote.py """ import os import sys import time import numpy as np import sounddevice as sd from scipy.io import wavfile import requests # ============================================================================= # Configuration # ============================================================================= API_URL = "https://yoavzamir-speechkid-api.hf.space/evaluate" SAMPLE_RATE = 16000 # match server expectation RECORD_DURATION = 3 # seconds TEMP_FILE = "temp_remote.wav" AVAILABLE_WORDS = [ # ש (Shin) words "shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem", "shaon", "shulchan", # ק (Kuf) words — K vs T substitution "kof", "kir", "kubiya", "kalmar", # Isolated sounds — for children who can't yet say full words "k_sound", "t_sound", ] # ANSI colors class C: GREEN = '\033[92m' RED = '\033[91m' CYAN = '\033[96m' YELLOW = '\033[93m' BOLD = '\033[1m' END = '\033[0m' # ============================================================================= # Core Functions # ============================================================================= def record_audio() -> np.ndarray: """Record from microphone with progress bar.""" print(f"\n{C.YELLOW}Get ready...{C.END}") for i in range(3, 0, -1): print(f" {C.BOLD}{i}...{C.END}", end='', flush=True) time.sleep(0.7) print(f"\n\n{C.GREEN}{C.BOLD} RECORDING! Speak now...{C.END}\n") audio = sd.rec( int(RECORD_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='float32' ) # Progress bar bar_len = 40 for i in range(RECORD_DURATION * 10): p = (i + 1) / (RECORD_DURATION * 10) filled = int(bar_len * p) bar = '#' * filled + '-' * (bar_len - filled) remaining = RECORD_DURATION - (i / 10) print(f"\r [{bar}] {remaining:.1f}s", end='', flush=True) time.sleep(0.1) sd.wait() print(f"\r [{'#' * bar_len}] Done! \n") return audio.flatten() def save_wav(audio: np.ndarray, path: str): """Save as 16-bit PCM WAV.""" audio_int16 = np.int16(audio * 32767) wavfile.write(path, SAMPLE_RATE, audio_int16) def send_to_api(wav_path: str, word: str) -> dict: """Send recording to HF Spaces API.""" print(f"{C.CYAN} Sending to server...{C.END}", flush=True) with open(wav_path, 'rb') as f: resp = requests.post( API_URL, files={"file": ("recording.wav", f, "audio/wav")}, data={"word": word}, timeout=60, ) if resp.status_code != 200: return {"status": "ERROR", "feedback": f"Server error {resp.status_code}: {resp.text}"} return resp.json() def play_recording(path: str): """Play back a WAV file through the default output device.""" try: from scipy.io import wavfile as wavfile_play sr, data = wavfile_play.read(path) if data.dtype == np.int16: data = data.astype(np.float32) / 32767.0 duration = len(data) / sr print(f"\n{C.CYAN} Playing back ({duration:.1f}s)...{C.END}") sd.play(data, sr) sd.wait() print(f" {C.CYAN}Done.{C.END}\n") except Exception as e: print(f"\n {C.RED}Playback failed: {e}{C.END}\n") def display_result(result: dict, word: str): """Show result in terminal.""" diagnosis = result.get("diagnosis", "?") status = result.get("status", "?") feedback = result.get("feedback", "") evidence = result.get("evidence", {}) details = result.get("details", {}) color = C.GREEN if status == "PASS" else C.RED icon = "PASS" if status == "PASS" else "FAIL" print(f"\n {'=' * 50}") print(f" {C.BOLD}Word: {word.upper()}{C.END}") print(f" {color}{C.BOLD} [{icon}] {diagnosis}{C.END}") print(f" {feedback}") print() print(f" {C.CYAN}Details:{C.END}") print(f" AI score: {details.get('alignment_score', evidence.get('alignment_score', '?'))}") print(f" Centroid: {details.get('centroid_hz', evidence.get('centroid_mean', '?'))} Hz") print(f" {'=' * 50}") # ============================================================================= # Main Loop # ============================================================================= def main(): # Check microphone try: sd.query_devices(kind='input') except Exception: print(f"{C.RED}No microphone detected.{C.END}") sys.exit(1) print(f"\n{C.CYAN}{C.BOLD}") print(" =============================================") print(" SPEECH TRAINER — Remote API Simulator") print(" =============================================") print(f"{C.END}") print(f" Server: {API_URL}\n") while True: # Word menu print(f"{C.YELLOW} Choose a word:{C.END}") for i, w in enumerate(AVAILABLE_WORDS, 1): print(f" {i}. {w}") has_recording = os.path.exists(TEMP_FILE) if has_recording: print(f" L. Listen to last recording") print(f" 0. Exit\n") choice = input(f" {C.CYAN}Choice: {C.END}").strip().upper() if choice == '0': print(f"\n {C.CYAN}Goodbye!{C.END}\n") break if choice == 'L': if has_recording: play_recording(TEMP_FILE) else: print(f" {C.RED}No recording yet.{C.END}\n") continue try: word = AVAILABLE_WORDS[int(choice) - 1] except (ValueError, IndexError): print(f" {C.RED}Invalid choice.{C.END}\n") continue # Record input(f"\n Press ENTER when ready to record '{C.BOLD}{word}{C.END}'...") audio = record_audio() save_wav(audio, TEMP_FILE) # Send to API try: result = send_to_api(TEMP_FILE, word) if result.get("status") == "ERROR": print(f"\n {C.RED}Error: {result.get('feedback', 'Unknown error')}{C.END}") else: display_result(result, word) except requests.exceptions.ConnectionError: print(f"\n {C.RED}Cannot reach server. Is the HF Space running?{C.END}") except requests.exceptions.Timeout: print(f"\n {C.RED}Server timeout. Try again.{C.END}") except Exception as e: print(f"\n {C.RED}Error: {e}{C.END}") print() # Cleanup if os.path.exists(TEMP_FILE): os.remove(TEMP_FILE) if __name__ == "__main__": main()