speechkid-api / simulate_remote.py
yoavzamir's picture
Fix isolated plosive detection: skip denoise, fix trim, fix burst detection
076a657
Raw
History Blame Contribute Delete
6.69 kB
"""
Remote Speech Simulator — Records from microphone, sends to HF Spaces API.
Usage:
python simulate_remote.py
"""
import os
import sys
import time
import numpy as np
import sounddevice as sd
from scipy.io import wavfile
import requests
# =============================================================================
# Configuration
# =============================================================================
API_URL = "https://yoavzamir-speechkid-api.hf.space/evaluate"
SAMPLE_RATE = 16000 # match server expectation
RECORD_DURATION = 3 # seconds
TEMP_FILE = "temp_remote.wav"
AVAILABLE_WORDS = [
# ש (Shin) words
"shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem", "shaon", "shulchan",
# ק (Kuf) words — K vs T substitution
"kof", "kir", "kubiya", "kalmar",
# Isolated sounds — for children who can't yet say full words
"k_sound", "t_sound",
]
# ANSI colors
class C:
GREEN = '\033[92m'
RED = '\033[91m'
CYAN = '\033[96m'
YELLOW = '\033[93m'
BOLD = '\033[1m'
END = '\033[0m'
# =============================================================================
# Core Functions
# =============================================================================
def record_audio() -> np.ndarray:
"""Record from microphone with progress bar."""
print(f"\n{C.YELLOW}Get ready...{C.END}")
for i in range(3, 0, -1):
print(f" {C.BOLD}{i}...{C.END}", end='', flush=True)
time.sleep(0.7)
print(f"\n\n{C.GREEN}{C.BOLD} RECORDING! Speak now...{C.END}\n")
audio = sd.rec(
int(RECORD_DURATION * SAMPLE_RATE),
samplerate=SAMPLE_RATE,
channels=1,
dtype='float32'
)
# Progress bar
bar_len = 40
for i in range(RECORD_DURATION * 10):
p = (i + 1) / (RECORD_DURATION * 10)
filled = int(bar_len * p)
bar = '#' * filled + '-' * (bar_len - filled)
remaining = RECORD_DURATION - (i / 10)
print(f"\r [{bar}] {remaining:.1f}s", end='', flush=True)
time.sleep(0.1)
sd.wait()
print(f"\r [{'#' * bar_len}] Done! \n")
return audio.flatten()
def save_wav(audio: np.ndarray, path: str):
"""Save as 16-bit PCM WAV."""
audio_int16 = np.int16(audio * 32767)
wavfile.write(path, SAMPLE_RATE, audio_int16)
def send_to_api(wav_path: str, word: str) -> dict:
"""Send recording to HF Spaces API."""
print(f"{C.CYAN} Sending to server...{C.END}", flush=True)
with open(wav_path, 'rb') as f:
resp = requests.post(
API_URL,
files={"file": ("recording.wav", f, "audio/wav")},
data={"word": word},
timeout=60,
)
if resp.status_code != 200:
return {"status": "ERROR", "feedback": f"Server error {resp.status_code}: {resp.text}"}
return resp.json()
def play_recording(path: str):
"""Play back a WAV file through the default output device."""
try:
from scipy.io import wavfile as wavfile_play
sr, data = wavfile_play.read(path)
if data.dtype == np.int16:
data = data.astype(np.float32) / 32767.0
duration = len(data) / sr
print(f"\n{C.CYAN} Playing back ({duration:.1f}s)...{C.END}")
sd.play(data, sr)
sd.wait()
print(f" {C.CYAN}Done.{C.END}\n")
except Exception as e:
print(f"\n {C.RED}Playback failed: {e}{C.END}\n")
def display_result(result: dict, word: str):
"""Show result in terminal."""
diagnosis = result.get("diagnosis", "?")
status = result.get("status", "?")
feedback = result.get("feedback", "")
evidence = result.get("evidence", {})
details = result.get("details", {})
color = C.GREEN if status == "PASS" else C.RED
icon = "PASS" if status == "PASS" else "FAIL"
print(f"\n {'=' * 50}")
print(f" {C.BOLD}Word: {word.upper()}{C.END}")
print(f" {color}{C.BOLD} [{icon}] {diagnosis}{C.END}")
print(f" {feedback}")
print()
print(f" {C.CYAN}Details:{C.END}")
print(f" AI score: {details.get('alignment_score', evidence.get('alignment_score', '?'))}")
print(f" Centroid: {details.get('centroid_hz', evidence.get('centroid_mean', '?'))} Hz")
print(f" {'=' * 50}")
# =============================================================================
# Main Loop
# =============================================================================
def main():
# Check microphone
try:
sd.query_devices(kind='input')
except Exception:
print(f"{C.RED}No microphone detected.{C.END}")
sys.exit(1)
print(f"\n{C.CYAN}{C.BOLD}")
print(" =============================================")
print(" SPEECH TRAINER — Remote API Simulator")
print(" =============================================")
print(f"{C.END}")
print(f" Server: {API_URL}\n")
while True:
# Word menu
print(f"{C.YELLOW} Choose a word:{C.END}")
for i, w in enumerate(AVAILABLE_WORDS, 1):
print(f" {i}. {w}")
has_recording = os.path.exists(TEMP_FILE)
if has_recording:
print(f" L. Listen to last recording")
print(f" 0. Exit\n")
choice = input(f" {C.CYAN}Choice: {C.END}").strip().upper()
if choice == '0':
print(f"\n {C.CYAN}Goodbye!{C.END}\n")
break
if choice == 'L':
if has_recording:
play_recording(TEMP_FILE)
else:
print(f" {C.RED}No recording yet.{C.END}\n")
continue
try:
word = AVAILABLE_WORDS[int(choice) - 1]
except (ValueError, IndexError):
print(f" {C.RED}Invalid choice.{C.END}\n")
continue
# Record
input(f"\n Press ENTER when ready to record '{C.BOLD}{word}{C.END}'...")
audio = record_audio()
save_wav(audio, TEMP_FILE)
# Send to API
try:
result = send_to_api(TEMP_FILE, word)
if result.get("status") == "ERROR":
print(f"\n {C.RED}Error: {result.get('feedback', 'Unknown error')}{C.END}")
else:
display_result(result, word)
except requests.exceptions.ConnectionError:
print(f"\n {C.RED}Cannot reach server. Is the HF Space running?{C.END}")
except requests.exceptions.Timeout:
print(f"\n {C.RED}Server timeout. Try again.{C.END}")
except Exception as e:
print(f"\n {C.RED}Error: {e}{C.END}")
print()
# Cleanup
if os.path.exists(TEMP_FILE):
os.remove(TEMP_FILE)
if __name__ == "__main__":
main()