Spaces:

yoavzamir
/

speechkid-api

Paused

App Files Files Community

speechkid-api / simulate_remote.py

yoavzamir

Fix isolated plosive detection: skip denoise, fix trim, fix burst detection

076a657 3 months ago

Raw

History Blame Contribute Delete

6.69 kB

	"""
	Remote Speech Simulator — Records from microphone, sends to HF Spaces API.

	Usage:
	python simulate_remote.py
	"""

	import os
	import sys
	import time

	import numpy as np
	import sounddevice as sd
	from scipy.io import wavfile
	import requests

	# =============================================================================
	# Configuration
	# =============================================================================

	API_URL = "https://yoavzamir-speechkid-api.hf.space/evaluate"
	SAMPLE_RATE = 16000 # match server expectation
	RECORD_DURATION = 3 # seconds
	TEMP_FILE = "temp_remote.wav"

	AVAILABLE_WORDS = [
	# ש (Shin) words
	"shalom", "shemesh", "shir", "shshshsh", "shuk", "geshem", "shaon", "shulchan",
	# ק (Kuf) words — K vs T substitution
	"kof", "kir", "kubiya", "kalmar",
	# Isolated sounds — for children who can't yet say full words
	"k_sound", "t_sound",
	]

	# ANSI colors
	class C:
	GREEN = '\033[92m'
	RED = '\033[91m'
	CYAN = '\033[96m'
	YELLOW = '\033[93m'
	BOLD = '\033[1m'
	END = '\033[0m'

	# =============================================================================
	# Core Functions
	# =============================================================================

	def record_audio() -> np.ndarray:
	"""Record from microphone with progress bar."""
	print(f"\n{C.YELLOW}Get ready...{C.END}")
	for i in range(3, 0, -1):
	print(f" {C.BOLD}{i}...{C.END}", end='', flush=True)
	time.sleep(0.7)
	print(f"\n\n{C.GREEN}{C.BOLD} RECORDING! Speak now...{C.END}\n")

	audio = sd.rec(
	int(RECORD_DURATION * SAMPLE_RATE),
	samplerate=SAMPLE_RATE,
	channels=1,
	dtype='float32'
	)

	# Progress bar
	bar_len = 40
	for i in range(RECORD_DURATION * 10):
	p = (i + 1) / (RECORD_DURATION * 10)
	filled = int(bar_len * p)
	bar = '#' * filled + '-' * (bar_len - filled)
	remaining = RECORD_DURATION - (i / 10)
	print(f"\r [{bar}] {remaining:.1f}s", end='', flush=True)
	time.sleep(0.1)

	sd.wait()
	print(f"\r [{'#' * bar_len}] Done! \n")
	return audio.flatten()


	def save_wav(audio: np.ndarray, path: str):
	"""Save as 16-bit PCM WAV."""
	audio_int16 = np.int16(audio * 32767)
	wavfile.write(path, SAMPLE_RATE, audio_int16)


	def send_to_api(wav_path: str, word: str) -> dict:
	"""Send recording to HF Spaces API."""
	print(f"{C.CYAN} Sending to server...{C.END}", flush=True)
	with open(wav_path, 'rb') as f:
	resp = requests.post(
	API_URL,
	files={"file": ("recording.wav", f, "audio/wav")},
	data={"word": word},
	timeout=60,
	)
	if resp.status_code != 200:
	return {"status": "ERROR", "feedback": f"Server error {resp.status_code}: {resp.text}"}
	return resp.json()


	def play_recording(path: str):
	"""Play back a WAV file through the default output device."""
	try:
	from scipy.io import wavfile as wavfile_play
	sr, data = wavfile_play.read(path)
	if data.dtype == np.int16:
	data = data.astype(np.float32) / 32767.0
	duration = len(data) / sr
	print(f"\n{C.CYAN} Playing back ({duration:.1f}s)...{C.END}")
	sd.play(data, sr)
	sd.wait()
	print(f" {C.CYAN}Done.{C.END}\n")
	except Exception as e:
	print(f"\n {C.RED}Playback failed: {e}{C.END}\n")


	def display_result(result: dict, word: str):
	"""Show result in terminal."""
	diagnosis = result.get("diagnosis", "?")
	status = result.get("status", "?")
	feedback = result.get("feedback", "")
	evidence = result.get("evidence", {})
	details = result.get("details", {})

	color = C.GREEN if status == "PASS" else C.RED
	icon = "PASS" if status == "PASS" else "FAIL"

	print(f"\n {'=' * 50}")
	print(f" {C.BOLD}Word: {word.upper()}{C.END}")
	print(f" {color}{C.BOLD} [{icon}] {diagnosis}{C.END}")
	print(f" {feedback}")
	print()
	print(f" {C.CYAN}Details:{C.END}")
	print(f" AI score: {details.get('alignment_score', evidence.get('alignment_score', '?'))}")
	print(f" Centroid: {details.get('centroid_hz', evidence.get('centroid_mean', '?'))} Hz")
	print(f" {'=' * 50}")


	# =============================================================================
	# Main Loop
	# =============================================================================

	def main():
	# Check microphone
	try:
	sd.query_devices(kind='input')
	except Exception:
	print(f"{C.RED}No microphone detected.{C.END}")
	sys.exit(1)

	print(f"\n{C.CYAN}{C.BOLD}")
	print(" =============================================")
	print(" SPEECH TRAINER — Remote API Simulator")
	print(" =============================================")
	print(f"{C.END}")
	print(f" Server: {API_URL}\n")

	while True:
	# Word menu
	print(f"{C.YELLOW} Choose a word:{C.END}")
	for i, w in enumerate(AVAILABLE_WORDS, 1):
	print(f" {i}. {w}")
	has_recording = os.path.exists(TEMP_FILE)
	if has_recording:
	print(f" L. Listen to last recording")
	print(f" 0. Exit\n")

	choice = input(f" {C.CYAN}Choice: {C.END}").strip().upper()
	if choice == '0':
	print(f"\n {C.CYAN}Goodbye!{C.END}\n")
	break
	if choice == 'L':
	if has_recording:
	play_recording(TEMP_FILE)
	else:
	print(f" {C.RED}No recording yet.{C.END}\n")
	continue
	try:
	word = AVAILABLE_WORDS[int(choice) - 1]
	except (ValueError, IndexError):
	print(f" {C.RED}Invalid choice.{C.END}\n")
	continue

	# Record
	input(f"\n Press ENTER when ready to record '{C.BOLD}{word}{C.END}'...")
	audio = record_audio()
	save_wav(audio, TEMP_FILE)

	# Send to API
	try:
	result = send_to_api(TEMP_FILE, word)
	if result.get("status") == "ERROR":
	print(f"\n {C.RED}Error: {result.get('feedback', 'Unknown error')}{C.END}")
	else:
	display_result(result, word)
	except requests.exceptions.ConnectionError:
	print(f"\n {C.RED}Cannot reach server. Is the HF Space running?{C.END}")
	except requests.exceptions.Timeout:
	print(f"\n {C.RED}Server timeout. Try again.{C.END}")
	except Exception as e:
	print(f"\n {C.RED}Error: {e}{C.END}")

	print()

	# Cleanup
	if os.path.exists(TEMP_FILE):
	os.remove(TEMP_FILE)


	if __name__ == "__main__":
	main()