Spaces:

AI-Solutions-KK
/

mango-disease-api

Sleeping

App Files Files Community

mango-disease-api / voice.py

AI-Solutions-KK

backend added

6709e9b 3 months ago

raw

history blame contribute delete

4.89 kB

	# api/voice.py
	# Intelligent Multi-Device Text-to-Speech
	# Supports: Bluetooth, Wired, Built-in speakers (Pi/PC/Mobile)

	import threading
	import sys
	import subprocess


	# ==================== DEVICE DETECTION ====================
	def _detect_audio_device():
	"""
	Auto-detect available audio output device.

	Priority:
	1. Bluetooth speaker (if connected)
	2. Wired/USB speaker
	3. Built-in speaker (PC/Mobile)
	4. Raspberry Pi audio jack
	5. HDMI audio output
	"""
	try:
	# Linux (Raspberry Pi / Ubuntu)
	if sys.platform.startswith("linux"):
	result = subprocess.run(
	["pactl", "list", "sinks", "short"],
	capture_output=True, text=True, timeout=2
	)

	if result.returncode == 0 and result.stdout.strip():
	# Found PulseAudio devices
	return "pulseaudio"

	# Check if any audio device exists (fallback)
	return "default"

	except Exception:
	return "default"


	# ==================== TTS ENGINE SELECTION ====================
	def _get_tts_engine():
	"""
	Select best available TTS engine based on platform.

	Fallback chain:
	1. pyttsx3 (Cross-platform - Windows/Mac/Linux)
	2. espeak (Linux/Pi - lightweight)
	3. Festival (Linux fallback)
	4. Silent fail (no crash)
	"""
	audio_device = _detect_audio_device()

	# Try pyttsx3 first (most compatible)
	try:
	import pyttsx3
	engine = pyttsx3.init()

	# Platform-specific audio routing
	if sys.platform.startswith("linux"):
	# Force audio to detected device on Linux
	try:
	engine.setProperty("driver", "espeak") # More reliable on Pi
	except Exception:
	pass # Use default driver

	engine.setProperty("rate", 150) # Speech rate
	engine.setProperty("volume", 0.9) # Volume

	return ("pyttsx3", engine)

	except Exception:
	pass

	# Fallback to espeak (Linux/Pi)
	if sys.platform.startswith("linux"):
	try:
	subprocess.run(["espeak", "--version"],
	capture_output=True, timeout=1)
	return ("espeak", None)
	except Exception:
	pass

	# Last resort: Festival (Linux)
	if sys.platform.startswith("linux"):
	try:
	subprocess.run(["festival", "--version"],
	capture_output=True, timeout=1)
	return ("festival", None)
	except Exception:
	pass

	# No TTS available - silent fail
	return (None, None)


	# ==================== WORKER THREAD ====================
	def _speak_worker(text: str):
	"""Background thread for non-blocking speech"""
	tts_type, engine = _get_tts_engine()

	try:
	if tts_type == "pyttsx3":
	# Standard pyttsx3
	engine.say(text)
	engine.runAndWait()
	engine.stop()

	elif tts_type == "espeak":
	# Direct espeak (Linux/Pi)
	subprocess.run(
	["espeak", text],
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	timeout=30
	)

	elif tts_type == "festival":
	# Festival TTS (Linux fallback)
	subprocess.run(
	["festival", "--tts"],
	input=text.encode(),
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	timeout=30
	)

	# Silent if no TTS available

	except Exception:
	# Absolute silent fail - never crash the API
	pass


	# ==================== PUBLIC API ====================
	def speak(text: str):
	"""
	Non-blocking, multi-device Text-to-Speech.

	Works on:
	- Raspberry Pi (Bluetooth, 3.5mm jack, HDMI)
	- Desktop PC (Built-in speakers)
	- Mobile devices (If API runs locally)

	Args:
	text: Text to speak

	Features:
	- Auto-detects available audio device
	- Non-blocking (runs in background)
	- Silent fail (never crashes API)
	- Multi-platform (Linux/Windows/Mac)
	"""
	if not text or not text.strip():
	return

	# Run in daemon thread (non-blocking)
	threading.Thread(
	target=_speak_worker,
	args=(text,),
	daemon=True
	).start()


	# ==================== MANUAL DEVICE SELECTION ====================
	def speak_with_device(text: str, device: str = "auto"):
	"""
	Advanced: Speak with specific device override.

	Args:
	text: Text to speak
	device: "auto", "bluetooth", "builtin", "hdmi"

	Note: Device selection requires platform-specific setup.
	For production, use `speak()` with auto-detection.
	"""
	# For future enhancement - specific device routing
	# Currently uses auto-detection
	speak(text)