import requests import logging import tempfile import os logger = logging.getLogger(__name__) async def speech_to_text(audio_bytes: bytes, filename: str) -> str: """ Convert audio bytes to text using Hugging Face Inference API (free). Args: audio_bytes: Raw audio file bytes filename: Name of the audio file Returns: Transcribed text """ try: logger.info(f"Converting audio to text using Hugging Face API") # Save audio bytes to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio: temp_audio.write(audio_bytes) temp_audio_path = temp_audio.name try: # Use Hugging Face Inference API (free) API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium" headers = {"Authorization": "Bearer YOUR_HF_TOKEN"} # Optional for free tier with open(temp_audio_path, "rb") as f: response = requests.post(API_URL, headers=headers, data=f) if response.status_code == 200: result = response.json() transcribed_text = result.get("text", "").strip() else: # Fallback to local model if API fails transcribed_text = await fallback_stt(audio_bytes, filename) if not transcribed_text: transcribed_text = "Sorry, I couldn't understand the audio." logger.info(f"✓ STT successful: '{transcribed_text}'") return transcribed_text finally: # Clean up temporary file if os.path.exists(temp_audio_path): os.unlink(temp_audio_path) except Exception as e: logger.error(f"✗ STT failed: {str(e)}") return "Sorry, there was an error processing your audio." async def fallback_stt(audio_bytes: bytes, filename: str) -> str: """Fallback STT using a simpler approach""" try: # Simple fallback - you could implement a basic speech recognition here # For now, return a placeholder return "Audio received but transcription service is temporarily unavailable." except Exception as e: logger.error(f"Fallback STT also failed: {str(e)}") return "Audio processing failed."