malek-messaoudii
update stt
47a3efb
raw
history blame
2.44 kB
import requests
import logging
import tempfile
import os
logger = logging.getLogger(__name__)
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
"""
Convert audio bytes to text using Hugging Face Inference API (free).
Args:
audio_bytes: Raw audio file bytes
filename: Name of the audio file
Returns:
Transcribed text
"""
try:
logger.info(f"Converting audio to text using Hugging Face API")
# Save audio bytes to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
try:
# Use Hugging Face Inference API (free)
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"} # Optional for free tier
with open(temp_audio_path, "rb") as f:
response = requests.post(API_URL, headers=headers, data=f)
if response.status_code == 200:
result = response.json()
transcribed_text = result.get("text", "").strip()
else:
# Fallback to local model if API fails
transcribed_text = await fallback_stt(audio_bytes, filename)
if not transcribed_text:
transcribed_text = "Sorry, I couldn't understand the audio."
logger.info(f"✓ STT successful: '{transcribed_text}'")
return transcribed_text
finally:
# Clean up temporary file
if os.path.exists(temp_audio_path):
os.unlink(temp_audio_path)
except Exception as e:
logger.error(f"✗ STT failed: {str(e)}")
return "Sorry, there was an error processing your audio."
async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
"""Fallback STT using a simpler approach"""
try:
# Simple fallback - you could implement a basic speech recognition here
# For now, return a placeholder
return "Audio received but transcription service is temporarily unavailable."
except Exception as e:
logger.error(f"Fallback STT also failed: {str(e)}")
return "Audio processing failed."