Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

FastAPI-Backend-Models / services /stt_service.py

malek-messaoudii

update stt

47a3efb about 2 months ago

2.44 kB

	import requests
	import logging
	import tempfile
	import os

	logger = logging.getLogger(__name__)

	async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
	"""
	Convert audio bytes to text using Hugging Face Inference API (free).

	Args:
	audio_bytes: Raw audio file bytes
	filename: Name of the audio file

	Returns:
	Transcribed text
	"""
	try:
	logger.info(f"Converting audio to text using Hugging Face API")

	# Save audio bytes to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
	temp_audio.write(audio_bytes)
	temp_audio_path = temp_audio.name

	try:
	# Use Hugging Face Inference API (free)
	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
	headers = {"Authorization": "Bearer YOUR_HF_TOKEN"} # Optional for free tier

	with open(temp_audio_path, "rb") as f:
	response = requests.post(API_URL, headers=headers, data=f)

	if response.status_code == 200:
	result = response.json()
	transcribed_text = result.get("text", "").strip()
	else:
	# Fallback to local model if API fails
	transcribed_text = await fallback_stt(audio_bytes, filename)

	if not transcribed_text:
	transcribed_text = "Sorry, I couldn't understand the audio."

	logger.info(f"✓ STT successful: '{transcribed_text}'")
	return transcribed_text

	finally:
	# Clean up temporary file
	if os.path.exists(temp_audio_path):
	os.unlink(temp_audio_path)

	except Exception as e:
	logger.error(f"✗ STT failed: {str(e)}")
	return "Sorry, there was an error processing your audio."


	async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
	"""Fallback STT using a simpler approach"""
	try:
	# Simple fallback - you could implement a basic speech recognition here
	# For now, return a placeholder
	return "Audio received but transcription service is temporarily unavailable."
	except Exception as e:
	logger.error(f"Fallback STT also failed: {str(e)}")
	return "Audio processing failed."