Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

File size: 2,440 Bytes

47a3efb
4a13628
95cb26e
 
4a13628
 
 
d4b6133
 
47a3efb
4a13628
 
 
95cb26e
4a13628
 
 
d4b6133
4a13628
47a3efb
4a13628
95cb26e
 
 
 
4a13628
95cb26e
47a3efb
 
 
95cb26e
47a3efb
 
 
 
 
 
 
 
 
 
95cb26e
 
 
 
 
 
 
 
 
 
 
4a13628
 
47a3efb

import requests
import logging
import tempfile
import os

logger = logging.getLogger(__name__)

async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
    """
    Convert audio bytes to text using Hugging Face Inference API (free).
    
    Args:
        audio_bytes: Raw audio file bytes
        filename: Name of the audio file
    
    Returns:
        Transcribed text
    """
    try:
        logger.info(f"Converting audio to text using Hugging Face API")
        
        # Save audio bytes to temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
            temp_audio.write(audio_bytes)
            temp_audio_path = temp_audio.name
        
        try:
            # Use Hugging Face Inference API (free)
            API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
            headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}  # Optional for free tier
            
            with open(temp_audio_path, "rb") as f:
                response = requests.post(API_URL, headers=headers, data=f)
            
            if response.status_code == 200:
                result = response.json()
                transcribed_text = result.get("text", "").strip()
            else:
                # Fallback to local model if API fails
                transcribed_text = await fallback_stt(audio_bytes, filename)
                
            if not transcribed_text:
                transcribed_text = "Sorry, I couldn't understand the audio."
                
            logger.info(f"✓ STT successful: '{transcribed_text}'")
            return transcribed_text
            
        finally:
            # Clean up temporary file
            if os.path.exists(temp_audio_path):
                os.unlink(temp_audio_path)
                
    except Exception as e:
        logger.error(f"✗ STT failed: {str(e)}")
        return "Sorry, there was an error processing your audio."


async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
    """Fallback STT using a simpler approach"""
    try:
        # Simple fallback - you could implement a basic speech recognition here
        # For now, return a placeholder
        return "Audio received but transcription service is temporarily unavailable."
    except Exception as e:
        logger.error(f"Fallback STT also failed: {str(e)}")
        return "Audio processing failed."