File size: 2,226 Bytes
e8aa76b 4a13628 95cb26e 4a13628 d4b6133 e8aa76b d4b6133 4a13628 e8aa76b a8c8142 e8aa76b 4a13628 e8aa76b 4a13628 e8aa76b a8c8142 95cb26e e8aa76b 95cb26e e8aa76b 95cb26e 4a13628 e8aa76b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import requests
import logging
import tempfile
import os
logger = logging.getLogger(__name__)
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
"""
Convert audio bytes to text using Hugging Face Inference API.
No ffmpeg required!
"""
try:
logger.info(f"Converting audio to text using Hugging Face API")
# Use Hugging Face Inference API (free, no ffmpeg needed)
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
# For Hugging Face Spaces, you might not need an API key for public models
headers = {}
# Send audio bytes directly to Hugging Face API
response = requests.post(API_URL, headers=headers, data=audio_bytes)
if response.status_code == 200:
result = response.json()
transcribed_text = result.get("text", "").strip()
if not transcribed_text:
transcribed_text = "No speech detected in the audio."
logger.info(f"✓ STT successful: '{transcribed_text}'")
return transcribed_text
else:
# If API fails, use fallback
error_msg = f"Hugging Face API error: {response.status_code}"
logger.error(error_msg)
return await fallback_stt(audio_bytes, filename)
except Exception as e:
logger.error(f"✗ STT failed: {str(e)}")
return await fallback_stt(audio_bytes, filename)
async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
"""Fallback STT using a simpler approach"""
try:
# Simple fallback that doesn't require ffmpeg
file_size = len(audio_bytes)
file_type = filename.split('.')[-1] if '.' in filename else 'unknown'
return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."
except Exception as e:
logger.error(f"Fallback STT also failed: {str(e)}")
return "Audio processing failed. Please try a different audio format or install ffmpeg." |