Pranav Mishra
Initial backend deployment - Flask API with ML models
1772a46
import requests
import os
import re
import logging
from typing import Optional
from .base_processor import AudioProcessor
logger = logging.getLogger(__name__)
class ExternalAPIProcessor(AudioProcessor):
"""
Hugging Face Whisper API integration for digit classification.
Uses openai/whisper-base model for speech-to-text conversion.
"""
def __init__(self):
super().__init__("External API (Whisper)")
# Try alternative Whisper model that should be available
self.api_url = "https://api-inference.huggingface.co/models/openai/whisper-small"
self.token = os.getenv('HUGGING_FACE_TOKEN')
self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
if not self.token:
logger.warning("HUGGING_FACE_TOKEN not found in environment variables")
def process_audio(self, audio_data: bytes) -> str:
"""
Process audio using Hugging Face Whisper API.
Args:
audio_data: Raw audio bytes (WAV format preferred)
Returns:
Predicted digit as string ('0'-'9')
Raises:
Exception: If API call fails or no digit found in response
"""
if not self.token:
raise Exception("Hugging Face API token not configured")
try:
# Make API request
response = requests.post(
self.api_url,
headers=self.headers,
data=audio_data,
timeout=15 # Increased timeout
)
if response.status_code == 401:
logger.error("Hugging Face API token is invalid or expired")
raise Exception("Invalid or expired API token - please update HUGGING_FACE_TOKEN")
elif response.status_code == 404:
logger.error(f"Model not found or unavailable: {self.api_url}")
raise Exception("API model unavailable - may be loading or deprecated")
elif response.status_code == 503:
logger.warning("Model is loading, this may take a few moments")
raise Exception("API model is loading - please try again in a moment")
elif response.status_code != 200:
logger.error(f"API request failed: {response.status_code} - {response.text}")
raise Exception(f"API error {response.status_code}: {response.text[:100]}")
# Parse response
result = response.json()
if 'text' not in result:
logger.error(f"Unexpected API response format: {result}")
raise Exception("Invalid API response format")
transcribed_text = result['text'].strip().lower()
logger.debug(f"Whisper transcription: '{transcribed_text}'")
# Extract digit from transcription
predicted_digit = self._extract_digit(transcribed_text)
if predicted_digit is None:
logger.warning(f"No digit found in transcription: '{transcribed_text}'")
return "?"
return predicted_digit
except requests.exceptions.Timeout:
raise Exception("API request timeout (15s) - service may be slow")
except requests.exceptions.RequestException as e:
raise Exception(f"API request failed: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error in external API processing: {str(e)}")
raise
def _extract_digit(self, text: str) -> Optional[str]:
"""
Extract digit from transcribed text.
Handles both numerical ('1', '2') and word forms ('one', 'two').
Args:
text: Transcribed text from Whisper
Returns:
Digit as string ('0'-'9') or None if not found
"""
# Word to digit mapping
word_to_digit = {
'zero': '0', 'oh': '0',
'one': '1', 'won': '1',
'two': '2', 'to': '2', 'too': '2',
'three': '3', 'tree': '3',
'four': '4', 'for': '4', 'fore': '4',
'five': '5',
'six': '6', 'sick': '6',
'seven': '7',
'eight': '8', 'ate': '8',
'nine': '9', 'niner': '9'
}
# First, try to find a direct digit
digit_match = re.search(r'\b([0-9])\b', text)
if digit_match:
return digit_match.group(1)
# Then try word forms
words = text.split()
for word in words:
clean_word = re.sub(r'[^\w]', '', word.lower())
if clean_word in word_to_digit:
return word_to_digit[clean_word]
# Try partial matches for robustness
for word, digit in word_to_digit.items():
if word in text:
return digit
return None
def is_configured(self) -> bool:
"""Check if API is properly configured."""
return bool(self.token)
def test_connection(self) -> bool:
"""Test API connection with a simple request."""
if not self.is_configured():
return False
try:
# Test with minimal audio data
test_response = requests.get(
self.api_url,
headers=self.headers,
timeout=5
)
return test_response.status_code == 200
except:
return False