""" Audio Digit Classification API for Hugging Face Spaces Backend API for spoken digit recognition (0-9) - HF Spaces deployment """ from flask import Flask, request, jsonify from flask_cors import CORS import os import time import logging from typing import Dict, Any, Optional from dotenv import load_dotenv import numpy as np # Import audio processors (only essential ones for deployment) from audio_processors.external_api import ExternalAPIProcessor from audio_processors.whisper_digit_processor import WhisperDigitProcessor from audio_processors.ml_mfcc_processor import MLMFCCProcessor from audio_processors.ml_mel_cnn_processor import MLMelCNNProcessor from audio_processors.ml_raw_cnn_processor import MLRawCNNProcessor # Import utilities from utils.audio_utils import validate_audio_format, convert_audio_format, get_audio_duration, convert_for_ml_models from utils.logging_utils import performance_logger, setup_flask_logging # Load environment variables load_dotenv() # Initialize Flask app app = Flask(__name__) app.secret_key = os.getenv('SECRET_KEY', 'hf_spaces_deployment_key') # Enable CORS for frontend requests from Vercel CORS(app, origins=['*']) # In production, specify your Vercel domain # Setup logging setup_flask_logging(app) # Configuration for HF Spaces MAX_AUDIO_DURATION = 10 # seconds MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'm4a', 'webm'} def allowed_file(filename: str) -> bool: """Check if file extension is allowed.""" return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # Global processor cache for model persistence _processor_cache = {} def initialize_processors(): """Initialize audio processors optimized for HF Spaces deployment with caching.""" global _processor_cache # Return cached processors if already initialized if _processor_cache: app.logger.info(f"Using cached processors: {len(_processor_cache)} available") return _processor_cache procs = {} # ML-trained processors (high priority - use best models only) ml_processors = [ ('ml_mfcc', MLMFCCProcessor, 'ML MFCC + Dense NN (Best - 98.52%)'), ('ml_mel_cnn', MLMelCNNProcessor, 'ML Mel CNN (Good - 97.22%)'), ('ml_raw_cnn', MLRawCNNProcessor, 'ML Raw CNN (Fair - 91.30%)') ] ml_working_count = 0 for proc_key, proc_class, proc_name in ml_processors: try: # Initialize once and cache app.logger.info(f"Loading {proc_name}...") processor = proc_class() if processor.is_configured(): procs[proc_key] = processor ml_working_count += 1 app.logger.info(f"[OK] {proc_name} loaded successfully (cached)") else: app.logger.warning(f"[WARN] {proc_name} not configured (model files missing)") except Exception as e: app.logger.error(f"[FAIL] Failed to initialize {proc_name}: {str(e)}") # External API processor as fallback try: external_processor = ExternalAPIProcessor() if external_processor.is_configured(): procs['external_api'] = external_processor app.logger.info("[OK] External API processor initialized (cached)") else: app.logger.warning("[WARN] External API not configured") except Exception as e: app.logger.error(f"[FAIL] Failed to initialize External API: {str(e)}") # Whisper digit processor as another fallback try: whisper_processor = WhisperDigitProcessor() if whisper_processor.is_configured(): procs['whisper_digit'] = whisper_processor app.logger.info("[OK] Whisper digit processor initialized (cached)") except Exception as e: app.logger.error(f"[FAIL] Failed to initialize Whisper: {str(e)}") # Cache the processors globally _processor_cache = procs app.logger.info(f"Processor initialization complete:") app.logger.info(f" ML Models loaded: {ml_working_count}/3") app.logger.info(f" Total processors cached: {len(procs)}") return procs # Initialize processors on startup (cached globally) processors = initialize_processors() @app.route('/') def index(): """API status endpoint.""" return jsonify({ 'message': 'Streaming Digit Classifier API', 'status': 'running', 'version': '1.0.0', 'available_processors': list(processors.keys()), 'documentation': 'Frontend at Vercel, Backend API at HF Spaces' }) @app.route('/api/process_audio', methods=['POST']) def process_audio(): """ Process audio file with selected method and return digit prediction. Expects multipart form data with 'audio' file and 'method' selection. """ try: # Validate request if 'audio' not in request.files: return jsonify({'error': 'No audio file provided'}), 400 if 'method' not in request.form: return jsonify({'error': 'No processing method specified'}), 400 audio_file = request.files['audio'] method = request.form['method'] # Validate audio file if audio_file.filename == '': return jsonify({'error': 'No file selected'}), 400 if not allowed_file(audio_file.filename): return jsonify({'error': 'Unsupported file format'}), 400 # Validate method if method not in processors: return jsonify({'error': f'Unknown processing method: {method}'}), 400 # Read audio data audio_data = audio_file.read() # Check file size if len(audio_data) > MAX_FILE_SIZE: return jsonify({'error': 'Audio file too large'}), 400 # Convert to standard format try: app.logger.debug(f"Converting audio format. Original size: {len(audio_data)} bytes") standardized_audio = convert_audio_format(audio_data) app.logger.debug(f"Converted audio size: {len(standardized_audio)} bytes") except Exception as e: app.logger.error(f"Audio conversion failed: {str(e)}") return jsonify({'error': 'Failed to process audio format - unsupported format or corrupted file'}), 400 # Check audio duration duration = get_audio_duration(standardized_audio) if duration > MAX_AUDIO_DURATION: return jsonify({ 'error': f'Audio too long: {duration:.1f}s (max: {MAX_AUDIO_DURATION}s)' }), 400 if duration < 0.1: return jsonify({'error': 'Audio too short (minimum: 0.1s)'}), 400 # Log audio input info performance_logger.log_audio_info(duration, { 'filename': audio_file.filename, 'size_bytes': len(audio_data), 'converted_size': len(standardized_audio), 'method': method }) # Process with selected method processor = processors[method] result = processor.predict_with_timing(standardized_audio) # Log performance performance_logger.log_prediction(method, result) # Add additional metadata result.update({ 'audio_duration': round(duration, 3), 'file_size': len(audio_data), 'api_version': '1.0.0' }) app.logger.info(f"Processed audio with {method}: '{result['predicted_digit']}' in {result['inference_time']}s") return jsonify(result) except Exception as e: app.logger.error(f"Audio processing error: {str(e)}") return jsonify({ 'error': 'Internal processing error', 'success': False, 'timestamp': time.time() }), 500 @app.route('/api/process_audio_chunk', methods=['POST']) def process_audio_chunk(): """ Process streaming audio chunk for real-time digit recognition. """ try: # Validate request if 'audio' not in request.files: return jsonify({'error': 'No audio chunk provided'}), 400 audio_file = request.files['audio'] method = request.form.get('method', 'ml_mfcc') # Default to best ML model # Validate method if method not in processors: return jsonify({'error': f'Unknown processing method: {method}'}), 400 # Read audio data audio_data = audio_file.read() # Check chunk size if len(audio_data) > MAX_FILE_SIZE: return jsonify({'error': 'Audio chunk too large'}), 400 if len(audio_data) < 100: return jsonify({'error': 'Audio chunk too small'}), 400 # Convert to standardized format try: standardized_audio = convert_for_ml_models(audio_data, 'streaming') except Exception as e: app.logger.error(f"Audio conversion failed for chunk: {str(e)}") return jsonify({'error': 'Failed to process audio chunk format'}), 400 # Process audio chunk processor = processors[method] result = processor.predict_with_timing(standardized_audio) # Add streaming metadata result.update({ 'segment_index': 0, 'segment_size': len(standardized_audio), 'is_streaming': True, 'api_version': '1.0.0' }) app.logger.info(f"Streaming prediction: '{result['predicted_digit']}' " f"(Inference: {result['inference_time']}s)") return jsonify({ 'success': True, 'segments_detected': 1, 'total_results': 1, 'results': [result], 'timestamp': time.time(), 'has_fallback': False }) except Exception as e: app.logger.error(f"Streaming audio processing error: {str(e)}") return jsonify({ 'error': 'Internal streaming processing error', 'success': False, 'timestamp': time.time() }), 500 @app.route('/api/processors') def get_processors(): """Get information about available processors.""" try: processor_info = {} for name, processor in processors.items(): info = { 'name': processor.name, 'method': name, 'configured': getattr(processor, 'is_configured', lambda: True)() } # Add model-specific info if available if hasattr(processor, 'get_model_info'): info.update(processor.get_model_info()) processor_info[name] = info return jsonify(processor_info) except Exception as e: app.logger.error(f"Error getting processors: {str(e)}") return jsonify({'error': 'Failed to retrieve processor information'}), 500 @app.route('/api/health') def health_check(): """Health check endpoint.""" try: # Check processor availability processor_health = {} for name, processor in processors.items(): processor_health[name] = { 'available': True, 'configured': getattr(processor, 'is_configured', lambda: True)() } return jsonify({ 'status': 'healthy', 'timestamp': time.time(), 'processors': processor_health, 'version': '1.0.0', 'deployment': 'huggingface-spaces' }) except Exception as e: app.logger.error(f"Health check failed: {str(e)}") return jsonify({ 'status': 'unhealthy', 'error': str(e), 'timestamp': time.time() }), 500 @app.errorhandler(404) def not_found_error(error): """Handle 404 errors.""" return jsonify({'error': 'Endpoint not found', 'status': 404}), 404 @app.errorhandler(500) def internal_error(error): """Handle 500 errors.""" app.logger.error(f"Internal error: {str(error)}") return jsonify({'error': 'Internal server error', 'status': 500}), 500 @app.errorhandler(413) def too_large_error(error): """Handle file too large errors.""" return jsonify({'error': 'File too large', 'status': 413}), 413 if __name__ == '__main__': # Log startup information try: import importlib.metadata flask_version = importlib.metadata.version('flask') except: flask_version = 'unknown' performance_logger.log_system_info({ 'python_version': os.sys.version, 'flask_version': flask_version, 'processors_loaded': list(processors.keys()), 'max_audio_duration': MAX_AUDIO_DURATION, 'max_file_size': MAX_FILE_SIZE, 'deployment': 'huggingface-spaces' }) # Run server (HF Spaces requires port 7860) port = int(os.getenv('PORT', 7860)) app.logger.info(f"Starting Audio Digit Classifier API on port {port}") app.logger.info("Deployment: Hugging Face Spaces") app.run( host='0.0.0.0', port=port, debug=False, # Disable debug in production threaded=True )