Spaces:
Sleeping
Sleeping
| """ | |
| Voice Detection API - Flask Application (HuggingFace Spaces Version) | |
| Accepts Base64-encoded MP3 audio and returns AI vs Human classification | |
| """ | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from functools import wraps | |
| import os | |
| import logging | |
| from datetime import datetime | |
| # Import the detector | |
| from detector import HybridEnsembleDetector | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Initialize Flask app | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Load API key from environment variable (HuggingFace Secrets) | |
| API_KEY = os.environ.get('API_KEY', 'sk_test_123456789') | |
| logger.info(f"API initialized with key: {API_KEY[:10]}...") | |
| # Initialize the detector globally (load models once at startup) | |
| logger.info("Loading AI detection models...") | |
| detector = None | |
| def init_detector(): | |
| """Initialize the detector with models""" | |
| global detector | |
| try: | |
| detector = HybridEnsembleDetector( | |
| deepfake_model_path="garystafford/wav2vec2-deepfake-voice-detector", | |
| whisper_model_path="openai/whisper-base", | |
| physics_weight=0.4, | |
| dl_weight=0.6, | |
| use_local_deepfake_model=False, | |
| use_local_whisper_model=False, | |
| max_audio_duration=30, | |
| load_whisper=False, # API uses client-provided language; skip Whisper to save GPU memory and startup time | |
| ) | |
| logger.info("β Detector initialized successfully") | |
| return True | |
| except Exception as e: | |
| logger.error(f"β Failed to initialize detector: {str(e)}") | |
| return False | |
| # Initialize detector at startup | |
| if not init_detector(): | |
| logger.warning("β οΈ API starting without detector - models will be loaded on first request") | |
| # ========================================================== | |
| # AUTHENTICATION DECORATOR | |
| # ========================================================== | |
| def require_api_key(f): | |
| """Decorator to validate API key from request headers""" | |
| def decorated_function(*args, **kwargs): | |
| # Get API key from headers | |
| provided_key = request.headers.get('x-api-key') | |
| if not provided_key: | |
| logger.warning(f"Request without API key from {request.remote_addr}") | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Missing API key. Please provide 'x-api-key' in request headers." | |
| }), 401 | |
| if provided_key != API_KEY: | |
| logger.warning(f"Invalid API key attempt from {request.remote_addr}") | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Invalid API key" | |
| }), 403 | |
| return f(*args, **kwargs) | |
| return decorated_function | |
| # ========================================================== | |
| # ROOT ENDPOINT (HuggingFace Spaces Homepage) | |
| # ========================================================== | |
| def home(): | |
| """Root endpoint - API information""" | |
| return jsonify({ | |
| "name": "Voice Detection API", | |
| "version": "1.0.0", | |
| "description": "AI-powered voice detection system for identifying AI-generated vs human voices", | |
| "endpoints": { | |
| "health": "/health", | |
| "detect": "/detect", | |
| "detection": "/api/voice-detection" | |
| }, | |
| "supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"], | |
| "authentication": "Required - use 'x-api-key' header", | |
| "documentation": "See README for full API documentation" | |
| }), 200 | |
| # ========================================================== | |
| # HEALTH CHECK ENDPOINT | |
| # ========================================================== | |
| def health_check(): | |
| """Health check endpoint (no authentication required)""" | |
| return jsonify({ | |
| "status": "healthy", | |
| "service": "Voice Detection API", | |
| "timestamp": datetime.utcnow().isoformat(), | |
| "models_loaded": detector is not None, | |
| "platform": "HuggingFace Spaces" | |
| }), 200 | |
| # ========================================================== | |
| # EVALUATION ENDPOINT: /detect (hackathon evaluator format) | |
| # Returns exactly: status, classification, confidenceScore | |
| # ========================================================== | |
| def detect(): | |
| """ | |
| Hackathon evaluation endpoint. Request/response format per evaluation guide. | |
| Request: { "language": "English", "audioFormat": "mp3", "audioBase64": "..." } | |
| Response: { "status": "success", "classification": "HUMAN"|"AI_GENERATED", "confidenceScore": 0.0-1.0 } | |
| """ | |
| global detector | |
| try: | |
| if not request.is_json: | |
| return jsonify({"status": "error", "message": "Content-Type must be application/json"}), 400 | |
| data = request.get_json() | |
| required_fields = ['language', 'audioFormat', 'audioBase64'] | |
| missing = [f for f in required_fields if f not in data] | |
| if missing: | |
| return jsonify({"status": "error", "message": f"Missing required fields: {', '.join(missing)}"}), 400 | |
| if not data.get('audioBase64') or len(data['audioBase64']) < 100: | |
| return jsonify({"status": "error", "message": "Invalid or empty audio data"}), 400 | |
| if str(data.get('audioFormat', '')).lower() != 'mp3': | |
| return jsonify({"status": "error", "message": "Only MP3 audio format is supported"}), 400 | |
| if detector is None: | |
| if not init_detector(): | |
| return jsonify({"status": "error", "message": "Failed to load AI detection models. Please try again later."}), 503 | |
| result = detector.analyze(data['audioBase64'], input_type="base64") | |
| if result['status'] != 'success': | |
| return jsonify({"status": "error", "message": result.get('error', 'Analysis failed')}), 500 | |
| raw_score = float(result['confidenceScore']) | |
| raw_score = max(0.0, min(1.0, raw_score)) | |
| classification = result['classification'] | |
| # confidenceScore = confidence in the prediction (0-1) | |
| if classification == 'AI_GENERATED': | |
| confidence_score = raw_score | |
| else: | |
| confidence_score = 1.0 - raw_score | |
| confidence_score = max(0.0, min(1.0, round(confidence_score, 2))) | |
| response = { | |
| "status": "success", | |
| "classification": classification, | |
| "confidenceScore": confidence_score | |
| } | |
| return jsonify(response), 200 | |
| except Exception as e: | |
| logger.error(f"Error in /detect: {str(e)}", exc_info=True) | |
| return jsonify({"status": "error", "message": "Internal server error"}), 500 | |
| # ========================================================== | |
| # MAIN VOICE DETECTION ENDPOINT (extended response) | |
| # ========================================================== | |
| def voice_detection(): | |
| """ | |
| Main voice detection endpoint | |
| Expected JSON Body: | |
| { | |
| "language": "Tamil" | "English" | "Hindi" | "Malayalam" | "Telugu", | |
| "audioFormat": "mp3", | |
| "audioBase64": "base64_encoded_audio_string" | |
| } | |
| Returns: | |
| { | |
| "status": "success", | |
| "language": "Tamil", | |
| "classification": "AI_GENERATED" | "HUMAN", | |
| "confidenceScore": 0.0-1.0, | |
| "explanation": "..." | |
| } | |
| """ | |
| global detector | |
| try: | |
| # Validate Content-Type | |
| if not request.is_json: | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Content-Type must be application/json" | |
| }), 400 | |
| # Get request data | |
| data = request.get_json() | |
| # Validate required fields | |
| required_fields = ['language', 'audioFormat', 'audioBase64'] | |
| missing_fields = [field for field in required_fields if field not in data] | |
| if missing_fields: | |
| return jsonify({ | |
| "status": "error", | |
| "message": f"Missing required fields: {', '.join(missing_fields)}" | |
| }), 400 | |
| # Validate language | |
| supported_languages = ['Tamil', 'English', 'Hindi', 'Malayalam', 'Telugu'] | |
| if data['language'] not in supported_languages: | |
| return jsonify({ | |
| "status": "error", | |
| "message": f"Unsupported language. Must be one of: {', '.join(supported_languages)}" | |
| }), 400 | |
| # Validate audio format | |
| if data['audioFormat'].lower() != 'mp3': | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Only MP3 audio format is supported" | |
| }), 400 | |
| # Validate base64 string | |
| audio_base64 = data['audioBase64'] | |
| if not audio_base64 or len(audio_base64) < 100: | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Invalid or empty audio data" | |
| }), 400 | |
| # Initialize detector if not already loaded | |
| if detector is None: | |
| logger.info("Lazy loading detector on first request...") | |
| if not init_detector(): | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Failed to load AI detection models. Please try again later." | |
| }), 503 | |
| # Log request | |
| logger.info(f"Processing voice detection request for language: {data['language']}") | |
| # Analyze audio | |
| result = detector.analyze(audio_base64, input_type="base64") | |
| # Check if analysis was successful | |
| if result['status'] != 'success': | |
| error_msg = result.get('error', 'Unknown error during analysis') | |
| logger.error(f"Analysis failed: {error_msg}") | |
| return jsonify({ | |
| "status": "error", | |
| "message": f"Audio analysis failed: {error_msg}" | |
| }), 500 | |
| # Prepare response (API compliant format - NO DEBUG INFO in production) | |
| response = { | |
| "status": "success", | |
| "language": data['language'], # Use requested language from input | |
| "classification": result['classification'], | |
| "confidenceScore": result['confidenceScore'], | |
| "explanation": result['explanation'] | |
| } | |
| logger.info(f"β Analysis complete: {result['classification']} (confidence: {result['confidenceScore']})") | |
| return jsonify(response), 200 | |
| except Exception as e: | |
| logger.error(f"Unexpected error in voice_detection: {str(e)}", exc_info=True) | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Internal server error occurred during processing" | |
| }), 500 | |
| # ========================================================== | |
| # ERROR HANDLERS | |
| # ========================================================== | |
| def not_found(error): | |
| """Handle 404 errors""" | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Endpoint not found" | |
| }), 404 | |
| def method_not_allowed(error): | |
| """Handle 405 errors""" | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Method not allowed for this endpoint" | |
| }), 405 | |
| def internal_error(error): | |
| """Handle 500 errors""" | |
| logger.error(f"Internal server error: {str(error)}") | |
| return jsonify({ | |
| "status": "error", | |
| "message": "Internal server error" | |
| }), 500 | |
| # ========================================================== | |
| # RUN APPLICATION | |
| # ========================================================== | |
| if __name__ == '__main__': | |
| # HuggingFace Spaces uses port 7860 | |
| port = int(os.environ.get('PORT', 7860)) | |
| # Run the app | |
| logger.info(f"π Starting Voice Detection API on port {port}") | |
| logger.info(f"π Evaluation endpoint: http://0.0.0.0:{port}/detect") | |
| logger.info(f"π Extended endpoint: http://0.0.0.0:{port}/api/voice-detection") | |
| logger.info(f"π API Key: {API_KEY}") | |
| logger.info(f"π Platform: HuggingFace Spaces") | |
| app.run( | |
| host='0.0.0.0', | |
| port=port, | |
| debug=False # Always False in production | |
| ) |