Spaces:

arshan123
/

voice-detection-api

Running

File size: 12,462 Bytes

"""
Voice Detection API - Flask Application (HuggingFace Spaces Version)
Accepts Base64-encoded MP3 audio and returns AI vs Human classification
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
from functools import wraps
import os
import logging
from datetime import datetime

# Import the detector
from detector import HybridEnsembleDetector

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Initialize Flask app
app = Flask(__name__)
CORS(app)

# Load API key from environment variable (HuggingFace Secrets)
API_KEY = os.environ.get('API_KEY', 'sk_test_123456789')
logger.info(f"API initialized with key: {API_KEY[:10]}...")

# Initialize the detector globally (load models once at startup)
logger.info("Loading AI detection models...")
detector = None

def init_detector():
    """Initialize the detector with models"""
    global detector
    try:
        detector = HybridEnsembleDetector(
            deepfake_model_path="garystafford/wav2vec2-deepfake-voice-detector",
            whisper_model_path="openai/whisper-base",
            physics_weight=0.4,
            dl_weight=0.6,
            use_local_deepfake_model=False,
            use_local_whisper_model=False,
            max_audio_duration=30,
            load_whisper=False,  # API uses client-provided language; skip Whisper to save GPU memory and startup time
        )
        logger.info("✅ Detector initialized successfully")
        return True
    except Exception as e:
        logger.error(f"❌ Failed to initialize detector: {str(e)}")
        return False

# Initialize detector at startup
if not init_detector():
    logger.warning("⚠️ API starting without detector - models will be loaded on first request")


# ==========================================================
# AUTHENTICATION DECORATOR
# ==========================================================
def require_api_key(f):
    """Decorator to validate API key from request headers"""
    @wraps(f)
    def decorated_function(*args, **kwargs):
        # Get API key from headers
        provided_key = request.headers.get('x-api-key')
        
        if not provided_key:
            logger.warning(f"Request without API key from {request.remote_addr}")
            return jsonify({
                "status": "error",
                "message": "Missing API key. Please provide 'x-api-key' in request headers."
            }), 401
        
        if provided_key != API_KEY:
            logger.warning(f"Invalid API key attempt from {request.remote_addr}")
            return jsonify({
                "status": "error",
                "message": "Invalid API key"
            }), 403
        
        return f(*args, **kwargs)
    
    return decorated_function


# ==========================================================
# ROOT ENDPOINT (HuggingFace Spaces Homepage)
# ==========================================================
@app.route('/', methods=['GET'])
def home():
    """Root endpoint - API information"""
    return jsonify({
        "name": "Voice Detection API",
        "version": "1.0.0",
        "description": "AI-powered voice detection system for identifying AI-generated vs human voices",
        "endpoints": {
            "health": "/health",
            "detect": "/detect",
            "detection": "/api/voice-detection"
        },
        "supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"],
        "authentication": "Required - use 'x-api-key' header",
        "documentation": "See README for full API documentation"
    }), 200


# ==========================================================
# HEALTH CHECK ENDPOINT
# ==========================================================
@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint (no authentication required)"""
    return jsonify({
        "status": "healthy",
        "service": "Voice Detection API",
        "timestamp": datetime.utcnow().isoformat(),
        "models_loaded": detector is not None,
        "platform": "HuggingFace Spaces"
    }), 200


# ==========================================================
# EVALUATION ENDPOINT: /detect (hackathon evaluator format)
# Returns exactly: status, classification, confidenceScore
# ==========================================================
@app.route('/detect', methods=['POST'])
@require_api_key
def detect():
    """
    Hackathon evaluation endpoint. Request/response format per evaluation guide.
    Request: { "language": "English", "audioFormat": "mp3", "audioBase64": "..." }
    Response: { "status": "success", "classification": "HUMAN"|"AI_GENERATED", "confidenceScore": 0.0-1.0 }
    """
    global detector
    try:
        if not request.is_json:
            return jsonify({"status": "error", "message": "Content-Type must be application/json"}), 400

        data = request.get_json()
        required_fields = ['language', 'audioFormat', 'audioBase64']
        missing = [f for f in required_fields if f not in data]
        if missing:
            return jsonify({"status": "error", "message": f"Missing required fields: {', '.join(missing)}"}), 400

        if not data.get('audioBase64') or len(data['audioBase64']) < 100:
            return jsonify({"status": "error", "message": "Invalid or empty audio data"}), 400

        if str(data.get('audioFormat', '')).lower() != 'mp3':
            return jsonify({"status": "error", "message": "Only MP3 audio format is supported"}), 400

        if detector is None:
            if not init_detector():
                return jsonify({"status": "error", "message": "Failed to load AI detection models. Please try again later."}), 503

        result = detector.analyze(data['audioBase64'], input_type="base64")

        if result['status'] != 'success':
            return jsonify({"status": "error", "message": result.get('error', 'Analysis failed')}), 500

        raw_score = float(result['confidenceScore'])
        raw_score = max(0.0, min(1.0, raw_score))
        classification = result['classification']
        # confidenceScore = confidence in the prediction (0-1)
        if classification == 'AI_GENERATED':
            confidence_score = raw_score
        else:
            confidence_score = 1.0 - raw_score
        confidence_score = max(0.0, min(1.0, round(confidence_score, 2)))

        response = {
            "status": "success",
            "classification": classification,
            "confidenceScore": confidence_score
        }
        return jsonify(response), 200

    except Exception as e:
        logger.error(f"Error in /detect: {str(e)}", exc_info=True)
        return jsonify({"status": "error", "message": "Internal server error"}), 500


# ==========================================================
# MAIN VOICE DETECTION ENDPOINT (extended response)
# ==========================================================
@app.route('/api/voice-detection', methods=['POST'])
@require_api_key
def voice_detection():
    """
    Main voice detection endpoint
    
    Expected JSON Body:
    {
        "language": "Tamil" | "English" | "Hindi" | "Malayalam" | "Telugu",
        "audioFormat": "mp3",
        "audioBase64": "base64_encoded_audio_string"
    }
    
    Returns:
    {
        "status": "success",
        "language": "Tamil",
        "classification": "AI_GENERATED" | "HUMAN",
        "confidenceScore": 0.0-1.0,
        "explanation": "..."
    }
    """
    global detector
    
    try:
        # Validate Content-Type
        if not request.is_json:
            return jsonify({
                "status": "error",
                "message": "Content-Type must be application/json"
            }), 400
        
        # Get request data
        data = request.get_json()
        
        # Validate required fields
        required_fields = ['language', 'audioFormat', 'audioBase64']
        missing_fields = [field for field in required_fields if field not in data]
        
        if missing_fields:
            return jsonify({
                "status": "error",
                "message": f"Missing required fields: {', '.join(missing_fields)}"
            }), 400
        
        # Validate language
        supported_languages = ['Tamil', 'English', 'Hindi', 'Malayalam', 'Telugu']
        if data['language'] not in supported_languages:
            return jsonify({
                "status": "error",
                "message": f"Unsupported language. Must be one of: {', '.join(supported_languages)}"
            }), 400
        
        # Validate audio format
        if data['audioFormat'].lower() != 'mp3':
            return jsonify({
                "status": "error",
                "message": "Only MP3 audio format is supported"
            }), 400
        
        # Validate base64 string
        audio_base64 = data['audioBase64']
        if not audio_base64 or len(audio_base64) < 100:
            return jsonify({
                "status": "error",
                "message": "Invalid or empty audio data"
            }), 400
        
        # Initialize detector if not already loaded
        if detector is None:
            logger.info("Lazy loading detector on first request...")
            if not init_detector():
                return jsonify({
                    "status": "error",
                    "message": "Failed to load AI detection models. Please try again later."
                }), 503
        
        # Log request
        logger.info(f"Processing voice detection request for language: {data['language']}")
        
        # Analyze audio
        result = detector.analyze(audio_base64, input_type="base64")
        
        # Check if analysis was successful
        if result['status'] != 'success':
            error_msg = result.get('error', 'Unknown error during analysis')
            logger.error(f"Analysis failed: {error_msg}")
            return jsonify({
                "status": "error",
                "message": f"Audio analysis failed: {error_msg}"
            }), 500
        
        # Prepare response (API compliant format - NO DEBUG INFO in production)
        response = {
            "status": "success",
            "language": data['language'],  # Use requested language from input
            "classification": result['classification'],
            "confidenceScore": result['confidenceScore'],
            "explanation": result['explanation']
        }
        
        logger.info(f"✅ Analysis complete: {result['classification']} (confidence: {result['confidenceScore']})")
        
        return jsonify(response), 200
        
    except Exception as e:
        logger.error(f"Unexpected error in voice_detection: {str(e)}", exc_info=True)
        return jsonify({
            "status": "error",
            "message": "Internal server error occurred during processing"
        }), 500


# ==========================================================
# ERROR HANDLERS
# ==========================================================
@app.errorhandler(404)
def not_found(error):
    """Handle 404 errors"""
    return jsonify({
        "status": "error",
        "message": "Endpoint not found"
    }), 404


@app.errorhandler(405)
def method_not_allowed(error):
    """Handle 405 errors"""
    return jsonify({
        "status": "error",
        "message": "Method not allowed for this endpoint"
    }), 405


@app.errorhandler(500)
def internal_error(error):
    """Handle 500 errors"""
    logger.error(f"Internal server error: {str(error)}")
    return jsonify({
        "status": "error",
        "message": "Internal server error"
    }), 500


# ==========================================================
# RUN APPLICATION
# ==========================================================
if __name__ == '__main__':
    # HuggingFace Spaces uses port 7860
    port = int(os.environ.get('PORT', 7860))
    
    # Run the app
    logger.info(f"🚀 Starting Voice Detection API on port {port}")
    logger.info(f"📍 Evaluation endpoint: http://0.0.0.0:{port}/detect")
    logger.info(f"📍 Extended endpoint: http://0.0.0.0:{port}/api/voice-detection")
    logger.info(f"🔑 API Key: {API_KEY}")
    logger.info(f"🌐 Platform: HuggingFace Spaces")
    
    app.run(
        host='0.0.0.0',
        port=port,
        debug=False  # Always False in production
    )