arshan123's picture
Modified according to final solution
f2e7b12
"""
Voice Detection API - Flask Application (HuggingFace Spaces Version)
Accepts Base64-encoded MP3 audio and returns AI vs Human classification
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
from functools import wraps
import os
import logging
from datetime import datetime
# Import the detector
from detector import HybridEnsembleDetector
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Initialize Flask app
app = Flask(__name__)
CORS(app)
# Load API key from environment variable (HuggingFace Secrets)
API_KEY = os.environ.get('API_KEY', 'sk_test_123456789')
logger.info(f"API initialized with key: {API_KEY[:10]}...")
# Initialize the detector globally (load models once at startup)
logger.info("Loading AI detection models...")
detector = None
def init_detector():
"""Initialize the detector with models"""
global detector
try:
detector = HybridEnsembleDetector(
deepfake_model_path="garystafford/wav2vec2-deepfake-voice-detector",
whisper_model_path="openai/whisper-base",
physics_weight=0.4,
dl_weight=0.6,
use_local_deepfake_model=False,
use_local_whisper_model=False,
max_audio_duration=30,
load_whisper=False, # API uses client-provided language; skip Whisper to save GPU memory and startup time
)
logger.info("βœ… Detector initialized successfully")
return True
except Exception as e:
logger.error(f"❌ Failed to initialize detector: {str(e)}")
return False
# Initialize detector at startup
if not init_detector():
logger.warning("⚠️ API starting without detector - models will be loaded on first request")
# ==========================================================
# AUTHENTICATION DECORATOR
# ==========================================================
def require_api_key(f):
"""Decorator to validate API key from request headers"""
@wraps(f)
def decorated_function(*args, **kwargs):
# Get API key from headers
provided_key = request.headers.get('x-api-key')
if not provided_key:
logger.warning(f"Request without API key from {request.remote_addr}")
return jsonify({
"status": "error",
"message": "Missing API key. Please provide 'x-api-key' in request headers."
}), 401
if provided_key != API_KEY:
logger.warning(f"Invalid API key attempt from {request.remote_addr}")
return jsonify({
"status": "error",
"message": "Invalid API key"
}), 403
return f(*args, **kwargs)
return decorated_function
# ==========================================================
# ROOT ENDPOINT (HuggingFace Spaces Homepage)
# ==========================================================
@app.route('/', methods=['GET'])
def home():
"""Root endpoint - API information"""
return jsonify({
"name": "Voice Detection API",
"version": "1.0.0",
"description": "AI-powered voice detection system for identifying AI-generated vs human voices",
"endpoints": {
"health": "/health",
"detect": "/detect",
"detection": "/api/voice-detection"
},
"supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"],
"authentication": "Required - use 'x-api-key' header",
"documentation": "See README for full API documentation"
}), 200
# ==========================================================
# HEALTH CHECK ENDPOINT
# ==========================================================
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint (no authentication required)"""
return jsonify({
"status": "healthy",
"service": "Voice Detection API",
"timestamp": datetime.utcnow().isoformat(),
"models_loaded": detector is not None,
"platform": "HuggingFace Spaces"
}), 200
# ==========================================================
# EVALUATION ENDPOINT: /detect (hackathon evaluator format)
# Returns exactly: status, classification, confidenceScore
# ==========================================================
@app.route('/detect', methods=['POST'])
@require_api_key
def detect():
"""
Hackathon evaluation endpoint. Request/response format per evaluation guide.
Request: { "language": "English", "audioFormat": "mp3", "audioBase64": "..." }
Response: { "status": "success", "classification": "HUMAN"|"AI_GENERATED", "confidenceScore": 0.0-1.0 }
"""
global detector
try:
if not request.is_json:
return jsonify({"status": "error", "message": "Content-Type must be application/json"}), 400
data = request.get_json()
required_fields = ['language', 'audioFormat', 'audioBase64']
missing = [f for f in required_fields if f not in data]
if missing:
return jsonify({"status": "error", "message": f"Missing required fields: {', '.join(missing)}"}), 400
if not data.get('audioBase64') or len(data['audioBase64']) < 100:
return jsonify({"status": "error", "message": "Invalid or empty audio data"}), 400
if str(data.get('audioFormat', '')).lower() != 'mp3':
return jsonify({"status": "error", "message": "Only MP3 audio format is supported"}), 400
if detector is None:
if not init_detector():
return jsonify({"status": "error", "message": "Failed to load AI detection models. Please try again later."}), 503
result = detector.analyze(data['audioBase64'], input_type="base64")
if result['status'] != 'success':
return jsonify({"status": "error", "message": result.get('error', 'Analysis failed')}), 500
raw_score = float(result['confidenceScore'])
raw_score = max(0.0, min(1.0, raw_score))
classification = result['classification']
# confidenceScore = confidence in the prediction (0-1)
if classification == 'AI_GENERATED':
confidence_score = raw_score
else:
confidence_score = 1.0 - raw_score
confidence_score = max(0.0, min(1.0, round(confidence_score, 2)))
response = {
"status": "success",
"classification": classification,
"confidenceScore": confidence_score
}
return jsonify(response), 200
except Exception as e:
logger.error(f"Error in /detect: {str(e)}", exc_info=True)
return jsonify({"status": "error", "message": "Internal server error"}), 500
# ==========================================================
# MAIN VOICE DETECTION ENDPOINT (extended response)
# ==========================================================
@app.route('/api/voice-detection', methods=['POST'])
@require_api_key
def voice_detection():
"""
Main voice detection endpoint
Expected JSON Body:
{
"language": "Tamil" | "English" | "Hindi" | "Malayalam" | "Telugu",
"audioFormat": "mp3",
"audioBase64": "base64_encoded_audio_string"
}
Returns:
{
"status": "success",
"language": "Tamil",
"classification": "AI_GENERATED" | "HUMAN",
"confidenceScore": 0.0-1.0,
"explanation": "..."
}
"""
global detector
try:
# Validate Content-Type
if not request.is_json:
return jsonify({
"status": "error",
"message": "Content-Type must be application/json"
}), 400
# Get request data
data = request.get_json()
# Validate required fields
required_fields = ['language', 'audioFormat', 'audioBase64']
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
return jsonify({
"status": "error",
"message": f"Missing required fields: {', '.join(missing_fields)}"
}), 400
# Validate language
supported_languages = ['Tamil', 'English', 'Hindi', 'Malayalam', 'Telugu']
if data['language'] not in supported_languages:
return jsonify({
"status": "error",
"message": f"Unsupported language. Must be one of: {', '.join(supported_languages)}"
}), 400
# Validate audio format
if data['audioFormat'].lower() != 'mp3':
return jsonify({
"status": "error",
"message": "Only MP3 audio format is supported"
}), 400
# Validate base64 string
audio_base64 = data['audioBase64']
if not audio_base64 or len(audio_base64) < 100:
return jsonify({
"status": "error",
"message": "Invalid or empty audio data"
}), 400
# Initialize detector if not already loaded
if detector is None:
logger.info("Lazy loading detector on first request...")
if not init_detector():
return jsonify({
"status": "error",
"message": "Failed to load AI detection models. Please try again later."
}), 503
# Log request
logger.info(f"Processing voice detection request for language: {data['language']}")
# Analyze audio
result = detector.analyze(audio_base64, input_type="base64")
# Check if analysis was successful
if result['status'] != 'success':
error_msg = result.get('error', 'Unknown error during analysis')
logger.error(f"Analysis failed: {error_msg}")
return jsonify({
"status": "error",
"message": f"Audio analysis failed: {error_msg}"
}), 500
# Prepare response (API compliant format - NO DEBUG INFO in production)
response = {
"status": "success",
"language": data['language'], # Use requested language from input
"classification": result['classification'],
"confidenceScore": result['confidenceScore'],
"explanation": result['explanation']
}
logger.info(f"βœ… Analysis complete: {result['classification']} (confidence: {result['confidenceScore']})")
return jsonify(response), 200
except Exception as e:
logger.error(f"Unexpected error in voice_detection: {str(e)}", exc_info=True)
return jsonify({
"status": "error",
"message": "Internal server error occurred during processing"
}), 500
# ==========================================================
# ERROR HANDLERS
# ==========================================================
@app.errorhandler(404)
def not_found(error):
"""Handle 404 errors"""
return jsonify({
"status": "error",
"message": "Endpoint not found"
}), 404
@app.errorhandler(405)
def method_not_allowed(error):
"""Handle 405 errors"""
return jsonify({
"status": "error",
"message": "Method not allowed for this endpoint"
}), 405
@app.errorhandler(500)
def internal_error(error):
"""Handle 500 errors"""
logger.error(f"Internal server error: {str(error)}")
return jsonify({
"status": "error",
"message": "Internal server error"
}), 500
# ==========================================================
# RUN APPLICATION
# ==========================================================
if __name__ == '__main__':
# HuggingFace Spaces uses port 7860
port = int(os.environ.get('PORT', 7860))
# Run the app
logger.info(f"πŸš€ Starting Voice Detection API on port {port}")
logger.info(f"πŸ“ Evaluation endpoint: http://0.0.0.0:{port}/detect")
logger.info(f"πŸ“ Extended endpoint: http://0.0.0.0:{port}/api/voice-detection")
logger.info(f"πŸ”‘ API Key: {API_KEY}")
logger.info(f"🌐 Platform: HuggingFace Spaces")
app.run(
host='0.0.0.0',
port=port,
debug=False # Always False in production
)