File size: 12,462 Bytes
410f9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0b35ca
 
410f9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e7b12
410f9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e7b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410f9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e7b12
 
410f9d4
 
 
 
 
 
 
aaa8c2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
"""
Voice Detection API - Flask Application (HuggingFace Spaces Version)
Accepts Base64-encoded MP3 audio and returns AI vs Human classification
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
from functools import wraps
import os
import logging
from datetime import datetime

# Import the detector
from detector import HybridEnsembleDetector

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Initialize Flask app
app = Flask(__name__)
CORS(app)

# Load API key from environment variable (HuggingFace Secrets)
API_KEY = os.environ.get('API_KEY', 'sk_test_123456789')
logger.info(f"API initialized with key: {API_KEY[:10]}...")

# Initialize the detector globally (load models once at startup)
logger.info("Loading AI detection models...")
detector = None

def init_detector():
    """Initialize the detector with models"""
    global detector
    try:
        detector = HybridEnsembleDetector(
            deepfake_model_path="garystafford/wav2vec2-deepfake-voice-detector",
            whisper_model_path="openai/whisper-base",
            physics_weight=0.4,
            dl_weight=0.6,
            use_local_deepfake_model=False,
            use_local_whisper_model=False,
            max_audio_duration=30,
            load_whisper=False,  # API uses client-provided language; skip Whisper to save GPU memory and startup time
        )
        logger.info("βœ… Detector initialized successfully")
        return True
    except Exception as e:
        logger.error(f"❌ Failed to initialize detector: {str(e)}")
        return False

# Initialize detector at startup
if not init_detector():
    logger.warning("⚠️ API starting without detector - models will be loaded on first request")


# ==========================================================
# AUTHENTICATION DECORATOR
# ==========================================================
def require_api_key(f):
    """Decorator to validate API key from request headers"""
    @wraps(f)
    def decorated_function(*args, **kwargs):
        # Get API key from headers
        provided_key = request.headers.get('x-api-key')
        
        if not provided_key:
            logger.warning(f"Request without API key from {request.remote_addr}")
            return jsonify({
                "status": "error",
                "message": "Missing API key. Please provide 'x-api-key' in request headers."
            }), 401
        
        if provided_key != API_KEY:
            logger.warning(f"Invalid API key attempt from {request.remote_addr}")
            return jsonify({
                "status": "error",
                "message": "Invalid API key"
            }), 403
        
        return f(*args, **kwargs)
    
    return decorated_function


# ==========================================================
# ROOT ENDPOINT (HuggingFace Spaces Homepage)
# ==========================================================
@app.route('/', methods=['GET'])
def home():
    """Root endpoint - API information"""
    return jsonify({
        "name": "Voice Detection API",
        "version": "1.0.0",
        "description": "AI-powered voice detection system for identifying AI-generated vs human voices",
        "endpoints": {
            "health": "/health",
            "detect": "/detect",
            "detection": "/api/voice-detection"
        },
        "supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"],
        "authentication": "Required - use 'x-api-key' header",
        "documentation": "See README for full API documentation"
    }), 200


# ==========================================================
# HEALTH CHECK ENDPOINT
# ==========================================================
@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint (no authentication required)"""
    return jsonify({
        "status": "healthy",
        "service": "Voice Detection API",
        "timestamp": datetime.utcnow().isoformat(),
        "models_loaded": detector is not None,
        "platform": "HuggingFace Spaces"
    }), 200


# ==========================================================
# EVALUATION ENDPOINT: /detect (hackathon evaluator format)
# Returns exactly: status, classification, confidenceScore
# ==========================================================
@app.route('/detect', methods=['POST'])
@require_api_key
def detect():
    """
    Hackathon evaluation endpoint. Request/response format per evaluation guide.
    Request: { "language": "English", "audioFormat": "mp3", "audioBase64": "..." }
    Response: { "status": "success", "classification": "HUMAN"|"AI_GENERATED", "confidenceScore": 0.0-1.0 }
    """
    global detector
    try:
        if not request.is_json:
            return jsonify({"status": "error", "message": "Content-Type must be application/json"}), 400

        data = request.get_json()
        required_fields = ['language', 'audioFormat', 'audioBase64']
        missing = [f for f in required_fields if f not in data]
        if missing:
            return jsonify({"status": "error", "message": f"Missing required fields: {', '.join(missing)}"}), 400

        if not data.get('audioBase64') or len(data['audioBase64']) < 100:
            return jsonify({"status": "error", "message": "Invalid or empty audio data"}), 400

        if str(data.get('audioFormat', '')).lower() != 'mp3':
            return jsonify({"status": "error", "message": "Only MP3 audio format is supported"}), 400

        if detector is None:
            if not init_detector():
                return jsonify({"status": "error", "message": "Failed to load AI detection models. Please try again later."}), 503

        result = detector.analyze(data['audioBase64'], input_type="base64")

        if result['status'] != 'success':
            return jsonify({"status": "error", "message": result.get('error', 'Analysis failed')}), 500

        raw_score = float(result['confidenceScore'])
        raw_score = max(0.0, min(1.0, raw_score))
        classification = result['classification']
        # confidenceScore = confidence in the prediction (0-1)
        if classification == 'AI_GENERATED':
            confidence_score = raw_score
        else:
            confidence_score = 1.0 - raw_score
        confidence_score = max(0.0, min(1.0, round(confidence_score, 2)))

        response = {
            "status": "success",
            "classification": classification,
            "confidenceScore": confidence_score
        }
        return jsonify(response), 200

    except Exception as e:
        logger.error(f"Error in /detect: {str(e)}", exc_info=True)
        return jsonify({"status": "error", "message": "Internal server error"}), 500


# ==========================================================
# MAIN VOICE DETECTION ENDPOINT (extended response)
# ==========================================================
@app.route('/api/voice-detection', methods=['POST'])
@require_api_key
def voice_detection():
    """
    Main voice detection endpoint
    
    Expected JSON Body:
    {
        "language": "Tamil" | "English" | "Hindi" | "Malayalam" | "Telugu",
        "audioFormat": "mp3",
        "audioBase64": "base64_encoded_audio_string"
    }
    
    Returns:
    {
        "status": "success",
        "language": "Tamil",
        "classification": "AI_GENERATED" | "HUMAN",
        "confidenceScore": 0.0-1.0,
        "explanation": "..."
    }
    """
    global detector
    
    try:
        # Validate Content-Type
        if not request.is_json:
            return jsonify({
                "status": "error",
                "message": "Content-Type must be application/json"
            }), 400
        
        # Get request data
        data = request.get_json()
        
        # Validate required fields
        required_fields = ['language', 'audioFormat', 'audioBase64']
        missing_fields = [field for field in required_fields if field not in data]
        
        if missing_fields:
            return jsonify({
                "status": "error",
                "message": f"Missing required fields: {', '.join(missing_fields)}"
            }), 400
        
        # Validate language
        supported_languages = ['Tamil', 'English', 'Hindi', 'Malayalam', 'Telugu']
        if data['language'] not in supported_languages:
            return jsonify({
                "status": "error",
                "message": f"Unsupported language. Must be one of: {', '.join(supported_languages)}"
            }), 400
        
        # Validate audio format
        if data['audioFormat'].lower() != 'mp3':
            return jsonify({
                "status": "error",
                "message": "Only MP3 audio format is supported"
            }), 400
        
        # Validate base64 string
        audio_base64 = data['audioBase64']
        if not audio_base64 or len(audio_base64) < 100:
            return jsonify({
                "status": "error",
                "message": "Invalid or empty audio data"
            }), 400
        
        # Initialize detector if not already loaded
        if detector is None:
            logger.info("Lazy loading detector on first request...")
            if not init_detector():
                return jsonify({
                    "status": "error",
                    "message": "Failed to load AI detection models. Please try again later."
                }), 503
        
        # Log request
        logger.info(f"Processing voice detection request for language: {data['language']}")
        
        # Analyze audio
        result = detector.analyze(audio_base64, input_type="base64")
        
        # Check if analysis was successful
        if result['status'] != 'success':
            error_msg = result.get('error', 'Unknown error during analysis')
            logger.error(f"Analysis failed: {error_msg}")
            return jsonify({
                "status": "error",
                "message": f"Audio analysis failed: {error_msg}"
            }), 500
        
        # Prepare response (API compliant format - NO DEBUG INFO in production)
        response = {
            "status": "success",
            "language": data['language'],  # Use requested language from input
            "classification": result['classification'],
            "confidenceScore": result['confidenceScore'],
            "explanation": result['explanation']
        }
        
        logger.info(f"βœ… Analysis complete: {result['classification']} (confidence: {result['confidenceScore']})")
        
        return jsonify(response), 200
        
    except Exception as e:
        logger.error(f"Unexpected error in voice_detection: {str(e)}", exc_info=True)
        return jsonify({
            "status": "error",
            "message": "Internal server error occurred during processing"
        }), 500


# ==========================================================
# ERROR HANDLERS
# ==========================================================
@app.errorhandler(404)
def not_found(error):
    """Handle 404 errors"""
    return jsonify({
        "status": "error",
        "message": "Endpoint not found"
    }), 404


@app.errorhandler(405)
def method_not_allowed(error):
    """Handle 405 errors"""
    return jsonify({
        "status": "error",
        "message": "Method not allowed for this endpoint"
    }), 405


@app.errorhandler(500)
def internal_error(error):
    """Handle 500 errors"""
    logger.error(f"Internal server error: {str(error)}")
    return jsonify({
        "status": "error",
        "message": "Internal server error"
    }), 500


# ==========================================================
# RUN APPLICATION
# ==========================================================
if __name__ == '__main__':
    # HuggingFace Spaces uses port 7860
    port = int(os.environ.get('PORT', 7860))
    
    # Run the app
    logger.info(f"πŸš€ Starting Voice Detection API on port {port}")
    logger.info(f"πŸ“ Evaluation endpoint: http://0.0.0.0:{port}/detect")
    logger.info(f"πŸ“ Extended endpoint: http://0.0.0.0:{port}/api/voice-detection")
    logger.info(f"πŸ”‘ API Key: {API_KEY}")
    logger.info(f"🌐 Platform: HuggingFace Spaces")
    
    app.run(
        host='0.0.0.0',
        port=port,
        debug=False  # Always False in production
    )