rahulrajeshjain05 commited on
Commit
2d5acea
·
verified ·
1 Parent(s): c336244

Create app_new.py

Browse files
Files changed (1) hide show
  1. app_new.py +284 -0
app_new.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import logging
4
+ import tempfile
5
+ import numpy as np
6
+ import torch
7
+ import uvicorn
8
+
9
+ from fastapi import FastAPI, HTTPException, Depends, Header
10
+ from pydantic import BaseModel
11
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
12
+ from pydub import AudioSegment
13
+
14
+ # ======================================================
15
+ # CONFIGURATION
16
+ # ======================================================
17
+
18
+ MODEL_ID = "Hemgg/Deepfake-audio-detection"
19
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
20
+
21
+ API_KEY_VALUE = os.getenv("API_KEY", "sk_test_123456789")
22
+
23
+ TARGET_SR = 16000
24
+ MAX_AUDIO_SECONDS = 8
25
+ MAX_LEN = TARGET_SR * MAX_AUDIO_SECONDS
26
+
27
+ SUPPORTED_LANGUAGES = ["Tamil", "English", "Hindi", "Malayalam", "Telugu"]
28
+
29
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ logging.basicConfig(level=logging.INFO)
32
+ logger = logging.getLogger("voice-detection")
33
+
34
+ # ======================================================
35
+ # FASTAPI INIT
36
+ # ======================================================
37
+
38
+ app = FastAPI(title="AI Voice Detection API")
39
+
40
+ model = None
41
+ feature_extractor = None
42
+
43
+ # ======================================================
44
+ # REQUEST MODEL
45
+ # ======================================================
46
+
47
+ class VoiceRequest(BaseModel):
48
+ language: str
49
+ audioFormat: str
50
+ audioBase64: str
51
+
52
+ # ======================================================
53
+ # STARTUP: LOAD MODEL ONCE
54
+ # ======================================================
55
+
56
+ @app.on_event("startup")
57
+ def load_model():
58
+ global model, feature_extractor
59
+
60
+ try:
61
+ logger.info("Loading model...")
62
+
63
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
64
+ MODEL_ID, token=HF_TOKEN
65
+ )
66
+
67
+ model = AutoModelForAudioClassification.from_pretrained(
68
+ MODEL_ID, token=HF_TOKEN
69
+ ).to(DEVICE)
70
+
71
+ model.eval()
72
+
73
+ logger.info("Model loaded successfully")
74
+
75
+ except Exception as e:
76
+ logger.error(f"Failed to load model: {e}")
77
+ model = None
78
+
79
+ # ======================================================
80
+ # API KEY VALIDATION
81
+ # ======================================================
82
+
83
+ async def verify_api_key(x_api_key: str = Header(None)):
84
+ if x_api_key != API_KEY_VALUE:
85
+ raise HTTPException(
86
+ status_code=403,
87
+ detail="Invalid API key or malformed request"
88
+ )
89
+ return x_api_key
90
+
91
+ # ======================================================
92
+ # AUDIO PREPROCESSING (ROBUST)
93
+ # ======================================================
94
+
95
+ def preprocess_audio(b64_string: str):
96
+
97
+ try:
98
+ if "," in b64_string:
99
+ b64_string = b64_string.split(",")[1]
100
+
101
+ audio_bytes = base64.b64decode(b64_string)
102
+
103
+ # Write to temporary file (handles malformed MP3)
104
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as tmp:
105
+ tmp.write(audio_bytes)
106
+ tmp.flush()
107
+
108
+ audio = AudioSegment.from_file(tmp.name)
109
+
110
+ # convert to mono + 16kHz
111
+ audio = audio.set_channels(1).set_frame_rate(TARGET_SR)
112
+
113
+ samples = np.array(audio.get_array_of_samples()).astype(np.float32)
114
+
115
+ # normalize safely
116
+ max_val = np.max(np.abs(samples))
117
+ if max_val > 0:
118
+ samples /= max_val
119
+
120
+ # duration control
121
+ samples = samples[:MAX_LEN]
122
+ samples = np.pad(samples, (0, max(0, MAX_LEN - len(samples))))
123
+
124
+ return samples
125
+
126
+ except Exception as e:
127
+ logger.error(f"Audio preprocessing failed: {e}")
128
+ raise HTTPException(
129
+ status_code=400,
130
+ detail="Invalid audio data"
131
+ )
132
+
133
+ # ======================================================
134
+ # ACOUSTIC ANOMALY DETECTOR (SECOND SIGNAL)
135
+ # ======================================================
136
+
137
+ def acoustic_anomaly_score(waveform):
138
+
139
+ energy_variance = np.var(np.abs(waveform))
140
+ signal_variance = np.var(waveform)
141
+
142
+ score = 0.0
143
+
144
+ # low variance often indicates synthetic speech
145
+ if energy_variance < 0.003:
146
+ score += 0.5
147
+
148
+ if signal_variance < 0.01:
149
+ score += 0.5
150
+
151
+ return min(score, 1.0)
152
+
153
+ # ======================================================
154
+ # DYNAMIC EXPLANATION
155
+ # ======================================================
156
+
157
+ def generate_explanation(waveform, classification):
158
+
159
+ energy_variance = np.var(np.abs(waveform))
160
+ signal_variance = np.var(waveform)
161
+
162
+ if classification == "AI_GENERATED":
163
+
164
+ if energy_variance < 0.003:
165
+ return "Very uniform energy distribution and smooth spectral structure indicate synthetic voice characteristics"
166
+
167
+ return "Unnatural spectral consistency and low vocal variation detected"
168
+
169
+ else:
170
+
171
+ if energy_variance > 0.01:
172
+ return "Natural vocal fluctuations and human prosody patterns detected"
173
+
174
+ return "Human-like frequency variation observed"
175
+
176
+ # ======================================================
177
+ # MAIN ENDPOINT
178
+ # ======================================================
179
+
180
+ @app.post("/api/voice-detection")
181
+ async def voice_detection(
182
+ request: VoiceRequest,
183
+ auth: str = Depends(verify_api_key)
184
+ ):
185
+
186
+ if model is None:
187
+ raise HTTPException(
188
+ status_code=500,
189
+ detail="Model not available"
190
+ )
191
+
192
+ # -----------------------------
193
+ # INPUT VALIDATION
194
+ # -----------------------------
195
+
196
+ if request.language not in SUPPORTED_LANGUAGES:
197
+ raise HTTPException(
198
+ status_code=400,
199
+ detail="Unsupported language"
200
+ )
201
+
202
+ if request.audioFormat.lower() != "mp3":
203
+ raise HTTPException(
204
+ status_code=400,
205
+ detail="Only mp3 format supported"
206
+ )
207
+
208
+ try:
209
+
210
+ # -----------------------------
211
+ # PREPROCESS AUDIO
212
+ # -----------------------------
213
+
214
+ waveform = preprocess_audio(request.audioBase64)
215
+
216
+ # -----------------------------
217
+ # MODEL INFERENCE
218
+ # -----------------------------
219
+
220
+ inputs = feature_extractor(
221
+ waveform,
222
+ sampling_rate=TARGET_SR,
223
+ return_tensors="pt"
224
+ ).to(DEVICE)
225
+
226
+ with torch.no_grad():
227
+ logits = model(**inputs).logits
228
+ probs = torch.softmax(logits, dim=-1)
229
+
230
+ model_confidence, pred_idx = torch.max(probs, dim=-1)
231
+ model_score = float(model_confidence.item())
232
+
233
+ # correct label mapping
234
+ model_prediction = model.config.id2label[pred_idx.item()]
235
+
236
+ # -----------------------------
237
+ # SECOND SIGNAL: ACOUSTIC CHECK
238
+ # -----------------------------
239
+
240
+ anomaly_score = acoustic_anomaly_score(waveform)
241
+
242
+ # ensemble scoring
243
+ final_score = 0.8 * model_score + 0.2 * anomaly_score
244
+
245
+ classification = (
246
+ "AI_GENERATED" if final_score > 0.5 else "HUMAN"
247
+ )
248
+
249
+ confidence = round(float(final_score), 3)
250
+
251
+ # -----------------------------
252
+ # EXPLANATION
253
+ # -----------------------------
254
+
255
+ explanation = generate_explanation(waveform, classification)
256
+
257
+ # -----------------------------
258
+ # RESPONSE
259
+ # -----------------------------
260
+
261
+ return {
262
+ "status": "success",
263
+ "language": request.language,
264
+ "classification": classification,
265
+ "confidenceScore": confidence,
266
+ "explanation": explanation
267
+ }
268
+
269
+ except HTTPException:
270
+ raise
271
+
272
+ except Exception as e:
273
+ logger.error(f"Inference error: {e}")
274
+ raise HTTPException(
275
+ status_code=400,
276
+ detail="Malformed request or processing error"
277
+ )
278
+
279
+ # ======================================================
280
+ # RUN SERVER
281
+ # ======================================================
282
+
283
+ if __name__ == "__main__":
284
+ uvicorn.run("app:app", host="0.0.0.0", port=7860)