kun7x commited on
Commit
6bed605
·
verified ·
1 Parent(s): f99e73c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +9 -66
  2. requirements.txt +0 -3
app.py CHANGED
@@ -4,7 +4,6 @@ Pure FastAPI - No Gradio
4
  """
5
 
6
  import os
7
- import random
8
  import base64
9
  import tempfile
10
  import numpy as np
@@ -17,8 +16,6 @@ from transformers import Wav2Vec2Model
17
  from pydub import AudioSegment
18
  import librosa
19
  import uvicorn
20
- import onnxruntime as ort
21
- import scipy.special
22
 
23
  # Configuration
24
  MODEL_REPO = "kimnamjoon0007/lkht-v440"
@@ -64,25 +61,6 @@ model.to(DEVICE)
64
  model.eval()
65
  print(f"Ready on {DEVICE}")
66
 
67
- # Warm-up: eliminate first-request latency
68
- with torch.no_grad():
69
- dummy = torch.randn(1, 16000).to(DEVICE)
70
- model(dummy)
71
- print("Warm-up done")
72
-
73
- # Export to ONNX for faster inference
74
- onnx_path = "/tmp/model.onnx"
75
- model.to("cpu")
76
- dummy_export = torch.randn(1, 48000)
77
- torch.onnx.export(
78
- model, dummy_export, onnx_path, opset_version=14,
79
- input_names=["input_values"],
80
- output_names=["logits"],
81
- dynamic_axes={"input_values": {1: "audio_length"}}
82
- )
83
- session = ort.InferenceSession(onnx_path)
84
- print("ONNX session ready")
85
-
86
 
87
  # FastAPI app
88
  app = FastAPI(title="AI Voice Detection API", version="2.0")
@@ -96,10 +74,8 @@ class DetectionRequest(BaseModel):
96
 
97
  class DetectionResponse(BaseModel):
98
  status: str
99
- language: str
100
  classification: str
101
  confidenceScore: float
102
- explanation: str
103
 
104
 
105
  def load_audio(audio_path):
@@ -163,10 +139,8 @@ def home():
163
  <h2>Response Format</h2>
164
  <pre>{{
165
  "status": "success",
166
- "language": "English",
167
  "classification": "AI_GENERATED" or "HUMAN",
168
- "confidenceScore": 0.97,
169
- "explanation": "Detected synthetic voice characteristics"
170
  }}</pre>
171
  </div>
172
 
@@ -190,10 +164,6 @@ def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
190
  if x_api_key != API_KEY:
191
  raise HTTPException(status_code=401, detail="Invalid API key")
192
 
193
- # Validate format
194
- if request.audioFormat.lower() != "mp3":
195
- raise HTTPException(status_code=400, detail="Only mp3 format supported")
196
-
197
  # Decode audio
198
  try:
199
  audio_bytes = base64.b64decode(request.audioBase64)
@@ -206,49 +176,22 @@ def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
206
  temp_file.close()
207
 
208
  try:
209
- # Process with ONNX Runtime
210
  waveform = load_audio(temp_file.name)
211
- ort_inputs = {"input_values": waveform.unsqueeze(0).numpy()}
212
- ort_outputs = session.run(None, ort_inputs)
213
- logits = ort_outputs[0]
214
- probs = scipy.special.softmax(logits, axis=-1)
215
- pred = int(probs.argmax(axis=-1)[0])
216
- conf = float(probs[0, pred])
217
 
218
- classification = "AI_GENERATED" if pred == 1 else "HUMAN"
 
 
 
 
219
 
220
- ai_explanations = [
221
- "Detected synthetic voice characteristics and artificial patterns",
222
- "Audio exhibits signs of AI-based speech synthesis",
223
- "Voice patterns are consistent with machine-generated speech",
224
- "Identified artificial spectral features typical of synthetic voices",
225
- "Analysis reveals digitally synthesized vocal characteristics",
226
- "Audio signature matches known AI voice generation patterns",
227
- "Detected unnatural prosody and robotic tonal artifacts",
228
- "Voice lacks micro-variations found in natural human speech",
229
- ]
230
- human_explanations = [
231
- "Detected natural speech patterns and organic voice characteristics",
232
- "Voice exhibits natural human vocal tract resonances",
233
- "Audio contains organic micro-variations consistent with human speech",
234
- "Speech patterns align with natural human voice production",
235
- "Identified genuine vocal characteristics and natural prosody",
236
- "Analysis confirms authentic human speech signatures",
237
- "Voice displays natural breathing patterns and tonal variations",
238
- "Audio shows no signs of synthetic generation or manipulation",
239
- ]
240
-
241
- if classification == "AI_GENERATED":
242
- explanation = random.choice(ai_explanations)
243
- else:
244
- explanation = random.choice(human_explanations)
245
 
246
  return DetectionResponse(
247
  status="success",
248
- language=request.language,
249
  classification=classification,
250
  confidenceScore=round(conf, 2),
251
- explanation=explanation
252
  )
253
 
254
  finally:
 
4
  """
5
 
6
  import os
 
7
  import base64
8
  import tempfile
9
  import numpy as np
 
16
  from pydub import AudioSegment
17
  import librosa
18
  import uvicorn
 
 
19
 
20
  # Configuration
21
  MODEL_REPO = "kimnamjoon0007/lkht-v440"
 
61
  model.eval()
62
  print(f"Ready on {DEVICE}")
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # FastAPI app
66
  app = FastAPI(title="AI Voice Detection API", version="2.0")
 
74
 
75
  class DetectionResponse(BaseModel):
76
  status: str
 
77
  classification: str
78
  confidenceScore: float
 
79
 
80
 
81
  def load_audio(audio_path):
 
139
  <h2>Response Format</h2>
140
  <pre>{{
141
  "status": "success",
 
142
  "classification": "AI_GENERATED" or "HUMAN",
143
+ "confidenceScore": 0.97
 
144
  }}</pre>
145
  </div>
146
 
 
164
  if x_api_key != API_KEY:
165
  raise HTTPException(status_code=401, detail="Invalid API key")
166
 
 
 
 
 
167
  # Decode audio
168
  try:
169
  audio_bytes = base64.b64decode(request.audioBase64)
 
176
  temp_file.close()
177
 
178
  try:
179
+ # Process
180
  waveform = load_audio(temp_file.name)
181
+ input_values = waveform.unsqueeze(0).to(DEVICE)
 
 
 
 
 
182
 
183
+ with torch.no_grad():
184
+ logits = model(input_values)
185
+ probs = torch.softmax(logits, dim=-1)
186
+ pred = torch.argmax(probs, dim=-1).item()
187
+ conf = probs[0, pred].item()
188
 
189
+ classification = "AI_GENERATED" if pred == 1 else "HUMAN"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  return DetectionResponse(
192
  status="success",
 
193
  classification=classification,
194
  confidenceScore=round(conf, 2),
 
195
  )
196
 
197
  finally:
requirements.txt CHANGED
@@ -9,6 +9,3 @@ pydub>=0.25.1
9
  numpy>=1.24.0
10
  scipy>=1.10.0
11
  soundfile>=0.12.0
12
- onnxruntime>=1.16.0
13
- onnx>=1.14.0
14
- onnxscript>=0.1.0
 
9
  numpy>=1.24.0
10
  scipy>=1.10.0
11
  soundfile>=0.12.0