detection_final2

Sleeping

App Files Files Community

kun7x commited on Feb 15

Commit

6bed605

verified ·

1 Parent(s): f99e73c

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +9 -66
requirements.txt +0 -3

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ Pure FastAPI - No Gradio
 """
 import os
-import random
 import base64
 import tempfile
 import numpy as np
@@ -17,8 +16,6 @@ from transformers import Wav2Vec2Model
 from pydub import AudioSegment
 import librosa
 import uvicorn
-import onnxruntime as ort
-import scipy.special
 # Configuration
 MODEL_REPO = "kimnamjoon0007/lkht-v440"
@@ -64,25 +61,6 @@ model.to(DEVICE)
 model.eval()
 print(f"Ready on {DEVICE}")
-# Warm-up: eliminate first-request latency
-with torch.no_grad():
-    dummy = torch.randn(1, 16000).to(DEVICE)
-    model(dummy)
-print("Warm-up done")
-# Export to ONNX for faster inference
-onnx_path = "/tmp/model.onnx"
-model.to("cpu")
-dummy_export = torch.randn(1, 48000)
-torch.onnx.export(
-    model, dummy_export, onnx_path, opset_version=14,
-    input_names=["input_values"],
-    output_names=["logits"],
-    dynamic_axes={"input_values": {1: "audio_length"}}
-)
-session = ort.InferenceSession(onnx_path)
-print("ONNX session ready")
 # FastAPI app
 app = FastAPI(title="AI Voice Detection API", version="2.0")
@@ -96,10 +74,8 @@ class DetectionRequest(BaseModel):
 class DetectionResponse(BaseModel):
     status: str
-    language: str
     classification: str
     confidenceScore: float
-    explanation: str
 def load_audio(audio_path):
@@ -163,10 +139,8 @@ def home():
         <h2>Response Format</h2>
         <pre>{{
   "status": "success",
-  "language": "English",
   "classification": "AI_GENERATED" or "HUMAN",
-  "confidenceScore": 0.97,
-  "explanation": "Detected synthetic voice characteristics"
 }}</pre>
     </div>
@@ -190,10 +164,6 @@ def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
     if x_api_key != API_KEY:
         raise HTTPException(status_code=401, detail="Invalid API key")
-    # Validate format
-    if request.audioFormat.lower() != "mp3":
-        raise HTTPException(status_code=400, detail="Only mp3 format supported")
     # Decode audio
     try:
         audio_bytes = base64.b64decode(request.audioBase64)
@@ -206,49 +176,22 @@ def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
     temp_file.close()
     try:
-        # Process with ONNX Runtime
         waveform = load_audio(temp_file.name)
-        ort_inputs = {"input_values": waveform.unsqueeze(0).numpy()}
-        ort_outputs = session.run(None, ort_inputs)
-        logits = ort_outputs[0]
-        probs = scipy.special.softmax(logits, axis=-1)
-        pred = int(probs.argmax(axis=-1)[0])
-        conf = float(probs[0, pred])
-        classification = "AI_GENERATED" if pred == 1 else "HUMAN"
-        ai_explanations = [
-            "Detected synthetic voice characteristics and artificial patterns",
-            "Audio exhibits signs of AI-based speech synthesis",
-            "Voice patterns are consistent with machine-generated speech",
-            "Identified artificial spectral features typical of synthetic voices",
-            "Analysis reveals digitally synthesized vocal characteristics",
-            "Audio signature matches known AI voice generation patterns",
-            "Detected unnatural prosody and robotic tonal artifacts",
-            "Voice lacks micro-variations found in natural human speech",
-        ]
-        human_explanations = [
-            "Detected natural speech patterns and organic voice characteristics",
-            "Voice exhibits natural human vocal tract resonances",
-            "Audio contains organic micro-variations consistent with human speech",
-            "Speech patterns align with natural human voice production",
-            "Identified genuine vocal characteristics and natural prosody",
-            "Analysis confirms authentic human speech signatures",
-            "Voice displays natural breathing patterns and tonal variations",
-            "Audio shows no signs of synthetic generation or manipulation",
-        ]
-        if classification == "AI_GENERATED":
-            explanation = random.choice(ai_explanations)
-        else:
-            explanation = random.choice(human_explanations)
         return DetectionResponse(
             status="success",
-            language=request.language,
             classification=classification,
             confidenceScore=round(conf, 2),
-            explanation=explanation
         )
     finally:

 """
 import os
 import base64
 import tempfile
 import numpy as np
 from pydub import AudioSegment
 import librosa
 import uvicorn
 # Configuration
 MODEL_REPO = "kimnamjoon0007/lkht-v440"
 model.eval()
 print(f"Ready on {DEVICE}")
 # FastAPI app
 app = FastAPI(title="AI Voice Detection API", version="2.0")
 class DetectionResponse(BaseModel):
     status: str
     classification: str
     confidenceScore: float
 def load_audio(audio_path):
         <h2>Response Format</h2>
         <pre>{{
   "status": "success",
   "classification": "AI_GENERATED" or "HUMAN",
+  "confidenceScore": 0.97
 }}</pre>
     </div>
     if x_api_key != API_KEY:
         raise HTTPException(status_code=401, detail="Invalid API key")
     # Decode audio
     try:
         audio_bytes = base64.b64decode(request.audioBase64)
     temp_file.close()
     try:
+        # Process
         waveform = load_audio(temp_file.name)
+        input_values = waveform.unsqueeze(0).to(DEVICE)
+        with torch.no_grad():
+            logits = model(input_values)
+            probs = torch.softmax(logits, dim=-1)
+            pred = torch.argmax(probs, dim=-1).item()
+            conf = probs[0, pred].item()
+        classification = "AI_GENERATED" if pred == 1 else "HUMAN"
         return DetectionResponse(
             status="success",
             classification=classification,
             confidenceScore=round(conf, 2),
         )
     finally:

requirements.txt CHANGED Viewed

@@ -9,6 +9,3 @@ pydub>=0.25.1
 numpy>=1.24.0
 scipy>=1.10.0
 soundfile>=0.12.0
-onnxruntime>=1.16.0
-onnx>=1.14.0
-onnxscript>=0.1.0

 numpy>=1.24.0
 scipy>=1.10.0
 soundfile>=0.12.0