AK-Gaming-92 commited on
Commit
0354504
·
verified ·
1 Parent(s): f1565c0

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +20 -0
  2. detector.py +107 -0
  3. main.py +62 -0
  4. requirements.txt +10 -0
  5. utils.py +23 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.9
2
+ FROM python:3.9
3
+
4
+ # Set working directory
5
+ WORKDIR /code
6
+
7
+ # Copy requirements and install dependencies
8
+ COPY ./requirements.txt /code/requirements.txt
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ # Copy the application code
12
+ COPY . /code
13
+
14
+ # Create a writable cache directory for the AI model
15
+ # (Hugging Face needs this permission setup)
16
+ RUN mkdir -p /code/cache && chmod 777 /code/cache
17
+ ENV XDG_CACHE_HOME=/code/cache
18
+
19
+ # Start the server on port 7860 (Hugging Face default)
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
detector.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import librosa
3
+ import numpy as np
4
+ import io
5
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
6
+
7
+ class VoiceDetector:
8
+ def __init__(self):
9
+ print("Loading AI Detection Model... (this may take a moment)")
10
+ # We use a pre-trained model specifically fine-tuned for Deepfake detection
11
+ # Source: https://huggingface.co/MelodyMachine/Deepfake-audio-detection-V2
12
+ # Alternative robust model: "padmalcom/wav2vec2-large-fake-voice-detection-v2"
13
+ self.model_name = "MelodyMachine/Deepfake-audio-detection-V2"
14
+
15
+ try:
16
+ self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.model_name)
17
+ self.model = Wav2Vec2ForSequenceClassification.from_pretrained(self.model_name)
18
+ self.model.eval() # Set to evaluation mode
19
+ except Exception as e:
20
+ print(f"CRITICAL ERROR: Failed to load AI model. {e}")
21
+ raise e
22
+
23
+ def preprocess_audio(self, audio_buffer: io.BytesIO, target_sr=16000):
24
+ """
25
+ Loads audio from bytes and resamples it to 16kHz (required by Wav2Vec2).
26
+ """
27
+ audio_buffer.seek(0)
28
+ # Load with librosa (automatically handles MP3/WAV)
29
+ y, sr = librosa.load(audio_buffer, sr=target_sr)
30
+
31
+ # Ensure we have enough audio for the model (pad if too short)
32
+ if len(y) < target_sr: # Less than 1 second
33
+ padding = target_sr - len(y)
34
+ y = np.pad(y, (0, padding), 'constant')
35
+
36
+ # Normalize audio volume
37
+ y = librosa.util.normalize(y)
38
+ return y
39
+
40
+ def analyze(self, audio_buffer: io.BytesIO, language: str):
41
+ """
42
+ Analyzes audio using the Deep Learning model.
43
+ Returns classification, confidence, and explanation.
44
+ """
45
+ try:
46
+ # 1. Preprocess Audio
47
+ audio_input = self.preprocess_audio(audio_buffer)
48
+
49
+ # 2. Prepare inputs for the model
50
+ inputs = self.feature_extractor(
51
+ audio_input,
52
+ sampling_rate=16000,
53
+ return_tensors="pt",
54
+ padding=True
55
+ )
56
+
57
+ # 3. Inference (Prediction)
58
+ with torch.no_grad():
59
+ logits = self.model(**inputs).logits
60
+
61
+ # 4. Convert logits to probabilities (Softmax)
62
+ probabilities = torch.nn.functional.softmax(logits, dim=-1)
63
+
64
+ # Get the predicted class (0 or 1) and score
65
+ # Note: Model specific labels need to be checked.
66
+ # Usually Index 0 = Real/Human, Index 1 = Fake/AI for this specific model family
67
+ # We verify via the model config id2label if available, otherwise assume standard.
68
+
69
+ # Let's dynamically check the label map if possible
70
+ id2label = self.model.config.id2label
71
+ predicted_id = torch.argmax(probabilities, dim=-1).item()
72
+ confidence = probabilities[0][predicted_id].item()
73
+ predicted_label = id2label[predicted_id]
74
+
75
+ # Map model output to API requirements
76
+ # The model labels might be "real"/"fake" or "bonafide"/"spoof"
77
+ is_ai = False
78
+ if "fake" in predicted_label.lower() or "spoof" in predicted_label.lower() or "ai" in predicted_label.lower():
79
+ is_ai = True
80
+ elif "real" in predicted_label.lower() or "bonafide" in predicted_label.lower() or "human" in predicted_label.lower():
81
+ is_ai = False
82
+ else:
83
+ # Fallback based on index (usually 1 is fake)
84
+ is_ai = (predicted_id == 1)
85
+
86
+ # 5. Construct Response
87
+ if is_ai:
88
+ classification = "AI_GENERATED"
89
+ explanation = "Deep learning model detected synthetic vocal artifacts and unnatural spectral patterns."
90
+ else:
91
+ classification = "HUMAN"
92
+ explanation = "Deep learning model verified natural micro-prosody and human vocal characteristics."
93
+
94
+ return {
95
+ "classification": classification,
96
+ "confidenceScore": round(confidence, 2),
97
+ "explanation": explanation
98
+ }
99
+
100
+ except Exception as e:
101
+ # Fallback for debugging
102
+ print(f"Analysis Error: {e}")
103
+ return {
104
+ "classification": "HUMAN", # Fail-safe default
105
+ "confidenceScore": 0.0,
106
+ "explanation": f"Error during analysis: {str(e)}"
107
+ }
main.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Header, HTTPException, Request
2
+ from pydantic import BaseModel
3
+ import uvicorn
4
+ import os
5
+ import traceback
6
+ from utils import decode_base64_audio, convert_mp3_to_wav
7
+ from detector import VoiceDetector
8
+
9
+ app = FastAPI(title="AI Voice Detection API")
10
+ detector = VoiceDetector()
11
+
12
+ # Configuration
13
+ API_KEY = os.getenv("API_KEY", "sk_test_123456789")
14
+ SUPPORTED_LANGUAGES = ["Tamil", "English", "Hindi", "Malayalam", "Telugu"]
15
+
16
+ class DetectionRequest(BaseModel):
17
+ language: str
18
+ audioFormat: str
19
+ audioBase64: str
20
+
21
+ @app.post("/api/voice-detection")
22
+ async def detect_voice(
23
+ request: DetectionRequest,
24
+ x_api_key: str = Header(None)
25
+ ):
26
+ # 1. Authentication
27
+ if x_api_key != API_KEY:
28
+ raise HTTPException(status_code=401, detail="Invalid API key or malformed request")
29
+
30
+ # 2. Validation
31
+ if request.language not in SUPPORTED_LANGUAGES:
32
+ raise HTTPException(status_code=400, detail=f"Language {request.language} not supported")
33
+
34
+ if request.audioFormat.lower() != "mp3":
35
+ raise HTTPException(status_code=400, detail="Only MP3 format is supported")
36
+
37
+ try:
38
+ # 3. Process Audio
39
+ mp3_buffer = decode_base64_audio(request.audioBase64)
40
+ wav_buffer = convert_mp3_to_wav(mp3_buffer)
41
+
42
+ # 4. Analyze
43
+ result = detector.analyze(wav_buffer, request.language)
44
+
45
+ # 5. Return Response
46
+ return {
47
+ "status": "success",
48
+ "language": request.language,
49
+ "classification": result["classification"],
50
+ "confidenceScore": result["confidenceScore"],
51
+ "explanation": result["explanation"]
52
+ }
53
+
54
+ except Exception as e:
55
+ traceback.print_exc()
56
+ return {
57
+ "status": "error",
58
+ "message": str(e)
59
+ }
60
+
61
+ if __name__ == "__main__":
62
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ requests
5
+ torch
6
+ transformers
7
+ librosa
8
+ numpy
9
+ scipy
10
+ pydub
utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ from pydub import AudioSegment
4
+ import tempfile
5
+ import os
6
+
7
+ def decode_base64_audio(base64_string: str) -> io.BytesIO:
8
+ """Decodes a base64 string into a bytes buffer."""
9
+ audio_data = base64.b64decode(base64_string)
10
+ return io.BytesIO(audio_data)
11
+
12
+ def convert_mp3_to_wav(mp3_buffer: io.BytesIO) -> io.BytesIO:
13
+ """Converts MP3 audio buffer to WAV format for processing."""
14
+ try:
15
+ audio = AudioSegment.from_mp3(mp3_buffer)
16
+ wav_buffer = io.BytesIO()
17
+ audio.export(wav_buffer, format="wav")
18
+ wav_buffer.seek(0)
19
+ return wav_buffer
20
+ except Exception as e:
21
+ # Fallback: return the buffer as-is, librosa can handle MP3
22
+ mp3_buffer.seek(0)
23
+ return mp3_buffer