Krish1440's picture
Upload 5 files
60d70d0 verified
import io
import librosa
import numpy as np
import soundfile as sf
import torch
from transformers import pipeline
class AudioDetector:
def __init__(self):
print("--- [AudioDetector] Initializing 4-Model Ensemble System... ---")
# The Committee of Experts
self.models_config = [
{
"id": "MelodyMachine/Deepfake-audio-detection-V2",
"name": "MelodyMachine",
"weight": 1.0
},
{
"id": "mo-thecreator/Deepfake-audio-detection",
"name": "Mo-Creator",
"weight": 1.0
},
{
"id": "Hemgg/Deepfake-audio-detection",
"name": "Hemgg",
"weight": 1.0
},
{
"id": "Gustking/wav2vec2-large-xlsr-deepfake-audio-classification",
"name": "Gustking-XLSR",
"weight": 1.2 # Higher weight for the large model
}
]
self.pipelines = []
for cfg in self.models_config:
try:
print(f"--- Loading Model: {cfg['name']} ({cfg['id']}) ---")
# Load pipeline
p = pipeline("audio-classification", model=cfg['id'])
self.pipelines.append({"pipe": p, "config": cfg})
print(f"[+] Loaded {cfg['name']}")
except Exception as e:
print(f"[-] Failed to load {cfg['name']}: {e}")
if not self.pipelines:
print("CRITICAL: No models could be loaded. Ensemble is empty.")
def analyze_audio(self, audio_data: bytes, language: str):
try:
# 1. Load Audio
buffer = io.BytesIO(audio_data)
y, sr = librosa.load(buffer, sr=16000)
# 2. Extract Features (For Explanation Context Only)
# We preserve this for generating professional justifications,
# but the DECISION is purely model-based.
centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
# 3. Running The Ensemble
votes = []
total_score = 0
total_weight = 0
print(f"\n--- Running Ensemble Inference on {len(self.pipelines)} models ---")
for item in self.pipelines:
p = item['pipe']
cfg = item['config']
weight = cfg['weight']
try:
# Run Inference
results = p(y, top_k=None) # Get all labels
# Parsing Result for AI Probability
ai_score = 0.0
# Logic: Find the label that means "Fake"
ai_labels = ["fake", "spoof", "aivoice", "artificial", "generated"]
found = False
for r in results:
label_clean = r['label'].lower().strip()
if label_clean in ai_labels:
ai_score = r['score']
found = True
break
# Note: If no AI label is found (e.g. only 'real'/'human'), ai_score stays 0.0 (Human)
# This logic covers {0: 'real', 1: 'fake'} where 'fake' is present.
verdict = "AI" if ai_score > 0.5 else "HUMAN"
# Weighted contribution
votes.append({
"name": cfg['name'],
"ai_prob": ai_score,
"verdict": verdict
})
total_score += (ai_score * weight)
total_weight += weight
print(f" > {cfg['name']}: {ai_score:.4f} ({verdict})")
except Exception as e:
print(f"Error inferencing {cfg['name']}: {e}")
# 4. Final Aggregation
if total_weight > 0:
final_ensemble_score = total_score / total_weight
else:
final_ensemble_score = 0.0 # Fail safe
is_ai = final_ensemble_score > 0.5
final_classification = "AI_GENERATED" if is_ai else "HUMAN"
# Confidence Score: Distance from 0.5, normalized to 0.5-1.0 roughly,
# or just probability of the winning class.
class_confidence = final_ensemble_score if is_ai else (1.0 - final_ensemble_score)
print(f"--- Final Ensemble Score: {final_ensemble_score:.4f} => {final_classification} (Conf: {class_confidence:.2f}) ---\n")
# 5. Construct Explanation
# "3 out of 4 models detected deepfake artifacts..."
ai_votes_count = sum(1 for v in votes if v['verdict'] == 'AI')
total_models = len(votes)
explanations = []
explanations.append(f"Ensemble Analysis: {ai_votes_count}/{total_models} models flagged this audio as AI-generated.")
explanations.append(f"Aggregated Score: {final_ensemble_score*100:.1f}%.")
if is_ai:
if centroid > 2000:
explanations.append("High-frequency spectral artifacts consistent with neural vocoders detected.")
else:
explanations.append("Deep learning pattern matching identified non-biological features.")
else:
explanations.append("Acoustic analysis confirms natural vocal resonance and organic production.")
final_explanation = " ".join(explanations)
return {
"classification": final_classification,
# Return logical confidence (prob of the chosen class)
"confidenceScore": round(float(class_confidence), 2),
"explanation": final_explanation
}
except Exception as e:
print(f"Analysis Failed: {e}")
return {
"classification": "HUMAN", # Fail safe
"confidenceScore": 0.0,
"error": str(e),
"explanation": "Analysis failed due to internal error."
}
# Global Instance
detector = AudioDetector()