|
|
import io |
|
|
import librosa |
|
|
import numpy as np |
|
|
import soundfile as sf |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
|
|
|
class AudioDetector: |
|
|
def __init__(self): |
|
|
print("--- [AudioDetector] Initializing 4-Model Ensemble System... ---") |
|
|
|
|
|
|
|
|
self.models_config = [ |
|
|
{ |
|
|
"id": "MelodyMachine/Deepfake-audio-detection-V2", |
|
|
"name": "MelodyMachine", |
|
|
"weight": 1.0 |
|
|
}, |
|
|
{ |
|
|
"id": "mo-thecreator/Deepfake-audio-detection", |
|
|
"name": "Mo-Creator", |
|
|
"weight": 1.0 |
|
|
}, |
|
|
{ |
|
|
"id": "Hemgg/Deepfake-audio-detection", |
|
|
"name": "Hemgg", |
|
|
"weight": 1.0 |
|
|
}, |
|
|
{ |
|
|
"id": "Gustking/wav2vec2-large-xlsr-deepfake-audio-classification", |
|
|
"name": "Gustking-XLSR", |
|
|
"weight": 1.2 |
|
|
} |
|
|
] |
|
|
|
|
|
self.pipelines = [] |
|
|
|
|
|
for cfg in self.models_config: |
|
|
try: |
|
|
print(f"--- Loading Model: {cfg['name']} ({cfg['id']}) ---") |
|
|
|
|
|
p = pipeline("audio-classification", model=cfg['id']) |
|
|
self.pipelines.append({"pipe": p, "config": cfg}) |
|
|
print(f"[+] Loaded {cfg['name']}") |
|
|
except Exception as e: |
|
|
print(f"[-] Failed to load {cfg['name']}: {e}") |
|
|
|
|
|
if not self.pipelines: |
|
|
print("CRITICAL: No models could be loaded. Ensemble is empty.") |
|
|
|
|
|
def analyze_audio(self, audio_data: bytes, language: str): |
|
|
try: |
|
|
|
|
|
buffer = io.BytesIO(audio_data) |
|
|
y, sr = librosa.load(buffer, sr=16000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) |
|
|
|
|
|
|
|
|
votes = [] |
|
|
total_score = 0 |
|
|
total_weight = 0 |
|
|
|
|
|
print(f"\n--- Running Ensemble Inference on {len(self.pipelines)} models ---") |
|
|
|
|
|
for item in self.pipelines: |
|
|
p = item['pipe'] |
|
|
cfg = item['config'] |
|
|
weight = cfg['weight'] |
|
|
|
|
|
try: |
|
|
|
|
|
results = p(y, top_k=None) |
|
|
|
|
|
|
|
|
ai_score = 0.0 |
|
|
|
|
|
|
|
|
ai_labels = ["fake", "spoof", "aivoice", "artificial", "generated"] |
|
|
|
|
|
found = False |
|
|
for r in results: |
|
|
label_clean = r['label'].lower().strip() |
|
|
if label_clean in ai_labels: |
|
|
ai_score = r['score'] |
|
|
found = True |
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
verdict = "AI" if ai_score > 0.5 else "HUMAN" |
|
|
|
|
|
|
|
|
votes.append({ |
|
|
"name": cfg['name'], |
|
|
"ai_prob": ai_score, |
|
|
"verdict": verdict |
|
|
}) |
|
|
|
|
|
total_score += (ai_score * weight) |
|
|
total_weight += weight |
|
|
|
|
|
print(f" > {cfg['name']}: {ai_score:.4f} ({verdict})") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error inferencing {cfg['name']}: {e}") |
|
|
|
|
|
|
|
|
if total_weight > 0: |
|
|
final_ensemble_score = total_score / total_weight |
|
|
else: |
|
|
final_ensemble_score = 0.0 |
|
|
|
|
|
is_ai = final_ensemble_score > 0.5 |
|
|
final_classification = "AI_GENERATED" if is_ai else "HUMAN" |
|
|
|
|
|
|
|
|
|
|
|
class_confidence = final_ensemble_score if is_ai else (1.0 - final_ensemble_score) |
|
|
|
|
|
print(f"--- Final Ensemble Score: {final_ensemble_score:.4f} => {final_classification} (Conf: {class_confidence:.2f}) ---\n") |
|
|
|
|
|
|
|
|
|
|
|
ai_votes_count = sum(1 for v in votes if v['verdict'] == 'AI') |
|
|
total_models = len(votes) |
|
|
|
|
|
explanations = [] |
|
|
explanations.append(f"Ensemble Analysis: {ai_votes_count}/{total_models} models flagged this audio as AI-generated.") |
|
|
explanations.append(f"Aggregated Score: {final_ensemble_score*100:.1f}%.") |
|
|
|
|
|
if is_ai: |
|
|
if centroid > 2000: |
|
|
explanations.append("High-frequency spectral artifacts consistent with neural vocoders detected.") |
|
|
else: |
|
|
explanations.append("Deep learning pattern matching identified non-biological features.") |
|
|
else: |
|
|
explanations.append("Acoustic analysis confirms natural vocal resonance and organic production.") |
|
|
|
|
|
final_explanation = " ".join(explanations) |
|
|
|
|
|
return { |
|
|
"classification": final_classification, |
|
|
|
|
|
"confidenceScore": round(float(class_confidence), 2), |
|
|
"explanation": final_explanation |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Analysis Failed: {e}") |
|
|
return { |
|
|
"classification": "HUMAN", |
|
|
"confidenceScore": 0.0, |
|
|
"error": str(e), |
|
|
"explanation": "Analysis failed due to internal error." |
|
|
} |
|
|
|
|
|
|
|
|
detector = AudioDetector() |
|
|
|