Spaces:
Running
Running
| import io | |
| import librosa | |
| import numpy as np | |
| import soundfile as sf | |
| import torch | |
| from transformers import pipeline | |
| class AudioDetector: | |
| def __init__(self): | |
| print("--- [AudioDetector] Initializing 4-Model Ensemble System... ---") | |
| # The Committee of Experts | |
| self.models_config = [ | |
| { | |
| "id": "MelodyMachine/Deepfake-audio-detection-V2", | |
| "name": "MelodyMachine", | |
| "weight": 1.0 | |
| }, | |
| { | |
| "id": "mo-thecreator/Deepfake-audio-detection", | |
| "name": "Mo-Creator", | |
| "weight": 1.0 | |
| }, | |
| { | |
| "id": "Hemgg/Deepfake-audio-detection", | |
| "name": "Hemgg", | |
| "weight": 1.0 | |
| }, | |
| { | |
| "id": "Gustking/wav2vec2-large-xlsr-deepfake-audio-classification", | |
| "name": "Gustking-XLSR", | |
| "weight": 1.2 # Higher weight for the large model | |
| } | |
| ] | |
| self.pipelines = [] | |
| for cfg in self.models_config: | |
| try: | |
| print(f"--- Loading Model: {cfg['name']} ({cfg['id']}) ---") | |
| # Load pipeline | |
| p = pipeline("audio-classification", model=cfg['id']) | |
| self.pipelines.append({"pipe": p, "config": cfg}) | |
| print(f"[+] Loaded {cfg['name']}") | |
| except Exception as e: | |
| print(f"[-] Failed to load {cfg['name']}: {e}") | |
| if not self.pipelines: | |
| print("CRITICAL: No models could be loaded. Ensemble is empty.") | |
| def analyze_audio(self, audio_data: bytes, language: str): | |
| try: | |
| # 1. Load Audio | |
| buffer = io.BytesIO(audio_data) | |
| y, sr = librosa.load(buffer, sr=16000) | |
| # 2. Extract Features (For Explanation Context Only) | |
| # We preserve this for generating professional justifications, | |
| # but the DECISION is purely model-based. | |
| centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) | |
| # 3. Running The Ensemble | |
| votes = [] | |
| total_score = 0 | |
| total_weight = 0 | |
| print(f"\n--- Running Ensemble Inference on {len(self.pipelines)} models ---") | |
| for item in self.pipelines: | |
| p = item['pipe'] | |
| cfg = item['config'] | |
| weight = cfg['weight'] | |
| try: | |
| # Run Inference | |
| results = p(y, top_k=None) # Get all labels | |
| # Parsing Result for AI Probability | |
| ai_score = 0.0 | |
| # Logic: Find the label that means "Fake" | |
| ai_labels = ["fake", "spoof", "aivoice", "artificial", "generated"] | |
| found = False | |
| for r in results: | |
| label_clean = r['label'].lower().strip() | |
| if label_clean in ai_labels: | |
| ai_score = r['score'] | |
| found = True | |
| break | |
| # Note: If no AI label is found (e.g. only 'real'/'human'), ai_score stays 0.0 (Human) | |
| # This logic covers {0: 'real', 1: 'fake'} where 'fake' is present. | |
| verdict = "AI" if ai_score > 0.5 else "HUMAN" | |
| # Weighted contribution | |
| votes.append({ | |
| "name": cfg['name'], | |
| "ai_prob": ai_score, | |
| "verdict": verdict | |
| }) | |
| total_score += (ai_score * weight) | |
| total_weight += weight | |
| print(f" > {cfg['name']}: {ai_score:.4f} ({verdict})") | |
| except Exception as e: | |
| print(f"Error inferencing {cfg['name']}: {e}") | |
| # 4. Final Aggregation | |
| if total_weight > 0: | |
| final_ensemble_score = total_score / total_weight | |
| else: | |
| final_ensemble_score = 0.0 # Fail safe | |
| is_ai = final_ensemble_score > 0.5 | |
| final_classification = "AI_GENERATED" if is_ai else "HUMAN" | |
| # Confidence Score: Distance from 0.5, normalized to 0.5-1.0 roughly, | |
| # or just probability of the winning class. | |
| class_confidence = final_ensemble_score if is_ai else (1.0 - final_ensemble_score) | |
| print(f"--- Final Ensemble Score: {final_ensemble_score:.4f} => {final_classification} (Conf: {class_confidence:.2f}) ---\n") | |
| # 5. Construct Explanation | |
| # "3 out of 4 models detected deepfake artifacts..." | |
| ai_votes_count = sum(1 for v in votes if v['verdict'] == 'AI') | |
| total_models = len(votes) | |
| explanations = [] | |
| explanations.append(f"Ensemble Analysis: {ai_votes_count}/{total_models} models flagged this audio as AI-generated.") | |
| explanations.append(f"Aggregated Score: {final_ensemble_score*100:.1f}%.") | |
| if is_ai: | |
| if centroid > 2000: | |
| explanations.append("High-frequency spectral artifacts consistent with neural vocoders detected.") | |
| else: | |
| explanations.append("Deep learning pattern matching identified non-biological features.") | |
| else: | |
| explanations.append("Acoustic analysis confirms natural vocal resonance and organic production.") | |
| final_explanation = " ".join(explanations) | |
| return { | |
| "classification": final_classification, | |
| # Return logical confidence (prob of the chosen class) | |
| "confidenceScore": round(float(class_confidence), 2), | |
| "explanation": final_explanation | |
| } | |
| except Exception as e: | |
| print(f"Analysis Failed: {e}") | |
| return { | |
| "classification": "HUMAN", # Fail safe | |
| "confidenceScore": 0.0, | |
| "error": str(e), | |
| "explanation": "Analysis failed due to internal error." | |
| } | |
| # Global Instance | |
| detector = AudioDetector() | |