Spaces:
Sleeping
Sleeping
| import torch | |
| import librosa | |
| import numpy as np | |
| import io | |
| import torch.nn.functional as F | |
| from transformers import AutoModelForAudioClassification, AutoFeatureExtractor | |
| class VoiceDetector: | |
| def __init__(self): | |
| print("β³ Loading Robust AI Detection Model...") | |
| # FIX: Using the verified MelodyMachine model | |
| self.model_name = "MelodyMachine/Deepfake-audio-detection-V2" | |
| try: | |
| self.feature_extractor = AutoFeatureExtractor.from_pretrained(self.model_name) | |
| self.model = AutoModelForAudioClassification.from_pretrained(self.model_name) | |
| self.model.eval() | |
| # Print labels to debug (Make sure we know what 0 and 1 mean) | |
| print(f"β Model Labels: {self.model.config.id2label}") | |
| except Exception as e: | |
| print(f"β CRITICAL ERROR: Failed to load AI model. {e}") | |
| raise e | |
| def preprocess_audio(self, audio_buffer: io.BytesIO, target_sr=16000): | |
| """ | |
| Robust preprocessing: Resample, Normalize, and Fix Duration. | |
| """ | |
| audio_buffer.seek(0) | |
| y, sr = librosa.load(audio_buffer, sr=target_sr) | |
| # 1. Normalize Volume (Crucial for quiet clips) | |
| y = librosa.util.normalize(y) | |
| # 2. Fix Duration (Model expects ~3-5 seconds) | |
| # If too short (< 1.5s), loop it. | |
| if len(y) < target_sr * 1.5: | |
| tile_factor = int_(np.ceil((target_sr * 1.5) / len(y))) | |
| y = np.tile(y, tile_factor) | |
| # 3. Limit Duration (If > 10s, take the middle 5s) | |
| # Long files confuse the model logic if not chunked. | |
| max_len = target_sr * 5 | |
| if len(y) > max_len: | |
| start = (len(y) - max_len) // 2 | |
| y = y[start : start + max_len] | |
| return y | |
| def analyze(self, audio_buffer: io.BytesIO, language: str): | |
| """ | |
| Analyzes audio with improved threshold logic using MelodyMachine. | |
| """ | |
| try: | |
| # 1. Preprocess | |
| audio_input = self.preprocess_audio(audio_buffer) | |
| # 2. Prepare Input | |
| inputs = self.feature_extractor( | |
| audio_input, | |
| sampling_rate=16000, | |
| return_tensors="pt", | |
| padding=True | |
| ) | |
| # 3. Inference | |
| with torch.no_grad(): | |
| logits = self.model(**inputs).logits | |
| # 4. Get Probabilities | |
| probs = F.softmax(logits, dim=-1) | |
| # 5. Dynamic Label Mapping (Safe method) | |
| # MelodyMachine usually: Label 0 = Fake, Label 1 = Real | |
| # But we check config to be 100% sure. | |
| id2label = self.model.config.id2label | |
| fake_score = 0.0 | |
| real_score = 0.0 | |
| # Find which index is 'fake' and which is 'real' | |
| for idx, label in id2label.items(): | |
| label_lower = str(label).lower() | |
| if "fake" in label_lower or "spoof" in label_lower: | |
| fake_score = probs[0][idx].item() | |
| elif "real" in label_lower or "bonafide" in label_lower: | |
| real_score = probs[0][idx].item() | |
| # Fallback if labels are just "LABEL_0", "LABEL_1" (MelodyMachine default) | |
| # In MelodyMachine: LABEL_0 is REAL, LABEL_1 is FAKE (Wait, let's verify logic below) | |
| # Actually, standard MelodyMachine: | |
| # Index 1 (Deepfake) -> "fake" | |
| # Index 0 (Real) -> "real" | |
| # Let's use direct index access if names are generic | |
| if fake_score == 0.0 and real_score == 0.0: | |
| # Default mapping for MelodyMachine | |
| real_score = probs[0][0].item() # Real is usually 0 | |
| fake_score = probs[0][1].item() # Fake is usually 1 | |
| print(f"π DEBUG: Real Score: {real_score:.4f} | Fake Score: {fake_score:.4f}") | |
| # 6. Decision Logic | |
| # If AI confidence is > 50%, call it AI. | |
| if fake_score > real_score: | |
| classification = "AI_GENERATED" | |
| confidence = fake_score | |
| explanation = f"Detected synthetic artifacts with {int(fake_score*100)}% confidence." | |
| else: | |
| classification = "HUMAN" | |
| confidence = real_score | |
| explanation = f"Verified human vocal characteristics with {int(real_score*100)}% confidence." | |
| return { | |
| "classification": classification, | |
| "confidenceScore": round(confidence, 2), | |
| "explanation": explanation | |
| } | |
| except Exception as e: | |
| print(f"Analysis Error: {e}") | |
| return { | |
| "classification": "HUMAN", | |
| "confidenceScore": 0.0, | |
| "explanation": f"Error: {str(e)}" | |
| } | |
| # Helper to fix numpy integer issue | |
| def int_(val): | |
| return int(val) |