Spaces:
Sleeping
Sleeping
ranar110
Fix: Update response format to match Multi-Language problem statement (classification key)
4f7e968 | import os | |
| import warnings | |
| # Suppress warnings | |
| warnings.filterwarnings("ignore") | |
| # Global model cache | |
| MODEL_CACHE = {} | |
| MODEL_NAME = "MelodyMachine/Deepfake-audio-detection" # A good starting model from HF | |
| def load_model(): | |
| """Load the model and feature extractor if not already loaded.""" | |
| if MODEL_CACHE.get("model") is None: | |
| print(f"Loading model: {MODEL_NAME}...") | |
| try: | |
| # Lazy import to prevent startup timeout | |
| from transformers import AutoFeatureExtractor, AutoModelForAudioClassification | |
| # Load feature extractor | |
| feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME) | |
| # Load model with memory optimization | |
| model = AutoModelForAudioClassification.from_pretrained( | |
| MODEL_NAME, | |
| low_cpu_mem_usage=True | |
| ) | |
| MODEL_CACHE["feature_extractor"] = feature_extractor | |
| MODEL_CACHE["model"] = model | |
| print("Model loaded successfully.") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return None, None | |
| return MODEL_CACHE["model"], MODEL_CACHE["feature_extractor"] | |
| def preprocess_audio(file_path, max_duration=10): | |
| """Load and preprocess audio file for the model.""" | |
| try: | |
| # Lazy import | |
| import librosa | |
| import numpy as np | |
| # Load audio file (resample to 16kHz as typically required by Wav2Vec2) | |
| audio, sample_rate = librosa.load(file_path, sr=16000, duration=max_duration) | |
| return audio, sample_rate | |
| except Exception as e: | |
| print(f"Error preprocessing audio: {e}") | |
| return None, None | |
| def analyze_audio_real(metadata): | |
| """ | |
| Run actual AI inference on the audio file. | |
| Replaces the mock logic with real Deep Learning model predictions. | |
| """ | |
| # Lazy import | |
| import torch | |
| file_path = metadata.get('file_path') | |
| if not file_path or not os.path.exists(file_path): | |
| return { | |
| "error": "File not found", | |
| "is_human": None, | |
| "confidence": 0.0 | |
| } | |
| # Load model | |
| model, feature_extractor = load_model() | |
| if not model or not feature_extractor: | |
| # Fallback if model fails to load (e.g. no internet/memory) | |
| return { | |
| "error": "Model failed to load", | |
| "is_human": None, | |
| "confidence": 0.0 | |
| } | |
| try: | |
| # Preprocess | |
| audio, sr = preprocess_audio(file_path) | |
| if audio is None: | |
| return {"error": "Invalid audio file", "is_human": None} | |
| # Prepare inputs | |
| inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt") | |
| # Inference | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| # Get probabilities (softmax) | |
| probs = torch.nn.functional.softmax(logits, dim=-1) | |
| # Get predicted label and score | |
| predicted_id = torch.argmax(logits, dim=-1).item() | |
| confidence = probs[0][predicted_id].item() | |
| id2label = model.config.id2label | |
| predicted_label = id2label[predicted_id] | |
| # Logic: if label contains "real" or "bona-fide", it's human | |
| is_human = "real" in predicted_label.lower() or "bona" in predicted_label.lower() | |
| # Return structured analysis | |
| return { | |
| "is_human": is_human, | |
| "classification": "HUMAN" if is_human else "AI_GENERATED", | |
| "confidence": round(confidence, 4), | |
| "detected_language": "analyzed", | |
| "model_used": MODEL_NAME, | |
| "raw_label": predicted_label, | |
| "segments": [ | |
| {"start": 0.0, "end": min(metadata.get('duration_seconds', 0), 10.0), "label": predicted_label} | |
| ] | |
| } | |
| except Exception as e: | |
| print(f"Inference error: {e}") | |
| return { | |
| "error": str(e), | |
| "is_human": None, | |
| "confidence": 0.0 | |
| } | |