Spaces:
Sleeping
Sleeping
| """ | |
| Audio Deepfake Detection - Model Utilities | |
| Handles loading and caching of the HuggingFace audio classification pipeline. | |
| Uses MelodyMachine/Deepfake-audio-detection-V2 (Wav2Vec2-based model). | |
| This module is completely separate from the video detection model_utils.py. | |
| """ | |
| from typing import Optional | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Model configuration | |
| AUDIO_MODEL_ID = "MelodyMachine/Deepfake-audio-detection-V2" | |
| AUDIO_MODEL_TASK = "audio-classification" | |
| # Global pipeline cache | |
| _audio_pipeline = None | |
| def load_audio_pipeline(): | |
| """ | |
| Load and cache the audio classification pipeline. | |
| Uses HuggingFace's pipeline API which handles: | |
| - Model downloading (first run only) | |
| - Feature extraction (Wav2Vec2FeatureExtractor) | |
| - Automatic resampling to 16kHz | |
| - Input normalization | |
| Returns: | |
| Pipeline object for audio classification | |
| Raises: | |
| RuntimeError: If pipeline loading fails | |
| """ | |
| global _audio_pipeline | |
| if _audio_pipeline is not None: | |
| logger.info("Using cached audio classification pipeline") | |
| return _audio_pipeline | |
| try: | |
| from transformers import pipeline | |
| logger.info(f"Loading audio classification pipeline: {AUDIO_MODEL_ID}") | |
| # Load pipeline with CPU device (-1 forces CPU) | |
| # The pipeline handles all preprocessing automatically | |
| _audio_pipeline = pipeline( | |
| task=AUDIO_MODEL_TASK, | |
| model=AUDIO_MODEL_ID, | |
| device=-1 # Force CPU for compatibility | |
| ) | |
| logger.info("Audio classification pipeline loaded successfully") | |
| return _audio_pipeline | |
| except Exception as e: | |
| logger.error(f"Failed to load audio pipeline: {e}") | |
| raise RuntimeError(f"Failed to load audio classification model: {e}") | |
| def get_audio_model_info() -> dict: | |
| """ | |
| Get information about the audio detection model. | |
| Returns: | |
| Dictionary with model metadata | |
| """ | |
| return { | |
| "model_id": AUDIO_MODEL_ID, | |
| "task": AUDIO_MODEL_TASK, | |
| "architecture": "Wav2Vec2ForSequenceClassification", | |
| "base_model": "facebook/wav2vec2-base", | |
| "sample_rate": 16000, | |
| "labels": ["fake", "real"], | |
| "reported_accuracy": 0.997 | |
| } | |
| def is_audio_pipeline_loaded() -> bool: | |
| """Check if the audio pipeline is already loaded in memory.""" | |
| return _audio_pipeline is not None | |
| def unload_audio_pipeline() -> None: | |
| """ | |
| Unload the audio pipeline from memory. | |
| Useful for freeing memory if needed. | |
| """ | |
| global _audio_pipeline | |
| if _audio_pipeline is not None: | |
| _audio_pipeline = None | |
| logger.info("Audio pipeline unloaded from memory") | |