import os
import torch
import librosa
import soundfile as sf  
import numpy as np
import subprocess
import traceback
import imageio_ffmpeg # ⚡ NEW: Added FFmpeg engine
from transformers import AutoModelForAudioClassification, AutoFeatureExtractor

class AudioDeepfakeDetector:
    def __init__(self):
        self.model_name = "Hemgg/Deepfake-audio-detection"
        self.model = None
        self.extractor = None
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Limit CPU threads to prevent bottlenecking
        if self.device == "cpu":
            torch.set_num_threads(4)

        print(f"⚡ Loading Audio AI Model: {self.model_name}...")
        
        try:
            self.extractor = AutoFeatureExtractor.from_pretrained(self.model_name)
            self.model = AutoModelForAudioClassification.from_pretrained(self.model_name).to(self.device)
            self.model.eval() 
            
            print(f"   ℹ️ Labels: {self.model.config.id2label}")
            print("✅ Audio Model Loaded Successfully.")
            
        except Exception as e:
            print(f"❌ Failed to load Audio Model: {e}")
            traceback.print_exc()

    def predict(self, audio_path):
        if not self.model:
            return "ERROR: MODEL NOT LOADED", 0.0

        temp_wav = "temp_fast_audio.wav"

        try:
            print(f"🔍 Analyzing audio: {audio_path}")
            
            # ⚡ ULTRA-FAST FFMPEG PRE-PROCESSING
            # Instantly chops the file to 4 seconds, forces Mono, and sets 16kHz
            ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
            command = [
                ffmpeg_exe,
                "-y",                   # Overwrite existing files
                "-i", audio_path,       # Input file (mp3, wav, etc.)
                "-t", "4",              # ⚡ Only grab the first 4 seconds
                "-ac", "1",             # ⚡ Force Mono (1 channel)
                "-ar", "16000",         # ⚡ Force 16000Hz sample rate
                temp_wav                # Output perfectly formatted temp file
            ]
            
            # Run the command silently and instantly
            subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

            # Now Soundfile can load it in a fraction of a millisecond 
            # because it requires ZERO math or resampling from Python!
            if os.path.exists(temp_wav):
                audio, sr = sf.read(temp_wav)
                os.remove(temp_wav) # Clean up temp file
            else:
                raise Exception("FFmpeg failed to process audio.")

            # Ensure data format matches PyTorch requirements
            audio = audio.astype(np.float32)

            inputs = self.extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
            inputs = {key: val.to(self.device) for key, val in inputs.items()}

            # Fast AI Inference
            with torch.inference_mode():
                logits = self.model(**inputs).logits

            probs = torch.nn.functional.softmax(logits, dim=-1)
            confidence, predicted_class_id = torch.max(probs, dim=-1)
            
            raw_label = self.model.config.id2label[predicted_class_id.item()]
            
            is_fake = False
            check_label = raw_label.lower()
            
            if "ai" in check_label or "fake" in check_label or "spoof" in check_label:
                is_fake = True

            label = "DEEPFAKE DETECTED" if is_fake else "REAL"
            score = confidence.item()

            print(f"✅ AI Verdict: {raw_label} -> {label} ({score*100:.1f}%)")
            return label, score

        except Exception as e:
            print(f"❌ AUDIO ERROR: {e}")
            traceback.print_exc() 
            return "ERROR", 0.0

if __name__ == "__main__":
    detector = AudioDeepfakeDetector()