import gradio as gr
import os
import subprocess
import torch
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import yt_dlp
import tempfile

def download_video(url, out_path):
    """Download a video from YouTube or direct MP4 link."""
    try:
        if "youtube.com" in url or "youtu.be" in url:
            ydl_opts = {'outtmpl': out_path}
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
        else:
            os.system(f"wget -O {out_path} {url}")
        return out_path
    except Exception as e:
        return f"ERROR: Video download failed: {str(e)}"

def extract_audio(video_path, audio_path):
    """Extract audio from video file using ffmpeg."""
    try:
        cmd = [
            "ffmpeg", "-y",
            "-i", video_path,
            "-vn",
            "-acodec", "pcm_s16le",
            "-ar", "16000",
            "-ac", "1",
            audio_path
        ]
        subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        return audio_path
    except Exception as e:
        return f"ERROR: Audio extraction failed: {str(e)}"

def analyze_accent(audio_path):
    """Analyze accent using SpeechBrain pre-trained model."""
    try:
        classifier = EncoderClassifier.from_hparams(
            source="speechbrain/lang-id-voxlingua107-ecapa",
            savedir="pretrained_models/lang-id-voxlingua107-ecapa"
        )
        signal, fs = torchaudio.load(audio_path)
        prediction = classifier.classify_batch(signal)
        predicted_lang = prediction[3][0]
        confidence = float(torch.max(prediction[1]).item())
        return predicted_lang, confidence
    except Exception as e:
        return f"ERROR: Accent analysis failed: {str(e)}", None

def process_input(video_link, uploaded_video):
    temp_dir = tempfile.mkdtemp()
    # Prioritize file upload if both provided
    if uploaded_video is not None:
        video_path = os.path.join(temp_dir, uploaded_video.name)
        with open(video_path, "wb") as f:
            f.write(uploaded_video.read())
    elif video_link:
        video_path = os.path.join(temp_dir, "input_video.mp4")
        result = download_video(video_link, video_path)
        if isinstance(result, str) and result.startswith("ERROR"):
            return result, None, None, None
    else:
        return "Please provide a YouTube/MP4 link or upload a video file.", None, None, None

    # Extract audio
    audio_path = os.path.join(temp_dir, "audio.wav")
    result = extract_audio(video_path, audio_path)
    if isinstance(result, str) and result.startswith("ERROR"):
        return result, None, None, None

    # Analyze accent
    accent, confidence = analyze_accent(audio_path)
    if isinstance(accent, str) and accent.startswith("ERROR"):
        return accent, None, None, None

    # For playback in Gradio
    return (
        f"**Detected Language/Accent:** {accent}\n\n**Confidence:** {confidence*100:.2f}%",
        video_path,
        audio_path,
        accent
    )

with gr.Blocks() as demo:
    gr.Markdown("# 🎙️ Accent/Language Detection from Video")
    gr.Markdown(
        "Upload a video or provide a YouTube/direct MP4 link. This app will extract the audio, "
        "detect the spoken language/accent, and estimate confidence using a SpeechBrain pre-trained model."
    )

    with gr.Row():
        video_link = gr.Textbox(label="YouTube or MP4 Link (optional)")
        uploaded_video = gr.File(label="Upload Video File (optional)", file_types=[".mp4", ".mov", ".avi", ".mkv"])

    btn = gr.Button("Analyze")

    output_text = gr.Markdown()
    video_output = gr.Video(label="Video Preview")
    audio_output = gr.Audio(label="Extracted Audio", type="filepath")

    btn.click(
        fn=process_input,
        inputs=[video_link, uploaded_video],
        outputs=[output_text, video_output, audio_output, gr.Textbox(visible=False)]
    )

demo.launch()