import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

# ===============================
# Whisper Model (lazy load)
# ===============================
model = None

def load_model():
    global model
    if model is None:
        model = WhisperModel("base", device="cpu", compute_type="int8")
    return model

# ===============================
# FFmpeg path
# ===============================
def get_ffmpeg():
    return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"

# ===============================
# SAFE: Download video only (NO postprocessing)
# ===============================
def download_video_only(url):
    video_path = "downloaded_video.mp4"

    if os.path.exists(video_path):
        os.remove(video_path)

    ydl_opts = {
        "format": "best",
        "outtmpl": video_path,
        "quiet": True,
        "nocheckcertificate": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    return video_path

# ===============================
# SAFE: Extract audio manually (NO ffprobe)
# ===============================
def extract_audio_safe(video_path):
    audio_path = "extracted_audio.wav"

    if os.path.exists(audio_path):
        os.remove(audio_path)

    subprocess.run(
        [
            get_ffmpeg(),
            "-y",
            "-i", video_path,
            "-vn",
            "-ac", "1",
            "-ar", "16000",
            audio_path
        ],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

    return audio_path

# ===============================
# Hindi script normalizer
# ===============================
def normalize_script(text, lang):
    if lang == "hi":
        try:
            return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
        except:
            return text
    return text

# ===============================
# Transcription logic (STABLE)
# ===============================
def transcribe(url, file, lang_choice):
    try:
        # -------- FILE MODE --------
        if file:
            ext = os.path.splitext(file)[1].lower()
            if ext in [".mp3", ".wav", ".m4a"]:
                audio = file
            else:
                audio = extract_audio_safe(file)

        # -------- URL MODE --------
        elif url:
            video = download_video_only(url)
            audio = extract_audio_safe(video)

        else:
            return "⚠️ Please paste a URL or upload a file."

        # Safety check
        if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
            return "❌ Audio extraction failed. Please try again."

        model = load_model()
        language = None if lang_choice == "Auto Detect" else lang_choice

        segments, info = model.transcribe(
            audio,
            beam_size=1,
            vad_filter=True,
            language=language
        )

        raw_text = " ".join(s.text for s in segments)
        final_text = normalize_script(raw_text, info.language)

        return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}"

    except Exception as e:
        if "instagram" in str(e).lower():
            return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
        return f"❌ Error: {str(e)}"

# ===============================
# MODERN UI
# ===============================
css = """
body {
    background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
}
.glass {
    background: rgba(255,255,255,0.08);
    backdrop-filter: blur(18px);
    border-radius: 18px;
    padding: 25px;
    box-shadow: 0 20px 40px rgba(0,0,0,0.4);
}
.gr-button-primary {
    background: linear-gradient(135deg,#00c6ff,#0072ff);
    border: none;
    color: white;
    font-weight: 600;
}
.gr-input, .gr-textarea {
    background: rgba(255,255,255,0.12) !important;
    color: white !important;
}
h1, h2, label, .markdown-text {
    color: #ffffff !important;
}
footer {display:none;}
"""

with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
    with gr.Column(elem_classes="glass"):
        gr.Markdown("## 🚀 Universal Transcript Tool (STABLE)")
        gr.Markdown(
            "✔ YouTube ✔ TikTok ✔ Facebook ✔ Twitter/X\n\n"
            "⚠️ Instagram URL blocked on Hugging Face → **Upload video instead**\n\n"
            "**No random ffprobe errors. Ever.**"
        )

        with gr.Tabs():
            with gr.TabItem("🔗 Paste Link"):
                url = gr.Textbox(label="Video URL")
                btn_url = gr.Button("🎧 Transcribe Link", variant="primary")

            with gr.TabItem("📂 Upload File"):
                file = gr.File(
                    label="Upload Video / Audio",
                    file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
                )
                btn_file = gr.Button("📂 Transcribe File", variant="primary")

        lang = gr.Dropdown(
            label="🌍 Transcript Language",
            choices=[
                "Auto Detect",
                "hi",
                "ur",
                "en",
                "ar",
                "fr",
                "de",
                "es",
                "ru",
                "ja",
                "zh"
            ],
            value="Auto Detect"
        )

        output = gr.Code(label="Transcript Output", lines=14)

    btn_url.click(transcribe, [url, gr.State(None), lang], output)
    btn_file.click(transcribe, [gr.State(None), file, lang], output)

demo.launch()