import json
import os
import shutil
import subprocess
import tempfile
from pathlib import Path

import gradio as gr
from huggingface_hub import snapshot_download

REPO_ID = "Daumee/Qwen3-ASR-0.6B-ONNX-CPU"

LANGUAGE_MAP = {
    "English": "English",
    "Chinese": "Chinese",
    "Bilingual": None,  # auto-detect
}

MODEL_DIR = snapshot_download(repo_id=REPO_ID)


def normalize_audio(input_path: str, progress: gr.Progress | None = None) -> str:
    """Convert uploaded audio to mono 16 kHz WAV. No trimming, no denoising."""
    if progress:
        progress(0.15, desc="Preparing audio...")

    if shutil.which("ffmpeg") is None:
        raise gr.Error("ffmpeg is not installed.")

    out_dir = Path(tempfile.mkdtemp())
    out_path = out_dir / "normalized.wav"

    cmd = [
        "ffmpeg",
        "-y",
        "-i", input_path,
        "-ac", "1",
        "-ar", "16000",
        "-vn",
        str(out_path),
    ]
    try:
        subprocess.run(
            cmd,
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
    except subprocess.CalledProcessError as e:
        raise gr.Error("Failed to process the uploaded audio file.") from e

    return str(out_path)


def run_onnx_asr(audio_path: str, mode: str, progress: gr.Progress | None = None) -> dict:
    if mode not in LANGUAGE_MAP:
        raise gr.Error("Invalid mode selected.")

    language = LANGUAGE_MAP[mode]

    script_path = Path(MODEL_DIR) / "onnx_inference.py"
    if not script_path.exists():
        raise gr.Error("onnx_inference.py was not found in the downloaded model repo.")

    cmd = ["python", str(script_path), audio_path, "--json"]
    if language is not None:
        cmd.extend(["--language", language])

    if progress:
        progress(0.45, desc="Running transcription...")

    try:
        proc = subprocess.run(
            cmd,
            cwd=MODEL_DIR,
            capture_output=True,
            text=True,
            check=True,
        )
    except subprocess.CalledProcessError as e:
        stderr = (e.stderr or "").strip()
        stdout = (e.stdout or "").strip()
        detail = stderr or stdout or "Unknown ASR error."
        raise gr.Error(detail[:1500]) from e

    output = (proc.stdout or "").strip().splitlines()
    parsed = None
    for line in reversed(output):
        line = line.strip()
        if not line:
            continue
        try:
            parsed = json.loads(line)
            break
        except json.JSONDecodeError:
            continue

    if not isinstance(parsed, dict):
        return {
            "text": (proc.stdout or "").strip(),
            "language": None,
        }

    return parsed


def make_txt_file(text: str, original_audio_path: str) -> str:
    out_dir = Path(tempfile.mkdtemp())
    stem = Path(original_audio_path).stem or "transcript"
    out_path = out_dir / f"{stem}.txt"
    out_path.write_text(text, encoding="utf-8")
    return str(out_path)


def transcribe(audio_file: str, mode: str, progress=gr.Progress()):
    if not audio_file:
        raise gr.Error("Please upload an audio file.")

    progress(0.05, desc="Starting...")

    normalized_path = None
    try:
        normalized_path = normalize_audio(audio_file, progress=progress)
        result = run_onnx_asr(normalized_path, mode=mode, progress=progress)

        text = (result.get("text") or result.get("transcript") or "").strip()
        txt_file = make_txt_file(text, audio_file)

        detected_language = result.get("language") or result.get("detected_language")
        info = f"Mode: {mode}"
        if detected_language:
            info += f"\nDetected language: {detected_language}"

        progress(1.0, desc="Done")
        return text, txt_file, info

    finally:
        if normalized_path and os.path.exists(normalized_path):
            try:
                os.remove(normalized_path)
            except OSError:
                pass


with gr.Blocks(title="Qwen3 ASR ONNX CPU") as demo:
    gr.Markdown("# Qwen3 ASR ONNX CPU")
    gr.Markdown(
        "Upload audio, choose a mode, transcribe with Qwen3-ASR ONNX on CPU, and download the transcript."
    )

    with gr.Row():
        audio = gr.Audio(
            sources=["upload"],
            type="filepath",
            label="Upload audio file",
        )
        mode = gr.Dropdown(
            choices=["English", "Chinese", "Bilingual"],
            value="Bilingual",
            label="Mode",
            info="Bilingual means auto-detect.",
        )

    transcribe_btn = gr.Button("Transcribe")

    transcript = gr.Textbox(label="Transcript", lines=14)
    download_file = gr.File(label="Download transcript")
    metadata = gr.Textbox(label="Info", lines=2, interactive=False)

    transcribe_btn.click(
        fn=transcribe,
        inputs=[audio, mode],
        outputs=[transcript, download_file, metadata],
    )

if __name__ == "__main__":
    demo.launch()