# app.py
import os
import torch
import gradio as gr
from transformers import (
    pipeline,
    WhisperProcessor,
    WhisperForConditionalGeneration,
)

# === 你的模型設定 ===
HF_REPO  = "aciang/ATD-TriPyr-ASR"
SUBROOT  = "refined_20251014_013556"
CKPTS    = ["checkpoint-200", "checkpoint-400", "checkpoint-600", "checkpoint-776"]
DEFAULT  = "checkpoint-400"

# 讀 Secret（Private Space 建議用 secret）
HF_TOKEN = os.getenv("HF_TOKEN", None)

# CPU / GPU 自動偵測
DEVICE = 0 if torch.cuda.is_available() else -1

# 依 ckpt 快取 pipeline，避免每次重建
_CACHE = {}

def build_asr(ckpt: str):
    """
    建立/取用 ASR pipeline；只下載所選 checkpoint 的權重
    """
    key = f"{ckpt}_{DEVICE}"
    if key in _CACHE:
        return _CACHE[key]

    subfolder = f"{SUBROOT}/{ckpt}"

    # 1) Processor（tokenizer + feature_extractor）先嘗試用你的模型庫「根目錄」
    #    若找不到 preprocessor_config.json，就退回 openai/whisper-small
    try:
        processor = WhisperProcessor.from_pretrained(HF_REPO, token=HF_TOKEN)
    except Exception:
        processor = WhisperProcessor.from_pretrained("openai/whisper-small")

    # 2) Model 從子資料夾載入（你的 checkpoint）
    model = WhisperForConditionalGeneration.from_pretrained(
        HF_REPO,
        subfolder=subfolder,
        torch_dtype="auto",          # 讓 transformers 自行決定 dtype（GPU 多半是 fp16）
        low_cpu_mem_usage=True,
        token=HF_TOKEN,
    )

    asr = pipeline(
        task="automatic-speech-recognition",
        model=model,
        tokenizer=processor.tokenizer,
        feature_extractor=processor.feature_extractor,
        device=DEVICE,
        chunk_length_s=30,           # 長音檔切段
        return_timestamps=False,
    )

    _CACHE[key] = asr
    return asr


def transcribe(audio_path, ckpt, lang_choice):
    """
    audio_path: 來自 gr.Audio(type='filepath') 的檔案路徑
    ckpt:       checkpoint 選擇
    lang_choice:'auto' 或指定語言代碼（例：'tay','pwn','ami'）
    """
    if not audio_path:
        return "請先上傳或錄製一段音檔"

    try:
        asr = build_asr(ckpt)
    except Exception as e:
        return f"（初始化模型時出錯）{e}"

    # Whisper：若指定語言，就帶入 generate_kwargs；否則交給 auto 偵測
    call_kwargs = {}
    if lang_choice and lang_choice != "auto":
        call_kwargs["generate_kwargs"] = {"language": lang_choice, "task": "transcribe"}

    try:
        result = asr(audio_path, **call_kwargs)
    except Exception as e:
        return f"（轉錄時出錯）{e}"

    if isinstance(result, dict) and "text" in result:
        return result["text"]
    return str(result)


# === Gradio 介面 ===
with gr.Blocks(title="ATD TriPyr ASR (HF Space)") as demo:
    gr.Markdown("## ATD TriPyr ASR\n選擇 checkpoint、上傳或錄音，一鍵轉文字。")

    with gr.Row():
        ckpt = gr.Dropdown(choices=CKPTS, value=DEFAULT, label="Checkpoint")
        # 依需求可自行增減；auto=自動偵測、tay=泰雅賽考利克、pwn=排灣、ami=阿美
        lang = gr.Dropdown(choices=["auto", "tay", "pwn", "ami"], value="auto", label="Language")

    with gr.Row():
        audio = gr.Audio(sources=["microphone","upload"], type="filepath", label="Audio（16~48kHz 皆可）")
        text_out = gr.Textbox(label="Transcription", lines=8)

    btn = gr.Button("Transcribe", variant="primary")
    btn.click(fn=transcribe, inputs=[audio, ckpt, lang], outputs=text_out)

    gr.Markdown(
        """
**小提醒：**
- 第一次選擇某個 checkpoint 會下載權重；看到 Busy 請稍等。
- 若 Space 出現 storage limit exceeded，請到模型庫刪除訓練用大檔（optimizer/scheduler 等），只保留 `model.safetensors` 與必要設定檔。
- 若你的模型庫根目錄暫時沒有 `preprocessor_config.json`，本程式會自動改用 `openai/whisper-small` 的 processor。
        """
    )

if __name__ == "__main__":
    # 開啟 SSR（HF Spaces 預設可），ZeroGPU/CPU 也能跑
    demo.launch()