# app.py — Batch file transcription (up to 10 files)

import os
import gc
import zipfile
import tempfile

import gradio as gr
import spaces
from transformers import pipeline
import torch

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"


# ——————————————————————————————
# ZeroGPU worker – model loaded once
# ——————————————————————————————
@spaces.GPU(duration=180)
def transcribe_files(audio_files):
    if not audio_files:
        return None, "Hlaðið upp hljóðskrám"

    audio_files = audio_files[:10]

    workdir = tempfile.mkdtemp()
    outdir = os.path.join(workdir, "transcripts")
    os.makedirs(outdir, exist_ok=True)

    pipe = pipeline(
        "automatic-speech-recognition",
        model="palli23/whisper-small-sam_spjall",
        torch_dtype=torch.float16,
        device=0,
    )

    for file in audio_files:
        audio_path = file.name
        base = os.path.splitext(os.path.basename(audio_path))[0]
        txt_path = os.path.join(outdir, f"{base}.txt")

        result = pipe(
            audio_path,
            chunk_length_s=30,
            batch_size=8,
            return_timestamps=False,
            generate_kwargs={
                "num_beams": 5,
                "repetition_penalty": 1.2,
                "no_repeat_ngram_size": 3,
                "temperature": 0.0,
            },
        )

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(result["text"].strip())

    # Zip outputs
    zip_path = os.path.join(workdir, "transcripts.zip")
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
        for fname in os.listdir(outdir):
            z.write(os.path.join(outdir, fname), arcname=fname)

    # Cleanup
    del pipe
    gc.collect()
    torch.cuda.empty_cache()

    return zip_path, "Lokið ✅"


# ——————————————————————————————
# UI
# ——————————————————————————————
with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
    gr.Markdown(
        "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
    )

    audio_in = gr.File(
        label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
        file_types=[".wav", ".mp3"],
        file_count="multiple",
    )

    btn = gr.Button("Transcribe", variant="primary", size="lg")

    zip_out = gr.File(label="Niðurhal – transcripts.zip")
    status = gr.Textbox(label="Staða", interactive=False)

    btn.click(
        fn=transcribe_files,
        inputs=audio_in,
        outputs=[zip_out, status],
    )


# ——————————————————————————————
# Launch
# ——————————————————————————————
demo.launch(
    share=True,
    server_name="0.0.0.0",
    server_port=7860,
)