# app.py — Batch file transcription (up to 10 files) import os import gc import zipfile import tempfile import gradio as gr import spaces from transformers import pipeline import torch os.environ["OMP_NUM_THREADS"] = "1" os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # —————————————————————————————— # ZeroGPU worker – model loaded once # —————————————————————————————— @spaces.GPU(duration=180) def transcribe_files(audio_files): if not audio_files: return None, "Hlaðið upp hljóðskrám" audio_files = audio_files[:10] workdir = tempfile.mkdtemp() outdir = os.path.join(workdir, "transcripts") os.makedirs(outdir, exist_ok=True) pipe = pipeline( "automatic-speech-recognition", model="palli23/whisper-small-sam_spjall", torch_dtype=torch.float16, device=0, ) for file in audio_files: audio_path = file.name base = os.path.splitext(os.path.basename(audio_path))[0] txt_path = os.path.join(outdir, f"{base}.txt") result = pipe( audio_path, chunk_length_s=30, batch_size=8, return_timestamps=False, generate_kwargs={ "num_beams": 5, "repetition_penalty": 1.2, "no_repeat_ngram_size": 3, "temperature": 0.0, }, ) with open(txt_path, "w", encoding="utf-8") as f: f.write(result["text"].strip()) # Zip outputs zip_path = os.path.join(workdir, "transcripts.zip") with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z: for fname in os.listdir(outdir): z.write(os.path.join(outdir, fname), arcname=fname) # Cleanup del pipe gc.collect() torch.cuda.empty_cache() return zip_path, "Lokið ✅" # —————————————————————————————— # UI # —————————————————————————————— with gr.Blocks() as demo: gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)") gr.Markdown( "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3" ) audio_in = gr.File( label="Hlaðið upp allt að 10 .wav / .mp3 skrám", file_types=[".wav", ".mp3"], file_count="multiple", ) btn = gr.Button("Transcribe", variant="primary", size="lg") zip_out = gr.File(label="Niðurhal – transcripts.zip") status = gr.Textbox(label="Staða", interactive=False) btn.click( fn=transcribe_files, inputs=audio_in, outputs=[zip_out, status], ) # —————————————————————————————— # Launch # —————————————————————————————— demo.launch( share=True, server_name="0.0.0.0", server_port=7860, )