palli23's picture
Update app.py
10659a4 verified
# app.py — Batch file transcription (up to 10 files)
import os
import gc
import zipfile
import tempfile
import gradio as gr
import spaces
from transformers import pipeline
import torch
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
# ——————————————————————————————
# ZeroGPU worker – model loaded once
# ——————————————————————————————
@spaces.GPU(duration=180)
def transcribe_files(audio_files):
if not audio_files:
return None, "Hlaðið upp hljóðskrám"
audio_files = audio_files[:10]
workdir = tempfile.mkdtemp()
outdir = os.path.join(workdir, "transcripts")
os.makedirs(outdir, exist_ok=True)
pipe = pipeline(
"automatic-speech-recognition",
model="palli23/whisper-small-sam_spjall",
torch_dtype=torch.float16,
device=0,
)
for file in audio_files:
audio_path = file.name
base = os.path.splitext(os.path.basename(audio_path))[0]
txt_path = os.path.join(outdir, f"{base}.txt")
result = pipe(
audio_path,
chunk_length_s=30,
batch_size=8,
return_timestamps=False,
generate_kwargs={
"num_beams": 5,
"repetition_penalty": 1.2,
"no_repeat_ngram_size": 3,
"temperature": 0.0,
},
)
with open(txt_path, "w", encoding="utf-8") as f:
f.write(result["text"].strip())
# Zip outputs
zip_path = os.path.join(workdir, "transcripts.zip")
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
for fname in os.listdir(outdir):
z.write(os.path.join(outdir, fname), arcname=fname)
# Cleanup
del pipe
gc.collect()
torch.cuda.empty_cache()
return zip_path, "Lokið ✅"
# ——————————————————————————————
# UI
# ——————————————————————————————
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
gr.Markdown(
"**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
)
audio_in = gr.File(
label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
file_types=[".wav", ".mp3"],
file_count="multiple",
)
btn = gr.Button("Transcribe", variant="primary", size="lg")
zip_out = gr.File(label="Niðurhal – transcripts.zip")
status = gr.Textbox(label="Staða", interactive=False)
btn.click(
fn=transcribe_files,
inputs=audio_in,
outputs=[zip_out, status],
)
# ——————————————————————————————
# Launch
# ——————————————————————————————
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
)