File size: 3,095 Bytes
10659a4
2102ae8
6adf5a9
10659a4
 
 
6adf5a9
 
 
a0182fe
45c12a4
10659a4
 
 
 
a90df61
3b102fc
10659a4
3b102fc
2102ae8
10659a4
 
 
 
 
 
 
 
 
a90df61
9648db0
10659a4
a4070be
9648db0
10659a4
3b102fc
a90df61
10659a4
 
 
 
a90df61
10659a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9648db0
 
 
a90df61
10659a4
 
9b12ef0
9a5eb7a
10659a4
9a5eb7a
2102ae8
10659a4
 
 
 
 
 
 
 
 
 
a90df61
3b102fc
a90df61
10659a4
 
 
 
 
 
 
 
 
a90df61
9a5eb7a
10659a4
9a5eb7a
c675e00
696e56f
c675e00
a0182fe
696e56f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# app.py — Batch file transcription (up to 10 files)

import os
import gc
import zipfile
import tempfile

import gradio as gr
import spaces
from transformers import pipeline
import torch

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"


# ——————————————————————————————
# ZeroGPU worker – model loaded once
# ——————————————————————————————
@spaces.GPU(duration=180)
def transcribe_files(audio_files):
    if not audio_files:
        return None, "Hlaðið upp hljóðskrám"

    audio_files = audio_files[:10]

    workdir = tempfile.mkdtemp()
    outdir = os.path.join(workdir, "transcripts")
    os.makedirs(outdir, exist_ok=True)

    pipe = pipeline(
        "automatic-speech-recognition",
        model="palli23/whisper-small-sam_spjall",
        torch_dtype=torch.float16,
        device=0,
    )

    for file in audio_files:
        audio_path = file.name
        base = os.path.splitext(os.path.basename(audio_path))[0]
        txt_path = os.path.join(outdir, f"{base}.txt")

        result = pipe(
            audio_path,
            chunk_length_s=30,
            batch_size=8,
            return_timestamps=False,
            generate_kwargs={
                "num_beams": 5,
                "repetition_penalty": 1.2,
                "no_repeat_ngram_size": 3,
                "temperature": 0.0,
            },
        )

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(result["text"].strip())

    # Zip outputs
    zip_path = os.path.join(workdir, "transcripts.zip")
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
        for fname in os.listdir(outdir):
            z.write(os.path.join(outdir, fname), arcname=fname)

    # Cleanup
    del pipe
    gc.collect()
    torch.cuda.empty_cache()

    return zip_path, "Lokið ✅"


# ——————————————————————————————
# UI
# ——————————————————————————————
with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
    gr.Markdown(
        "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
    )

    audio_in = gr.File(
        label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
        file_types=[".wav", ".mp3"],
        file_count="multiple",
    )

    btn = gr.Button("Transcribe", variant="primary", size="lg")

    zip_out = gr.File(label="Niðurhal – transcripts.zip")
    status = gr.Textbox(label="Staða", interactive=False)

    btn.click(
        fn=transcribe_files,
        inputs=audio_in,
        outputs=[zip_out, status],
    )


# ——————————————————————————————
# Launch
# ——————————————————————————————
demo.launch(
    share=True,
    server_name="0.0.0.0",
    server_port=7860,
)