palli23 commited on
Commit
696e56f
·
verified ·
1 Parent(s): a8f1bf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -80
app.py CHANGED
@@ -1,115 +1,75 @@
1
- # app.py — Batch file transcription (up to 25 files, Icelandic forced, HF-safe)
2
 
3
  import os
4
- import gc
5
- import zipfile
6
- import tempfile
7
 
8
  import gradio as gr
9
  import spaces
10
  from transformers import pipeline
11
  import torch
12
-
13
- # Environment safety
14
- os.environ["OMP_NUM_THREADS"] = "1"
15
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
16
-
17
 
18
  # ——————————————————————————————
19
- # ZeroGPU worker – model loaded once
20
  # ——————————————————————————————
21
  @spaces.GPU(duration=180)
22
- def transcribe_files(audio_files):
23
- if not audio_files:
24
- return None, "Hlaðið upp hljóðskrám"
25
-
26
- audio_files = audio_files[:25]
27
 
28
- workdir = tempfile.mkdtemp()
29
- outdir = os.path.join(workdir, "transcripts")
30
- os.makedirs(outdir, exist_ok=True)
31
-
32
- # Create ASR pipeline
33
  pipe = pipeline(
34
- "automatic-speech-recognition",
 
 
 
 
 
35
  model="palli23/whisper-small-sam_spjall",
36
  torch_dtype=torch.float16,
37
- device=0,
38
  )
39
 
40
- # 🔧 PATCH generation config (critical fix)
41
- gen_cfg = pipe.model.generation_config
42
- gen_cfg.language = "is"
43
- gen_cfg.task = "transcribe"
44
- gen_cfg.forced_decoder_ids = None # prevent conflicts
45
- gen_cfg.suppress_tokens = None # avoid tokenizer mismatch
46
-
47
- for file in audio_files:
48
- audio_path = file.name
49
- base = os.path.splitext(os.path.basename(audio_path))[0]
50
- txt_path = os.path.join(outdir, f"{base}.txt")
51
-
52
- result = pipe(
53
- audio_path,
54
- chunk_length_s=30,
55
- batch_size=8,
56
- return_timestamps=False,
57
- generate_kwargs={
58
- "num_beams": 5,
59
- "repetition_penalty": 1.2,
60
- "no_repeat_ngram_size": 3,
61
- "temperature": 0.0,
62
- },
63
- )
64
-
65
- with open(txt_path, "w", encoding="utf-8") as f:
66
- f.write(result["text"].strip())
67
-
68
- # Zip outputs
69
- zip_path = os.path.join(workdir, "transcripts.zip")
70
- with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
71
- for fname in os.listdir(outdir):
72
- z.write(os.path.join(outdir, fname), arcname=fname)
73
 
74
- # Cleanup
75
  del pipe
76
  gc.collect()
77
  torch.cuda.empty_cache()
78
 
79
- return zip_path, f"Lokið ✅ ({len(audio_files)} skrár)"
80
-
81
 
82
  # ——————————————————————————————
83
- # UI
84
  # ——————————————————————————————
85
  with gr.Blocks() as demo:
86
- gr.Markdown("# Íslenskt ASR – Batch (allt að 25 skrár)")
87
- gr.Markdown(
88
- "**palli23/whisper-small-sam_spjall** · íslenska föst · .wav / .mp3"
89
- )
90
-
91
- audio_in = gr.File(
92
- label="Hlaðið upp allt að 25 .wav / .mp3 skrám",
93
- file_types=[".wav", ".mp3"],
94
- file_count="multiple",
95
- )
96
 
 
97
  btn = gr.Button("Transcribe", variant="primary", size="lg")
 
98
 
99
- zip_out = gr.File(label="Niðurhal transcripts.zip")
100
- status = gr.Textbox(label="Staða", interactive=False)
101
-
102
- btn.click(
103
- fn=transcribe_files,
104
- inputs=audio_in,
105
- outputs=[zip_out, status],
106
- )
107
-
108
 
109
  # ———————————————————��——————————
110
- # Launch
111
  # ——————————————————————————————
112
  demo.launch(
 
113
  server_name="0.0.0.0",
114
  server_port=7860,
115
- )
 
 
1
+ # app.py — Your original working version + repetition_penalty=1.2 + ngram=3
2
 
3
  import os
4
+ os.environ["OMP_NUM_THREADS"] = "1"
5
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
6
 
7
  import gradio as gr
8
  import spaces
9
  from transformers import pipeline
10
  import torch
11
+ import gc
 
 
 
 
12
 
13
  # ——————————————————————————————
14
+ # ZeroGPU worker – model loaded inside
15
  # ——————————————————————————————
16
  @spaces.GPU(duration=180)
17
+ def transcribe_3min(audio_path):
18
+ if not audio_path:
19
+ return "Hlaðið upp hljóðskrá"
 
 
20
 
 
 
 
 
 
21
  pipe = pipeline(
22
+ "automatic-speech-recognition",
23
+ #model="palli23/whisper-tiny-icelandic-distilled-v3",
24
+ #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
25
+ #odel = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
26
+ #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
27
+ #model="palli23/whisper-tiny-samromur-spjallromur",
28
  model="palli23/whisper-small-sam_spjall",
29
  torch_dtype=torch.float16,
30
+ device=0, # GPU inside @spaces.GPU
31
  )
32
 
33
+ result = pipe(
34
+ audio_path,
35
+ chunk_length_s=30,
36
+ batch_size=8,
37
+ return_timestamps=False, # no timestamps, as you want
38
+ generate_kwargs={
39
+ "num_beams": 5, #var beam size 1
40
+ "repetition_penalty": 1.2, # exactly what you asked for
41
+ "no_repeat_ngram_size": 3, # ← exactly what you asked for
42
+ "temperature": 0.0,
43
+ }
44
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Clean memory so ZeroGPU lives forever
47
  del pipe
48
  gc.collect()
49
  torch.cuda.empty_cache()
50
 
51
+ return result["text"]
 
52
 
53
  # ——————————————————————————————
54
+ # UI – clean and simple
55
  # ——————————————————————————————
56
  with gr.Blocks() as demo:
57
+ gr.Markdown("# Íslenskt ASR – 3 mínútur")
58
+ gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
59
+ gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
 
 
 
 
 
 
 
60
 
61
+ audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
62
  btn = gr.Button("Transcribe", variant="primary", size="lg")
63
+ output = gr.Textbox(lines=25, label="Útskrift")
64
 
65
+ btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 
 
 
 
 
 
 
 
66
 
67
  # ———————————————————��——————————
68
+ # Public launch
69
  # ——————————————————————————————
70
  demo.launch(
71
+ share=True,
72
  server_name="0.0.0.0",
73
  server_port=7860,
74
+ auth=None
75
+ )