palli23 commited on
Commit
10659a4
·
verified ·
1 Parent(s): 696e56f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -38
app.py CHANGED
@@ -1,75 +1,107 @@
1
- # app.py — Your original working version + repetition_penalty=1.2 + ngram=3
2
 
3
  import os
4
- os.environ["OMP_NUM_THREADS"] = "1"
5
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
6
 
7
  import gradio as gr
8
  import spaces
9
  from transformers import pipeline
10
  import torch
11
- import gc
 
 
 
12
 
13
  # ——————————————————————————————
14
- # ZeroGPU worker – model loaded inside
15
  # ——————————————————————————————
16
  @spaces.GPU(duration=180)
17
- def transcribe_3min(audio_path):
18
- if not audio_path:
19
- return "Hlaðið upp hljóðskrá"
 
 
 
 
 
 
20
 
21
  pipe = pipeline(
22
- "automatic-speech-recognition",
23
- #model="palli23/whisper-tiny-icelandic-distilled-v3",
24
- #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
25
- #odel = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
26
- #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
27
- #model="palli23/whisper-tiny-samromur-spjallromur",
28
  model="palli23/whisper-small-sam_spjall",
29
  torch_dtype=torch.float16,
30
- device=0, # GPU inside @spaces.GPU
31
  )
32
 
33
- result = pipe(
34
- audio_path,
35
- chunk_length_s=30,
36
- batch_size=8,
37
- return_timestamps=False, # ← no timestamps, as you want
38
- generate_kwargs={
39
- "num_beams": 5, #var beam size 1
40
- "repetition_penalty": 1.2, # ← exactly what you asked for
41
- "no_repeat_ngram_size": 3, # ← exactly what you asked for
42
- "temperature": 0.0,
43
- }
44
- )
45
 
46
- # Clean memory so ZeroGPU lives forever
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  del pipe
48
  gc.collect()
49
  torch.cuda.empty_cache()
50
 
51
- return result["text"]
 
52
 
53
  # ——————————————————————————————
54
- # UI – clean and simple
55
  # ——————————————————————————————
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# Íslenskt ASR – 3 mínútur")
58
- gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
59
- gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
 
 
 
 
 
 
 
60
 
61
- audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
62
  btn = gr.Button("Transcribe", variant="primary", size="lg")
63
- output = gr.Textbox(lines=25, label="Útskrift")
64
 
65
- btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 
 
 
 
 
 
 
 
66
 
67
  # ——————————————————————————————
68
- # Public launch
69
  # ——————————————————————————————
70
  demo.launch(
71
  share=True,
72
  server_name="0.0.0.0",
73
  server_port=7860,
74
- auth=None
75
  )
 
1
+ # app.py — Batch file transcription (up to 10 files)
2
 
3
  import os
4
+ import gc
5
+ import zipfile
6
+ import tempfile
7
 
8
  import gradio as gr
9
  import spaces
10
  from transformers import pipeline
11
  import torch
12
+
13
+ os.environ["OMP_NUM_THREADS"] = "1"
14
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
15
+
16
 
17
  # ——————————————————————————————
18
+ # ZeroGPU worker – model loaded once
19
  # ——————————————————————————————
20
  @spaces.GPU(duration=180)
21
+ def transcribe_files(audio_files):
22
+ if not audio_files:
23
+ return None, "Hlaðið upp hljóðskrám"
24
+
25
+ audio_files = audio_files[:10]
26
+
27
+ workdir = tempfile.mkdtemp()
28
+ outdir = os.path.join(workdir, "transcripts")
29
+ os.makedirs(outdir, exist_ok=True)
30
 
31
  pipe = pipeline(
32
+ "automatic-speech-recognition",
 
 
 
 
 
33
  model="palli23/whisper-small-sam_spjall",
34
  torch_dtype=torch.float16,
35
+ device=0,
36
  )
37
 
38
+ for file in audio_files:
39
+ audio_path = file.name
40
+ base = os.path.splitext(os.path.basename(audio_path))[0]
41
+ txt_path = os.path.join(outdir, f"{base}.txt")
 
 
 
 
 
 
 
 
42
 
43
+ result = pipe(
44
+ audio_path,
45
+ chunk_length_s=30,
46
+ batch_size=8,
47
+ return_timestamps=False,
48
+ generate_kwargs={
49
+ "num_beams": 5,
50
+ "repetition_penalty": 1.2,
51
+ "no_repeat_ngram_size": 3,
52
+ "temperature": 0.0,
53
+ },
54
+ )
55
+
56
+ with open(txt_path, "w", encoding="utf-8") as f:
57
+ f.write(result["text"].strip())
58
+
59
+ # Zip outputs
60
+ zip_path = os.path.join(workdir, "transcripts.zip")
61
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
62
+ for fname in os.listdir(outdir):
63
+ z.write(os.path.join(outdir, fname), arcname=fname)
64
+
65
+ # Cleanup
66
  del pipe
67
  gc.collect()
68
  torch.cuda.empty_cache()
69
 
70
+ return zip_path, "Lokið ✅"
71
+
72
 
73
  # ——————————————————————————————
74
+ # UI
75
  # ——————————————————————————————
76
  with gr.Blocks() as demo:
77
+ gr.Markdown("# Íslenskt ASR – Batch (allt að 10 skrár)")
78
+ gr.Markdown(
79
+ "**palli23/whisper-small-sam_spjall** · sama stillingar · .wav / .mp3"
80
+ )
81
+
82
+ audio_in = gr.File(
83
+ label="Hlaðið upp allt að 10 .wav / .mp3 skrám",
84
+ file_types=[".wav", ".mp3"],
85
+ file_count="multiple",
86
+ )
87
 
 
88
  btn = gr.Button("Transcribe", variant="primary", size="lg")
 
89
 
90
+ zip_out = gr.File(label="Niðurhal transcripts.zip")
91
+ status = gr.Textbox(label="Staða", interactive=False)
92
+
93
+ btn.click(
94
+ fn=transcribe_files,
95
+ inputs=audio_in,
96
+ outputs=[zip_out, status],
97
+ )
98
+
99
 
100
  # ——————————————————————————————
101
+ # Launch
102
  # ——————————————————————————————
103
  demo.launch(
104
  share=True,
105
  server_name="0.0.0.0",
106
  server_port=7860,
 
107
  )