palli23 commited on
Commit
33f0766
·
verified ·
1 Parent(s): 2102ae8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Íslenskt ASR ZeroGPU + repetition_penalty=1.2 (perfect for your model)
2
 
3
  import os
4
  os.environ["OMP_NUM_THREADS"] = "1"
@@ -11,10 +11,8 @@ import torch
11
  import gc
12
 
13
  # ——————————————————————————————
14
- # Model + generation settings (repetition_penalty = 1.2)
15
  # ——————————————————————————————
16
- MODEL_NAME = "palli23/whisper-small-sam_spjall"
17
-
18
  @spaces.GPU(duration=180)
19
  def transcribe_3min(audio_path):
20
  if not audio_path:
@@ -22,26 +20,24 @@ def transcribe_3min(audio_path):
22
 
23
  pipe = pipeline(
24
  "automatic-speech-recognition",
25
- model=MODEL_NAME,
26
  torch_dtype=torch.float16,
27
- device=0, # GPU inside @spaces.GPU
28
- token=os.getenv("HF_TOKEN"), # if you have private model
29
  )
30
 
31
  result = pipe(
32
  audio_path,
33
  chunk_length_s=30,
34
- stride_length_s=(6, 0),
35
  batch_size=8,
36
- return_timestamps=False,
37
  generate_kwargs={
38
- "repetition_penalty": 1.2, # ← exactly what you want
39
- "no_repeat_ngram_size": 3, # extra safety against loops
40
  "temperature": 0.0,
41
  }
42
  )
43
 
44
- # Clean up memory so ZeroGPU never dies
45
  del pipe
46
  gc.collect()
47
  torch.cuda.empty_cache()
@@ -49,30 +45,25 @@ def transcribe_3min(audio_path):
49
  return result["text"]
50
 
51
  # ——————————————————————————————
52
- # UI – clean and fast
53
  # ——————————————————————————————
54
  with gr.Blocks() as demo:
55
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
56
  gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
57
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
58
 
59
- audio_in = gr.Audio(
60
- type="filepath",
61
- label="Hlaðið upp .mp3 / .wav (max ~5 mín)"
62
- )
63
  btn = gr.Button("Transcribe", variant="primary", size="lg")
64
- output = gr.Textbox(lines=25, label="Útskrift", show_word_timestamps=False)
65
 
66
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
67
 
68
  # ——————————————————————————————
69
- # Public Space – no login
70
  # ——————————————————————————————
71
  demo.launch(
72
  share=True,
73
  server_name="0.0.0.0",
74
  server_port=7860,
75
- show_error=True,
76
- quiet=False,
77
  auth=None
78
  )
 
1
+ # app.py — Your original working version + repetition_penalty=1.2 + ngram=3
2
 
3
  import os
4
  os.environ["OMP_NUM_THREADS"] = "1"
 
11
  import gc
12
 
13
  # ——————————————————————————————
14
+ # ZeroGPU worker model loaded inside
15
  # ——————————————————————————————
 
 
16
  @spaces.GPU(duration=180)
17
  def transcribe_3min(audio_path):
18
  if not audio_path:
 
20
 
21
  pipe = pipeline(
22
  "automatic-speech-recognition",
23
+ model="palli23/whisper-small-sam_spjall",
24
  torch_dtype=torch.float16,
25
+ device=0, # GPU inside @spaces.GPU
 
26
  )
27
 
28
  result = pipe(
29
  audio_path,
30
  chunk_length_s=30,
 
31
  batch_size=8,
32
+ return_timestamps=False, # ← no timestamps, as you want
33
  generate_kwargs={
34
+ "repetition_penalty": 1.2, # ← exactly what you asked for
35
+ "no_repeat_ngram_size": 3, # exactly what you asked for
36
  "temperature": 0.0,
37
  }
38
  )
39
 
40
+ # Clean memory so ZeroGPU lives forever
41
  del pipe
42
  gc.collect()
43
  torch.cuda.empty_cache()
 
45
  return result["text"]
46
 
47
  # ——————————————————————————————
48
+ # UI – clean and simple
49
  # ——————————————————————————————
50
  with gr.Blocks() as demo:
51
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
52
  gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
53
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
54
 
55
+ audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
 
 
 
56
  btn = gr.Button("Transcribe", variant="primary", size="lg")
57
+ output = gr.Textbox(lines=25, label="Útskrift")
58
 
59
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
60
 
61
  # ——————————————————————————————
62
+ # Public launch
63
  # ——————————————————————————————
64
  demo.launch(
65
  share=True,
66
  server_name="0.0.0.0",
67
  server_port=7860,
 
 
68
  auth=None
69
  )