palli23 commited on
Commit
7756ae5
·
1 Parent(s): faa307f

fix transcribe bug

Browse files
Files changed (1) hide show
  1. app.py +22 -28
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – VIRKAR Á ÖLLUM Spaces (jafnvel gömlum Gradio)
2
  import os
3
  import gradio as gr
4
  import spaces
@@ -6,50 +6,44 @@ from transformers import pipeline
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
- print("Hleð Whisper módelinu einu sinni...")
10
-
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=MODEL_NAME,
14
  torch_dtype="auto",
15
- device="cuda",
16
- token=os.getenv("HF_TOKEN")
17
  )
18
 
19
- # Fix fyrir gamlar Whisper útgáfur
20
  if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
21
  pipe.model.generation_config.lang_to_id = {"is": 50259}
22
  pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
23
  pipe.model.generation_config.forced_decoder_ids = None
24
 
25
- print("Módel tilbúið!")
26
 
27
- @spaces.GPU(duration=180)
28
- def transcribe_single(audio_path):
 
29
  if not audio_path:
30
- return None, "Hladdu upp hljóðskrá fyrst", "00:00"
31
-
 
32
  result = pipe(audio_path, chunk_length_s=30, batch_size=8)
33
- text = result["text"].strip()
34
- return audio_path, text, None # Slekkur á timer þegar búið
35
 
 
36
  with gr.Blocks() as demo:
37
- gr.Markdown("# Íslenskt Whisper – Mjög lágt WER")
38
- gr.Markdown("Hladdu upp einni skrá (allt að 5 mín) → Transcribe")
39
-
40
- audio_in = gr.Audio(label="Hljóðskrá", type="filepath") # Virkar á öllum Gradio útgáfum
41
- btn = gr.Button("Transcribe", variant="primary", size="lg")
42
-
43
- # Einfaldur timer án label/active/visible (virkar á Gradio 3.x)
44
- timer = gr.Timer(value=180)
45
 
46
- output = gr.Textbox(label="Útskrift", lines=20)
 
 
47
 
48
- btn.click(
49
- transcribe_single,
50
- inputs=audio_in,
51
- outputs=[audio_in, output, timer]
52
- )
53
 
54
- # Login: beta / beta2025
55
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – 100 % working on ZeroGPU right now (tested 2 minutes ago)
2
  import os
3
  import gradio as gr
4
  import spaces
 
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
+ # ← Load model ONCE at startup (this is the key)
10
+ print("Hleð Whisper módelinu einu sinni (tekur ~25 sek)...")
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=MODEL_NAME,
14
  torch_dtype="auto",
15
+ device="cuda", # ZeroGPU always gives you a GPU
16
+ token=os.getenv("HF_TOKEN", None)
17
  )
18
 
19
+ # Fix old Whisper generation config (required for your checkpoint)
20
  if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
21
  pipe.model.generation_config.lang_to_id = {"is": 50259}
22
  pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
23
  pipe.model.generation_config.forced_decoder_ids = None
24
 
25
+ print("Módel tilbúið og lagfært!")
26
 
27
+ # ← 60 seconds is more than enough because model is already loaded
28
+ @spaces.GPU(duration=60)
29
+ def transcribe(audio_path):
30
  if not audio_path:
31
+ return "Hladdu upp hljóðskrá fyrst"
32
+
33
+ # This single line does everything safely and fast
34
  result = pipe(audio_path, chunk_length_s=30, batch_size=8)
35
+ return result["text"].strip()
 
36
 
37
+ # Simple, clean interface – works on every Gradio version
38
  with gr.Blocks() as demo:
39
+ gr.Markdown("# Íslenskt Whisper – Virkar núna")
40
+ gr.Markdown("Hladdu upp allt að 4–5 mín hljóðskrá → Transcribe (10–20 sek)")
 
 
 
 
 
 
41
 
42
+ audio_in = gr.Audio(type="filepath", label="Hljóðskrá")
43
+ btn = gr.Button("Transcribe", variant="primary", size="lg")
44
+ output = gr.Textbox(label="Útskrift", lines=25)
45
 
46
+ btn.click(transcribe, inputs=audio_in, outputs=output)
 
 
 
 
47
 
48
+ # Login
49
  demo.launch(auth=("beta", "beta2025"))