palli23 commited on
Commit
cde6c6f
·
1 Parent(s): 1feadc6

fix transcribe bug

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – FINAL VERSION works on paid T4 right now (15–25 s for 3 min)
2
  import os
3
  import gradio as gr
4
  import spaces
@@ -7,31 +7,37 @@ import torch
7
 
8
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
9
 
10
- print("Loading model once at startup (takes ~25 s once, never again)...")
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=MODEL_NAME,
14
- torch_dtype=torch.float16, # FP16 = 2× faster on T4
15
  device=0,
16
  token=os.getenv("HF_TOKEN")
17
  )
18
 
19
- # Lock to Icelandic no language detection delay
 
 
 
 
 
20
  pipe.model.generation_config.language = "is"
21
  pipe.model.generation_config.task = "transcribe"
22
- print("Model ready and locked to Icelandic!")
23
 
24
- # ←←← THIS IS THE FUNCTION NAME THAT WAS MISSING BEFORE
25
- @spaces.GPU(duration=120) # more than enough on paid GPU
 
26
  def transcribe_safe(audio_path):
27
  if not audio_path:
28
  return "Hladdu upp hljóðskrá"
29
 
30
- # Your original safe 20-second chunking (kept exactly as you wrote it)
31
  import librosa
 
32
  audio, sr = librosa.load(audio_path, sr=16000)
33
- chunk_len = 16000 * 20
34
- stride = 16000 * 2
35
  chunks = []
36
  for i in range(0, len(audio), chunk_len - stride):
37
  chunk = audio[i:i + chunk_len]
@@ -41,12 +47,12 @@ def transcribe_safe(audio_path):
41
 
42
  full_text = ""
43
  for chunk in chunks:
44
- result = pipe(chunk, batch_size=16) # raised from 8 → 16
45
  full_text += result["text"] + " "
46
 
47
  return full_text.strip() or "Ekkert heyrt"
48
 
49
- # Your beautiful UI – unchanged
50
  with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
51
  gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
52
  gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**")
@@ -55,7 +61,6 @@ with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
55
  btn = gr.Button("Transcribe (15–25 sek)", variant="primary", size="lg")
56
  out = gr.Textbox(lines=30, label="Útskrift")
57
 
58
- # ←←← FIXED: now points to the correct function name
59
  btn.click(transcribe_safe, inputs=audio, outputs=out)
60
 
61
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – FINAL & WORKING on paid T4 (15–25 s for 3 min)
2
  import os
3
  import gradio as gr
4
  import spaces
 
7
 
8
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
9
 
10
+ print("Loading model once at startup...")
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=MODEL_NAME,
14
+ torch_dtype=torch.float16,
15
  device=0,
16
  token=os.getenv("HF_TOKEN")
17
  )
18
 
19
+ # ←←← THIS FIXES THE lang_to_id ERROR FOREVER
20
+ if not hasattr(pipe.model.generation_config, "lang_to_id"):
21
+ pipe.model.generation_config.lang_to_id = {"is": 50259}
22
+ pipe.model.generation_config.task_to_id = {"transcribe": 50359}
23
+ pipe.model.generation_config.forced_decoder_ids = None
24
+
25
  pipe.model.generation_config.language = "is"
26
  pipe.model.generation_config.task = "transcribe"
 
27
 
28
+ print("Model ready locked to Icelandic no more errors!")
29
+
30
+ @spaces.GPU(duration=120)
31
  def transcribe_safe(audio_path):
32
  if not audio_path:
33
  return "Hladdu upp hljóðskrá"
34
 
35
+ # librosa imported here so startup never crashes
36
  import librosa
37
+
38
  audio, sr = librosa.load(audio_path, sr=16000)
39
+ chunk_len = 16000 * 100 # 20 seconds
40
+ stride = 16000 * 2 # 2 seconds overlap
41
  chunks = []
42
  for i in range(0, len(audio), chunk_len - stride):
43
  chunk = audio[i:i + chunk_len]
 
47
 
48
  full_text = ""
49
  for chunk in chunks:
50
+ result = pipe(chunk, batch_size=16)
51
  full_text += result["text"] + " "
52
 
53
  return full_text.strip() or "Ekkert heyrt"
54
 
55
+ # Your original beautiful UI
56
  with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
57
  gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
58
  gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**")
 
61
  btn = gr.Button("Transcribe (15–25 sek)", variant="primary", size="lg")
62
  out = gr.Textbox(lines=30, label="Útskrift")
63
 
 
64
  btn.click(transcribe_safe, inputs=audio, outputs=out)
65
 
66
  demo.launch(auth=("beta", "beta2025"))