palli23 commited on
Commit
c3fbcde
·
1 Parent(s): cde6c6f

fix transcribe bug

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – FINAL & WORKING on paid T4 (15–25 s for 3 min)
2
  import os
3
  import gradio as gr
4
  import spaces
@@ -16,28 +16,31 @@ pipe = pipeline(
16
  token=os.getenv("HF_TOKEN")
17
  )
18
 
19
- # ←←← THIS FIXES THE lang_to_id ERROR FOREVER
20
  if not hasattr(pipe.model.generation_config, "lang_to_id"):
21
  pipe.model.generation_config.lang_to_id = {"is": 50259}
22
  pipe.model.generation_config.task_to_id = {"transcribe": 50359}
23
  pipe.model.generation_config.forced_decoder_ids = None
24
 
 
 
 
 
25
  pipe.model.generation_config.language = "is"
26
  pipe.model.generation_config.task = "transcribe"
27
 
28
- print("Model ready – locked to Icelandic – no more errors!")
29
 
30
  @spaces.GPU(duration=120)
31
  def transcribe_safe(audio_path):
32
  if not audio_path:
33
  return "Hladdu upp hljóðskrá"
34
 
35
- # ← librosa imported here so startup never crashes
36
  import librosa
37
 
38
  audio, sr = librosa.load(audio_path, sr=16000)
39
- chunk_len = 16000 * 100 # 20 seconds
40
- stride = 16000 * 2 # 2 seconds overlap
41
  chunks = []
42
  for i in range(0, len(audio), chunk_len - stride):
43
  chunk = audio[i:i + chunk_len]
@@ -52,7 +55,6 @@ def transcribe_safe(audio_path):
52
 
53
  return full_text.strip() or "Ekkert heyrt"
54
 
55
- # Your original beautiful UI
56
  with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
57
  gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
58
  gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**")
 
1
+ # app.py – FIXED: no_timestamps_token_id added (no more ValueError)
2
  import os
3
  import gradio as gr
4
  import spaces
 
16
  token=os.getenv("HF_TOKEN")
17
  )
18
 
19
+ # Fix old Whisper config completely (including timestamps token)
20
  if not hasattr(pipe.model.generation_config, "lang_to_id"):
21
  pipe.model.generation_config.lang_to_id = {"is": 50259}
22
  pipe.model.generation_config.task_to_id = {"transcribe": 50359}
23
  pipe.model.generation_config.forced_decoder_ids = None
24
 
25
+ # ←←← THIS FIXES THE TIMESTAMP ERROR
26
+ if not hasattr(pipe.model.generation_config, "no_timestamps_token_id"):
27
+ pipe.model.generation_config.no_timestamps_token_id = 50363
28
+
29
  pipe.model.generation_config.language = "is"
30
  pipe.model.generation_config.task = "transcribe"
31
 
32
+ print("Model ready – fully fixed for timestamps!")
33
 
34
  @spaces.GPU(duration=120)
35
  def transcribe_safe(audio_path):
36
  if not audio_path:
37
  return "Hladdu upp hljóðskrá"
38
 
 
39
  import librosa
40
 
41
  audio, sr = librosa.load(audio_path, sr=16000)
42
+ chunk_len = 16000 * 20
43
+ stride = 16000 * 2
44
  chunks = []
45
  for i in range(0, len(audio), chunk_len - stride):
46
  chunk = audio[i:i + chunk_len]
 
55
 
56
  return full_text.strip() or "Ekkert heyrt"
57
 
 
58
  with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
59
  gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
60
  gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**")