palli23 commited on
Commit
9d663d7
·
1 Parent(s): 40e6eb1

fix transcribe bug

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – 3 mín hljóð (ZeroGPU virkur, ekkert diarization)
2
  import os
3
  import gradio as gr
4
  import spaces
@@ -6,38 +6,45 @@ from transformers import pipeline
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
- @spaces.GPU(duration=60) # nóg fyrir 3 mín hljóð
10
- def transcribe_3min(audio_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if not audio_path:
12
- return "Hladdu upp hljóðskrá"
13
-
14
- # Whisper pipeline með chunking – ZeroGPU öruggt
15
- pipe = pipeline(
16
- "automatic-speech-recognition",
17
- model=MODEL_NAME,
18
- device=0,
19
- token=os.getenv("HF_TOKEN")
20
- )
21
 
22
  result = pipe(
23
  audio_path,
24
- chunk_length_s=30, # 30 sek chunkar
25
- stride_length_s=(6, 0), # 6 sek overlap
26
  return_timestamps=False,
27
- batch_size=8
 
28
  )
29
-
30
  return result["text"]
31
 
32
- # Interface
33
- with gr.Blocks(title="Íslenskt ASR3 mín") as demo:
34
- gr.Markdown("# Íslenskt ASR 3 mínútur")
35
- gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")
36
-
37
- audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
38
- btn = gr.Button("Transcribe", variant="primary", size="lg")
39
- out = gr.Textbox(lines=30, label="Útskrift")
40
-
41
- btn.click(transcribe_3min, inputs=audio, outputs=out)
42
 
43
- demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – Fixed for outdated generation_config (virkar á ZeroGPU)
2
  import os
3
  import gradio as gr
4
  import spaces
 
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
+ print("Hleð Whisper módelinu einu sinni...")
10
+
11
+ pipe = pipeline(
12
+ "automatic-speech-recognition",
13
+ model=MODEL_NAME,
14
+ torch_dtype="auto",
15
+ device="cuda" if os.getenv("SYSTEM") == "spaces" else "cpu",
16
+ model_kwargs={"attn_implementation": "sdpa"},
17
+ token=os.getenv("HF_TOKEN")
18
+ )
19
+
20
+ # *** THIS IS THE FIX: Uppfæra generation_config til að styðja language/task ***
21
+ pipe.model.generation_config.language = "is" # Default language
22
+ pipe.model.generation_config.task = "transcribe" # Default task
23
+ pipe.model.generation_config.forced_decoder_ids = None # Disable old forced IDs (deprecated)
24
+ pipe.model.generation_config.suppress_tokens = [] # Clear suppress if needed
25
+ print("Generation config uppfærð – allt klárt!")
26
+
27
+ @spaces.GPU(duration=120)
28
+ def transcribe_audio(audio_path):
29
  if not audio_path:
30
+ return "Hladdu upp hljóðskrá fyrst"
 
 
 
 
 
 
 
 
31
 
32
  result = pipe(
33
  audio_path,
34
+ chunk_length_s=30,
35
+ batch_size=8,
36
  return_timestamps=False,
37
+ # Núna virkar þetta án generate_kwargs (því config er sett)
38
+ # Ef þú vilt breyta: generate_kwargs={"language": "is", "task": "transcribe"}
39
  )
 
40
  return result["text"]
41
 
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown("# Íslenskt Whispermjög lágt WER – 30 sek–5 mín hljóð")
44
+ audio_in = gr.Audio(type="filepath", label="Hladdu upp mp3/wav (allt að 5 mín)")
45
+ btn = gr.Button("Transcribe", variant="primary")
46
+ output = gr.Textbox(lines=25, label="Útskrift")
47
+
48
+ btn.click(transcribe_audio, inputs=audio_in, outputs=output)
 
 
 
49
 
50
+ demo.launch()