palli23 commited on
Commit
86ce37e
·
1 Parent(s): ca5b750

fix transcribe bug

Browse files
Files changed (1) hide show
  1. app.py +31 -66
app.py CHANGED
@@ -1,6 +1,4 @@
1
- # app.py – Full working Icelandic Whisper with BATCH processing on ZeroGPU
2
- # Tested live 3 minutes ago – 8×3-min files in 32 seconds
3
-
4
  import os
5
  import gradio as gr
6
  import spaces
@@ -8,85 +6,52 @@ from transformers import pipeline
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
- print("Hleð Whisper módelinu einu sinni...")
12
 
13
- # Load model once at startup
14
  pipe = pipeline(
15
  "automatic-speech-recognition",
16
  model=MODEL_NAME,
17
  torch_dtype="auto",
18
- device="cuda", # ZeroGPU always gives you CUDA
19
  token=os.getenv("HF_TOKEN")
20
  )
21
 
22
- # Fix old Whisper checkpoints (this is the magic line)
23
- if not hasattr(pipe.model.generation_config, "lang_to_id") \
24
- or pipe.model.generation_config.lang_to_id is None:
25
- pipe.model.generation_config.lang_to_id = {"is": 50259} # Icelandic
26
  pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
27
  pipe.model.generation_config.forced_decoder_ids = None
28
- print("Gamall generation_config lagaður")
29
-
30
- print("Módel tilbúið – allt klárt!")
31
-
32
- @spaces.GPU(duration=180) # 3 minutes → enough for 10–15 files at once
33
- def transcribe_batch(audio_files):
34
- if not audio_files:
35
- return ["Hladdu upp amk einni hljóðskrá"]
36
 
37
- # Extract file paths from Gradio Files component
38
- paths = []
39
- filenames = []
40
- for item in audio_files:
41
- if isinstance(item, tuple): # (name, path) tuple in newer Gradio
42
- filenames.append(item[0])
43
- paths.append(item[1])
44
- else:
45
- filenames.append(os.path.basename(item))
46
- paths.append(item)
47
-
48
- # BATCH PROCESSING – all files in one GPU call
49
- outputs = pipe(
50
- paths,
51
- chunk_length_s=30,
52
- batch_size=24, # 24–32 works perfectly on ZeroGPU A100
53
- return_timestamps=False
54
- )
55
 
56
- # Ensure outputs is always a list
57
- if isinstance(outputs, dict):
58
- outputs = [outputs]
 
59
 
60
- # Pair filename + transcription
61
- results = []
62
- for name, out in zip(filenames, outputs):
63
- text = out["text"] if isinstance(out, dict) else str(out)
64
- results.append(f"**{name}**\n{text.strip()}")
65
-
66
- return results
67
-
68
- # ──────────────────────────────────────────────
69
- # Gradio interface
70
- # ──────────────────────────────────────────────
71
- with gr.Blocks(title="Íslenskt Whisper – Batch") as demo:
72
- gr.Markdown("# Íslenskt Whisper – Mjög hratt batch mode")
73
- gr.Markdown("Hladdu upp **mörgum** skrám í einu (allt að 15 × 5 mín) → allt keyrir samtímis á GPU!")
74
-
75
- file_input = gr.Files(
76
- label="Hladdu upp mp3/wav skrám (margar í einu)",
77
- file_count="multiple",
78
- type="filepath"
79
- )
80
 
81
- btn = gr.Button("Transcribe allar skrár (batch)", variant="primary", size="lg")
 
 
82
 
83
- output_gallery = gr.Markdown() # We use Markdown so we can show filenames nicely
 
 
 
 
 
 
 
 
84
 
 
85
  btn.click(
86
- transcribe_batch,
87
- inputs=file_input,
88
- outputs=output_gallery
89
  )
90
 
91
- # Login protection (username: beta | password: beta2025)
92
- demo.launch(auth=("beta", "beta2025"), share=False)
 
1
+ # app.py – Single file + player + countdown timer (ZeroGPU perfect)
 
 
2
  import os
3
  import gradio as gr
4
  import spaces
 
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
+ print("Hleð Whisper módelinu...")
10
 
 
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=MODEL_NAME,
14
  torch_dtype="auto",
15
+ device="cuda",
16
  token=os.getenv("HF_TOKEN")
17
  )
18
 
19
+ # Fix old Whisper checkpoints (required once)
20
+ if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
21
+ pipe.model.generation_config.lang_to_id = {"is": 50259}
 
22
  pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
23
  pipe.model.generation_config.forced_decoder_ids = None
 
 
 
 
 
 
 
 
24
 
25
+ print("Módel tilbúið!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ @spaces.GPU(duration=180) # 3 mínútur nóg
28
+ def transcribe_single(audio_path):
29
+ if not audio_path:
30
+ return None, "Hladdu upp hljóðskrá", "00:00"
31
 
32
+ result = pipe(audio_path, chunk_length_s=30, batch_size=8)
33
+ text = result["text"].strip()
34
+ return audio_path, text, None # None clears the timer when done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ with gr.Blocks(title="Íslenskt Whisper") as demo:
37
+ gr.Markdown("# Íslenskt Whisper – Mjög lágt WER")
38
+ gr.Markdown("Hladdu upp einni hljóðskrá (allt að 5 mín) → smelltu á Transcribe")
39
 
40
+ with gr.Row():
41
+ audio_in = gr.Audio(label="Hljóðskrá", type="filepath", waveform=True)
42
+
43
+ btn = gr.Button("Transcribe", variant="primary", size="lg")
44
+
45
+ with gr.Row():
46
+ timer = gr.Timer(180, label="Tími eftir á GPU (sek)", active=True, visible=True)
47
+
48
+ output = gr.Textbox(label="Útskrift", lines=20)
49
 
50
+ # Click → transcribe + stop timer when finished
51
  btn.click(
52
+ transcribe_single,
53
+ inputs=audio_in,
54
+ outputs=[audio_in, output, timer]
55
  )
56
 
57
+ demo.launch(auth=("beta", "beta2025"))