palli23 commited on
Commit
168cab1
·
1 Parent(s): 2179696

ZeroGPU SAFE – 3 mín hljóð án timeout

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -1,17 +1,29 @@
1
- # app.py – Virkar á ZeroGPU allt 3 mínútur (180 sek)
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
6
  import numpy as np
 
7
 
8
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
9
 
10
- @spaces.GPU
11
- def transcribe_long(audio_path):
12
  if not audio_path:
13
  return "Hladdu upp hljóðskrá"
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Hlaða ASR á GPU (cached)
16
  pipe = pipeline(
17
  "automatic-speech-recognition",
@@ -20,35 +32,22 @@ def transcribe_long(audio_path):
20
  token=os.getenv("HF_TOKEN")
21
  )
22
 
23
- # Chunkar – 30 sek + 2 sek overlap (besta niðurstaðan fyrir Whisper-small)
24
- chunk_length_s = 30
25
- stride_length_s = 2
26
-
27
- result = pipe(
28
- audio_path,
29
- chunk_length_s=chunk_length_s,
30
- stride_length_s=(stride_length_s, stride_length_s),
31
- return_timestamps=False,
32
- batch_size=8
33
- )
34
 
35
- return result["text"]
36
 
37
- # Gradio – fallegt og tilbúið fyrir 3 mín klippur
38
- with gr.Blocks(title="Íslenskt ASR – 3 mínútur") as demo:
39
- gr.Markdown("# Íslenskt ASR – ZeroGPU (A100/T4)")
40
- gr.Markdown("**Styður allt að 3 mínútna hljóðklippur · ~4 % WER · 25–45 sek transcribe**")
41
-
42
- audio = gr.Audio(
43
- type="filepath",
44
- label="Hladdu upp .mp3 / .wav / .m4a (allt að 3 mín)"
45
- )
46
 
 
47
  btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg")
48
  out = gr.Textbox(lines=30, label="Útskrift")
49
 
50
- btn.click(transcribe_long, inputs=audio, outputs=out)
51
-
52
- gr.Markdown("© 2025 – Einkaeign")
53
 
54
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – ZeroGPU SAFE3 mín hljóð án "GPU task aborted"
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
6
  import numpy as np
7
+ import librosa
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
+ @spaces.GPU(duration=60) # ← MEST 60 sek – ZeroGPU leyfir
12
+ def transcribe_safe(audio_path):
13
  if not audio_path:
14
  return "Hladdu upp hljóðskrá"
15
 
16
+ # Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt)
17
+ audio, sr = librosa.load(audio_path, sr=16000)
18
+ chunk_len = 16000 * 20 # 20 sek
19
+ stride = 16000 * 2 # 2 sek overlap
20
+ chunks = []
21
+ for i in range(0, len(audio), chunk_len - stride):
22
+ chunk = audio[i:i + chunk_len]
23
+ if len(chunk) < 16000: # undir 1 sek → hætta
24
+ break
25
+ chunks.append(chunk)
26
+
27
  # Hlaða ASR á GPU (cached)
28
  pipe = pipeline(
29
  "automatic-speech-recognition",
 
32
  token=os.getenv("HF_TOKEN")
33
  )
34
 
35
+ full_text = ""
36
+ for idx, chunk in enumerate(chunks):
37
+ result = pipe(chunk, batch_size=8)
38
+ full_text += result["text"] + " "
 
 
 
 
 
 
 
39
 
40
+ return full_text.strip() or "Ekkert heyrt"
41
 
42
+ # Gradio – fallegt og tilbúið fyrir 3 mín
43
+ with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo:
44
+ gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
45
+ gr.Markdown("**~4 % WER · 25–45 sek · ZeroGPU (PRO)**")
 
 
 
 
 
46
 
47
+ audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
48
  btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg")
49
  out = gr.Textbox(lines=30, label="Útskrift")
50
 
51
+ btn.click(transcribe_safe, inputs=audio, outputs=out)
 
 
52
 
53
  demo.launch(auth=("beta", "beta2025"))