palli23 commited on
Commit
0918b24
·
1 Parent(s): 1170a88

diarization1Mæló

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -1,40 +1,53 @@
1
- # app.py – Whisper-small on ZeroGPU (PRO)0.2 RTF
2
  import os
3
  import gradio as gr
4
- import spaces # Required for @spaces.GPU
5
  from transformers import pipeline
 
 
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
- # Force ZeroGPU allocation for transcribe (A100/T4)
10
- @spaces.GPU
11
- def transcribe(audio):
12
- if not audio:
13
  return "Hladdu upp hljóðskrá"
14
 
15
- # Load pipeline on GPU (cached after first run)
 
 
 
 
 
 
 
 
 
 
 
16
  pipe = pipeline(
17
  "automatic-speech-recognition",
18
  model=MODEL_NAME,
19
- device=0, # GPU
20
  token=os.getenv("HF_TOKEN")
21
  )
22
 
23
- try:
24
- result = pipe(audio)
25
- return result["text"]
26
- except Exception as e:
27
- return f"Villa: {str(e)}"
 
28
 
29
- # Simple interface
30
- with gr.Blocks(title="Íslenskt ASR – ZeroGPU GPU") as demo:
31
- gr.Markdown("# Íslenskt ASR – ZeroGPU (A100/T4)")
32
- gr.Markdown("**Whisper-small · ~4 % WER · 0.2 RTF (1525 sek fyrir 90 sek hljóð)**")
33
 
34
- audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav")
35
- btn = gr.Button("Transcribe", variant="primary", size="lg")
36
- out = gr.Textbox(lines=25, label="Útskrift")
37
 
38
- btn.click(transcribe, audio, out)
39
 
40
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – ZeroGPU SAFE3 mín hljóð án "GPU task aborted"
2
  import os
3
  import gradio as gr
4
+ import spaces
5
  from transformers import pipeline
6
+ import numpy as np
7
+ import librosa
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
+ @spaces.GPU(duration=60) # MEST 60 sek ZeroGPU leyfir
12
+ def transcribe_safe(audio_path):
13
+ if not audio_path:
 
14
  return "Hladdu upp hljóðskrá"
15
 
16
+ # Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt)
17
+ audio, sr = librosa.load(audio_path, sr=16000)
18
+ chunk_len = 16000 * 20 # 20 sek
19
+ stride = 16000 * 2 # 2 sek overlap
20
+ chunks = []
21
+ for i in range(0, len(audio), chunk_len - stride):
22
+ chunk = audio[i:i + chunk_len]
23
+ if len(chunk) < 16000: # undir 1 sek → hætta
24
+ break
25
+ chunks.append(chunk)
26
+
27
+ # Hlaða ASR á GPU (cached)
28
  pipe = pipeline(
29
  "automatic-speech-recognition",
30
  model=MODEL_NAME,
31
+ device=0,
32
  token=os.getenv("HF_TOKEN")
33
  )
34
 
35
+ full_text = ""
36
+ for idx, chunk in enumerate(chunks):
37
+ result = pipe(chunk, batch_size=8)
38
+ full_text += result["text"] + " "
39
+
40
+ return full_text.strip() or "Ekkert heyrt"
41
 
42
+ # Gradio – fallegt og tilbúið fyrir 3 mín
43
+ with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo:
44
+ gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
45
+ gr.Markdown("**~4 % WER · 2545 sek · ZeroGPU (PRO)**")
46
 
47
+ audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
48
+ btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg")
49
+ out = gr.Textbox(lines=30, label="Útskrift")
50
 
51
+ btn.click(transcribe_safe, inputs=audio, outputs=out)
52
 
53
  demo.launch(auth=("beta", "beta2025"))