palli23 commited on
Commit
1b09acf
·
1 Parent(s): 0918b24

diarization1Mæló

Browse files
Files changed (1) hide show
  1. app.py +18 -28
app.py CHANGED
@@ -1,30 +1,17 @@
1
- # app.py – ZeroGPU SAFE – 3 mín hljóð án "GPU task aborted"
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
6
- import numpy as np
7
- import librosa
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
11
- @spaces.GPU(duration=60) # MEST 60 sek – ZeroGPU leyfir
12
- def transcribe_safe(audio_path):
13
  if not audio_path:
14
  return "Hladdu upp hljóðskrá"
15
 
16
- # Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt)
17
- audio, sr = librosa.load(audio_path, sr=16000)
18
- chunk_len = 16000 * 20 # 20 sek
19
- stride = 16000 * 2 # 2 sek overlap
20
- chunks = []
21
- for i in range(0, len(audio), chunk_len - stride):
22
- chunk = audio[i:i + chunk_len]
23
- if len(chunk) < 16000: # undir 1 sek → hætta
24
- break
25
- chunks.append(chunk)
26
-
27
- # Hlaða ASR á GPU (cached)
28
  pipe = pipeline(
29
  "automatic-speech-recognition",
30
  model=MODEL_NAME,
@@ -32,22 +19,25 @@ def transcribe_safe(audio_path):
32
  token=os.getenv("HF_TOKEN")
33
  )
34
 
35
- full_text = ""
36
- for idx, chunk in enumerate(chunks):
37
- result = pipe(chunk, batch_size=8)
38
- full_text += result["text"] + " "
 
 
 
39
 
40
- return full_text.strip() or "Ekkert heyrt"
41
 
42
- # Gradio – fallegt og tilbúið fyrir 3 mín
43
- with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo:
44
- gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
45
- gr.Markdown("**~4 % WER · 25–45 sek · ZeroGPU (PRO)**")
46
 
47
  audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
48
- btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg")
49
  out = gr.Textbox(lines=30, label="Útskrift")
50
 
51
- btn.click(transcribe_safe, inputs=audio, outputs=out)
52
 
53
  demo.launch(auth=("beta", "beta2025"))
 
1
+ # app.py – 3 mín hljóð (ZeroGPU virkur, ekkert diarization)
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
 
 
6
 
7
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
+ @spaces.GPU(duration=60) # nóg fyrir 3 mín hljóð
10
+ def transcribe_3min(audio_path):
11
  if not audio_path:
12
  return "Hladdu upp hljóðskrá"
13
 
14
+ # Whisper pipeline með chunking ZeroGPU öruggt
 
 
 
 
 
 
 
 
 
 
 
15
  pipe = pipeline(
16
  "automatic-speech-recognition",
17
  model=MODEL_NAME,
 
19
  token=os.getenv("HF_TOKEN")
20
  )
21
 
22
+ result = pipe(
23
+ audio_path,
24
+ chunk_length_s=30, # 30 sek chunkar
25
+ stride_length_s=(6, 0), # 6 sek overlap
26
+ return_timestamps=False,
27
+ batch_size=8
28
+ )
29
 
30
+ return result["text"]
31
 
32
+ # Interface
33
+ with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
34
+ gr.Markdown("# Íslenskt ASR – 3 mínútur")
35
+ gr.Markdown("**Whisper-small · ~4 % WER · 20–45 sek transcribe á ZeroGPU**")
36
 
37
  audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
38
+ btn = gr.Button("Transcribe", variant="primary", size="lg")
39
  out = gr.Textbox(lines=30, label="Útskrift")
40
 
41
+ btn.click(transcribe_3min, inputs=audio, outputs=out)
42
 
43
  demo.launch(auth=("beta", "beta2025"))