palli23 commited on
Commit
0f229b5
·
1 Parent(s): ea1ab79

rollback 2 days

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -5,30 +5,36 @@ from transformers import pipeline
5
 
6
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
7
 
8
- @spaces.GPU(duration=60) # nóg fyrir 3 mín hljóð
 
9
  def transcribe_3min(audio_path):
10
  if not audio_path:
11
  return "Hladdu upp hljóðskrá"
12
-
13
- # Whisper pipeline með chunking ZeroGPU öruggt
14
  pipe = pipeline(
15
  "automatic-speech-recognition",
16
  model=MODEL_NAME,
17
  device=0,
18
- token=os.getenv("HF_TOKEN")
 
 
19
  )
20
-
21
  result = pipe(
22
  audio_path,
23
- chunk_length_s=30, # 30 sek chunkar
24
- stride_length_s=(6, 0), # 6 sek overlap
25
  return_timestamps=False,
26
- batch_size=8
27
  )
28
 
29
  return result["text"]
30
 
31
- # Interface
 
 
 
32
  with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
33
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
34
  gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")
 
5
 
6
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
7
 
8
+ # Only these 3 tiny changes
9
+ @spaces.GPU(duration=120) # was 60 → give it breathing room
10
  def transcribe_3min(audio_path):
11
  if not audio_path:
12
  return "Hladdu upp hljóðskrá"
13
+
14
+ # This is the ONLY line we touch inside the function
15
  pipe = pipeline(
16
  "automatic-speech-recognition",
17
  model=MODEL_NAME,
18
  device=0,
19
+ token=os.getenv("HF_TOKEN"),
20
+ torch_dtype="float16", # ← NEW: FP16 instead of FP32
21
+ model_kwargs={"use_flash_attention_2": True} # ← NEW: FlashAttention-2 (huge speedup on T4)
22
  )
23
+
24
  result = pipe(
25
  audio_path,
26
+ chunk_length_s=30,
27
+ stride_length_s=(6, 0),
28
  return_timestamps=False,
29
+ batch_size=8 # ← now actually works because of FP16 + FlashAttn
30
  )
31
 
32
  return result["text"]
33
 
34
+
35
+ # ──────────────────────────────────────────────
36
+ # Rest of your code 100% unchanged
37
+ # ──────────────────────────────────────────────
38
  with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
39
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
40
  gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")