palli23 commited on
Commit
38b1c82
·
1 Parent(s): 0f229b5

rollback 2 days

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -5,36 +5,32 @@ from transformers import pipeline
5
 
6
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
7
 
8
- # Only these 3 tiny changes
9
- @spaces.GPU(duration=120) # was 60 → give it breathing room
10
  def transcribe_3min(audio_path):
11
  if not audio_path:
12
  return "Hladdu upp hljóðskrá"
13
-
14
- # This is the ONLY line we touch inside the function
15
  pipe = pipeline(
16
  "automatic-speech-recognition",
17
  model=MODEL_NAME,
18
  device=0,
19
  token=os.getenv("HF_TOKEN"),
20
- torch_dtype="float16", # ← NEW: FP16 instead of FP32
21
- model_kwargs={"use_flash_attention_2": True} # ← NEW: FlashAttention-2 (huge speedup on T4)
22
  )
23
-
24
  result = pipe(
25
  audio_path,
26
- chunk_length_s=30,
27
- stride_length_s=(6, 0),
28
  return_timestamps=False,
29
- batch_size=8 # ← now actually works because of FP16 + FlashAttn
30
  )
31
 
32
  return result["text"]
33
 
34
-
35
- # ──────────────────────────────────────────────
36
- # Rest of your code 100% unchanged
37
- # ──────────────────────────────────────────────
38
  with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
39
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
40
  gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")
 
5
 
6
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
7
 
8
+ @spaces.GPU(duration=120) # nóg fyrir 3 mín hljóð
 
9
  def transcribe_3min(audio_path):
10
  if not audio_path:
11
  return "Hladdu upp hljóðskrá"
12
+
13
+ # Whisper pipeline með chunking ZeroGPU öruggt
14
  pipe = pipeline(
15
  "automatic-speech-recognition",
16
  model=MODEL_NAME,
17
  device=0,
18
  token=os.getenv("HF_TOKEN"),
19
+ torch_dtype="float16", # ← NEW: FP16 instead of FP32
20
+ # ← REMOVED: use_flash_attention_2 (causes TypeError on this model)
21
  )
22
+
23
  result = pipe(
24
  audio_path,
25
+ chunk_length_s=30, # 30 sek chunkar
26
+ stride_length_s=(6, 0), # 6 sek overlap
27
  return_timestamps=False,
28
+ batch_size=8
29
  )
30
 
31
  return result["text"]
32
 
33
+ # Interface
 
 
 
34
  with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
35
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
36
  gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")