palli23 commited on
Commit
9a5eb7a
·
verified ·
1 Parent(s): 3b102fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -1,7 +1,9 @@
1
- # app.py — Íslenskt ASR – ZeroGPU ready (your original, just fixed for free tier)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 
 
5
 
6
  import gradio as gr
7
  import spaces
@@ -10,35 +12,36 @@ import torch
10
  import gc
11
 
12
  # ——————————————————————————————
13
- # Model loaded ONLY when needed (ZeroGPU rule)
14
  # ——————————————————————————————
15
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
16
 
17
- @spaces.GPU(duration=180) # This keeps it alive + refreshes automatically
18
  def get_pipe():
19
- return pipeline(
 
20
  "automatic-speech-recognition",
21
  model=MODEL_NAME,
22
- torch_dtype="float16",
23
- device=0, # GPU inside the worker
24
  token=os.getenv("HF_TOKEN"),
25
  )
 
 
 
 
26
 
27
- # Global pipe — will be created on first use
28
- pipe = None
29
-
30
- # — Your original transcription function (unchanged except tiny safety) —
31
  def transcribe_3min(audio_path):
32
- global pipe
33
  if not audio_path:
34
  return "Hlaðið upp hljóðskrá"
35
-
36
- # Re-create pipe if something went wrong (OOM, crash, etc.)
37
- if pipe is None:
38
- print("Loading model (first use or refresh)...")
39
- pipe = get_pipe()
40
-
41
  try:
 
 
 
42
  result = pipe(
43
  audio_path,
44
  chunk_length_s=30,
@@ -46,26 +49,26 @@ def transcribe_3min(audio_path):
46
  batch_size=8,
47
  return_timestamps=False,
48
  )
49
- # Clean up memory aggressively (critical on ZeroGPU)
 
50
  if "chunks" in result:
51
  del result["chunks"]
52
  gc.collect()
53
  torch.cuda.empty_cache()
54
-
55
  return result["text"]
56
-
57
  except torch.cuda.OutOfMemoryError:
58
- print("OOM → reloading model next run")
59
- pipe = None
60
  gc.collect()
61
  torch.cuda.empty_cache()
62
- return "Of mikið minni bíddu 10 sek og prófaðu aftur"
63
-
64
  except Exception as e:
65
- pipe = None # Force reload next time
66
  return f"Villa: {str(e)}"
67
 
68
- # — Your original UI — 100% unchanged —
 
 
69
  with gr.Blocks() as demo:
70
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
71
  gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
@@ -80,7 +83,9 @@ with gr.Blocks() as demo:
80
 
81
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
82
 
83
- # — Public launch —
 
 
84
  demo.launch(
85
  auth=None,
86
  share=True,
 
1
+ # app.py — Íslenskt ASR – 3 mínútur (ZeroGPU ready, refreshes forever)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
5
+ # Block CUDA init in main process (ZeroGPU requirement)
6
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
7
 
8
  import gradio as gr
9
  import spaces
 
12
  import gc
13
 
14
  # ——————————————————————————————
15
+ # Model loaded INSIDE GPU worker only (no global init)
16
  # ——————————————————————————————
17
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
18
 
19
+ @spaces.GPU(duration=180) # Auto-refreshes GPU after 3 min idle
20
  def get_pipe():
21
+ # Load on CPU first (safe in main), move to GPU in worker
22
+ pipe_cpu = pipeline(
23
  "automatic-speech-recognition",
24
  model=MODEL_NAME,
25
+ torch_dtype="float16", # Use dtype=torch.float16 if deprecated warning persists
26
+ device="cpu", # KEY: CPU init to avoid lazy CUDA in main
27
  token=os.getenv("HF_TOKEN"),
28
  )
29
+ # Now in GPU worker: move to device=0
30
+ pipe_gpu = pipe_cpu.to("cuda")
31
+ del pipe_cpu # Free CPU memory
32
+ return pipe_gpu
33
 
34
+ # ——————————————————————————————
35
+ # Transcription function (calls GPU only when needed)
36
+ # ——————————————————————————————
 
37
  def transcribe_3min(audio_path):
 
38
  if not audio_path:
39
  return "Hlaðið upp hljóðskrá"
40
+
 
 
 
 
 
41
  try:
42
+ # Get fresh pipe from GPU worker (loads/moves only here)
43
+ pipe = get_pipe()
44
+
45
  result = pipe(
46
  audio_path,
47
  chunk_length_s=30,
 
49
  batch_size=8,
50
  return_timestamps=False,
51
  )
52
+
53
+ # Memory cleanup (critical for ZeroGPU)
54
  if "chunks" in result:
55
  del result["chunks"]
56
  gc.collect()
57
  torch.cuda.empty_cache()
58
+
59
  return result["text"]
60
+
61
  except torch.cuda.OutOfMemoryError:
 
 
62
  gc.collect()
63
  torch.cuda.empty_cache()
64
+ return "Of mikið minni notað bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
65
+
66
  except Exception as e:
 
67
  return f"Villa: {str(e)}"
68
 
69
+ # ——————————————————————————————
70
+ # UI — your original, unchanged
71
+ # ——————————————————————————————
72
  with gr.Blocks() as demo:
73
  gr.Markdown("# Íslenskt ASR – 3 mínútur")
74
  gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
 
83
 
84
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
85
 
86
+ # ——————————————————————————————
87
+ # PUBLIC — NO LOGIN, NO PASSWORD
88
+ # ——————————————————————————————
89
  demo.launch(
90
  auth=None,
91
  share=True,