palli23 commited on
Commit
1f8d8c7
·
verified ·
1 Parent(s): a7eba16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -40
app.py CHANGED
@@ -1,7 +1,9 @@
1
- # app.py — Íslenskt ASR – ZeroGPU Fixed (no CUDA init at startup, Dec 2025)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 
 
5
 
6
  import gradio as gr
7
  import spaces
@@ -10,47 +12,37 @@ import torch
10
  import gc
11
 
12
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
13
- pipe = None # Global pipeline – loaded ONLY inside @spaces.GPU
14
 
15
  @spaces.GPU(duration=180, max_batch_size=4)
16
- def get_or_refresh_pipeline():
17
- global pipe
 
 
 
 
 
18
 
19
- # Check if pipeline is broken (now safe inside GPU worker)
20
- if pipe is not None:
21
- try:
22
- _ = pipe.model.device # Quick health check
23
- except Exception:
24
- print("GPU context lost → rebuilding pipeline...")
25
- pipe = None
26
- gc.collect()
27
- if torch.cuda.is_available():
28
- torch.cuda.empty_cache()
29
-
30
- if pipe is None:
31
- print("Loading Whisper model (cold start ~15-25s)...")
32
  pipe = pipeline(
33
  "automatic-speech-recognition",
34
  model=MODEL_NAME,
35
  torch_dtype=torch.float16,
36
- device=0, # CUDA init happens HERE, inside GPU worker
37
  token=os.getenv("HF_TOKEN"),
38
  )
39
- if torch.cuda.is_available():
40
- torch.cuda.empty_cache()
41
-
42
- return pipe
43
-
44
- def transcribe_3min(audio_path):
45
- if not audio_path:
46
- return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
47
-
48
- global pipe # Safe now, since no CUDA at function level
49
-
50
- try:
51
- current_pipe = get_or_refresh_pipeline() # This triggers GPU context
52
 
53
- result = current_pipe(
 
 
 
 
 
 
 
 
 
54
  audio_path,
55
  chunk_length_s=30,
56
  stride_length_s=(6, 0),
@@ -61,10 +53,12 @@ def transcribe_3min(audio_path):
61
 
62
  text = result["text"].strip()
63
 
64
- # Clean up chunks if present
65
  if "chunks" in result:
66
  del result["chunks"]
67
 
 
 
68
  gc.collect()
69
  if torch.cuda.is_available():
70
  torch.cuda.empty_cache()
@@ -72,8 +66,6 @@ def transcribe_3min(audio_path):
72
  return text if text else "(ekkert tal greint)"
73
 
74
  except torch.cuda.OutOfMemoryError:
75
- print("OOM detected → forcing full pipeline reload")
76
- pipe = None
77
  gc.collect()
78
  if torch.cuda.is_available():
79
  torch.cuda.empty_cache()
@@ -87,7 +79,7 @@ with gr.Blocks(title="Íslenskt ASR") as demo:
87
  gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
88
  gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
89
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
90
- gr.Markdown("> Keyrt á **ZeroGPU** – fyrsta ræsing tekur 15–30 sek, síðan hröð")
91
 
92
  audio_in = gr.Audio(
93
  type="filepath",
@@ -97,13 +89,14 @@ with gr.Blocks(title="Íslenskt ASR") as demo:
97
  btn = gr.Button("Umrita", variant="primary", size="lg")
98
  output = gr.Textbox(lines=25, label="Texti")
99
 
100
- btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 
101
 
102
  gr.Markdown("""
103
  ### Leiðbeiningar
104
- - Fyrsta umritunin tekur lengur (model hleðst inn á GPU)
105
- - Eftir það: 515 sek fyrir 3 mín hljóð
106
- - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
107
  """)
108
 
109
  # ————————————————————— Launch ——————���——————————————
 
1
+ # app.py — Íslenskt ASR – ZeroGPU Fully Stateless Fix (Dec 2025)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
5
+ # Force CPU-only at import to prevent any lazy CUDA init
6
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
7
 
8
  import gradio as gr
9
  import spaces
 
12
  import gc
13
 
14
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
 
15
 
16
  @spaces.GPU(duration=180, max_batch_size=4)
17
+ def transcribe_3min_gpu(audio_path):
18
+ """
19
+ FULLY SELF-CONTAINED GPU FUNCTION – no globals, no prior CUDA touches.
20
+ Loads model fresh on CPU first, then moves to GPU INSIDE worker.
21
+ """
22
+ if not audio_path:
23
+ return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
24
 
25
+ try:
26
+ print("Loading Whisper model on CPU first (safe init)...")
27
+ # Load on CPU explicitly to avoid any CUDA during model download/init
 
 
 
 
 
 
 
 
 
 
28
  pipe = pipeline(
29
  "automatic-speech-recognition",
30
  model=MODEL_NAME,
31
  torch_dtype=torch.float16,
32
+ device="cpu", # KEY FIX: CPU first, no CUDA yet
33
  token=os.getenv("HF_TOKEN"),
34
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Now move to GPU – this happens INSIDE @spaces.GPU worker, safe!
37
+ print("Moving model to GPU...")
38
+ pipe.model = pipe.model.to("cuda")
39
+ pipe.device = "cuda"
40
+ if hasattr(pipe, 'model_decoder'):
41
+ pipe.model_decoder = pipe.model_decoder.to("cuda")
42
+
43
+ # Run inference
44
+ print("Running transcription...")
45
+ result = pipe(
46
  audio_path,
47
  chunk_length_s=30,
48
  stride_length_s=(6, 0),
 
53
 
54
  text = result["text"].strip()
55
 
56
+ # Cleanup chunks
57
  if "chunks" in result:
58
  del result["chunks"]
59
 
60
+ # Aggressive cleanup BEFORE returning
61
+ del pipe
62
  gc.collect()
63
  if torch.cuda.is_available():
64
  torch.cuda.empty_cache()
 
66
  return text if text else "(ekkert tal greint)"
67
 
68
  except torch.cuda.OutOfMemoryError:
 
 
69
  gc.collect()
70
  if torch.cuda.is_available():
71
  torch.cuda.empty_cache()
 
79
  gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
80
  gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
81
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
82
+ gr.Markdown("> Keyrt á **ZeroGPU** – hver umritun hleðst nýtt (15–30 sek), en örugg og stöðug")
83
 
84
  audio_in = gr.Audio(
85
  type="filepath",
 
89
  btn = gr.Button("Umrita", variant="primary", size="lg")
90
  output = gr.Textbox(lines=25, label="Texti")
91
 
92
+ # Use the GPU-decorated function directly
93
+ btn.click(fn=transcribe_3min_gpu, inputs=audio_in, outputs=output)
94
 
95
  gr.Markdown("""
96
  ### Leiðbeiningar
97
+ - Hver umritun hleðst módelinu nýtt á GPU (ZeroGPU regla)
98
+ - Tími: 1530 sek (lengur en á venjulegu GPU, en lifir endalaust)
99
+ - Ef villa kemur → bíddu 10 sek og prófaðu aftur
100
  """)
101
 
102
  # ————————————————————— Launch ——————���——————————————