palli23 commited on
Commit
a7eba16
·
verified ·
1 Parent(s): df45fb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Íslenskt ASR – ZeroGPU 100% stable (Dec 2025 final fixed version)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
@@ -10,13 +10,13 @@ import torch
10
  import gc
11
 
12
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
13
- pipe = None # Global pipeline
14
 
15
  @spaces.GPU(duration=180, max_batch_size=4)
16
  def get_or_refresh_pipeline():
17
  global pipe
18
 
19
- # Check if pipeline is broken or GPU context died
20
  if pipe is not None:
21
  try:
22
  _ = pipe.model.device # Quick health check
@@ -24,7 +24,8 @@ def get_or_refresh_pipeline():
24
  print("GPU context lost → rebuilding pipeline...")
25
  pipe = None
26
  gc.collect()
27
- torch.cuda.empty_cache()
 
28
 
29
  if pipe is None:
30
  print("Loading Whisper model (cold start ~15-25s)...")
@@ -32,10 +33,11 @@ def get_or_refresh_pipeline():
32
  "automatic-speech-recognition",
33
  model=MODEL_NAME,
34
  torch_dtype=torch.float16,
35
- device=0,
36
  token=os.getenv("HF_TOKEN"),
37
  )
38
- torch.cuda.empty_cache()
 
39
 
40
  return pipe
41
 
@@ -43,10 +45,10 @@ def transcribe_3min(audio_path):
43
  if not audio_path:
44
  return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
45
 
46
- global pipe # Now declared at the very top of the function → no error!
47
 
48
  try:
49
- current_pipe = get_or_refresh_pipeline()
50
 
51
  result = current_pipe(
52
  audio_path,
@@ -64,15 +66,17 @@ def transcribe_3min(audio_path):
64
  del result["chunks"]
65
 
66
  gc.collect()
67
- torch.cuda.empty_cache()
 
68
 
69
  return text if text else "(ekkert tal greint)"
70
 
71
  except torch.cuda.OutOfMemoryError:
72
  print("OOM detected → forcing full pipeline reload")
73
- pipe = None # This is now allowed because global declared first
74
  gc.collect()
75
- torch.cuda.empty_cache()
 
76
  return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
77
 
78
  except Exception as e:
@@ -97,7 +101,7 @@ with gr.Blocks(title="Íslenskt ASR") as demo:
97
 
98
  gr.Markdown("""
99
  ### Leiðbeiningar
100
- - Fyrsta umritunin tekur lengur (model hleðst inn)
101
  - Eftir það: 5–15 sek fyrir 3 mín hljóð
102
  - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
103
  """)
 
1
+ # app.py — Íslenskt ASR – ZeroGPU Fixed (no CUDA init at startup, Dec 2025)
2
  import os
3
  os.environ["OMP_NUM_THREADS"] = "1"
4
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 
10
  import gc
11
 
12
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
13
+ pipe = None # Global pipeline – loaded ONLY inside @spaces.GPU
14
 
15
  @spaces.GPU(duration=180, max_batch_size=4)
16
  def get_or_refresh_pipeline():
17
  global pipe
18
 
19
+ # Check if pipeline is broken (now safe inside GPU worker)
20
  if pipe is not None:
21
  try:
22
  _ = pipe.model.device # Quick health check
 
24
  print("GPU context lost → rebuilding pipeline...")
25
  pipe = None
26
  gc.collect()
27
+ if torch.cuda.is_available():
28
+ torch.cuda.empty_cache()
29
 
30
  if pipe is None:
31
  print("Loading Whisper model (cold start ~15-25s)...")
 
33
  "automatic-speech-recognition",
34
  model=MODEL_NAME,
35
  torch_dtype=torch.float16,
36
+ device=0, # CUDA init happens HERE, inside GPU worker
37
  token=os.getenv("HF_TOKEN"),
38
  )
39
+ if torch.cuda.is_available():
40
+ torch.cuda.empty_cache()
41
 
42
  return pipe
43
 
 
45
  if not audio_path:
46
  return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
47
 
48
+ global pipe # Safe now, since no CUDA at function level
49
 
50
  try:
51
+ current_pipe = get_or_refresh_pipeline() # This triggers GPU context
52
 
53
  result = current_pipe(
54
  audio_path,
 
66
  del result["chunks"]
67
 
68
  gc.collect()
69
+ if torch.cuda.is_available():
70
+ torch.cuda.empty_cache()
71
 
72
  return text if text else "(ekkert tal greint)"
73
 
74
  except torch.cuda.OutOfMemoryError:
75
  print("OOM detected → forcing full pipeline reload")
76
+ pipe = None
77
  gc.collect()
78
+ if torch.cuda.is_available():
79
+ torch.cuda.empty_cache()
80
  return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
81
 
82
  except Exception as e:
 
101
 
102
  gr.Markdown("""
103
  ### Leiðbeiningar
104
+ - Fyrsta umritunin tekur lengur (model hleðst inn á GPU)
105
  - Eftir það: 5–15 sek fyrir 3 mín hljóð
106
  - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
107
  """)