TextGeneratorSmall

Running on Zero

App Files Files Community

palli23 commited on Dec 5, 2025

Commit

a7eba16

verified ·

1 Parent(s): df45fb8

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — Íslenskt ASR – ZeroGPU 100% stable (Dec 2025 final fixed version)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
@@ -10,13 +10,13 @@ import torch
 import gc
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-pipe = None  # Global pipeline
 @spaces.GPU(duration=180, max_batch_size=4)
 def get_or_refresh_pipeline():
     global pipe
-    # Check if pipeline is broken or GPU context died
     if pipe is not None:
         try:
             _ = pipe.model.device  # Quick health check
@@ -24,7 +24,8 @@ def get_or_refresh_pipeline():
             print("GPU context lost → rebuilding pipeline...")
             pipe = None
             gc.collect()
-            torch.cuda.empty_cache()
     if pipe is None:
         print("Loading Whisper model (cold start ~15-25s)...")
@@ -32,10 +33,11 @@ def get_or_refresh_pipeline():
             "automatic-speech-recognition",
             model=MODEL_NAME,
             torch_dtype=torch.float16,
-            device=0,
             token=os.getenv("HF_TOKEN"),
         )
-        torch.cuda.empty_cache()
     return pipe
@@ -43,10 +45,10 @@ def transcribe_3min(audio_path):
     if not audio_path:
         return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
-    global pipe  # ← Now declared at the very top of the function → no error!
     try:
-        current_pipe = get_or_refresh_pipeline()
         result = current_pipe(
             audio_path,
@@ -64,15 +66,17 @@ def transcribe_3min(audio_path):
             del result["chunks"]
         gc.collect()
-        torch.cuda.empty_cache()
         return text if text else "(ekkert tal greint)"
     except torch.cuda.OutOfMemoryError:
         print("OOM detected → forcing full pipeline reload")
-        pipe = None  # This is now allowed because global declared first
         gc.collect()
-        torch.cuda.empty_cache()
         return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
     except Exception as e:
@@ -97,7 +101,7 @@ with gr.Blocks(title="Íslenskt ASR") as demo:
     gr.Markdown("""
     ### Leiðbeiningar
-    - Fyrsta umritunin tekur lengur (model hleðst inn)
     - Eftir það: 5–15 sek fyrir 3 mín hljóð
     - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
     """)

+# app.py — Íslenskt ASR – ZeroGPU Fixed (no CUDA init at startup, Dec 2025)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 import gc
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+pipe = None  # Global pipeline – loaded ONLY inside @spaces.GPU
 @spaces.GPU(duration=180, max_batch_size=4)
 def get_or_refresh_pipeline():
     global pipe
+    # Check if pipeline is broken (now safe inside GPU worker)
     if pipe is not None:
         try:
             _ = pipe.model.device  # Quick health check
             print("GPU context lost → rebuilding pipeline...")
             pipe = None
             gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     if pipe is None:
         print("Loading Whisper model (cold start ~15-25s)...")
             "automatic-speech-recognition",
             model=MODEL_NAME,
             torch_dtype=torch.float16,
+            device=0,  # CUDA init happens HERE, inside GPU worker
             token=os.getenv("HF_TOKEN"),
         )
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
     return pipe
     if not audio_path:
         return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
+    global pipe  # Safe now, since no CUDA at function level
     try:
+        current_pipe = get_or_refresh_pipeline()  # This triggers GPU context
         result = current_pipe(
             audio_path,
             del result["chunks"]
         gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         return text if text else "(ekkert tal greint)"
     except torch.cuda.OutOfMemoryError:
         print("OOM detected → forcing full pipeline reload")
+        pipe = None
         gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
     except Exception as e:
     gr.Markdown("""
     ### Leiðbeiningar
+    - Fyrsta umritunin tekur lengur (model hleðst inn á GPU)
     - Eftir það: 5–15 sek fyrir 3 mín hljóð
     - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
     """)