Spaces:

palli23
/

ASR_API

Running on Zero

App Files Files Community

palli23 commited on 27 days ago

Commit

9a5eb7a

verified ·

1 Parent(s): 3b102fc

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
-# app.py — Íslenskt ASR – ZeroGPU ready (your original, just fixed for free tier)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
@@ -10,35 +12,36 @@ import torch
 import gc
 # ——————————————————————————————
-# Model loaded ONLY when needed (ZeroGPU rule)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-@spaces.GPU(duration=180)  # This keeps it alive + refreshes automatically
 def get_pipe():
-    return pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
-        torch_dtype="float16",
-        device=0,                    # GPU inside the worker
         token=os.getenv("HF_TOKEN"),
     )
-# Global pipe — will be created on first use
-pipe = None
-# — Your original transcription function (unchanged except tiny safety) —
 def transcribe_3min(audio_path):
-    global pipe
     if not audio_path:
         return "Hlaðið upp hljóðskrá"
-    # Re-create pipe if something went wrong (OOM, crash, etc.)
-    if pipe is None:
-        print("Loading model (first use or refresh)...")
-        pipe = get_pipe()
     try:
         result = pipe(
             audio_path,
             chunk_length_s=30,
@@ -46,26 +49,26 @@ def transcribe_3min(audio_path):
             batch_size=8,
             return_timestamps=False,
         )
-        # Clean up memory aggressively (critical on ZeroGPU)
         if "chunks" in result:
             del result["chunks"]
         gc.collect()
         torch.cuda.empty_cache()
         return result["text"]
     except torch.cuda.OutOfMemoryError:
-        print("OOM → reloading model next run")
-        pipe = None
         gc.collect()
         torch.cuda.empty_cache()
-        return "Of mikið minni → bíddu 10 sek og prófaðu aftur"
     except Exception as e:
-        pipe = None  # Force reload next time
         return f"Villa: {str(e)}"
-# — Your original UI — 100% unchanged —
 with gr.Blocks() as demo:
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
     gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
@@ -80,7 +83,9 @@ with gr.Blocks() as demo:
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
-# — Public launch —
 demo.launch(
     auth=None,
     share=True,

+# app.py — Íslenskt ASR – 3 mínútur (ZeroGPU ready, refreshes forever)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
+# Block CUDA init in main process (ZeroGPU requirement)
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
 import gradio as gr
 import spaces
 import gc
 # ——————————————————————————————
+# Model loaded INSIDE GPU worker only (no global init)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+@spaces.GPU(duration=180)  # Auto-refreshes GPU after 3 min idle
 def get_pipe():
+    # Load on CPU first (safe in main), move to GPU in worker
+    pipe_cpu = pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
+        torch_dtype="float16",  # Use dtype=torch.float16 if deprecated warning persists
+        device="cpu",           # KEY: CPU init to avoid lazy CUDA in main
         token=os.getenv("HF_TOKEN"),
     )
+    # Now in GPU worker: move to device=0
+    pipe_gpu = pipe_cpu.to("cuda")
+    del pipe_cpu  # Free CPU memory
+    return pipe_gpu
+# ——————————————————————————————
+# Transcription function (calls GPU only when needed)
+# ——————————————————————————————
 def transcribe_3min(audio_path):
     if not audio_path:
         return "Hlaðið upp hljóðskrá"
     try:
+        # Get fresh pipe from GPU worker (loads/moves only here)
+        pipe = get_pipe()
         result = pipe(
             audio_path,
             chunk_length_s=30,
             batch_size=8,
             return_timestamps=False,
         )
+        # Memory cleanup (critical for ZeroGPU)
         if "chunks" in result:
             del result["chunks"]
         gc.collect()
         torch.cuda.empty_cache()
         return result["text"]
     except torch.cuda.OutOfMemoryError:
         gc.collect()
         torch.cuda.empty_cache()
+        return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
     except Exception as e:
         return f"Villa: {str(e)}"
+# ——————————————————————————————
+# UI — your original, unchanged
+# ——————————————————————————————
 with gr.Blocks() as demo:
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
     gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
+# ——————————————————————————————
+# PUBLIC — NO LOGIN, NO PASSWORD
+# ——————————————————————————————
 demo.launch(
     auth=None,
     share=True,