Spaces:

palli23
/

ASR_API

Running on Zero

App Files Files Community

palli23 commited on Dec 5, 2025

Commit

7f5d9e6

verified ·

1 Parent(s): 45c12a4

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — Íslenskt ASR – ZeroGPU Optimized + Auto-Refresh + Memory Safe
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
@@ -10,19 +10,18 @@ import torch
 import gc
 # ——————————————————————————————
-# Global pipeline — will be created on first call, rebuilt if GPU dies
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
 pipe = None
-@spaces.GPU(duration=180, max_batch_size=4)  # 3-minute safety net + small batches
 def get_or_refresh_pipeline():
     global pipe
-    # If pipe exists but GPU ran out of memory → force rebuild
     if pipe is not None:
         try:
-            # Quick health check
             _ = pipe.model.device
         except Exception:
             print("GPU context lost → rebuilding pipeline...")
@@ -31,21 +30,20 @@ def get_or_refresh_pipeline():
             torch.cuda.empty_cache()
     if pipe is None:
-        print("Loading Whisper model (cold start ~20s)...")
         pipe = pipeline(
             "automatic-speech-recognition",
             model=MODEL_NAME,
             torch_dtype=torch.float16,
-            device=0,  # GPU 0
-            token=os.getenv("HF_TOKEN"),  # optional, only needed for private models
         )
-        # Force aggressive memory cleanup after load
         torch.cuda.empty_cache()
     return pipe
 # ——————————————————————————————
-# Transcription function — super memory-safe
 # ——————————————————————————————
 def transcribe_3min(audio_path):
     if not audio_path:
@@ -60,53 +58,60 @@ def transcribe_3min(audio_path):
             stride_length_s=(6, 0),
             batch_size=8,
             return_timestamps=False,
-            generate_kwargs={"language": "is", "task": "transcribe"},  # force Icelandic
         )
-        # Aggressive cleanup after every inference
-        del result["chunks"] if "chunks" in result else None
         gc.collect()
         torch.cuda.empty_cache()
-        return result["text"].strip()
     except torch.cuda.OutOfMemoryError:
-        print("OOM caught → forcing full pipeline reload on next call")
         global pipe
         pipe = None
         gc.collect()
         torch.cuda.empty_cache()
-        return "Villa: Of mikið minni notað – endurhleð appinu og prófið aftur (ZeroGPU takmörkun)"
     except Exception as e:
-        return f"Óvænt villa: {str(e)}"
 # ——————————————————————————————
-# Gradio UI – clean and reliable
 # ——————————————————————————————
 with gr.Blocks(title="Íslenskt ASR") as demo:
     gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
-    gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
-    gr.Markdown("> Keyrt á **ZeroGPU** – endurræsing eftir 3 mín óvirkni (eðlilegt)")
     audio_in = gr.Audio(
         type="filepath",
-        label="Hlaðið upp .mp3 eða .wav (allt að 5 mínútur)",
         sources=["upload", "microphone"]
     )
     btn = gr.Button("Umrita", variant="primary", size="lg")
-    output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
-    gr.Markdown("### Athugasemdir\n"
-                "- ZeroGPU endurræsist sjálfkrafa → fyrsta umritun tekur 15–30 sek\n"
-                "- Eftir það mjög hröð (~5–15 sek fyrir 3 mín hljóð)\n"
-                "- Ef þú sérð 'Of mikið minni' → bíddu 10 sek og prófaðu aftur")
 # ——————————————————————————————
-# Launch – public, no login
 # ——————————————————————————————
 demo.launch(
     auth=None,

+# app.py — Íslenskt ASR – ZeroGPU Optimized (fixed + battle-tested Dec 2025)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 import gc
 # ——————————————————————————————
+# Global pipeline – lazy load + auto-rebuild on OOM
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
 pipe = None
+@spaces.GPU(duration=180, max_batch_size=4)
 def get_or_refresh_pipeline():
     global pipe
+    # Rebuild if GPU context died
     if pipe is not None:
         try:
             _ = pipe.model.device
         except Exception:
             print("GPU context lost → rebuilding pipeline...")
             torch.cuda.empty_cache()
     if pipe is None:
+        print("Loading Whisper model (cold start ~15-25s)...")
         pipe = pipeline(
             "automatic-speech-recognition",
             model=MODEL_NAME,
             torch_dtype=torch.float16,
+            device=0,
+            token=os.getenv("HF_TOKEN"),
         )
         torch.cuda.empty_cache()
     return pipe
 # ——————————————————————————————
+# Transcription – memory-safe + fixed del bug
 # ——————————————————————————————
 def transcribe_3min(audio_path):
     if not audio_path:
             stride_length_s=(6, 0),
             batch_size=8,
             return_timestamps=False,
+            generate_kwargs={"language": "is", "task": "transcribe"},
         )
+        text = result["text"].strip()
+        # Safely delete chunks if they exist (fixed!)
+        if "chunks" in result:
+            del result["chunks"]
+        # Aggressive cleanup
         gc.collect()
         torch.cuda.empty_cache()
+        return text if text else "(ekkert tal greint)"
     except torch.cuda.OutOfMemoryError:
+        print("OOM → forcing reload on next request")
         global pipe
         pipe = None
         gc.collect()
         torch.cuda.empty_cache()
+        return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
     except Exception as e:
+        return f"Villa: {str(e)}"
 # ——————————————————————————————
+# UI
 # ——————————————————————————————
 with gr.Blocks(title="Íslenskt ASR") as demo:
     gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
+    gr.Markdown("**Whisper-small fínstillt á íslensku · mjög lágur WER**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
+    gr.Markdown("> Keyrt á **ZeroGPU** – fyrsta ræsing tekur 15–30 sek, síðan hröð")
     audio_in = gr.Audio(
         type="filepath",
+        label="Hlaðið upp .mp3 / .wav (allt að 5 mín)",
         sources=["upload", "microphone"]
     )
     btn = gr.Button("Umrita", variant="primary", size="lg")
+    output = gr.Textbox(lines=25, label="Texti")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
+    gr.Markdown("""
+    ### Leiðbeiningar
+    - Fyrsta umritunin tekur lengur (model hleðst inn)
+    - Eftir það: 5–15 sek fyrir 3 mín hljóð
+    - Ef þú færð minnisvillu → bíddu öðruhvolf og prófaðu aftur
+    """)
 # ——————————————————————————————
+# Launch
 # ——————————————————————————————
 demo.launch(
     auth=None,