Spaces:

palli23
/

ASR_API

Running on Zero

App Files Files Community

palli23 commited on 27 days ago

Commit

45c12a4

verified ·

1 Parent(s): 9f7f868

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -36

app.py CHANGED Viewed

@@ -1,68 +1,116 @@
-# app.py — Íslenskt ASR – 3 mínútur (public, no login, with contact)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
 from transformers import pipeline
 # ——————————————————————————————
-# Model loaded ONCE at startup (global)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-@spaces.GPU(duration=180)
-def get_pipe():
-    return pipeline(
-        "automatic-speech-recognition",
-        model=MODEL_NAME,
-        torch_dtype="float16",
-        device=0,
-        token=os.getenv("HF_TOKEN"),
-    )
-pipe = get_pipe()
 # ——————————————————————————————
-# Transcription function
 # ——————————————————————————————
 def transcribe_3min(audio_path):
     if not audio_path:
-        return "Hladdu upp hljóðskrá"
-    result = pipe(
-        audio_path,
-        chunk_length_s=30,
-        stride_length_s=(6, 0),
-        batch_size=8,
-        return_timestamps=False,
-    )
-    return result["text"]
 # ——————————————————————————————
-# UI — only added your email, nothing else changed
 # ——————————————————————————————
-with gr.Blocks() as demo:  # ← removed 'theme=' (was causing error)
-    gr.Markdown("# Íslenskt ASR – 3 mínútur")
-    gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
-        label="Hladdu upp .mp3 / .wav (max 5 mín)"
     )
-    btn = gr.Button("Transcribe", variant="primary", size="lg")
-    output = gr.Textbox(lines=30, label="Útskrift")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ——————————————————————————————
-# PUBLIC — NO LOGIN, NO PASSWORD
 # ——————————————————————————————
 demo.launch(
-    auth=None,                    # ← No login
-    share=True,                   # ← Public
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,

+# app.py — Íslenskt ASR – ZeroGPU Optimized + Auto-Refresh + Memory Safe
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
 import gradio as gr
 import spaces
 from transformers import pipeline
+import torch
+import gc
 # ——————————————————————————————
+# Global pipeline — will be created on first call, rebuilt if GPU dies
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+pipe = None
+@spaces.GPU(duration=180, max_batch_size=4)  # 3-minute safety net + small batches
+def get_or_refresh_pipeline():
+    global pipe
+    # If pipe exists but GPU ran out of memory → force rebuild
+    if pipe is not None:
+        try:
+            # Quick health check
+            _ = pipe.model.device
+        except Exception:
+            print("GPU context lost → rebuilding pipeline...")
+            pipe = None
+            gc.collect()
+            torch.cuda.empty_cache()
+    if pipe is None:
+        print("Loading Whisper model (cold start ~20s)...")
+        pipe = pipeline(
+            "automatic-speech-recognition",
+            model=MODEL_NAME,
+            torch_dtype=torch.float16,
+            device=0,  # GPU 0
+            token=os.getenv("HF_TOKEN"),  # optional, only needed for private models
+        )
+        # Force aggressive memory cleanup after load
+        torch.cuda.empty_cache()
+    return pipe
 # ——————————————————————————————
+# Transcription function — super memory-safe
 # ——————————————————————————————
 def transcribe_3min(audio_path):
     if not audio_path:
+        return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
+    try:
+        pipe = get_or_refresh_pipeline()
+        result = pipe(
+            audio_path,
+            chunk_length_s=30,
+            stride_length_s=(6, 0),
+            batch_size=8,
+            return_timestamps=False,
+            generate_kwargs={"language": "is", "task": "transcribe"},  # force Icelandic
+        )
+        # Aggressive cleanup after every inference
+        del result["chunks"] if "chunks" in result else None
+        gc.collect()
+        torch.cuda.empty_cache()
+        return result["text"].strip()
+    except torch.cuda.OutOfMemoryError:
+        print("OOM caught → forcing full pipeline reload on next call")
+        global pipe
+        pipe = None
+        gc.collect()
+        torch.cuda.empty_cache()
+        return "Villa: Of mikið minni notað – endurhleð appinu og prófið aftur (ZeroGPU takmörkun)"
+    except Exception as e:
+        return f"Óvænt villa: {str(e)}"
 # ——————————————————————————————
+# Gradio UI – clean and reliable
 # ——————————————————————————————
+with gr.Blocks(title="Íslenskt ASR") as demo:
+    gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
+    gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
+    gr.Markdown("> Keyrt á **ZeroGPU** – endurræsing eftir 3 mín óvirkni (eðlilegt)")
     audio_in = gr.Audio(
         type="filepath",
+        label="Hlaðið upp .mp3 eða .wav (allt að 5 mínútur)",
+        sources=["upload", "microphone"]
     )
+    btn = gr.Button("Umrita", variant="primary", size="lg")
+    output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
+    gr.Markdown("### Athugasemdir\n"
+                "- ZeroGPU endurræsist sjálfkrafa → fyrsta umritun tekur 15–30 sek\n"
+                "- Eftir það mjög hröð (~5–15 sek fyrir 3 mín hljóð)\n"
+                "- Ef þú sérð 'Of mikið minni' → bíddu 10 sek og prófaðu aftur")
 # ——————————————————————————————
+# Launch – public, no login
 # ——————————————————————————————
 demo.launch(
+    auth=None,
+    share=True,
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,