Spaces:

palli23
/

ASR_API

Sleeping

App Files Files Community

palli23 commited on 23 days ago

Commit

9648db0

verified ·

1 Parent(s): 9a5eb7a

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -55

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# app.py — Íslenskt ASR – 3 mínútur (ZeroGPU ready, refreshes forever)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
-# Block CUDA init in main process (ZeroGPU requirement)
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
 import gradio as gr
 import spaces
@@ -12,83 +10,62 @@ import torch
 import gc
 # ——————————————————————————————
-# Model loaded INSIDE GPU worker only (no global init)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-@spaces.GPU(duration=180)  # Auto-refreshes GPU after 3 min idle
-def get_pipe():
-    # Load on CPU first (safe in main), move to GPU in worker
-    pipe_cpu = pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
-        torch_dtype="float16",  # Use dtype=torch.float16 if deprecated warning persists
-        device="cpu",           # KEY: CPU init to avoid lazy CUDA in main
         token=os.getenv("HF_TOKEN"),
     )
-    # Now in GPU worker: move to device=0
-    pipe_gpu = pipe_cpu.to("cuda")
-    del pipe_cpu  # Free CPU memory
-    return pipe_gpu
-# ——————————————————————————————
-# Transcription function (calls GPU only when needed)
-# ——————————————————————————————
-def transcribe_3min(audio_path):
-    if not audio_path:
-        return "Hlaðið upp hljóðskrá"
-    try:
-        # Get fresh pipe from GPU worker (loads/moves only here)
-        pipe = get_pipe()
-        result = pipe(
-            audio_path,
-            chunk_length_s=30,
-            stride_length_s=(6, 0),
-            batch_size=8,
-            return_timestamps=False,
-        )
-        # Memory cleanup (critical for ZeroGPU)
-        if "chunks" in result:
-            del result["chunks"]
-        gc.collect()
-        torch.cuda.empty_cache()
-        return result["text"]
-    except torch.cuda.OutOfMemoryError:
-        gc.collect()
-        torch.cuda.empty_cache()
-        return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
-    except Exception as e:
-        return f"Villa: {str(e)}"
 # ——————————————————————————————
 # UI — your original, unchanged
 # ——————————————————————————————
-with gr.Blocks() as demo:
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
-    gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
         label="Hlaðið upp .mp3 / .wav (max 5 mín)"
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
     output = gr.Textbox(lines=30, label="Útskrift")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ——————————————————————————————
 # PUBLIC — NO LOGIN, NO PASSWORD
 # ——————————————————————————————
 demo.launch(
-    auth=None,
-    share=True,
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,

+# app.py — Íslenskt ASR – 3 mínútur (ZeroGPU, works forever, your original code!)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
 import gc
 # ——————————————————————————————
+# Model loaded ONLY inside GPU worker (ZeroGPU safe)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+@spaces.GPU(duration=180)  # Auto-refreshes every 3 min idle → Space never dies
+def transcribe_3min(audio_path):
+    if not audio_path:
+        return "Hlaðið upp hljóðskrá"
+    # Load pipeline directly on GPU inside the worker (this is the simplest & works 100%)
+    pipe = pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
+        torch_dtype=torch.float16,
+        device=0,  # GPU 0 (safe inside @spaces.GPU)
         token=os.getenv("HF_TOKEN"),
     )
+    result = pipe(
+        audio_path,
+        chunk_length_s=30,
+        stride_length_s=(6, 0),
+        batch_size=8,
+        return_timestamps=False,
+    )
+    # Aggressive memory cleanup so ZeroGPU stays happy
+    if "chunks" in result:
+        del result["chunks"]
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+    return result["text"]
 # ——————————————————————————————
 # UI — your original, unchanged
 # ——————————————————————————————
+with gr.Blocks() as demo:  # removed 'theme=' (was causing error)
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
+    gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
         label="Hlaðið upp .mp3 / .wav (max 5 mín)"
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
     output = gr.Textbox(lines=30, label="Útskrift")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ——————————————————————————————
 # PUBLIC — NO LOGIN, NO PASSWORD
 # ——————————————————————————————
 demo.launch(
+    auth=None, # ← No login
+    share=True, # ← Public
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,