LargeSpjallSam

Sleeping

App Files Files Community

palli23 commited on Dec 5, 2025

Commit

3b102fc

verified ·

1 Parent(s): af5ceec

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -98

app.py CHANGED Viewed

@@ -1,132 +1,86 @@
-# app.py — Íslenskt ASR – ZeroGPU Fully Fixed (Dec 2025 – handles str audio + CUDA safe)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "garbage_collection_threshold:0.6,max_split_size_mb:128"
-# Disable CUDA visibility at startup to prevent main process init
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
 import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
 import gc
-import librosa  # For loading audio bytes if needed
-import io
-import soundfile as sf  # For writing temp files from bytes
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-@spaces.GPU(duration=180, max_batch_size=4)
-def transcribe_3min_gpu(audio_input):
-    """
-    Handles both filepath (str) and uploaded bytes/temp files.
-    Loads model on CPU first, moves to GPU inside worker.
-    """
-    if not audio_input:
-        return "Hlaðið upp hljóðskrá (mp3/wav, max 5 mín)"
-    # Handle str (filepath) vs bytes/tuple from Gradio upload
-    if isinstance(audio_input, str):
-        audio_path = audio_input
-    else:
-        # audio_input is tuple (filepath, tuple(bytes, sample_rate)) or just bytes
-        if isinstance(audio_input, tuple) and len(audio_input) > 0:
-            audio_path = audio_input[0]  # Temp filepath from upload
-        else:
-            # Fallback: write bytes to temp file if no path
-            if isinstance(audio_input, bytes):
-                audio_bytes = audio_input
-            else:
-                return "Ógild hljóðskrá – reyndu aftur"
-            # Assume 16kHz sample rate for Whisper (common fallback)
-            sample_rate = 16000
-            # Load with librosa if needed, but for simplicity write to temp
-            with io.BytesIO(audio_bytes) as audio_io:
-                # Use soundfile to write temp wav
-                with sf.SoundFile(audio_io, 'r') as f:
-                    data, sr = f.read(), f.samplerate
-                    if sr != 16000:
-                        data = librosa.resample(data, orig_sr=sr, target_sr=16000)
-                # Write to temp file
-                audio_path = "/tmp/temp_audio.wav"
-                sf.write(audio_path, data, 16000)
     try:
-        print("Loading Whisper model on CPU first (safe for ZeroGPU)...")
-        # Load pipeline on CPU – no CUDA touch
-        pipe = pipeline(
-            "automatic-speech-recognition",
-            model=MODEL_NAME,
-            torch_dtype=torch.float16,
-            device="cpu",  # Critical: CPU init only
-            token=os.getenv("HF_TOKEN"),
-        )
-        # Now inside GPU worker: move entire pipeline to CUDA
-        print("Moving pipeline to GPU (ZeroGPU safe)...")
-        pipe = pipe.to("cuda")
-        # Run inference
-        print("Running transcription...")
         result = pipe(
             audio_path,
             chunk_length_s=30,
             stride_length_s=(6, 0),
-            batch_size=4,  # Smaller batch for ZeroGPU stability
             return_timestamps=False,
-            generate_kwargs={"language": "is", "task": "transcribe"},
         )
-        text = result["text"].strip()
-        # Cleanup
         if "chunks" in result:
             del result["chunks"]
-        # Delete pipe immediately to free memory
-        del pipe
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return text if text else "(ekkert tal greint)"
     except torch.cuda.OutOfMemoryError:
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return "Of mikið minni notað – bíddu 10 sek og prófaðu aftur (ZeroGPU takmörk)"
     except Exception as e:
-        return f"Óvænt villa: {str(e)}"
-# ————————————————————— UI —————————————————————
-with gr.Blocks(title="Íslenskt ASR") as demo:
-    gr.Markdown("# Íslenskt ASR – 3–5 mín hljóð")
-    gr.Markdown("**Whisper-small fínstillt á íslensku spjalli · mjög lágur WER**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
-    gr.Markdown("> Keyrt á **ZeroGPU** – hver umritun hleðst nýtt (15–30 sek), stöðug og örugg")
     audio_in = gr.Audio(
-        type="filepath",  # Ensures str output for pipeline
-        label="Hlaðið upp .mp3 / .wav (allt að 5 mín)",
-        sources=["upload", "microphone"]
     )
-    btn = gr.Button("Umrita", variant="primary", size="lg")
-    output = gr.Textbox(lines=25, label="Texti", placeholder="Hljóðtextinn birtist hér...")
-    # Click event uses GPU-decorated fn
-    btn.click(fn=transcribe_3min_gpu, inputs=audio_in, outputs=output)
-    gr.Markdown("""
-    ### Leiðbeiningar
-    - Hver umritun: 15–30 sek (módel hleðst á GPU)
-    - Styður upload og microphone – sjálfkrafa umbreytir í rétt format
-    - Ef villa: bíddu og prófaðu aftur (ZeroGPU endurræsir)
-    """)
-# ————————————————————— Launch —————————————————————
 demo.launch(
     auth=None,
     share=True,

+# app.py — Íslenskt ASR – ZeroGPU ready (your original, just fixed for free tier)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
 import gc
+# ——————————————————————————————
+# Model loaded ONLY when needed (ZeroGPU rule)
+# ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+@spaces.GPU(duration=180)  # This keeps it alive + refreshes automatically
+def get_pipe():
+    return pipeline(
+        "automatic-speech-recognition",
+        model=MODEL_NAME,
+        torch_dtype="float16",
+        device=0,                    # GPU inside the worker
+        token=os.getenv("HF_TOKEN"),
+    )
+# Global pipe — will be created on first use
+pipe = None
+# — Your original transcription function (unchanged except tiny safety) —
+def transcribe_3min(audio_path):
+    global pipe
+    if not audio_path:
+        return "Hlaðið upp hljóðskrá"
+    # Re-create pipe if something went wrong (OOM, crash, etc.)
+    if pipe is None:
+        print("Loading model (first use or refresh)...")
+        pipe = get_pipe()
     try:
         result = pipe(
             audio_path,
             chunk_length_s=30,
             stride_length_s=(6, 0),
+            batch_size=8,
             return_timestamps=False,
         )
+        # Clean up memory aggressively (critical on ZeroGPU)
         if "chunks" in result:
             del result["chunks"]
         gc.collect()
+        torch.cuda.empty_cache()
+        return result["text"]
     except torch.cuda.OutOfMemoryError:
+        print("OOM → reloading model next run")
+        pipe = None
         gc.collect()
+        torch.cuda.empty_cache()
+        return "Of mikið minni → bíddu 10 sek og prófaðu aftur"
     except Exception as e:
+        pipe = None  # Force reload next time
+        return f"Villa: {str(e)}"
+# — Your original UI — 100% unchanged —
+with gr.Blocks() as demo:
+    gr.Markdown("# Íslenskt ASR – 3 mínútur")
+    gr.Markdown("**Whisper small · mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
+        type="filepath",
+        label="Hlaðið upp .mp3 / .wav (max 5 mín)"
     )
+    btn = gr.Button("Transcribe", variant="primary", size="lg")
+    output = gr.Textbox(lines=30, label="Útskrift")
+    btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
+# — Public launch —
 demo.launch(
     auth=None,
     share=True,