Spaces:

palli23
/

ASR_API

Sleeping

App Files Files Community

palli23 commited on 18 days ago

Commit

2102ae8

verified ·

1 Parent(s): 9489fe7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -23

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# app.py — Íslenskt ASR – 3 mínútur (ZeroGPU, works forever, your original code!)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
@@ -10,64 +11,68 @@ import torch
 import gc
 # ——————————————————————————————
-# Model loaded ONLY inside GPU worker (ZeroGPU safe)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-#MODEL_NAME = "language-and-voice-lab/whisper-large-icelandic-62640-steps-967h"
-@spaces.GPU(duration=180)  # Auto-refreshes every 3 min idle → Space never dies
 def transcribe_3min(audio_path):
     if not audio_path:
         return "Hlaðið upp hljóðskrá"
-    # Load pipeline directly on GPU inside the worker (this is the simplest & works 100%)
     pipe = pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
         torch_dtype=torch.float16,
-        device=0,  # GPU 0 (safe inside @spaces.GPU)
-        token=os.getenv("HF_TOKEN"),
     )
     result = pipe(
         audio_path,
         chunk_length_s=30,
         stride_length_s=(6, 0),
         batch_size=8,
         return_timestamps=False,
     )
-    # Aggressive memory cleanup so ZeroGPU stays happy
-    if "chunks" in result:
-        del result["chunks"]
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
     return result["text"]
 # ——————————————————————————————
-# UI — your original, unchanged
 # ——————————————————————————————
-with gr.Blocks() as demo:  # removed 'theme=' (was causing error)
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
-    gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
-        label="Hlaðið upp .mp3 / .wav (max 5 mín)"
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
-    output = gr.Textbox(lines=30, label="Útskrift")
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ——————————————————————————————
-# PUBLIC — NO LOGIN, NO PASSWORD
 # ——————————————————————————————
 demo.launch(
-    auth=None, # ← No login
-    share=True, # ← Public
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,
-    quiet=False
 )

+# app.py — Íslenskt ASR – ZeroGPU + repetition_penalty=1.2 (perfect for your model)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gc
 # ——————————————————————————————
+# Model + generation settings (repetition_penalty = 1.2)
 # ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
+@spaces.GPU(duration=180)
 def transcribe_3min(audio_path):
     if not audio_path:
         return "Hlaðið upp hljóðskrá"
     pipe = pipeline(
         "automatic-speech-recognition",
         model=MODEL_NAME,
         torch_dtype=torch.float16,
+        device=0,                     # GPU inside @spaces.GPU
+        token=os.getenv("HF_TOKEN"),  # if you have private model
     )
     result = pipe(
         audio_path,
         chunk_length_s=30,
         stride_length_s=(6, 0),
         batch_size=8,
         return_timestamps=False,
+        generate_kwargs={
+            "repetition_penalty": 1.2,     # ← exactly what you want
+            "no_repeat_ngram_size": 3,     # extra safety against loops
+            "temperature": 0.0,
+        }
     )
+    # Clean up memory so ZeroGPU never dies
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
     return result["text"]
 # ——————————————————————————————
+# UI – clean and fast
 # ——————————————————————————————
+with gr.Blocks() as demo:
     gr.Markdown("# Íslenskt ASR – 3 mínútur")
+    gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
     gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
+        label="Hlaðið upp .mp3 / .wav (max ~5 mín)"
     )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
+    output = gr.Textbox(lines=25, label="Útskrift", show_word_timestamps=False)
     btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ——————————————————————————————
+# Public Space – no login
 # ——————————————————————————————
 demo.launch(
+    share=True,
     server_name="0.0.0.0",
     server_port=7860,
     show_error=True,
+    quiet=False,
+    auth=None
 )