TextGeneratorSmall

Sleeping

App Files Files Community

palli23 commited on 18 days ago

Commit

696e56f

verified ·

1 Parent(s): a8f1bf8

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -80

app.py CHANGED Viewed

@@ -1,115 +1,75 @@
-# app.py — Batch file transcription (up to 25 files, Icelandic forced, HF-safe)
 import os
-import gc
-import zipfile
-import tempfile
 import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
-# Environment safety
-os.environ["OMP_NUM_THREADS"] = "1"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 # ——————————————————————————————
-# ZeroGPU worker – model loaded once
 # ——————————————————————————————
 @spaces.GPU(duration=180)
-def transcribe_files(audio_files):
-    if not audio_files:
-        return None, "Hlaðið upp hljóðskrám"
-    audio_files = audio_files[:25]
-    workdir = tempfile.mkdtemp()
-    outdir = os.path.join(workdir, "transcripts")
-    os.makedirs(outdir, exist_ok=True)
-    # Create ASR pipeline
     pipe = pipeline(
-        "automatic-speech-recognition",
         model="palli23/whisper-small-sam_spjall",
         torch_dtype=torch.float16,
-        device=0,
     )
-    # 🔧 PATCH generation config (critical fix)
-    gen_cfg = pipe.model.generation_config
-    gen_cfg.language = "is"
-    gen_cfg.task = "transcribe"
-    gen_cfg.forced_decoder_ids = None   # prevent conflicts
-    gen_cfg.suppress_tokens = None      # avoid tokenizer mismatch
-    for file in audio_files:
-        audio_path = file.name
-        base = os.path.splitext(os.path.basename(audio_path))[0]
-        txt_path = os.path.join(outdir, f"{base}.txt")
-        result = pipe(
-            audio_path,
-            chunk_length_s=30,
-            batch_size=8,
-            return_timestamps=False,
-            generate_kwargs={
-                "num_beams": 5,
-                "repetition_penalty": 1.2,
-                "no_repeat_ngram_size": 3,
-                "temperature": 0.0,
-            },
-        )
-        with open(txt_path, "w", encoding="utf-8") as f:
-            f.write(result["text"].strip())
-    # Zip outputs
-    zip_path = os.path.join(workdir, "transcripts.zip")
-    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
-        for fname in os.listdir(outdir):
-            z.write(os.path.join(outdir, fname), arcname=fname)
-    # Cleanup
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
-    return zip_path, f"Lokið ✅ ({len(audio_files)} skrár)"
 # ——————————————————————————————
-# UI
 # ——————————————————————————————
 with gr.Blocks() as demo:
-    gr.Markdown("# Íslenskt ASR – Batch (allt að 25 skrár)")
-    gr.Markdown(
-        "**palli23/whisper-small-sam_spjall** · íslenska föst · .wav / .mp3"
-    )
-    audio_in = gr.File(
-        label="Hlaðið upp allt að 25 .wav / .mp3 skrám",
-        file_types=[".wav", ".mp3"],
-        file_count="multiple",
-    )
     btn = gr.Button("Transcribe", variant="primary", size="lg")
-    zip_out = gr.File(label="Niðurhal – transcripts.zip")
-    status = gr.Textbox(label="Staða", interactive=False)
-    btn.click(
-        fn=transcribe_files,
-        inputs=audio_in,
-        outputs=[zip_out, status],
-    )
 # ———————————————————��——————————
-# Launch
 # ——————————————————————————————
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
-)

+# app.py — Your original working version + repetition_penalty=1.2 + ngram=3
 import os
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
+import gc
 # ——————————————————————————————
+# ZeroGPU worker – model loaded inside
 # ——————————————————————————————
 @spaces.GPU(duration=180)
+def transcribe_3min(audio_path):
+    if not audio_path:
+        return "Hlaðið upp hljóðskrá"
     pipe = pipeline(
+        "automatic-speech-recognition",
+        #model="palli23/whisper-tiny-icelandic-distilled-v3",
+        #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
+        #odel = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
+        #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
+        #model="palli23/whisper-tiny-samromur-spjallromur",
         model="palli23/whisper-small-sam_spjall",
         torch_dtype=torch.float16,
+        device=0,  # GPU inside @spaces.GPU
     )
+    result = pipe(
+        audio_path,
+        chunk_length_s=30,
+        batch_size=8,
+        return_timestamps=False,  # ← no timestamps, as you want
+        generate_kwargs={
+            "num_beams": 5,  #var beam size 1
+            "repetition_penalty": 1.2,     # ← exactly what you asked for
+            "no_repeat_ngram_size": 3,     # ← exactly what you asked for
+            "temperature": 0.0,
+        }
+    )
+    # Clean memory so ZeroGPU lives forever
     del pipe
     gc.collect()
     torch.cuda.empty_cache()
+    return result["text"]
 # ——————————————————————————————
+# UI – clean and simple
 # ——————————————————————————————
 with gr.Blocks() as demo:
+    gr.Markdown("# Íslenskt ASR – 3 mínútur")
+    gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
+    gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
+    audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
     btn = gr.Button("Transcribe", variant="primary", size="lg")
+    output = gr.Textbox(lines=25, label="Útskrift")
+    btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
 # ———————————————————��——————————
+# Public launch
 # ——————————————————————————————
 demo.launch(
+    share=True,
     server_name="0.0.0.0",
     server_port=7860,
+    auth=None
+)