ASR_API2

Sleeping

App Files Files Community

palli23 commited on Nov 30, 2025

Commit

f0e9bad

1 Parent(s): eaa65d7

test

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER)
+# Virkar í þínu núverandi HF Space (A100 GPU)
+import os
+import torch
+import gradio as gr
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+from pyctcdecode import build_ctcdecoder
+import warnings
+warnings.filterwarnings("ignore")
+print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)")
+# ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn)
+MODEL_NAME = "palli23/whisper-small-icelandic-3.8wer-private"   # ← BREYTTU HÉR
+# Hladdu módel og processor
+processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
+# KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni)
+KENLM_PATH = "kenlm_5gram.bin"   # nafnið á þinni .bin skrá
+# Byggja CTC decoder með KenLM (þín bestu stillingar)
+decoder = build_ctcdecoder(
+    labels=list(processor.tokenizer.get_vocab().keys()),
+    kenlm_model_path=KENLM_PATH,
+    alpha=0.75,
+    beta=1.8,
+)
+# Tengja decoder við módel
+model.generation_config.decoder = decoder
+model.to("cuda")  # A100 í Space-inu
+print("Módel + KenLM tilbúið á GPU – 3.8 % WER!")
+# ---------------------------------------------------------------
+# Inference fallið (með KenLM rescoring)
+# ---------------------------------------------------------------
+@torch.inference_mode()
+def transcribe(audio_path):
+    if not audio_path:
+        return "Hladdu upp hljóðskrá"
+    try:
+        # Preprocess
+        audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt")
+        input_features = audio_input.input_features.to("cuda")
+        # Generate með beam search + KenLM
+        generated_ids = model.generate(
+            input_features,
+            max_length=448,
+            num_beams=5,
+            length_penalty=1.0,
+        )
+        # Decode með KenLM
+        transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return transcription.strip()
+    except Exception as e:
+        return f"Villa: {str(e)}"
+# ---------------------------------------------------------------
+# Gradio interface – fallegt og tilbúið fyrir beta
+# ---------------------------------------------------------------
+with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo:
+    gr.Markdown("# Íslenskt ASR – Lokað Beta")
+    gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**")
+    audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a")
+    btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg")
+    output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...")
+    btn.click(transcribe, inputs=audio, outputs=output)
+    gr.Markdown("---")
+    gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð")
+# Lykilorð + keyrir á þínum GPU
+demo.launch(
+    auth=("beta", "#beta2025"),      # breyttu í eitthvað sterkara ef þú vilt
+    server_name="0.0.0.0",
+    server_port=7860
+)