Spaces:

palli23
/

ASR_API

Running on Zero

App Files Files Community

palli23 commited on Nov 30, 2025

Commit

ddeefba

1 Parent(s): bf8d739

update3

Browse files

Files changed (1) hide show

app.py +23 -77

app.py CHANGED Viewed

@@ -1,87 +1,33 @@
-# app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER)
-# Virkar í þínu núverandi HF Space (A100 GPU)
 import os
-import torch
 import gradio as gr
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-from pyctcdecode import build_ctcdecoder
-import warnings
-warnings.filterwarnings("ignore")
-print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)")
-# ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn)
-MODEL_NAME = "palli23/whisper-small-sam_spjall"   # ← BREYTTU HÉR
-# Hladdu módel og processor
-processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
-# KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni)
-KENLM_PATH = "kenlm_5gram.bin"   # nafnið á þinni .bin skrá
-# Byggja CTC decoder með KenLM (þín bestu stillingar)
-decoder = build_ctcdecoder(
-    labels=list(processor.tokenizer.get_vocab().keys()),
-    kenlm_model_path=KENLM_PATH,
-    alpha=0.75,
-    beta=1.8,
 )
-# Tengja decoder við módel
-model.generation_config.decoder = decoder
-model.to("cuda")  # A100 í Space-inu
-print("Módel + KenLM tilbúið á GPU – 3.8 % WER!")
-# ---------------------------------------------------------------
-# Inference fallið (með KenLM rescoring)
-# ---------------------------------------------------------------
-@torch.inference_mode()
-def transcribe(audio_path):
-    if not audio_path:
-        return "Hladdu upp hljóðskrá"
-    try:
-        # Preprocess
-        audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt")
-        input_features = audio_input.input_features.to("cuda")
-        # Generate með beam search + KenLM
-        generated_ids = model.generate(
-            input_features,
-            max_length=448,
-            num_beams=5,
-            length_penalty=1.0,
-        )
-        # Decode með KenLM
-        transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return transcription.strip()
-    except Exception as e:
-        return f"Villa: {str(e)}"
-# ---------------------------------------------------------------
-# Gradio interface – fallegt og tilbúið fyrir beta
-# ---------------------------------------------------------------
-with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo:
-    gr.Markdown("# Íslenskt ASR – Lokað Beta")
-    gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**")
-    audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a")
-    btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg")
-    output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...")
-    btn.click(transcribe, inputs=audio, outputs=output)
-    gr.Markdown("---")
-    gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð")
-# Lykilorð + keyrir á þínum GPU
-# Í staðinn fyrir harðkóðað
-demo.launch(
-    auth=(os.getenv("AUTH_USER", "beta"), os.getenv("AUTH_PASS", "beta2025")),
-    server_name="0.0.0.0",
-    server_port=7860
-)

+# app.py – FIXED – notar Secrets token (ekki harðkóðað)
 import os
 import gradio as gr
+from transformers import pipeline
+# Módel nafnið (þitt private)
+MODEL_NAME = "palli23/whisper-small-sam_spjall"
+# Nota Secrets token – aldrei sýnilegt
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=MODEL_NAME,
+    device=0,
+    token=os.getenv("HF_TOKEN")  # ← þetta notar Secrets token
 )
+def transcribe(audio):
+    if not audio:
+        return "Hladdu upp hljóð"
+    result = pipe(audio)
+    return result["text"]
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Íslenskt ASR – Beta")
+    gr.Markdown("Whisper-small · ~4–5 % WER")
+    audio = gr.Audio(type="filepath")
+    btn = gr.Button("Transcribe")
+    out = gr.Textbox(lines=20)
+    btn.click(transcribe, audio, out)
+demo.launch(auth=("beta", "beta2025"))