# app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER) # Virkar í þínu núverandi HF Space (A100 GPU) import os import torch import gradio as gr from transformers import WhisperProcessor, WhisperForConditionalGeneration from pyctcdecode import build_ctcdecoder import warnings warnings.filterwarnings("ignore") print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)") # ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn) MODEL_NAME = "palli23/whisper-small-sam_spjall" # ← BREYTTU HÉR # Hladdu módel og processor processor = WhisperProcessor.from_pretrained(MODEL_NAME) model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME) # KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni) KENLM_PATH = "kenlm_5gram.bin" # nafnið á þinni .bin skrá # Byggja CTC decoder með KenLM (þín bestu stillingar) decoder = build_ctcdecoder( labels=list(processor.tokenizer.get_vocab().keys()), kenlm_model_path=KENLM_PATH, alpha=0.75, beta=1.8, ) # Tengja decoder við módel model.generation_config.decoder = decoder model.to("cuda") # A100 í Space-inu print("Módel + KenLM tilbúið á GPU – 3.8 % WER!") # --------------------------------------------------------------- # Inference fallið (með KenLM rescoring) # --------------------------------------------------------------- @torch.inference_mode() def transcribe(audio_path): if not audio_path: return "Hladdu upp hljóðskrá" try: # Preprocess audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt") input_features = audio_input.input_features.to("cuda") # Generate með beam search + KenLM generated_ids = model.generate( input_features, max_length=448, num_beams=5, length_penalty=1.0, ) # Decode með KenLM transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return transcription.strip() except Exception as e: return f"Villa: {str(e)}" # --------------------------------------------------------------- # Gradio interface – fallegt og tilbúið fyrir beta # --------------------------------------------------------------- with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo: gr.Markdown("# Íslenskt ASR – Lokað Beta") gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**") audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a") btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg") output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...") btn.click(transcribe, inputs=audio, outputs=output) gr.Markdown("---") gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð") # Lykilorð + keyrir á þínum GPU demo.launch( auth=("beta", "#beta2025"), # breyttu í eitthvað sterkara ef þú vilt server_name="0.0.0.0", server_port=7860 )