Spaces:
Sleeping
Sleeping
| # app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER) | |
| # Virkar í þínu núverandi HF Space (A100 GPU) | |
| import os | |
| import torch | |
| import gradio as gr | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| from pyctcdecode import build_ctcdecoder | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)") | |
| # ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn) | |
| MODEL_NAME = "palli23/whisper-small-sam_spjall" # ← BREYTTU HÉR | |
| # Hladdu módel og processor | |
| processor = WhisperProcessor.from_pretrained(MODEL_NAME) | |
| model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME) | |
| # KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni) | |
| KENLM_PATH = "kenlm_5gram.bin" # nafnið á þinni .bin skrá | |
| # Byggja CTC decoder með KenLM (þín bestu stillingar) | |
| decoder = build_ctcdecoder( | |
| labels=list(processor.tokenizer.get_vocab().keys()), | |
| kenlm_model_path=KENLM_PATH, | |
| alpha=0.75, | |
| beta=1.8, | |
| ) | |
| # Tengja decoder við módel | |
| model.generation_config.decoder = decoder | |
| model.to("cuda") # A100 í Space-inu | |
| print("Módel + KenLM tilbúið á GPU – 3.8 % WER!") | |
| # --------------------------------------------------------------- | |
| # Inference fallið (með KenLM rescoring) | |
| # --------------------------------------------------------------- | |
| def transcribe(audio_path): | |
| if not audio_path: | |
| return "Hladdu upp hljóðskrá" | |
| try: | |
| # Preprocess | |
| audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt") | |
| input_features = audio_input.input_features.to("cuda") | |
| # Generate með beam search + KenLM | |
| generated_ids = model.generate( | |
| input_features, | |
| max_length=448, | |
| num_beams=5, | |
| length_penalty=1.0, | |
| ) | |
| # Decode með KenLM | |
| transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return transcription.strip() | |
| except Exception as e: | |
| return f"Villa: {str(e)}" | |
| # --------------------------------------------------------------- | |
| # Gradio interface – fallegt og tilbúið fyrir beta | |
| # --------------------------------------------------------------- | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo: | |
| gr.Markdown("# Íslenskt ASR – Lokað Beta") | |
| gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**") | |
| audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a") | |
| btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg") | |
| output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...") | |
| btn.click(transcribe, inputs=audio, outputs=output) | |
| gr.Markdown("---") | |
| gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð") | |
| # Lykilorð + keyrir á þínum GPU | |
| demo.launch( | |
| auth=("beta", "#beta2025"), # breyttu í eitthvað sterkara ef þú vilt | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |