palli23 commited on
Commit
f0e9bad
·
1 Parent(s): eaa65d7
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER)
2
+ # Virkar í þínu núverandi HF Space (A100 GPU)
3
+ import os
4
+ import torch
5
+ import gradio as gr
6
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
7
+ from pyctcdecode import build_ctcdecoder
8
+ import warnings
9
+ warnings.filterwarnings("ignore")
10
+
11
+ print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)")
12
+
13
+ # ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn)
14
+ MODEL_NAME = "palli23/whisper-small-icelandic-3.8wer-private" # ← BREYTTU HÉR
15
+
16
+ # Hladdu módel og processor
17
+ processor = WhisperProcessor.from_pretrained(MODEL_NAME)
18
+ model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
19
+
20
+ # KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni)
21
+ KENLM_PATH = "kenlm_5gram.bin" # nafnið á þinni .bin skrá
22
+
23
+ # Byggja CTC decoder með KenLM (þín bestu stillingar)
24
+ decoder = build_ctcdecoder(
25
+ labels=list(processor.tokenizer.get_vocab().keys()),
26
+ kenlm_model_path=KENLM_PATH,
27
+ alpha=0.75,
28
+ beta=1.8,
29
+ )
30
+
31
+ # Tengja decoder við módel
32
+ model.generation_config.decoder = decoder
33
+ model.to("cuda") # A100 í Space-inu
34
+
35
+ print("Módel + KenLM tilbúið á GPU – 3.8 % WER!")
36
+
37
+ # ---------------------------------------------------------------
38
+ # Inference fallið (með KenLM rescoring)
39
+ # ---------------------------------------------------------------
40
+ @torch.inference_mode()
41
+ def transcribe(audio_path):
42
+ if not audio_path:
43
+ return "Hladdu upp hljóðskrá"
44
+
45
+ try:
46
+ # Preprocess
47
+ audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt")
48
+ input_features = audio_input.input_features.to("cuda")
49
+
50
+ # Generate með beam search + KenLM
51
+ generated_ids = model.generate(
52
+ input_features,
53
+ max_length=448,
54
+ num_beams=5,
55
+ length_penalty=1.0,
56
+ )
57
+
58
+ # Decode með KenLM
59
+ transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
60
+ return transcription.strip()
61
+
62
+ except Exception as e:
63
+ return f"Villa: {str(e)}"
64
+
65
+ # ---------------------------------------------------------------
66
+ # Gradio interface – fallegt og tilbúið fyrir beta
67
+ # ---------------------------------------------------------------
68
+ with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo:
69
+ gr.Markdown("# Íslenskt ASR – Lokað Beta")
70
+ gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**")
71
+
72
+ audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a")
73
+ btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg")
74
+ output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...")
75
+
76
+ btn.click(transcribe, inputs=audio, outputs=output)
77
+
78
+ gr.Markdown("---")
79
+ gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð")
80
+
81
+ # Lykilorð + keyrir á þínum GPU
82
+ demo.launch(
83
+ auth=("beta", "#beta2025"), # breyttu í eitthvað sterkara ef þú vilt
84
+ server_name="0.0.0.0",
85
+ server_port=7860
86
+ )