palli23 commited on
Commit
ddeefba
·
1 Parent(s): bf8d739
Files changed (1) hide show
  1. app.py +23 -77
app.py CHANGED
@@ -1,87 +1,33 @@
1
- # app.py – ALVÖRU INFERENCE með KenLM rescoring (3.8 % WER)
2
- # Virkar í þínu núverandi HF Space (A100 GPU)
3
  import os
4
- import torch
5
  import gradio as gr
6
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
7
- from pyctcdecode import build_ctcdecoder
8
- import warnings
9
- warnings.filterwarnings("ignore")
10
 
11
- print("Hleð módel og KenLM... (tekur 20–40 sek í fyrsta skipti)")
 
12
 
13
- # ÞINN PRIVATE MODEL REPO (breyttu í þitt nákvæma nafn)
14
- MODEL_NAME = "palli23/whisper-small-sam_spjall" # ← BREYTTU HÉR
15
-
16
- # Hladdu módel og processor
17
- processor = WhisperProcessor.from_pretrained(MODEL_NAME)
18
- model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
19
-
20
- # KenLM binary – sett í repo-ið (þú hefur þegar upload-að henni)
21
- KENLM_PATH = "kenlm_5gram.bin" # nafnið á þinni .bin skrá
22
-
23
- # Byggja CTC decoder með KenLM (þín bestu stillingar)
24
- decoder = build_ctcdecoder(
25
- labels=list(processor.tokenizer.get_vocab().keys()),
26
- kenlm_model_path=KENLM_PATH,
27
- alpha=0.75,
28
- beta=1.8,
29
  )
30
 
31
- # Tengja decoder við módel
32
- model.generation_config.decoder = decoder
33
- model.to("cuda") # A100 í Space-inu
34
-
35
- print("Módel + KenLM tilbúið á GPU – 3.8 % WER!")
36
 
37
- # ---------------------------------------------------------------
38
- # Inference fallið (með KenLM rescoring)
39
- # ---------------------------------------------------------------
40
- @torch.inference_mode()
41
- def transcribe(audio_path):
42
- if not audio_path:
43
- return "Hladdu upp hljóðskrá"
44
-
45
- try:
46
- # Preprocess
47
- audio_input = processor(audio_path, sampling_rate=16000, return_tensors="pt")
48
- input_features = audio_input.input_features.to("cuda")
49
-
50
- # Generate með beam search + KenLM
51
- generated_ids = model.generate(
52
- input_features,
53
- max_length=448,
54
- num_beams=5,
55
- length_penalty=1.0,
56
- )
57
-
58
- # Decode með KenLM
59
- transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
60
- return transcription.strip()
61
-
62
- except Exception as e:
63
- return f"Villa: {str(e)}"
64
-
65
- # ---------------------------------------------------------------
66
- # Gradio interface – fallegt og tilbúið fyrir beta
67
- # ---------------------------------------------------------------
68
- with gr.Blocks(theme=gr.themes.Soft(), title="Íslenskt ASR – 3.8 % WER") as demo:
69
- gr.Markdown("# Íslenskt ASR – Lokað Beta")
70
- gr.Markdown("**3.8 % WER á RÚV fréttum · Full KenLM rescoring · Einkaeign**")
71
-
72
- audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav / .m4a")
73
- btn = gr.Button("Transcribe (15–90 sek)", variant="primary", size="lg")
74
- output = gr.Textbox(lines=25, label="Útskrift", placeholder="Hér kemur textinn...")
75
 
76
- btn.click(transcribe, inputs=audio, outputs=output)
 
 
77
 
78
- gr.Markdown("---")
79
- gr.Markdown("© 2025 – Einkaeign · Engin gögn vistuð")
80
 
81
- # Lykilorð + keyrir á þínum GPU
82
- # Í staðinn fyrir harðkóðað
83
- demo.launch(
84
- auth=(os.getenv("AUTH_USER", "beta"), os.getenv("AUTH_PASS", "beta2025")),
85
- server_name="0.0.0.0",
86
- server_port=7860
87
- )
 
1
+ # app.py – FIXED notar Secrets token (ekki harðkóðað)
 
2
  import os
 
3
  import gradio as gr
4
+ from transformers import pipeline
 
 
 
5
 
6
+ # Módel nafnið (þitt private)
7
+ MODEL_NAME = "palli23/whisper-small-sam_spjall"
8
 
9
+ # Nota Secrets token aldrei sýnilegt
10
+ pipe = pipeline(
11
+ "automatic-speech-recognition",
12
+ model=MODEL_NAME,
13
+ device=0,
14
+ token=os.getenv("HF_TOKEN") # ← þetta notar Secrets token
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
+ def transcribe(audio):
18
+ if not audio:
19
+ return "Hladdu upp hljóð"
20
+ result = pipe(audio)
21
+ return result["text"]
22
 
23
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
24
+ gr.Markdown("# Íslenskt ASR Beta")
25
+ gr.Markdown("Whisper-small · ~4–5 % WER")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ audio = gr.Audio(type="filepath")
28
+ btn = gr.Button("Transcribe")
29
+ out = gr.Textbox(lines=20)
30
 
31
+ btn.click(transcribe, audio, out)
 
32
 
33
+ demo.launch(auth=("beta", "beta2025"))