| |
| import os |
| import gradio as gr |
| import spaces |
| from transformers import pipeline |
|
|
| MODEL_NAME = "palli23/whisper-small-sam_spjall" |
|
|
| print("Hleð Whisper módelinu...") |
|
|
| pipe = pipeline( |
| "automatic-speech-recognition", |
| model=MODEL_NAME, |
| torch_dtype="auto", |
| device="cuda", |
| token=os.getenv("HF_TOKEN") |
| ) |
|
|
| |
| if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None: |
| pipe.model.generation_config.lang_to_id = {"is": 50259} |
| pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358} |
| pipe.model.generation_config.forced_decoder_ids = None |
|
|
| print("Módel tilbúið!") |
|
|
| @spaces.GPU(duration=180) |
| def transcribe_single(audio_path): |
| if not audio_path: |
| return None, "Hladdu upp hljóðskrá", "00:00" |
|
|
| result = pipe(audio_path, chunk_length_s=30, batch_size=8) |
| text = result["text"].strip() |
| return audio_path, text, None |
|
|
| with gr.Blocks(title="Íslenskt Whisper") as demo: |
| gr.Markdown("# Íslenskt Whisper – Mjög lágt WER") |
| gr.Markdown("Hladdu upp einni hljóðskrá (allt að 5 mín) → smelltu á Transcribe") |
|
|
| with gr.Row(): |
| audio_in = gr.Audio(label="Hljóðskrá", type="filepath", waveform=True) |
| |
| btn = gr.Button("Transcribe", variant="primary", size="lg") |
| |
| with gr.Row(): |
| timer = gr.Timer(180, label="Tími eftir á GPU (sek)", active=True, visible=True) |
| |
| output = gr.Textbox(label="Útskrift", lines=20) |
|
|
| |
| btn.click( |
| transcribe_single, |
| inputs=audio_in, |
| outputs=[audio_in, output, timer] |
| ) |
|
|
| demo.launch(auth=("beta", "beta2025")) |