|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
import spaces |
|
|
from transformers import pipeline |
|
|
|
|
|
MODEL_NAME = "palli23/whisper-small-sam_spjall" |
|
|
|
|
|
print("Hleð Whisper módelinu einu sinni...") |
|
|
|
|
|
pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=MODEL_NAME, |
|
|
torch_dtype="auto", |
|
|
device="cuda", |
|
|
token=os.getenv("HF_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None: |
|
|
pipe.model.generation_config.lang_to_id = {"is": 50259} |
|
|
pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358} |
|
|
pipe.model.generation_config.forced_decoder_ids = None |
|
|
|
|
|
print("Módel tilbúið!") |
|
|
|
|
|
@spaces.GPU(duration=180) |
|
|
def transcribe_single(audio_path): |
|
|
if not audio_path: |
|
|
return None, "Hladdu upp hljóðskrá fyrst", "00:00" |
|
|
|
|
|
result = pipe(audio_path, chunk_length_s=30, batch_size=8) |
|
|
text = result["text"].strip() |
|
|
return audio_path, text, None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Íslenskt Whisper – Mjög lágt WER") |
|
|
gr.Markdown("Hladdu upp einni skrá (allt að 5 mín) → Transcribe") |
|
|
|
|
|
audio_in = gr.Audio(label="Hljóðskrá", type="filepath") |
|
|
btn = gr.Button("Transcribe", variant="primary", size="lg") |
|
|
|
|
|
|
|
|
timer = gr.Timer(value=180) |
|
|
|
|
|
output = gr.Textbox(label="Útskrift", lines=20) |
|
|
|
|
|
btn.click( |
|
|
transcribe_single, |
|
|
inputs=audio_in, |
|
|
outputs=[audio_in, output, timer] |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch(auth=("beta", "beta2025")) |