|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
import spaces |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
|
|
|
MODEL_NAME = "palli23/whisper-small-sam_spjall" |
|
|
|
|
|
print("Loading model once at startup...") |
|
|
pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=MODEL_NAME, |
|
|
torch_dtype=torch.float16, |
|
|
device=0, |
|
|
token=os.getenv("HF_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
if not hasattr(pipe.model.generation_config, "lang_to_id"): |
|
|
pipe.model.generation_config.lang_to_id = {"is": 50259} |
|
|
pipe.model.generation_config.task_to_id = {"transcribe": 50359} |
|
|
pipe.model.generation_config.forced_decoder_ids = None |
|
|
|
|
|
|
|
|
if not hasattr(pipe.model.generation_config, "no_timestamps_token_id"): |
|
|
pipe.model.generation_config.no_timestamps_token_id = 50363 |
|
|
|
|
|
pipe.model.generation_config.language = "is" |
|
|
pipe.model.generation_config.task = "transcribe" |
|
|
|
|
|
print("Model ready – fully fixed for timestamps!") |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
|
def transcribe_safe(audio_path): |
|
|
if not audio_path: |
|
|
return "Hladdu upp hljóðskrá" |
|
|
|
|
|
import librosa |
|
|
|
|
|
audio, sr = librosa.load(audio_path, sr=16000) |
|
|
chunk_len = 16000 * 20 |
|
|
stride = 16000 * 2 |
|
|
chunks = [] |
|
|
for i in range(0, len(audio), chunk_len - stride): |
|
|
chunk = audio[i:i + chunk_len] |
|
|
if len(chunk) < 16000: |
|
|
break |
|
|
chunks.append(chunk) |
|
|
|
|
|
full_text = "" |
|
|
for chunk in chunks: |
|
|
result = pipe(chunk, batch_size=16) |
|
|
full_text += result["text"] + " " |
|
|
|
|
|
return full_text.strip() or "Ekkert heyrt" |
|
|
|
|
|
with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo: |
|
|
gr.Markdown("# Íslenskt ASR – 3 mín hljóð") |
|
|
gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**") |
|
|
|
|
|
audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)") |
|
|
btn = gr.Button("Transcribe (15–25 sek)", variant="primary", size="lg") |
|
|
out = gr.Textbox(lines=30, label="Útskrift") |
|
|
|
|
|
btn.click(transcribe_safe, inputs=audio, outputs=out) |
|
|
|
|
|
demo.launch(auth=("beta", "beta2025")) |