ASR_API / app.py
palli23's picture
fix transcribe bug
faa307f
raw
history blame
1.77 kB
# app.py – VIRKAR Á ÖLLUM Spaces (jafnvel gömlum Gradio)
import os
import gradio as gr
import spaces
from transformers import pipeline
MODEL_NAME = "palli23/whisper-small-sam_spjall"
print("Hleð Whisper módelinu einu sinni...")
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
torch_dtype="auto",
device="cuda",
token=os.getenv("HF_TOKEN")
)
# Fix fyrir gamlar Whisper útgáfur
if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
pipe.model.generation_config.lang_to_id = {"is": 50259}
pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
pipe.model.generation_config.forced_decoder_ids = None
print("Módel tilbúið!")
@spaces.GPU(duration=180)
def transcribe_single(audio_path):
if not audio_path:
return None, "Hladdu upp hljóðskrá fyrst", "00:00"
result = pipe(audio_path, chunk_length_s=30, batch_size=8)
text = result["text"].strip()
return audio_path, text, None # Slekkur á timer þegar búið
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt Whisper – Mjög lágt WER")
gr.Markdown("Hladdu upp einni skrá (allt að 5 mín) → Transcribe")
audio_in = gr.Audio(label="Hljóðskrá", type="filepath") # Virkar á öllum Gradio útgáfum
btn = gr.Button("Transcribe", variant="primary", size="lg")
# Einfaldur timer án label/active/visible (virkar á Gradio 3.x)
timer = gr.Timer(value=180)
output = gr.Textbox(label="Útskrift", lines=20)
btn.click(
transcribe_single,
inputs=audio_in,
outputs=[audio_in, output, timer]
)
# Login: beta / beta2025
demo.launch(auth=("beta", "beta2025"))