ASR_API2 / app.py
palli23's picture
Update app.py
2055ff2 verified
raw
history blame
2.41 kB
# app.py
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
import gradio as gr
import spaces
from transformers import pipeline
# ——————————————————————————————
# Model loaded ONCE at startup (global)
# ——————————————————————————————
MODEL_NAME = "palli23/whisper-large-v3-is-samromur-20-40s-3x-final"
@spaces.GPU(duration=180)
def get_pipe():
return pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
torch_dtype="float16",
device=0, # T4 GPU
token=os.getenv("HF_TOKEN"), # Remove line if model is public
)
pipe = get_pipe() # ← Loaded once when Space starts
# ——————————————————————————————
# Transcription function (super fast now)
# ——————————————————————————————
def transcribe_3min(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
result = pipe(
audio_path,
chunk_length_s=30,
stride_length_s=(6, 0),
batch_size=8,
return_timestamps=False,
)
return result["text"]
# ——————————————————————————————
# Gradio UI
# ——————————————————————————————
with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
gr.Markdown("# Íslenskt ASR – 3 mínútur")
gr.Markdown("**Whisper · mjög lágur WER · allt að 5 mín hljóð**")
audio_in = gr.Audio(
type="filepath",
label="Hladdu upp .mp3 / .wav (max 5 mín)"
)
btn = gr.Button("Transcribe", variant="primary", size="lg")
output = gr.Textbox(lines=30, label="Útskrift")
btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
# ——————————————————————————————
# Stable launch (no more infinite Building)
# ——————————————————————————————
demo.launch(
auth=("beta", "beta2025"),
ssr_mode=False,
show_error=True,
server_name="0.0.0.0",
server_port=7860,
quiet=False
)