Spaces:

palli23
/

ASR_API

Running on Zero

App Files Files Community

palli23 commited on Dec 5, 2025

Commit

a0182fe

verified ·

1 Parent(s): cf36ab0

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -85

app.py CHANGED Viewed

@@ -1,110 +1,70 @@
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
-import whisperx
-# -----------------------------
-# MODEL SETTINGS
-# -----------------------------
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
-HF_TOKEN = os.getenv("HF_TOKEN")
-# -----------------------------
-# LOAD MODELS ONCE (GPU)
-# -----------------------------
 @spaces.GPU(duration=180)
-def load_all_models():
-    device = "cuda"
-    # 1. Whisper-small model
-    asr_model = whisperx.load_model(
-        MODEL_NAME,
-        device=device,
-        compute_type="float16"
-    )
-    # 2. Alignment model
-    align_model, metadata = whisperx.load_align_model(
-        language_code="is",
-        device=device
-    )
-    # 3. Diarization model (pyannote)
-    diar_model = whisperx.DiarizationPipeline(
-        model_name="pyannote/speaker-diarization-3.1",
-        device=device,
-        use_auth_token=HF_TOKEN
     )
-    return asr_model, align_model, metadata, diar_model
-asr_model, align_model, align_metadata, diar_model = load_all_models()
-# -----------------------------
-# TRANSCRIPTION + DIARIZATION
-# -----------------------------
-def transcribe_is_with_diar(audio_path):
     if not audio_path:
         return "Hladdu upp hljóðskrá"
-    # Load audio
-    audio = whisperx.load_audio(audio_path)
-    # --- 1. ASR with Whisper-small
-    asr_result = asr_model.transcribe(
-        audio,
-        batch_size=8
-    )
-    # --- 2. Alignment (word timestamps)
-    aligned = whisperx.align(
-        asr_result["segments"],
-        align_model,
-        align_metadata,
-        audio,
-        device="cuda"
     )
-    # --- 3. Diarization
-    diarization = diar_model(audio)
-    # --- 4. Merge diarization + words
-    final = whisperx.assign_word_speakers(diarization, aligned)
-    # Format output text
-    output_lines = []
-    for seg in final["segments"]:
-        speaker = seg.get("speaker", "SPEAKER_00")
-        text = seg.get("text", "")
-        output_lines.append(f"[{speaker}] {text}")
-    return "\n".join(output_lines)
-# -----------------------------
-# BUILD GRADIO UI
-# -----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("# 🇮🇸 Íslenskt ASR + Raddgreining (Diarization)")
-    gr.Markdown("**Whisper-small + WhisperX** — Hljóð allt að 5 mínútur")
     audio_in = gr.Audio(
         type="filepath",
-        label="Hladdu upp hljóði (.mp3 / .wav)"
     )
-    btn = gr.Button("Transcribe", variant="primary")
-    output = gr.Textbox(lines=30, label="Útskrift með raddgreiningu")
-    btn.click(fn=transcribe_is_with_diar, inputs=audio_in, outputs=output)
 demo.launch(
-    auth=None,
-    share=True,
     server_name="0.0.0.0",
-    server_port=7860
-)

+# app.py — Íslenskt ASR – 3 mínútur (public, no login, with contact)
 import os
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import gradio as gr
 import spaces
+from transformers import pipeline
+# ——————————————————————————————
+# Model loaded ONCE at startup (global)
+# ——————————————————————————————
 MODEL_NAME = "palli23/whisper-small-sam_spjall"
 @spaces.GPU(duration=180)
+def get_pipe():
+    return pipeline(
+        "automatic-speech-recognition",
+        model=MODEL_NAME,
+        torch_dtype="float16",
+        device=0,
+        token=os.getenv("HF_TOKEN"),
     )
+pipe = get_pipe()
+# ——————————————————————————————
+# Transcription function
+# ——————————————————————————————
+def transcribe_3min(audio_path):
     if not audio_path:
         return "Hladdu upp hljóðskrá"
+    result = pipe(
+        audio_path,
+        chunk_length_s=30,
+        stride_length_s=(6, 0),
+        batch_size=8,
+        return_timestamps=False,
     )
+    return result["text"]
+# ——————————————————————————————
+# UI — only added your email, nothing else changed
+# ——————————————————————————————
+with gr.Blocks() as demo:  # ← removed 'theme=' (was causing error)
+    gr.Markdown("# Íslenskt ASR – 3 mínútur")
+    gr.Markdown("**Whisper small· mjög lágur WER á prófunarupptökum · allt að 5 mín hljóð**")
+    gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
     audio_in = gr.Audio(
         type="filepath",
+        label="Hladdu upp .mp3 / .wav (max 5 mín)"
     )
+    btn = gr.Button("Transcribe", variant="primary", size="lg")
+    output = gr.Textbox(lines=30, label="Útskrift")
+    btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
+# ——————————————————————————————
+# PUBLIC — NO LOGIN, NO PASSWORD
+# ——————————————————————————————
 demo.launch(
+    auth=None,                    # ← No login
+    share=True,                   # ← Public
     server_name="0.0.0.0",
+    server_port=7860,
+    show_error=True,
+    quiet=False
+)