Spaces:

sae8d
/

comparison

Runtime error

App Files Files Community

sae8d commited on Feb 12

Commit

be877ec

verified ·

1 Parent(s): 80f3a2c

Create app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoFeatureExtractor
+import torch
+# List of your 4 HF Whisper‑style models
+# All are Arabic‑focused ASR models; they must be `WhisperTokenizer` / `WhisperFeatureExtractor` compatible
+model_ids = [
+    "IJyad/whisper-large-v3-Tarteel",
+    "deepdml/whisper-medium-ar-quran-mix-norm",
+    "naazimsnh02/whisper-large-v3-turbo-ar-quran",
+    "Habib-HF/tarbiyah-ai-whisper-medium-merged",
+]
+# Caching pipelines to save GPU VRAM (they share tokenizer/feature_extractor if compatible)
+_registry = {}
+def _get_pipeline(model_id):
+    if model_id not in _registry:
+        # Whisper‑style ASR pipeline automatically handles tokenizer + feature_extractor
+        pipe = pipeline(
+            "automatic-speech-recognition",
+            model=model_id,
+            device=0 if torch.cuda.is_available() else -1,
+        )
+        _registry[model_id] = pipe
+    return _registry[model_id]
+# Single transcription function that runs all 4 models
+def compare_on_mic(audio):
+    """
+    audio: (sample_rate, numpy array) from Gradio mic component
+    Returns a list of transcriptions from each model, plus concatenated side‑by‑side box.
+    """
+    if audio is None:
+        return ["No audio input"] * 5  # 4 transcriptions + one “merged” cell
+    sr, y = audio
+    outputs = []
+    all_texts = []
+    for model_id in model_ids:
+        try:
+            pipe = _get_pipeline(model_id)
+            # Run ASR on the same mic sample
+            result = pipe({"sampling_rate": sr, "raw": y})
+            text = result["text"].strip()
+        except Exception as e:
+            text = f"[Error on {model_id.split('/')[-1]}: {str(e)[:80]}]"
+        outputs.append(text)
+        all_texts.append(f"**{model_id.split('/')[-1]}**: {text}")
+    # Optional: one merged view for quick comparison
+    merged_text = "\n\n".join(all_texts)
+    return outputs + [merged_text]
+# Build Gradio layout
+with gr.Blocks(title="Compare 4 Arabic Quran Whisper Models") as demo:
+    gr.Markdown("""
+    # Compare Whisper‑style ASR models on mic samples
+    Click **Record** and speak (preferably Arabic Qur’ān / tajweed content).
+    All 4 models will transcribe the **same** mic buffer side‑by‑side.
+    """)
+    with gr.Row():
+        mic_input = gr.Microphone(
+            label="🎙️ Mic Input",
+            type="numpy",
+            interactive=True,
+        )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 1. `IJyad/whisper-large-v3-Tarteel`")
+            out1 = gr.Textbox(label="Transcription", lines=4)
+        with gr.Column():
+            gr.Markdown("### 2. `deepdml/whisper-medium-ar-quran-mix-norm`")
+            out2 = gr.Textbox(label="Transcription", lines=4)
+        with gr.Column():
+            gr.Markdown("### 3. `naazimsnh02/whisper-large-v3-turbo-ar-quran`")
+            out3 = gr.Textbox(label="Transcription", lines=4)
+        with gr.Column():
+            gr.Markdown("### 4. `Habib-HF/tarbiyah-ai-whisper-medium-merged`")
+            out4 = gr.Textbox(label="Transcription", lines=4)
+    # One big comparison box (optional, helps see differences at a glance)
+    with gr.Row():
+        gr.Markdown("### Side‑by‑side comparison")
+        out_all = gr.Textbox(label="All models together", lines=8)
+    # Connect mic to inference function (multiple outputs via list)
+    mic_input.change(
+        fn=compare_on_mic,
+        inputs=[mic_input],
+        outputs=[out1, out2, out3, out4, out_all]
+    )
+demo.launch(debug=False)  # Hugging Face Spaces will override host/port