Spaces:

sae8d
/

comparison

Runtime error

sae8d commited on Feb 12

Commit

00efa0b

verified ·

1 Parent(s): a7f35d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoFeatureExtractor
 import torch
 # List of your 4 HF Whisper‑style models
 # All are Arabic‑focused ASR models; they must be `WhisperTokenizer` / `WhisperFeatureExtractor` compatible
@@ -27,32 +28,34 @@ def _get_pipeline(model_id):
 # Single transcription function that runs all 4 models
 def compare_on_mic(audio):
-    """
-    audio: (sample_rate, numpy array) from Gradio mic component
-    Returns a list of transcriptions from each model, plus concatenated side‑by‑side box.
-    """
     if audio is None:
-        return ["No audio input"] * 5  # 4 transcriptions + one “merged” cell
-    sr, y = audio
-    outputs = []
     all_texts = []
     for model_id in model_ids:
         try:
             pipe = _get_pipeline(model_id)
-            # Run ASR on the same mic sample
             result = pipe({"sampling_rate": sr, "raw": y})
             text = result["text"].strip()
         except Exception as e:
-            text = f"[Error on {model_id.split('/')[-1]}: {str(e)[:80]}]"
-        outputs.append(text)
         all_texts.append(f"**{model_id.split('/')[-1]}**: {text}")
-    # Optional: one merged view for quick comparison
     merged_text = "\n\n".join(all_texts)
-    return outputs + [merged_text]
 # Build Gradio layout
 with gr.Blocks(title="Compare 4 Arabic Quran Whisper Models") as demo:

 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoFeatureExtractor
 import torch
+import numpy as np  # Add this import at top
 # List of your 4 HF Whisper‑style models
 # All are Arabic‑focused ASR models; they must be `WhisperTokenizer` / `WhisperFeatureExtractor` compatible
 # Single transcription function that runs all 4 models
 def compare_on_mic(audio):
     if audio is None:
+        return ["No audio input"] * 5
+    sr, y = audio  # y is numpy.int16 from Gradio mic
+    # 🆕 FIX: Convert int16 → float32 and normalize (Whisper expects [-1.0, 1.0])
+    if y.dtype == np.int16:
+        y = y.astype(np.float32) / 32768.0  # Standard Whisper normalization
+    # Ensure mono (squeeze channels if stereo)
+    if len(y.shape) > 1:
+        y = np.mean(y, axis=0)
     all_texts = []
     for model_id in model_ids:
         try:
             pipe = _get_pipeline(model_id)
+            # Pass normalized float32 numpy array
             result = pipe({"sampling_rate": sr, "raw": y})
             text = result["text"].strip()
         except Exception as e:
+            text = f"[Error: {str(e)[:80]}]"
         all_texts.append(f"**{model_id.split('/')[-1]}**: {text}")
     merged_text = "\n\n".join(all_texts)
+    return all_texts + [merged_text]  # 4 individual + 1 merged
 # Build Gradio layout
 with gr.Blocks(title="Compare 4 Arabic Quran Whisper Models") as demo: