sae8d commited on
Commit
be877ec
·
verified ·
1 Parent(s): 80f3a2c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoFeatureExtractor
3
+ import torch
4
+
5
+ # List of your 4 HF Whisper‑style models
6
+ # All are Arabic‑focused ASR models; they must be `WhisperTokenizer` / `WhisperFeatureExtractor` compatible
7
+ model_ids = [
8
+ "IJyad/whisper-large-v3-Tarteel",
9
+ "deepdml/whisper-medium-ar-quran-mix-norm",
10
+ "naazimsnh02/whisper-large-v3-turbo-ar-quran",
11
+ "Habib-HF/tarbiyah-ai-whisper-medium-merged",
12
+ ]
13
+
14
+ # Caching pipelines to save GPU VRAM (they share tokenizer/feature_extractor if compatible)
15
+ _registry = {}
16
+
17
+ def _get_pipeline(model_id):
18
+ if model_id not in _registry:
19
+ # Whisper‑style ASR pipeline automatically handles tokenizer + feature_extractor
20
+ pipe = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model_id,
23
+ device=0 if torch.cuda.is_available() else -1,
24
+ )
25
+ _registry[model_id] = pipe
26
+ return _registry[model_id]
27
+
28
+ # Single transcription function that runs all 4 models
29
+ def compare_on_mic(audio):
30
+ """
31
+ audio: (sample_rate, numpy array) from Gradio mic component
32
+ Returns a list of transcriptions from each model, plus concatenated side‑by‑side box.
33
+ """
34
+ if audio is None:
35
+ return ["No audio input"] * 5 # 4 transcriptions + one “merged” cell
36
+
37
+ sr, y = audio
38
+
39
+ outputs = []
40
+ all_texts = []
41
+
42
+ for model_id in model_ids:
43
+ try:
44
+ pipe = _get_pipeline(model_id)
45
+ # Run ASR on the same mic sample
46
+ result = pipe({"sampling_rate": sr, "raw": y})
47
+ text = result["text"].strip()
48
+ except Exception as e:
49
+ text = f"[Error on {model_id.split('/')[-1]}: {str(e)[:80]}]"
50
+ outputs.append(text)
51
+ all_texts.append(f"**{model_id.split('/')[-1]}**: {text}")
52
+
53
+ # Optional: one merged view for quick comparison
54
+ merged_text = "\n\n".join(all_texts)
55
+ return outputs + [merged_text]
56
+
57
+ # Build Gradio layout
58
+ with gr.Blocks(title="Compare 4 Arabic Quran Whisper Models") as demo:
59
+ gr.Markdown("""
60
+ # Compare Whisper‑style ASR models on mic samples
61
+ Click **Record** and speak (preferably Arabic Qur’ān / tajweed content).
62
+ All 4 models will transcribe the **same** mic buffer side‑by‑side.
63
+ """)
64
+
65
+ with gr.Row():
66
+ mic_input = gr.Microphone(
67
+ label="🎙️ Mic Input",
68
+ type="numpy",
69
+ interactive=True,
70
+ )
71
+
72
+ with gr.Row():
73
+ with gr.Column():
74
+ gr.Markdown("### 1. `IJyad/whisper-large-v3-Tarteel`")
75
+ out1 = gr.Textbox(label="Transcription", lines=4)
76
+ with gr.Column():
77
+ gr.Markdown("### 2. `deepdml/whisper-medium-ar-quran-mix-norm`")
78
+ out2 = gr.Textbox(label="Transcription", lines=4)
79
+ with gr.Column():
80
+ gr.Markdown("### 3. `naazimsnh02/whisper-large-v3-turbo-ar-quran`")
81
+ out3 = gr.Textbox(label="Transcription", lines=4)
82
+ with gr.Column():
83
+ gr.Markdown("### 4. `Habib-HF/tarbiyah-ai-whisper-medium-merged`")
84
+ out4 = gr.Textbox(label="Transcription", lines=4)
85
+
86
+ # One big comparison box (optional, helps see differences at a glance)
87
+ with gr.Row():
88
+ gr.Markdown("### Side‑by‑side comparison")
89
+ out_all = gr.Textbox(label="All models together", lines=8)
90
+
91
+ # Connect mic to inference function (multiple outputs via list)
92
+ mic_input.change(
93
+ fn=compare_on_mic,
94
+ inputs=[mic_input],
95
+ outputs=[out1, out2, out3, out4, out_all]
96
+ )
97
+
98
+ demo.launch(debug=False) # Hugging Face Spaces will override host/port