sae8d commited on
Commit
63ef013
·
verified ·
1 Parent(s): adc6a85

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+
5
+ # Detect device and dtype for efficiency/memory
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ # Load all 4 models (with chunking for long audio)
10
+ pipe1 = pipeline(
11
+ "automatic-speech-recognition",
12
+ model="IJyad/whisper-large-v3-Tarteel",
13
+ torch_dtype=dtype,
14
+ device=device,
15
+ chunk_length_s=30,
16
+ )
17
+ pipe2 = pipeline(
18
+ "automatic-speech-recognition",
19
+ model="deepdml/whisper-medium-ar-quran-mix-norm",
20
+ torch_dtype=dtype,
21
+ device=device,
22
+ chunk_length_s=30,
23
+ )
24
+ pipe3 = pipeline(
25
+ "automatic-speech-recognition",
26
+ model="naazimsnh02/whisper-large-v3-turbo-ar-quran",
27
+ torch_dtype=dtype,
28
+ device=device,
29
+ chunk_length_s=30,
30
+ )
31
+ pipe4 = pipeline(
32
+ "automatic-speech-recognition",
33
+ model="Habib-HF/tarbiyah-ai-whisper-medium-merged",
34
+ torch_dtype=dtype,
35
+ device=device,
36
+ chunk_length_s=30,
37
+ )
38
+
39
+ def transcribe(audio):
40
+ if audio is None:
41
+ return "No audio", "No audio", "No audio", "No audio"
42
+
43
+ # Force Arabic language for consistency (these models are Arabic/Quran specialized)
44
+ kwargs = {"language": "arabic", "task": "transcribe"}
45
+
46
+ text1 = pipe1(audio, generate_kwargs=kwargs)["text"]
47
+ text2 = pipe2(audio, generate_kwargs=kwargs)["text"]
48
+ text3 = pipe3(audio, generate_kwargs=kwargs)["text"]
49
+ text4 = pipe4(audio, generate_kwargs=kwargs)["text"]
50
+
51
+ return text1, text2, text3, text4
52
+
53
+ with gr.Blocks(title="Quran Whisper Models Comparison") as demo:
54
+ gr.Markdown("""
55
+ # Quran ASR Models Comparison
56
+
57
+ Upload or record a short Quranic recitation and compare transcriptions side-by-side.
58
+
59
+ Models:
60
+ - IJyad/whisper-large-v3-Tarteel (large-v3, high accuracy)
61
+ - deepdml/whisper-medium-ar-quran-mix-norm (medium)
62
+ - naazimsnh02/whisper-large-v3-turbo-ar-quran (turbo, fast & accurate)
63
+ - Habib-HF/tarbiyah-ai-whisper-medium-merged (medium, merged general + Quran)
64
+ """)
65
+
66
+ audio_input = gr.Audio(
67
+ sources=["microphone", "upload"],
68
+ type="filepath",
69
+ label="Record from mic or upload audio file (WAV/MP3, preferably Quran recitation)"
70
+ )
71
+
72
+ btn = gr.Button("Transcribe with all 4 models")
73
+
74
+ with gr.Row():
75
+ with gr.Column():
76
+ gr.Markdown("### IJyad/whisper-large-v3-Tarteel")
77
+ out1 = gr.Textbox(label="Transcription", lines=6, rtl=True)
78
+ with gr.Column():
79
+ gr.Markdown("### deepdml/whisper-medium-ar-quran-mix-norm")
80
+ out2 = gr.Textbox(label="Transcription", lines=6, rtl=True)
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ gr.Markdown("### naazimsnh02/whisper-large-v3-turbo-ar-quran")
85
+ out3 = gr.Textbox(label="Transcription", lines=6, rtl=True)
86
+ with gr.Column():
87
+ gr.Markdown("### Habib-HF/tarbiyah-ai-whisper-medium-merged")
88
+ out4 = gr.Textbox(label="Transcription", lines=6, rtl=True)
89
+
90
+ btn.click(transcribe, inputs=audio_input, outputs=[out1, out2, out3, out4])
91
+
92
+ gr.Markdown("""
93
+ **Notes:**
94
+ - Best for short Quran recitations (mic recordings are usually <30s).
95
+ - Transcriptions are plain Arabic text (no tashkeel/diacritics in most cases).
96
+ - GPU highly recommended — CPU will be slow.
97
+ - These models are Quran-specialized; general Arabic speech may not work well.
98
+ """)
99
+
100
+ demo.queue() # Helps with concurrent users
101
+ demo.launch()