1morecupofhottea commited on
Commit
ec35154
Β·
1 Parent(s): 962a236

Update design

Browse files
Files changed (1) hide show
  1. app.py +249 -16
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import re
 
4
 
5
  # Load both ASR models
6
  whisper_asr = pipeline("automatic-speech-recognition", model="1morecupofhottea/Whisper-Code-Switching-Kh-En")
@@ -11,28 +12,260 @@ def clean_transcript(text: str) -> str:
11
  return re.sub(r"</?[^>]+>", "", text).strip()
12
 
13
  def transcribe(audio, model_choice):
14
- if model_choice == "Whisper":
15
- result = whisper_asr(audio)
16
- return result["text"]
17
- else:
18
- result = wav2vec_asr(audio)
19
- return clean_transcript(result["text"])
 
 
 
 
 
 
 
20
 
21
- with gr.Blocks() as demo:
22
- gr.Markdown("# πŸŽ™οΈ ASR Demo: Choose Your Model")
23
- gr.Markdown("Upload or record audio, then select which ASR model to use.")
24
 
25
- with gr.Row():
26
- audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Audio")
27
- model_selector = gr.Dropdown(choices=["Whisper", "Wav2Vec2"], value="Whisper", label="Select Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- output_text = gr.Textbox(label="Transcription")
 
 
 
 
30
 
31
- transcribe_button = gr.Button("Transcribe")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  transcribe_button.click(
33
  fn=transcribe,
34
  inputs=[audio_input, model_selector],
35
- outputs=output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
38
- demo.launch()
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import re
4
+ import time
5
 
6
  # Load both ASR models
7
  whisper_asr = pipeline("automatic-speech-recognition", model="1morecupofhottea/Whisper-Code-Switching-Kh-En")
 
12
  return re.sub(r"</?[^>]+>", "", text).strip()
13
 
14
  def transcribe(audio, model_choice):
15
+ if audio is None:
16
+ return "❌ Please upload or record an audio file first!"
17
+
18
+ try:
19
+ if model_choice == "🎯 Whisper (Recommended)":
20
+ result = whisper_asr(audio)
21
+ return f"βœ… **Transcription Complete**\n\n{result['text']}"
22
+ else:
23
+ result = wav2vec_asr(audio)
24
+ cleaned_text = clean_transcript(result["text"])
25
+ return f"βœ… **Transcription Complete**\n\n{cleaned_text}"
26
+ except Exception as e:
27
+ return f"❌ Error during transcription: {str(e)}"
28
 
29
+ def clear_all():
30
+ return None, "🎯 Whisper (Recommended)", ""
 
31
 
32
+ # Custom CSS for modern styling
33
+ custom_css = """
34
+ /* Global Styles */
35
+ .gradio-container {
36
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
37
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
38
+ }
39
+
40
+ /* Header Styling */
41
+ .header-section {
42
+ background: rgba(255, 255, 255, 0.95);
43
+ border-radius: 20px;
44
+ padding: 30px;
45
+ margin-bottom: 25px;
46
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
47
+ backdrop-filter: blur(10px);
48
+ border: 1px solid rgba(255, 255, 255, 0.2);
49
+ }
50
+
51
+ .header-section h1 {
52
+ background: linear-gradient(45deg, #667eea, #764ba2);
53
+ -webkit-background-clip: text;
54
+ -webkit-text-fill-color: transparent;
55
+ font-size: 2.5em !important;
56
+ font-weight: 700 !important;
57
+ text-align: center;
58
+ margin-bottom: 15px;
59
+ }
60
+
61
+ .header-section p {
62
+ color: #555;
63
+ font-size: 1.1em;
64
+ text-align: center;
65
+ margin: 0;
66
+ line-height: 1.6;
67
+ }
68
+
69
+ /* Main Content Cards */
70
+ .input-card, .output-card {
71
+ background: rgba(255, 255, 255, 0.95);
72
+ border-radius: 15px;
73
+ padding: 25px;
74
+ box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1);
75
+ backdrop-filter: blur(10px);
76
+ border: 1px solid rgba(255, 255, 255, 0.2);
77
+ margin-bottom: 20px;
78
+ }
79
+
80
+ /* Button Styling */
81
+ .primary-button {
82
+ background: linear-gradient(45deg, #667eea, #764ba2) !important;
83
+ border: none !important;
84
+ border-radius: 12px !important;
85
+ padding: 15px 30px !important;
86
+ font-size: 1.1em !important;
87
+ font-weight: 600 !important;
88
+ color: white !important;
89
+ box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4) !important;
90
+ transition: all 0.3s ease !important;
91
+ }
92
+
93
+ .primary-button:hover {
94
+ transform: translateY(-2px) !important;
95
+ box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
96
+ }
97
+
98
+ .secondary-button {
99
+ background: linear-gradient(45deg, #ff6b6b, #ee5a24) !important;
100
+ border: none !important;
101
+ border-radius: 12px !important;
102
+ padding: 12px 25px !important;
103
+ font-size: 1em !important;
104
+ font-weight: 600 !important;
105
+ color: white !important;
106
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.4) !important;
107
+ }
108
+
109
+ /* Audio Component Styling */
110
+ .audio-component {
111
+ border-radius: 12px !important;
112
+ border: 2px solid #e1e8f7 !important;
113
+ background: #f8faff !important;
114
+ }
115
 
116
+ /* Dropdown Styling */
117
+ .dropdown-component {
118
+ border-radius: 12px !important;
119
+ border: 2px solid #e1e8f7 !important;
120
+ }
121
 
122
+ /* Output Text Styling */
123
+ .output-text {
124
+ background: #f8faff !important;
125
+ border-radius: 12px !important;
126
+ border: 2px solid #e1e8f7 !important;
127
+ padding: 20px !important;
128
+ font-size: 1.05em !important;
129
+ line-height: 1.6 !important;
130
+ }
131
+
132
+ /* Features Section */
133
+ .features-section {
134
+ background: rgba(255, 255, 255, 0.95);
135
+ border-radius: 15px;
136
+ padding: 25px;
137
+ margin-top: 25px;
138
+ box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1);
139
+ backdrop-filter: blur(10px);
140
+ border: 1px solid rgba(255, 255, 255, 0.2);
141
+ }
142
+
143
+ .feature-item {
144
+ margin-bottom: 10px;
145
+ color: #555;
146
+ font-size: 1.05em;
147
+ }
148
+
149
+ /* Responsive Design */
150
+ @media (max-width: 768px) {
151
+ .header-section h1 {
152
+ font-size: 2em !important;
153
+ }
154
+
155
+ .input-card, .output-card {
156
+ padding: 20px;
157
+ margin-bottom: 15px;
158
+ }
159
+ }
160
+ """
161
+
162
+ # Create the main interface
163
+ with gr.Blocks(css=custom_css, title="πŸŽ™οΈ CS-ASR | Code-Switching Speech Recognition") as demo:
164
+ # Header Section
165
+ with gr.Column(elem_classes="header-section"):
166
+ gr.HTML("""
167
+ <h1>πŸŽ™οΈ Code-Switching ASR Studio</h1>
168
+ <p>Advanced Speech Recognition for Khmer-English Code-Switching</p>
169
+ <p>✨ Powered by state-of-the-art Whisper and Wav2Vec2 models ✨</p>
170
+ """)
171
+
172
+ # Main Content
173
+ with gr.Row():
174
+ # Input Section
175
+ with gr.Column(scale=1, elem_classes="input-card"):
176
+ gr.HTML("<h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600;'>🎡 Audio Input</h3>")
177
+
178
+ audio_input = gr.Audio(
179
+ sources=["microphone", "upload"],
180
+ type="filepath",
181
+ label="Record or Upload Audio",
182
+ elem_classes="audio-component"
183
+ )
184
+
185
+ model_selector = gr.Dropdown(
186
+ choices=[
187
+ "🎯 Whisper (Recommended)",
188
+ "⚑ Wav2Vec2 (Fast)"
189
+ ],
190
+ value="🎯 Whisper (Recommended)",
191
+ label="πŸ€– Select AI Model",
192
+ elem_classes="dropdown-component",
193
+ info="Choose the model that best fits your needs"
194
+ )
195
+
196
+ # Action Buttons
197
+ with gr.Row():
198
+ transcribe_button = gr.Button(
199
+ "πŸš€ Start Transcription",
200
+ variant="primary",
201
+ elem_classes="primary-button",
202
+ scale=2
203
+ )
204
+ clear_button = gr.Button(
205
+ "πŸ—‘οΈ Clear All",
206
+ elem_classes="secondary-button",
207
+ scale=1
208
+ )
209
+
210
+ # Output Section
211
+ with gr.Column(scale=1, elem_classes="output-card"):
212
+ gr.HTML("<h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600;'>πŸ“ Transcription Result</h3>")
213
+
214
+ output_text = gr.Textbox(
215
+ label="Your Transcription Will Appear Here",
216
+ placeholder="🎀 Upload an audio file and click 'Start Transcription' to see the magic happen!",
217
+ lines=12,
218
+ elem_classes="output-text",
219
+ interactive=False
220
+ )
221
+
222
+ # Features Section
223
+ with gr.Column(elem_classes="features-section"):
224
+ gr.HTML("""
225
+ <h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600; text-align: center;'>🌟 Key Features</h3>
226
+ <div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px; margin-top: 20px;'>
227
+ <div class='feature-item'>
228
+ <strong>🎯 Dual Model Support:</strong> Choose between Whisper and Wav2Vec2 for optimal results
229
+ </div>
230
+ <div class='feature-item'>
231
+ <strong>🌍 Code-Switching Ready:</strong> Seamlessly handles Khmer-English mixed speech
232
+ </div>
233
+ <div class='feature-item'>
234
+ <strong>🎀 Flexible Input:</strong> Record live or upload existing audio files
235
+ </div>
236
+ <div class='feature-item'>
237
+ <strong>⚑ Real-time Processing:</strong> Fast and accurate transcription results
238
+ </div>
239
+ <div class='feature-item'>
240
+ <strong>🎨 Modern Interface:</strong> Beautiful, responsive design for all devices
241
+ </div>
242
+ <div class='feature-item'>
243
+ <strong>πŸ”§ Easy to Use:</strong> No technical knowledge required - just click and transcribe!
244
+ </div>
245
+ </div>
246
+ """)
247
+
248
+ # Event Handlers
249
  transcribe_button.click(
250
  fn=transcribe,
251
  inputs=[audio_input, model_selector],
252
+ outputs=output_text,
253
+ show_progress=True
254
+ )
255
+
256
+ clear_button.click(
257
+ fn=clear_all,
258
+ outputs=[audio_input, model_selector, output_text]
259
+ )
260
+
261
+ # Auto-transcribe when audio is uploaded (optional)
262
+ audio_input.change(
263
+ fn=lambda audio, model: transcribe(audio, model) if audio is not None else "",
264
+ inputs=[audio_input, model_selector],
265
+ outputs=output_text,
266
+ show_progress=True
267
  )
268
 
269
+ # Launch with custom configuration
270
+ if __name__ == "__main__":
271
+ demo.launch()