gnumanth commited on
Commit
da8393f
·
verified ·
1 Parent(s): 306c2bc

Simplified working Gradio UI with standard components

Browse files
Files changed (1) hide show
  1. app.py +73 -197
app.py CHANGED
@@ -62,6 +62,7 @@ def transcribe(audio, state):
62
  if ASR_MODEL:
63
  with torch.no_grad():
64
  context = state['buffer'][-32000:]
 
65
  results = ASR_MODEL.transcribe([context])
66
  print(f"[INFER] Context: {len(context)} | Raw: {results}", flush=True)
67
 
@@ -92,254 +93,129 @@ def transcribe(audio, state):
92
 
93
  def clear_session():
94
  print("[SESSION RESET]", flush=True)
95
- return {'transcript': [], 'buffer': None, 'counter': 0}, "Tap the mic to start..."
96
 
97
  def log_connection():
98
  print(">>> CLIENT CONNECTED <<<", flush=True)
99
 
100
- # --- SIMPLE CLEAN CSS ---
101
- CSS = """
102
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap');
103
-
104
- :root {
105
- --bg-dark: #0a0a0f;
106
- --bg-gradient: linear-gradient(180deg, #0a0a0f 0%, #151520 50%, #0a0a0f 100%);
107
- }
108
-
109
- body {
110
- background: var(--bg-dark) !important;
111
- }
112
-
113
  .gradio-container {
114
- background: var(--bg-gradient) !important;
115
- min-height: 100vh !important;
116
- font-family: 'Inter', sans-serif !important;
117
- max-width: 100% !important;
118
- padding: 0 !important;
119
- margin: 0 !important;
120
- }
121
-
122
- #main-container {
123
  min-height: 100vh;
124
- display: flex;
125
- flex-direction: column;
126
- background: var(--bg-gradient);
127
  }
128
 
129
- /* Header */
130
- #header-row {
131
- padding: 20px;
132
- display: flex;
133
- justify-content: center;
134
- background: transparent !important;
135
  }
136
 
137
- #session-badge {
138
- background: rgba(255, 255, 255, 0.05) !important;
139
- border: 1px solid rgba(255, 255, 255, 0.1) !important;
140
- border-radius: 20px !important;
141
- padding: 8px 16px !important;
142
- color: rgba(255, 255, 255, 0.7) !important;
143
- font-size: 14px !important;
144
  }
145
 
146
- /* Transcript area */
147
- #transcript-row {
148
- flex: 1;
149
- display: flex;
150
- align-items: center;
151
- justify-content: center;
152
- min-height: 50vh;
153
- background: transparent !important;
154
- padding: 40px 20px;
155
  }
156
 
157
- #transcript-display {
158
- background: transparent !important;
159
- border: none !important;
160
- box-shadow: none !important;
161
  text-align: center;
 
 
 
 
162
  }
163
 
164
- #transcript-display textarea {
165
  background: transparent !important;
166
- border: none !important;
167
  color: #ffffff !important;
168
- font-size: 36px !important;
169
- font-weight: 400 !important;
170
  text-align: center !important;
171
- line-height: 1.4 !important;
172
- text-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
173
- min-height: 150px !important;
174
- resize: none !important;
175
- }
176
-
177
- #transcript-display label {
178
- display: none !important;
179
- }
180
-
181
- #transcript-display .wrap {
182
- background: transparent !important;
183
- }
184
-
185
- /* Controls */
186
- #controls-row {
187
- padding: 30px 20px 60px;
188
- display: flex;
189
- justify-content: center;
190
- align-items: center;
191
- gap: 30px;
192
- background: transparent !important;
193
- }
194
-
195
- /* Mic button styling */
196
- #mic-input {
197
- background: transparent !important;
198
- border: none !important;
199
- box-shadow: none !important;
200
- max-width: 120px;
201
- }
202
-
203
- #mic-input > div {
204
- background: transparent !important;
205
  border: none !important;
206
- box-shadow: none !important;
207
- padding: 0 !important;
208
  }
209
 
210
- #mic-input label,
211
- #mic-input .wrap > div:first-child {
212
- display: none !important;
213
  }
214
 
215
- #mic-input audio {
216
- display: none !important;
217
- }
218
-
219
- #mic-input button {
220
- width: 80px !important;
221
- height: 80px !important;
222
- border-radius: 50% !important;
223
- background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%) !important;
224
- border: none !important;
225
- cursor: pointer !important;
226
- box-shadow: 0 8px 30px rgba(99, 102, 241, 0.4) !important;
227
- transition: all 0.2s ease !important;
228
- }
229
-
230
- #mic-input button:hover {
231
- transform: scale(1.05) !important;
232
- box-shadow: 0 12px 40px rgba(99, 102, 241, 0.5) !important;
233
- }
234
-
235
- #mic-input button svg {
236
- width: 32px !important;
237
- height: 32px !important;
238
- }
239
-
240
- /* Reset button */
241
- #reset-btn {
242
- background: rgba(255, 255, 255, 0.05) !important;
243
- border: 1px solid rgba(255, 255, 255, 0.15) !important;
244
- color: rgba(255, 255, 255, 0.7) !important;
245
- padding: 14px 28px !important;
246
- border-radius: 12px !important;
247
- font-size: 14px !important;
248
- font-weight: 500 !important;
249
- cursor: pointer !important;
250
- transition: all 0.2s ease !important;
251
- }
252
-
253
- #reset-btn:hover {
254
  background: rgba(255, 255, 255, 0.1) !important;
255
- color: #ffffff !important;
256
  }
257
 
258
- /* Hide Gradio footer and other elements */
259
  footer {
260
  display: none !important;
261
  }
262
-
263
- .contain {
264
- background: transparent !important;
265
- }
266
-
267
- .block {
268
- background: transparent !important;
269
- border: none !important;
270
- box-shadow: none !important;
271
- }
272
-
273
- /* Ambient glow at bottom */
274
- #controls-row::before {
275
- content: '';
276
- position: fixed;
277
- bottom: 0;
278
- left: 50%;
279
- transform: translateX(-50%);
280
- width: 200%;
281
- height: 40vh;
282
- background: radial-gradient(ellipse at center bottom, rgba(99, 102, 241, 0.12) 0%, transparent 70%);
283
- pointer-events: none;
284
- z-index: 0;
285
- }
286
  """
287
 
288
- # --- GRADIO APP ---
289
- with gr.Blocks(css=CSS, title="Nemotron Speech Streaming", theme=gr.themes.Base()) as demo:
290
  state = gr.State({'transcript': [], 'buffer': None, 'counter': 0})
291
 
292
- with gr.Column(elem_id="main-container"):
293
- # Header
294
- with gr.Row(elem_id="header-row"):
295
- gr.HTML(f"""
296
- <div id="session-badge" style="display: inline-flex; align-items: center; gap: 10px;
297
- background: rgba(255,255,255,0.05); border: 1px solid rgba(255,255,255,0.1);
298
- border-radius: 20px; padding: 10px 20px;">
299
- <span style="width: 10px; height: 10px; background: #22c55e; border-radius: 50%;
300
- animation: pulse 2s infinite;"></span>
301
- <span style="color: rgba(255,255,255,0.8); font-size: 14px;">Live Session • {SESSION_ID}</span>
302
- </div>
303
- <style>
304
- @keyframes pulse {{
305
- 0%, 100% {{ opacity: 1; }}
306
- 50% {{ opacity: 0.5; }}
307
- }}
308
- </style>
309
- """)
310
-
311
- # Transcript display
312
- with gr.Row(elem_id="transcript-row"):
313
- transcript = gr.Textbox(
314
- value="Tap the mic to start...",
315
- elem_id="transcript-display",
316
- show_label=False,
317
- lines=4,
318
- max_lines=6,
319
- interactive=False
320
  )
321
 
322
- # Controls
323
- with gr.Row(elem_id="controls-row"):
324
  audio = gr.Audio(
325
  sources=["microphone"],
326
  streaming=True,
327
  type="numpy",
328
- elem_id="mic-input",
329
- show_label=False
330
  )
331
- reset_btn = gr.Button("Reset", elem_id="reset-btn")
 
 
 
 
 
 
 
 
332
 
333
  # Events
334
  demo.load(fn=log_connection)
335
  audio.stream(
336
  fn=transcribe,
337
  inputs=[audio, state],
338
- outputs=[state, transcript],
339
  show_progress="hidden",
340
  trigger_mode="always_last"
341
  )
342
- reset_btn.click(fn=clear_session, outputs=[state, transcript])
343
 
344
  if __name__ == "__main__":
345
  demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
 
62
  if ASR_MODEL:
63
  with torch.no_grad():
64
  context = state['buffer'][-32000:]
65
+
66
  results = ASR_MODEL.transcribe([context])
67
  print(f"[INFER] Context: {len(context)} | Raw: {results}", flush=True)
68
 
 
93
 
94
  def clear_session():
95
  print("[SESSION RESET]", flush=True)
96
+ return {'transcript': [], 'buffer': None, 'counter': 0}, "Listening..."
97
 
98
  def log_connection():
99
  print(">>> CLIENT CONNECTED <<<", flush=True)
100
 
101
+ # --- CUSTOM THEME CSS ---
102
+ custom_css = """
 
 
 
 
 
 
 
 
 
 
 
103
  .gradio-container {
104
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f0f23 100%) !important;
 
 
 
 
 
 
 
 
105
  min-height: 100vh;
 
 
 
106
  }
107
 
108
+ #title-text {
109
+ text-align: center;
110
+ color: #76b900;
111
+ font-size: 2em;
112
+ font-weight: bold;
113
+ margin-bottom: 10px;
114
  }
115
 
116
+ #subtitle-text {
117
+ text-align: center;
118
+ color: #888;
119
+ font-size: 1em;
120
+ margin-bottom: 30px;
 
 
121
  }
122
 
123
+ #session-info {
124
+ text-align: center;
125
+ color: #76b900;
126
+ font-size: 0.9em;
127
+ padding: 10px;
128
+ background: rgba(118, 185, 0, 0.1);
129
+ border-radius: 20px;
130
+ display: inline-block;
 
131
  }
132
 
133
+ #transcript-box {
134
+ min-height: 200px;
135
+ font-size: 1.5em;
 
136
  text-align: center;
137
+ padding: 40px 20px;
138
+ background: rgba(255, 255, 255, 0.05);
139
+ border-radius: 15px;
140
+ border: 1px solid rgba(255, 255, 255, 0.1);
141
  }
142
 
143
+ #transcript-box textarea {
144
  background: transparent !important;
 
145
  color: #ffffff !important;
146
+ font-size: 1.5em !important;
 
147
  text-align: center !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  border: none !important;
 
 
149
  }
150
 
151
+ #mic-button {
152
+ margin: 20px auto;
153
+ display: block;
154
  }
155
 
156
+ #reset-button {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  background: rgba(255, 255, 255, 0.1) !important;
158
+ border: 1px solid rgba(255, 255, 255, 0.2) !important;
159
  }
160
 
 
161
  footer {
162
  display: none !important;
163
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  """
165
 
166
+ # --- GRADIO UI ---
167
+ with gr.Blocks(css=custom_css, title="Nemotron Speech Streaming", theme=gr.themes.Soft(primary_hue="green")) as demo:
168
  state = gr.State({'transcript': [], 'buffer': None, 'counter': 0})
169
 
170
+ gr.HTML(f"""
171
+ <div id="title-text">Nemotron Speech Streaming</div>
172
+ <div id="subtitle-text">Real-time speech recognition powered by NVIDIA NeMo</div>
173
+ <div style="text-align: center; margin-bottom: 20px;">
174
+ <span id="session-info">Session: {SESSION_ID}</span>
175
+ </div>
176
+ """)
177
+
178
+ with gr.Row():
179
+ with gr.Column():
180
+ transcript_display = gr.Textbox(
181
+ value="Listening...",
182
+ label="Transcript",
183
+ elem_id="transcript-box",
184
+ lines=6,
185
+ max_lines=10,
186
+ interactive=False,
187
+ show_copy_button=True
 
 
 
 
 
 
 
 
 
 
188
  )
189
 
190
+ with gr.Row():
191
+ with gr.Column(scale=2):
192
  audio = gr.Audio(
193
  sources=["microphone"],
194
  streaming=True,
195
  type="numpy",
196
+ label="Click to Start Recording",
197
+ elem_id="mic-button"
198
  )
199
+ with gr.Column(scale=1):
200
+ reset_btn = gr.Button("Reset", elem_id="reset-button", variant="secondary")
201
+
202
+ gr.HTML("""
203
+ <div style="text-align: center; margin-top: 30px; color: #666; font-size: 0.85em;">
204
+ <p>Click the microphone to start speaking. Your speech will be transcribed in real-time.</p>
205
+ <p>Model: <strong>nvidia/nemotron-speech-streaming-en-0.6b</strong></p>
206
+ </div>
207
+ """)
208
 
209
  # Events
210
  demo.load(fn=log_connection)
211
  audio.stream(
212
  fn=transcribe,
213
  inputs=[audio, state],
214
+ outputs=[state, transcript_display],
215
  show_progress="hidden",
216
  trigger_mode="always_last"
217
  )
218
+ reset_btn.click(fn=clear_session, outputs=[state, transcript_display])
219
 
220
  if __name__ == "__main__":
221
  demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)