harsh2ai commited on
Commit
3c50bb0
Β·
verified Β·
1 Parent(s): 279472c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -142
app.py CHANGED
@@ -1,5 +1,4 @@
1
  #!/usr/bin/env python3
2
- #updated
3
  """
4
  Ringg STT V0 - Hugging Face Space (Frontend)
5
  Makes API calls to private inference endpoint via ngrok
@@ -27,114 +26,126 @@ LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
27
  # Custom CSS for Ringg branding
28
  custom_css = """
29
  .gradio-container {
30
- font-family: 'Inter', sans-serif;
31
- max-width: 950px;
32
- margin: 0 auto;
33
  }
34
-
35
  .main-header {
36
- display: flex;
37
- align-items: center;
38
- justify-content: center;
39
- gap: 20px;
40
- flex-wrap: nowrap;
41
- padding: 20px;
42
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
43
- color: white;
44
- border-radius: 10px;
45
- margin-bottom: 20px;
46
- max-width: 900px;
47
- margin-left: auto;
48
- margin-right: auto;
49
  }
50
-
51
  .main-header .main-logo {
52
- height: 60px;
53
- width: 60px;
54
- flex-shrink: 0;
55
- display: flex;
56
- align-items: center;
57
- justify-content: center;
58
  }
59
-
60
  .main-header .main-logo img {
61
- max-height: 100%;
62
- max-width: 100%;
63
- object-fit: contain;
64
  }
65
-
66
  .main-header .main-logo.main-logo--placeholder {
67
- background-color: rgba(255, 255, 255, 0.2);
68
- border-radius: 12px;
69
  }
70
-
71
  .main-header .main-text {
72
- text-align: left;
73
- display: flex;
74
- flex-direction: column;
75
- justify-content: center;
76
- min-width: 0;
77
  }
78
-
79
  .main-header .main-text h1 {
80
- margin: 0 0 6px;
81
  }
82
-
83
  .main-header .main-text p {
84
- margin: 0;
85
  }
86
-
87
  @media (max-width: 640px) {
88
- .main-header {
89
- flex-wrap: wrap;
90
- }
91
-
92
- .main-header .main-text {
93
- text-align: center;
94
- width: 100%;
95
- }
96
  }
97
-
98
  .status-dot {
99
- display: inline-block;
100
- width: 8px;
101
- height: 8px;
102
- border-radius: 50%;
103
- margin-left: 8px;
104
  }
105
-
106
  .status-dot.healthy {
107
- background-color: #22c55e;
108
- animation: pulse-green 2s ease-in-out infinite;
109
  }
110
-
111
  .status-dot.error {
112
- background-color: #ef4444;
113
- animation: pulse-red 2s ease-in-out infinite;
114
  }
115
-
116
  @keyframes pulse-green {
117
- 0% {
118
- box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
119
- }
120
- 70% {
121
- box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
122
- }
123
- 100% {
124
- box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
125
- }
126
  }
127
-
128
  @keyframes pulse-red {
129
- 0% {
130
- box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
131
- }
132
- 70% {
133
- box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
134
- }
135
- 100% {
136
- box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
137
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
  """
140
 
@@ -214,16 +225,9 @@ def create_interface():
214
  def transcribe_audio(audio_file):
215
  """Transcribe uploaded audio"""
216
  if audio_file is None:
217
- return "⚠️ Please upload an audio file to transcribe."
218
-
219
- transcription = stt_client.transcribe_audio(audio_file)
220
- text = (transcription or "").strip()
221
-
222
- if not text or text.startswith("❌") or text.startswith("⏱"):
223
- return text or "⚠️ No speech detectedβ€”try a clearer recording."
224
 
225
- footer = "(Served via API β€’ Remote backend)"
226
- return f"{text}\n\n{footer}"
227
 
228
  def check_api_status():
229
  """Check API health status"""
@@ -255,29 +259,35 @@ def create_interface():
255
  """)
256
 
257
  gr.Markdown(
258
- """
259
- # 🎯 Performance Benchmarks
260
- #### **Ringg STT V0** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
261
-
262
- | Model | Median WER ↓ | Mean WER ↓ |
263
- |-------|--------------|------------|
264
- | **Elaichi STT (Ringg AI)** | **15.00%** | **15.92%** |
265
- | IndicWav2Vec | 19.35% | 20.91% |
266
- | VakyanSh Wav2Vec2 | 22.73% | 24.78% |
267
- """
268
  )
269
 
270
- gr.Markdown(
271
- """
272
- -----------------
273
- # πŸ“ Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
274
- """
275
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  with gr.Row():
278
  audio_input = gr.Audio(
279
  label="πŸ“ Upload Audio File",
280
  type="filepath",
 
281
  scale=3,
282
  )
283
 
@@ -287,46 +297,26 @@ def create_interface():
287
 
288
  file_output = gr.Textbox(
289
  label="Transcription Result",
290
- lines=6,
291
  interactive=True,
292
  placeholder="Upload a file and click Transcribe...",
293
  )
294
 
295
- transcribe_btn.click(
296
- transcribe_audio,
297
- inputs=audio_input,
298
- outputs=file_output,
299
- )
300
 
301
- gr.Markdown(
302
- """
303
- ### ✨ Features
304
- - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
305
- - 🎯 **High Accuracy**: Competitive with leading ASR models
306
- - πŸ“ **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
307
- - ⚑ **Fast Processing**: Optimized for quick transcription
308
- """
309
- )
310
 
311
- gr.Markdown(
312
- """
313
- ### ⚠️ Benchmark Disclaimer
314
- - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
315
- - Dataset issues include inaudible segments and repeated sentences caused by interruptions
316
- - Background noise is prominent across many clips, impacting recognition quality
317
- - Mixed Hindi-English speech often provides Hindi-only transcripts
318
- - Currency, time, and year normalization is inconsistent with spoken forms
319
- - Original transcripts lack punctuation, increasing WER for models that predict it
320
- """
321
- )
322
-
323
- gr.Markdown(
324
- """
325
- # πŸ™ Acknowledgements
326
- - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
327
- - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
328
- """
329
- )
330
 
331
  return demo
332
 
 
1
  #!/usr/bin/env python3
 
2
  """
3
  Ringg STT V0 - Hugging Face Space (Frontend)
4
  Makes API calls to private inference endpoint via ngrok
 
26
  # Custom CSS for Ringg branding
27
  custom_css = """
28
  .gradio-container {
29
+ font-family: 'Inter', sans-serif;
 
 
30
  }
 
31
  .main-header {
32
+ display: flex;
33
+ align-items: center;
34
+ justify-content: center;
35
+ gap: 20px;
36
+ flex-wrap: nowrap;
37
+ padding: 20px;
38
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
39
+ color: white;
40
+ border-radius: 10px;
41
+ margin-bottom: 20px;
42
+ max-width: 900px;
43
+ margin-left: auto;
44
+ margin-right: auto;
45
  }
 
46
  .main-header .main-logo {
47
+ height: 60px;
48
+ width: 60px;
49
+ flex-shrink: 0;
50
+ display: flex;
51
+ align-items: center;
52
+ justify-content: center;
53
  }
 
54
  .main-header .main-logo img {
55
+ max-height: 100%;
56
+ max-width: 100%;
57
+ object-fit: contain;
58
  }
 
59
  .main-header .main-logo.main-logo--placeholder {
60
+ background-color: rgba(255, 255, 255, 0.2);
61
+ border-radius: 12px;
62
  }
 
63
  .main-header .main-text {
64
+ text-align: left;
65
+ display: flex;
66
+ flex-direction: column;
67
+ justify-content: center;
68
+ min-width: 0;
69
  }
 
70
  .main-header .main-text h1 {
71
+ margin: 0 0 6px;
72
  }
 
73
  .main-header .main-text p {
74
+ margin: 0;
75
  }
 
76
  @media (max-width: 640px) {
77
+ .main-header {
78
+ flex-wrap: wrap;
79
+ }
80
+ .main-header .main-text {
81
+ text-align: center;
82
+ width: 100%;
83
+ }
 
84
  }
 
85
  .status-dot {
86
+ display: inline-block;
87
+ width: 8px;
88
+ height: 8px;
89
+ border-radius: 50%;
90
+ margin-left: 8px;
91
  }
 
92
  .status-dot.healthy {
93
+ background-color: #22c55e;
94
+ animation: pulse-green 2s ease-in-out infinite;
95
  }
 
96
  .status-dot.error {
97
+ background-color: #ef4444;
98
+ animation: pulse-red 2s ease-in-out infinite;
99
  }
 
100
  @keyframes pulse-green {
101
+ 0% {
102
+ box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
103
+ }
104
+ 70% {
105
+ box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
106
+ }
107
+ 100% {
108
+ box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
109
+ }
110
  }
 
111
  @keyframes pulse-red {
112
+ 0% {
113
+ box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
114
+ }
115
+ 70% {
116
+ box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
117
+ }
118
+ 100% {
119
+ box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
120
+ }
121
+ }
122
+ div[data-testid="audio"] {
123
+ min-height: 60px !important;
124
+ max-height: 80px !important;
125
+ }
126
+ div[data-testid="audio"] > div {
127
+ height: auto !important;
128
+ min-height: auto !important;
129
+ }
130
+ .wrap.wrap.wrap.svelte-1w6y6zl {
131
+ height: auto !important;
132
+ min-height: auto !important;
133
+ }
134
+ .gradio-row {
135
+ min-height: auto !important;
136
+ }
137
+ footer {
138
+ visibility: hidden !important;
139
+ height: 50px !important;
140
+ }
141
+ footer:after {
142
+ content: "Made with ❀️ by RinggAI Team" !important;
143
+ visibility: visible !important;
144
+ display: block !important;
145
+ text-align: center !important;
146
+ margin-top: 15px !important;
147
+ color: #666 !important;
148
+ font-size: 14px !important;
149
  }
150
  """
151
 
 
225
  def transcribe_audio(audio_file):
226
  """Transcribe uploaded audio"""
227
  if audio_file is None:
228
+ return "Please upload an audio file!"
 
 
 
 
 
 
229
 
230
+ return stt_client.transcribe_audio(audio_file)
 
231
 
232
  def check_api_status():
233
  """Check API health status"""
 
259
  """)
260
 
261
  gr.Markdown(
262
+ """ # 🎯 Performance Benchmarks \n #### **Ringg STT V0** Ranks **2nd** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other leading Solutions."""
 
 
 
 
 
 
 
 
 
263
  )
264
 
265
+ with gr.Row():
266
+ gr.DataFrame(
267
+ value=[
268
+ ["IndicWav2Vec (Winner)", "18.55%", "63.31%"],
269
+ ["Ringg STT V0", "21.03%", "66.27%"],
270
+ ["VakyanSh Wav2Vec2", "24.06%", "66.34%"],
271
+ ["Whisper Large-v3", "29.17%", "63.31%"],
272
+ ["Whisper Large-v2", "37.50%", "66.27%"],
273
+ ],
274
+ headers=["Model", "Indic Norm WER ↓", "Whisper Norm WER ↓"],
275
+ datatype=["str", "str", "str"],
276
+ row_count=5,
277
+ col_count=(3, "fixed"),
278
+ )
279
+
280
+ gr.Markdown("""
281
+ -----------------
282
+ # πŸ“ Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
283
+
284
+ """)
285
 
286
  with gr.Row():
287
  audio_input = gr.Audio(
288
  label="πŸ“ Upload Audio File",
289
  type="filepath",
290
+ sources=["upload"],
291
  scale=3,
292
  )
293
 
 
297
 
298
  file_output = gr.Textbox(
299
  label="Transcription Result",
300
+ lines=3,
301
  interactive=True,
302
  placeholder="Upload a file and click Transcribe...",
303
  )
304
 
305
+ transcribe_btn.click(transcribe_audio, inputs=audio_input, outputs=file_output)
 
 
 
 
306
 
307
+ # gr.Markdown("""
308
+ # ### ✨ Features
309
+ # - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
310
+ # - 🎯 **High Accuracy**: Competitive with leading ASR models
311
+ # - πŸ“ **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
312
+ # - ⚑ **Fast Processing**: Optimized for quick transcription
313
+ # """)
 
 
314
 
315
+ gr.Markdown("""
316
+ # πŸ™ Acknowledgements
317
+ - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
318
+ - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
319
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  return demo
322