Spaces:

RinggAI
/

STT

Running

App Files Files Community

harsh2ai commited on Nov 7, 2025

Commit

c70257c

1 Parent(s): 7d46a3c

updated ui for new model

Browse files

Files changed (1) hide show

app.py +141 -123

app.py CHANGED Viewed

@@ -26,126 +26,114 @@ LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
 # Custom CSS for Ringg branding
 custom_css = """
 .gradio-container {
-    font-family: 'Inter', sans-serif;
 }
 .main-header {
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    gap: 20px;
-    flex-wrap: nowrap;
-    padding: 20px;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    border-radius: 10px;
-    margin-bottom: 20px;
-    max-width: 900px;
-    margin-left: auto;
-    margin-right: auto;
 }
 .main-header .main-logo {
-    height: 60px;
-    width: 60px;
-    flex-shrink: 0;
-    display: flex;
-    align-items: center;
-    justify-content: center;
 }
 .main-header .main-logo img {
-    max-height: 100%;
-    max-width: 100%;
-    object-fit: contain;
 }
 .main-header .main-logo.main-logo--placeholder {
-    background-color: rgba(255, 255, 255, 0.2);
-    border-radius: 12px;
 }
 .main-header .main-text {
-    text-align: left;
-    display: flex;
-    flex-direction: column;
-    justify-content: center;
-    min-width: 0;
 }
 .main-header .main-text h1 {
-    margin: 0 0 6px;
 }
 .main-header .main-text p {
-    margin: 0;
 }
 @media (max-width: 640px) {
-    .main-header {
-        flex-wrap: wrap;
-    }
-    .main-header .main-text {
-        text-align: center;
-        width: 100%;
-    }
 }
 .status-dot {
-    display: inline-block;
-    width: 8px;
-    height: 8px;
-    border-radius: 50%;
-    margin-left: 8px;
 }
 .status-dot.healthy {
-    background-color: #22c55e;
-    animation: pulse-green 2s ease-in-out infinite;
 }
 .status-dot.error {
-    background-color: #ef4444;
-    animation: pulse-red 2s ease-in-out infinite;
 }
 @keyframes pulse-green {
-    0% {
-        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
-    }
-    70% {
-        box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
-    }
-    100% {
-        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
-    }
 }
 @keyframes pulse-red {
-    0% {
-        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
-    }
-    70% {
-        box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
-    }
-    100% {
-        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
-    }
-}
-div[data-testid="audio"] {
-    min-height: 60px !important;
-    max-height: 80px !important;
-}
-div[data-testid="audio"] > div {
-    height: auto !important;
-    min-height: auto !important;
-}
-.wrap.wrap.wrap.svelte-1w6y6zl {
-    height: auto !important;
-    min-height: auto !important;
-}
-.gradio-row {
-    min-height: auto !important;
-}
-footer {
-    visibility: hidden !important;
-    height: 50px !important;
-}
-footer:after {
-    content: "Made with ❤️ by RinggAI Team" !important;
-    visibility: visible !important;
-    display: block !important;
-    text-align: center !important;
-    margin-top: 15px !important;
-    color: #666 !important;
-    font-size: 14px !important;
 }
 """
@@ -225,9 +213,16 @@ def create_interface():
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
-            return "Please upload an audio file!"
-        return stt_client.transcribe_audio(audio_file)
     def check_api_status():
         """Check API health status"""
@@ -259,29 +254,32 @@ def create_interface():
         """)
         gr.Markdown(
-            """ # 🎯 Performance Benchmarks \n #### **Ringg STT V0** Ranks **2nd** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other leading Solutions."""
         )
         with gr.Row():
             gr.DataFrame(
                 value=[
-                    ["IndicWav2Vec (Winner)", "18.55%", "63.31%"],
-                    ["Ringg STT V0", "21.03%", "66.27%"],
-                    ["VakyanSh Wav2Vec2", "24.06%", "66.34%"],
-                    ["Whisper Large-v3", "29.17%", "63.31%"],
-                    ["Whisper Large-v2", "37.50%", "66.27%"],
                 ],
-                headers=["Model", "Indic Norm WER ↓", "Whisper Norm WER ↓"],
                 datatype=["str", "str", "str"],
-                row_count=5,
                 col_count=(3, "fixed"),
             )
-        gr.Markdown("""
-        -----------------
-        # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
-        """)
         with gr.Row():
             audio_input = gr.Audio(
@@ -297,26 +295,46 @@ def create_interface():
         file_output = gr.Textbox(
             label="Transcription Result",
-            lines=3,
             interactive=True,
             placeholder="Upload a file and click Transcribe...",
         )
-        transcribe_btn.click(transcribe_audio, inputs=audio_input, outputs=file_output)
-        # gr.Markdown("""
-        # ### ✨ Features
-        # - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
-        # - 🎯 **High Accuracy**: Competitive with leading ASR models
-        # - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
-        # - ⚡ **Fast Processing**: Optimized for quick transcription
-        # """)
-        gr.Markdown("""
-        # 🙏 Acknowledgements
-        - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
-        - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
-        """)
     return demo

 # Custom CSS for Ringg branding
 custom_css = """
 .gradio-container {
+  font-family: 'Inter', sans-serif;
+  max-width: 950px;
+  margin: 0 auto;
 }
 .main-header {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: 20px;
+  flex-wrap: nowrap;
+  padding: 20px;
+  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+  color: white;
+  border-radius: 10px;
+  margin-bottom: 20px;
+  max-width: 900px;
+  margin-left: auto;
+  margin-right: auto;
 }
 .main-header .main-logo {
+  height: 60px;
+  width: 60px;
+  flex-shrink: 0;
+  display: flex;
+  align-items: center;
+  justify-content: center;
 }
 .main-header .main-logo img {
+  max-height: 100%;
+  max-width: 100%;
+  object-fit: contain;
 }
 .main-header .main-logo.main-logo--placeholder {
+  background-color: rgba(255, 255, 255, 0.2);
+  border-radius: 12px;
 }
 .main-header .main-text {
+  text-align: left;
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  min-width: 0;
 }
 .main-header .main-text h1 {
+  margin: 0 0 6px;
 }
 .main-header .main-text p {
+  margin: 0;
 }
 @media (max-width: 640px) {
+  .main-header {
+    flex-wrap: wrap;
+  }
+  .main-header .main-text {
+    text-align: center;
+    width: 100%;
+  }
 }
 .status-dot {
+  display: inline-block;
+  width: 8px;
+  height: 8px;
+  border-radius: 50%;
+  margin-left: 8px;
 }
 .status-dot.healthy {
+  background-color: #22c55e;
+  animation: pulse-green 2s ease-in-out infinite;
 }
 .status-dot.error {
+  background-color: #ef4444;
+  animation: pulse-red 2s ease-in-out infinite;
 }
 @keyframes pulse-green {
+  0% {
+    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
+  }
+  70% {
+    box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
+  }
+  100% {
+    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
+  }
 }
 @keyframes pulse-red {
+  0% {
+    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
+  }
+  70% {
+    box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
+  }
+  100% {
+    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
+  }
 }
 """
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
+            return "⚠️ Please upload an audio file to transcribe."
+        transcription = stt_client.transcribe_audio(audio_file)
+        text = (transcription or "").strip()
+        if not text or text.startswith("❌") or text.startswith("⏱"):
+            return text or "⚠️ No speech detected—try a clearer recording."
+        footer = "(Served via API • Remote backend)"
+        return f"{text}\n\n{footer}"
     def check_api_status():
         """Check API health status"""
         """)
         gr.Markdown(
+            """
+            # 🎯 Performance Benchmarks
+            #### **Ringg STT V0** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
+            """
         )
         with gr.Row():
             gr.DataFrame(
                 value=[
+                    ["Elaichi STT (Ringg AI)", "15.00%", "15.92%"],
+                    ["IndicWav2Vec ", "19.35%", "20.91%"],
+                    ["VakyanSh Wav2Vec2", "22.73%", "24.78%"],
                 ],
+                headers=["Model", "Median WER ↓", "Mean WER ↓"],
                 datatype=["str", "str", "str"],
+                row_count=3,
                 col_count=(3, "fixed"),
+                interactive=False,
             )
+        gr.Markdown(
+            """
+            -----------------
+            # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
+            """
+        )
         with gr.Row():
             audio_input = gr.Audio(
         file_output = gr.Textbox(
             label="Transcription Result",
+            lines=6,
             interactive=True,
             placeholder="Upload a file and click Transcribe...",
         )
+        transcribe_btn.click(
+            transcribe_audio,
+            inputs=audio_input,
+            outputs=file_output,
+        )
+        gr.Markdown(
+            """
+            ### ✨ Features
+            - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
+            - 🎯 **High Accuracy**: Competitive with leading ASR models
+            - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
+            - ⚡ **Fast Processing**: Optimized for quick transcription
+            """
+        )
+        gr.Markdown(
+            """
+            ### ⚠️ Benchmark Disclaimer
+            - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
+            - Dataset issues include inaudible segments and repeated sentences caused by interruptions
+            - Background noise is prominent across many clips, impacting recognition quality
+            - Mixed Hindi-English speech often provides Hindi-only transcripts
+            - Currency, time, and year normalization is inconsistent with spoken forms
+            - Original transcripts lack punctuation, increasing WER for models that predict it
+            """
+        )
+        gr.Markdown(
+            """
+            # 🙏 Acknowledgements
+            - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
+            - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
+            """
+        )
     return demo