Spaces:

RinggAI
/

STT

Running

App Files Files Community

harsh2ai commited on Nov 7

Commit

3c50bb0

verified ·

1 Parent(s): 279472c

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -142

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-#updated
 """
 Ringg STT V0 - Hugging Face Space (Frontend)
 Makes API calls to private inference endpoint via ngrok
@@ -27,114 +26,126 @@ LOGO_URL = os.environ.get("STT_LOGO_URL", DEFAULT_LOGO_URL).strip()
 # Custom CSS for Ringg branding
 custom_css = """
 .gradio-container {
-  font-family: 'Inter', sans-serif;
-  max-width: 950px;
-  margin: 0 auto;
 }
 .main-header {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: 20px;
-  flex-wrap: nowrap;
-  padding: 20px;
-  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-  color: white;
-  border-radius: 10px;
-  margin-bottom: 20px;
-  max-width: 900px;
-  margin-left: auto;
-  margin-right: auto;
 }
 .main-header .main-logo {
-  height: 60px;
-  width: 60px;
-  flex-shrink: 0;
-  display: flex;
-  align-items: center;
-  justify-content: center;
 }
 .main-header .main-logo img {
-  max-height: 100%;
-  max-width: 100%;
-  object-fit: contain;
 }
 .main-header .main-logo.main-logo--placeholder {
-  background-color: rgba(255, 255, 255, 0.2);
-  border-radius: 12px;
 }
 .main-header .main-text {
-  text-align: left;
-  display: flex;
-  flex-direction: column;
-  justify-content: center;
-  min-width: 0;
 }
 .main-header .main-text h1 {
-  margin: 0 0 6px;
 }
 .main-header .main-text p {
-  margin: 0;
 }
 @media (max-width: 640px) {
-  .main-header {
-    flex-wrap: wrap;
-  }
-  .main-header .main-text {
-    text-align: center;
-    width: 100%;
-  }
 }
 .status-dot {
-  display: inline-block;
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  margin-left: 8px;
 }
 .status-dot.healthy {
-  background-color: #22c55e;
-  animation: pulse-green 2s ease-in-out infinite;
 }
 .status-dot.error {
-  background-color: #ef4444;
-  animation: pulse-red 2s ease-in-out infinite;
 }
 @keyframes pulse-green {
-  0% {
-    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
-  }
-  70% {
-    box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
-  }
-  100% {
-    box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
-  }
 }
 @keyframes pulse-red {
-  0% {
-    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
-  }
-  70% {
-    box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
-  }
-  100% {
-    box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
-  }
 }
 """
@@ -214,16 +225,9 @@ def create_interface():
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
-            return "⚠️ Please upload an audio file to transcribe."
-        transcription = stt_client.transcribe_audio(audio_file)
-        text = (transcription or "").strip()
-        if not text or text.startswith("❌") or text.startswith("⏱"):
-            return text or "⚠️ No speech detected—try a clearer recording."
-        footer = "(Served via API • Remote backend)"
-        return f"{text}\n\n{footer}"
     def check_api_status():
         """Check API health status"""
@@ -255,29 +259,35 @@ def create_interface():
         """)
         gr.Markdown(
-            """
-            # 🎯 Performance Benchmarks
-            #### **Ringg STT V0** Ranks **1st** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other Leading Solutions.
-            | Model | Median WER ↓ | Mean WER ↓ |
-            |-------|--------------|------------|
-            | **Elaichi STT (Ringg AI)** | **15.00%** | **15.92%** |
-            | IndicWav2Vec | 19.35% | 20.91% |
-            | VakyanSh Wav2Vec2 | 22.73% | 24.78% |
-            """
         )
-        gr.Markdown(
-            """
-            -----------------
-            # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
-            """
-        )
         with gr.Row():
             audio_input = gr.Audio(
                 label="📁 Upload Audio File",
                 type="filepath",
                 scale=3,
             )
@@ -287,46 +297,26 @@ def create_interface():
         file_output = gr.Textbox(
             label="Transcription Result",
-            lines=6,
             interactive=True,
             placeholder="Upload a file and click Transcribe...",
         )
-        transcribe_btn.click(
-            transcribe_audio,
-            inputs=audio_input,
-            outputs=file_output,
-        )
-        gr.Markdown(
-            """
-            ### ✨ Features
-            - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
-            - 🎯 **High Accuracy**: Competitive with leading ASR models
-            - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
-            - ⚡ **Fast Processing**: Optimized for quick transcription
-            """
-        )
-        gr.Markdown(
-            """
-            ### ⚠️ Benchmark Disclaimer
-            - Evaluated on a modified FLEURS subset to ensure consistent Hindi coverage
-            - Dataset issues include inaudible segments and repeated sentences caused by interruptions
-            - Background noise is prominent across many clips, impacting recognition quality
-            - Mixed Hindi-English speech often provides Hindi-only transcripts
-            - Currency, time, and year normalization is inconsistent with spoken forms
-            - Original transcripts lack punctuation, increasing WER for models that predict it
-            """
-        )
-        gr.Markdown(
-            """
-            # 🙏 Acknowledgements
-            - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
-            - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
-            """
-        )
     return demo

 #!/usr/bin/env python3
 """
 Ringg STT V0 - Hugging Face Space (Frontend)
 Makes API calls to private inference endpoint via ngrok
 # Custom CSS for Ringg branding
 custom_css = """
 .gradio-container {
+    font-family: 'Inter', sans-serif;
 }
 .main-header {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 20px;
+    flex-wrap: nowrap;
+    padding: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    border-radius: 10px;
+    margin-bottom: 20px;
+    max-width: 900px;
+    margin-left: auto;
+    margin-right: auto;
 }
 .main-header .main-logo {
+    height: 60px;
+    width: 60px;
+    flex-shrink: 0;
+    display: flex;
+    align-items: center;
+    justify-content: center;
 }
 .main-header .main-logo img {
+    max-height: 100%;
+    max-width: 100%;
+    object-fit: contain;
 }
 .main-header .main-logo.main-logo--placeholder {
+    background-color: rgba(255, 255, 255, 0.2);
+    border-radius: 12px;
 }
 .main-header .main-text {
+    text-align: left;
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    min-width: 0;
 }
 .main-header .main-text h1 {
+    margin: 0 0 6px;
 }
 .main-header .main-text p {
+    margin: 0;
 }
 @media (max-width: 640px) {
+    .main-header {
+        flex-wrap: wrap;
+    }
+    .main-header .main-text {
+        text-align: center;
+        width: 100%;
+    }
 }
 .status-dot {
+    display: inline-block;
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    margin-left: 8px;
 }
 .status-dot.healthy {
+    background-color: #22c55e;
+    animation: pulse-green 2s ease-in-out infinite;
 }
 .status-dot.error {
+    background-color: #ef4444;
+    animation: pulse-red 2s ease-in-out infinite;
 }
 @keyframes pulse-green {
+    0% {
+        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
+    }
 }
 @keyframes pulse-red {
+    0% {
+        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
+    }
+}
+div[data-testid="audio"] {
+    min-height: 60px !important;
+    max-height: 80px !important;
+}
+div[data-testid="audio"] > div {
+    height: auto !important;
+    min-height: auto !important;
+}
+.wrap.wrap.wrap.svelte-1w6y6zl {
+    height: auto !important;
+    min-height: auto !important;
+}
+.gradio-row {
+    min-height: auto !important;
+}
+footer {
+    visibility: hidden !important;
+    height: 50px !important;
+}
+footer:after {
+    content: "Made with ❤️ by RinggAI Team" !important;
+    visibility: visible !important;
+    display: block !important;
+    text-align: center !important;
+    margin-top: 15px !important;
+    color: #666 !important;
+    font-size: 14px !important;
 }
 """
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
+            return "Please upload an audio file!"
+        return stt_client.transcribe_audio(audio_file)
     def check_api_status():
         """Check API health status"""
         """)
         gr.Markdown(
+            """ # 🎯 Performance Benchmarks \n #### **Ringg STT V0** Ranks **2nd** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other leading Solutions."""
         )
+        with gr.Row():
+            gr.DataFrame(
+                value=[
+                    ["IndicWav2Vec (Winner)", "18.55%", "63.31%"],
+                    ["Ringg STT V0", "21.03%", "66.27%"],
+                    ["VakyanSh Wav2Vec2", "24.06%", "66.34%"],
+                    ["Whisper Large-v3", "29.17%", "63.31%"],
+                    ["Whisper Large-v2", "37.50%", "66.27%"],
+                ],
+                headers=["Model", "Indic Norm WER ↓", "Whisper Norm WER ↓"],
+                datatype=["str", "str", "str"],
+                row_count=5,
+                col_count=(3, "fixed"),
+            )
+        gr.Markdown("""
+        -----------------
+        # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
+        """)
         with gr.Row():
             audio_input = gr.Audio(
                 label="📁 Upload Audio File",
                 type="filepath",
+                sources=["upload"],
                 scale=3,
             )
         file_output = gr.Textbox(
             label="Transcription Result",
+            lines=3,
             interactive=True,
             placeholder="Upload a file and click Transcribe...",
         )
+        transcribe_btn.click(transcribe_audio, inputs=audio_input, outputs=file_output)
+        # gr.Markdown("""
+        # ### ✨ Features
+        # - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
+        # - 🎯 **High Accuracy**: Competitive with leading ASR models
+        # - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
+        # - ⚡ **Fast Processing**: Optimized for quick transcription
+        # """)
+        gr.Markdown("""
+        # 🙏 Acknowledgements
+        - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
+        - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
+        """)
     return demo