Spaces:

Oriserve
/

ASR_arena

Running

App Files Files Community

chinmayc3 commited on May 30, 2025

Commit

16d67be

1 Parent(s): 25e924b

Integrated sarvam speech-to-text api

Browse files

Files changed (2) hide show

app.py +7 -7
pages/scoreboard.py +6 -4

app.py CHANGED Viewed

@@ -92,10 +92,11 @@ class ResultWriter:
             'path',
             'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
             'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
-            'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
         ]
-        self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
         if not fs.exists(save_path):
             print("CSV File not found in s3 bucket creating a new one",save_path)
@@ -216,7 +217,6 @@ def call_function(model_name):
                     "audio_b64":True
                 }}
     elif st.session_state.current_audio_type == "uploaded":
-        # For uploaded files, use the processed audio data
         array = st.session_state.audio['data']
         sr = st.session_state.audio['sample_rate']
         if sr != 22050:
@@ -247,7 +247,7 @@ def call_function(model_name):
     return transcript
 def transcribe_audio():
-    models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
     if st.session_state.model_1_selection == "Random":
         model1_name = random.choice(models_list)
@@ -375,7 +375,7 @@ def on_option_none_click():
 def on_click_transcribe():
     if st.session_state.has_audio:
-        with st.spinner("Transcribing audio... this may take up to 30 seconds"):
             option_1_text, option_2_text = transcribe_audio(
                     )
             st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
@@ -516,7 +516,7 @@ def main():
     st.markdown("### Model Selection")
     col_model1, col_model2 = st.columns(2)
-    models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure"]
     with col_model1:
         st.selectbox(
@@ -582,7 +582,7 @@ def main():
         * Model names are revealed after the vote is cast.
         * Currently Hindi and English are supported, and
             the results for Hindi will be in Hinglish (Hindi in Latin script)
-        * It may take up to 30 seconds for speech recognition in some cases.
         * Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
     """.strip()

             'path',
             'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
             'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
+            'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration',
+            'sarvam_score','sarvam_appearance','sarvam_duration',
         ]
+        self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
         if not fs.exists(save_path):
             print("CSV File not found in s3 bucket creating a new one",save_path)
                     "audio_b64":True
                 }}
     elif st.session_state.current_audio_type == "uploaded":
         array = st.session_state.audio['data']
         sr = st.session_state.audio['sample_rate']
         if sr != 22050:
     return transcript
 def transcribe_audio():
+    models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure",'sarvam']
     if st.session_state.model_1_selection == "Random":
         model1_name = random.choice(models_list)
 def on_click_transcribe():
     if st.session_state.has_audio:
+        with st.spinner("Transcribing audio... this may take some time"):
             option_1_text, option_2_text = transcribe_audio(
                     )
             st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
     st.markdown("### Model Selection")
     col_model1, col_model2 = st.columns(2)
+    models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure","sarvam"]
     with col_model1:
         st.selectbox(
         * Model names are revealed after the vote is cast.
         * Currently Hindi and English are supported, and
             the results for Hindi will be in Hinglish (Hindi in Latin script)
+        * It may take up to 30-60 seconds for speech recognition in some cases.
         * Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
     """.strip()

pages/scoreboard.py CHANGED Viewed

@@ -30,13 +30,14 @@ def get_model_abbreviation(model_name):
         'deepgram': 'Deepgram',
         'Ori Swift': 'Ori Swift',
         'Ori Prime': 'Ori Prime',
-        'azure' : 'Azure'
     }
     return abbrev_map.get(model_name, model_name)
 def calculate_metrics(df):
-    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
     metrics = {}
     for model in models:
@@ -100,7 +101,7 @@ def create_appearance_chart(metrics):
     return fig
 def create_head_to_head_matrix(df):
-    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
     matrix = np.zeros((len(models), len(models)))
     for i, model1 in enumerate(models):
@@ -213,7 +214,8 @@ def dashboard():
             "Ori Apex": "The top-performing model, fast and stable.",
             "Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
             "Deepgram" : "Deepgram Nova-2 API",
-            "Azure" : "Azure Speech Services API"
         }
         st.header('Model Descriptions')

         'deepgram': 'Deepgram',
         'Ori Swift': 'Ori Swift',
         'Ori Prime': 'Ori Prime',
+        'azure' : 'Azure',
+        'sarvam':'Sarvam'
     }
     return abbrev_map.get(model_name, model_name)
 def calculate_metrics(df):
+    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
     metrics = {}
     for model in models:
     return fig
 def create_head_to_head_matrix(df):
+    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
     matrix = np.zeros((len(models), len(models)))
     for i, model1 in enumerate(models):
             "Ori Apex": "The top-performing model, fast and stable.",
             "Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
             "Deepgram" : "Deepgram Nova-2 API",
+            "Azure" : "Azure Speech Services API",
+            "Sarvam": "Sarvam AI saarika:v2 API"
         }
         st.header('Model Descriptions')