Integrated sarvam speech-to-text api
Browse files- app.py +7 -7
- pages/scoreboard.py +6 -4
app.py
CHANGED
|
@@ -92,10 +92,11 @@ class ResultWriter:
|
|
| 92 |
'path',
|
| 93 |
'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
|
| 94 |
'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
|
| 95 |
-
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
|
|
|
| 96 |
]
|
| 97 |
|
| 98 |
-
self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
| 99 |
|
| 100 |
if not fs.exists(save_path):
|
| 101 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
|
@@ -216,7 +217,6 @@ def call_function(model_name):
|
|
| 216 |
"audio_b64":True
|
| 217 |
}}
|
| 218 |
elif st.session_state.current_audio_type == "uploaded":
|
| 219 |
-
# For uploaded files, use the processed audio data
|
| 220 |
array = st.session_state.audio['data']
|
| 221 |
sr = st.session_state.audio['sample_rate']
|
| 222 |
if sr != 22050:
|
|
@@ -247,7 +247,7 @@ def call_function(model_name):
|
|
| 247 |
return transcript
|
| 248 |
|
| 249 |
def transcribe_audio():
|
| 250 |
-
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
|
| 251 |
|
| 252 |
if st.session_state.model_1_selection == "Random":
|
| 253 |
model1_name = random.choice(models_list)
|
|
@@ -375,7 +375,7 @@ def on_option_none_click():
|
|
| 375 |
|
| 376 |
def on_click_transcribe():
|
| 377 |
if st.session_state.has_audio:
|
| 378 |
-
with st.spinner("Transcribing audio... this may take
|
| 379 |
option_1_text, option_2_text = transcribe_audio(
|
| 380 |
)
|
| 381 |
st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
|
|
@@ -516,7 +516,7 @@ def main():
|
|
| 516 |
st.markdown("### Model Selection")
|
| 517 |
col_model1, col_model2 = st.columns(2)
|
| 518 |
|
| 519 |
-
models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure"]
|
| 520 |
|
| 521 |
with col_model1:
|
| 522 |
st.selectbox(
|
|
@@ -582,7 +582,7 @@ def main():
|
|
| 582 |
* Model names are revealed after the vote is cast.
|
| 583 |
* Currently Hindi and English are supported, and
|
| 584 |
the results for Hindi will be in Hinglish (Hindi in Latin script)
|
| 585 |
-
* It may take up to 30 seconds for speech recognition in some cases.
|
| 586 |
* Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
|
| 587 |
""".strip()
|
| 588 |
|
|
|
|
| 92 |
'path',
|
| 93 |
'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
|
| 94 |
'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
|
| 95 |
+
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration',
|
| 96 |
+
'sarvam_score','sarvam_appearance','sarvam_duration',
|
| 97 |
]
|
| 98 |
|
| 99 |
+
self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
| 100 |
|
| 101 |
if not fs.exists(save_path):
|
| 102 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
|
|
|
| 217 |
"audio_b64":True
|
| 218 |
}}
|
| 219 |
elif st.session_state.current_audio_type == "uploaded":
|
|
|
|
| 220 |
array = st.session_state.audio['data']
|
| 221 |
sr = st.session_state.audio['sample_rate']
|
| 222 |
if sr != 22050:
|
|
|
|
| 247 |
return transcript
|
| 248 |
|
| 249 |
def transcribe_audio():
|
| 250 |
+
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure",'sarvam']
|
| 251 |
|
| 252 |
if st.session_state.model_1_selection == "Random":
|
| 253 |
model1_name = random.choice(models_list)
|
|
|
|
| 375 |
|
| 376 |
def on_click_transcribe():
|
| 377 |
if st.session_state.has_audio:
|
| 378 |
+
with st.spinner("Transcribing audio... this may take some time"):
|
| 379 |
option_1_text, option_2_text = transcribe_audio(
|
| 380 |
)
|
| 381 |
st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
|
|
|
|
| 516 |
st.markdown("### Model Selection")
|
| 517 |
col_model1, col_model2 = st.columns(2)
|
| 518 |
|
| 519 |
+
models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure","sarvam"]
|
| 520 |
|
| 521 |
with col_model1:
|
| 522 |
st.selectbox(
|
|
|
|
| 582 |
* Model names are revealed after the vote is cast.
|
| 583 |
* Currently Hindi and English are supported, and
|
| 584 |
the results for Hindi will be in Hinglish (Hindi in Latin script)
|
| 585 |
+
* It may take up to 30-60 seconds for speech recognition in some cases.
|
| 586 |
* Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
|
| 587 |
""".strip()
|
| 588 |
|
pages/scoreboard.py
CHANGED
|
@@ -30,13 +30,14 @@ def get_model_abbreviation(model_name):
|
|
| 30 |
'deepgram': 'Deepgram',
|
| 31 |
'Ori Swift': 'Ori Swift',
|
| 32 |
'Ori Prime': 'Ori Prime',
|
| 33 |
-
'azure' : 'Azure'
|
|
|
|
| 34 |
}
|
| 35 |
return abbrev_map.get(model_name, model_name)
|
| 36 |
|
| 37 |
|
| 38 |
def calculate_metrics(df):
|
| 39 |
-
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
| 40 |
metrics = {}
|
| 41 |
|
| 42 |
for model in models:
|
|
@@ -100,7 +101,7 @@ def create_appearance_chart(metrics):
|
|
| 100 |
return fig
|
| 101 |
|
| 102 |
def create_head_to_head_matrix(df):
|
| 103 |
-
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
| 104 |
matrix = np.zeros((len(models), len(models)))
|
| 105 |
|
| 106 |
for i, model1 in enumerate(models):
|
|
@@ -213,7 +214,8 @@ def dashboard():
|
|
| 213 |
"Ori Apex": "The top-performing model, fast and stable.",
|
| 214 |
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
| 215 |
"Deepgram" : "Deepgram Nova-2 API",
|
| 216 |
-
"Azure" : "Azure Speech Services API"
|
|
|
|
| 217 |
}
|
| 218 |
|
| 219 |
st.header('Model Descriptions')
|
|
|
|
| 30 |
'deepgram': 'Deepgram',
|
| 31 |
'Ori Swift': 'Ori Swift',
|
| 32 |
'Ori Prime': 'Ori Prime',
|
| 33 |
+
'azure' : 'Azure',
|
| 34 |
+
'sarvam':'Sarvam'
|
| 35 |
}
|
| 36 |
return abbrev_map.get(model_name, model_name)
|
| 37 |
|
| 38 |
|
| 39 |
def calculate_metrics(df):
|
| 40 |
+
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
| 41 |
metrics = {}
|
| 42 |
|
| 43 |
for model in models:
|
|
|
|
| 101 |
return fig
|
| 102 |
|
| 103 |
def create_head_to_head_matrix(df):
|
| 104 |
+
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
| 105 |
matrix = np.zeros((len(models), len(models)))
|
| 106 |
|
| 107 |
for i, model1 in enumerate(models):
|
|
|
|
| 214 |
"Ori Apex": "The top-performing model, fast and stable.",
|
| 215 |
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
| 216 |
"Deepgram" : "Deepgram Nova-2 API",
|
| 217 |
+
"Azure" : "Azure Speech Services API",
|
| 218 |
+
"Sarvam": "Sarvam AI saarika:v2 API"
|
| 219 |
}
|
| 220 |
|
| 221 |
st.header('Model Descriptions')
|