Update src/streamlit_app.py
Browse files- src/streamlit_app.py +62 -0
src/streamlit_app.py
CHANGED
|
@@ -84,3 +84,65 @@ with tab3:
|
|
| 84 |
"Models fine-tuned from Wav2Vec2 XLS-R, Whisper, MMS-1B, and W2V2-BERT "
|
| 85 |
"to support high-quality speech recognition in this language."
|
| 86 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"Models fine-tuned from Wav2Vec2 XLS-R, Whisper, MMS-1B, and W2V2-BERT "
|
| 85 |
"to support high-quality speech recognition in this language."
|
| 86 |
)
|
| 87 |
+
# --- Tab 4: Evaluation Scenarios ---
|
| 88 |
+
with tab4:
|
| 89 |
+
st.header("Evaluation Scenarios")
|
| 90 |
+
st.write(
|
| 91 |
+
"To benchmark ASR models for African languages, we design evaluation scenarios "
|
| 92 |
+
"that mimic real-world challenges such as limited training data, domain shift, "
|
| 93 |
+
"and variation in speech style."
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Summary Table
|
| 97 |
+
st.subheader("Scenario Overview")
|
| 98 |
+
scenarios = pd.DataFrame([
|
| 99 |
+
{
|
| 100 |
+
"Scenario": "Data Efficiency Benchmark",
|
| 101 |
+
"Focus": "Low-resource training (1 hour per language)",
|
| 102 |
+
"Languages": "Multiple African languages",
|
| 103 |
+
"Dataset": "[ASR Africa Data Efficiency Benchmark](https://huggingface.co/datasets/asr-africa/ASRAfricaDataEfficiencyBenchmark)"
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"Scenario": "Domain Adaptation Benchmark",
|
| 107 |
+
"Focus": "Performance shift across domains",
|
| 108 |
+
"Languages": "Akan (General β Finance), Wolof (General β Agriculture)",
|
| 109 |
+
"Dataset": "[African ASR Domain Adaptation Benchmark](https://huggingface.co/datasets/asr-africa/African-ASR-Domain-Adaptation-Evaluation)"
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"Scenario": "Speech Type Adaptation",
|
| 113 |
+
"Focus": "Different speech types (read, conversation, etc.)",
|
| 114 |
+
"Languages": "Ongoing (various African languages)",
|
| 115 |
+
"Dataset": "[African ASR Speech Type Adaptation](https://huggingface.co/datasets/asr-africa/African-ASR-Speech-Type-Adaptation)"
|
| 116 |
+
}
|
| 117 |
+
])
|
| 118 |
+
|
| 119 |
+
st.dataframe(scenarios, use_container_width=True)
|
| 120 |
+
|
| 121 |
+
st.subheader("Explore Scenarios")
|
| 122 |
+
|
| 123 |
+
with st.expander("Data Efficiency Benchmark"):
|
| 124 |
+
st.markdown("""
|
| 125 |
+
- **Goal:** Evaluate ASR performance in low-resource conditions.
|
| 126 |
+
- **Design:** 1 hour of transcribed audio per language.
|
| 127 |
+
- **Includes:** MP3 audio + metadata (speaker age, gender, environment).
|
| 128 |
+
- **Use case:** Encourage data-efficient ASR systems.
|
| 129 |
+
π [View dataset](https://huggingface.co/datasets/asr-africa/ASRAfricaDataEfficiencyBenchmark)
|
| 130 |
+
""")
|
| 131 |
+
|
| 132 |
+
with st.expander("Domain Adaptation Benchmark"):
|
| 133 |
+
st.markdown("""
|
| 134 |
+
- **Goal:** Test ASR generalization across domains.
|
| 135 |
+
- **Languages:**
|
| 136 |
+
- Akan β General purpose training, Financial domain testing.
|
| 137 |
+
- Wolof β General purpose training, Agricultural domain testing.
|
| 138 |
+
- **Challenge:** Many ASR systems degrade when moved to new domains.
|
| 139 |
+
π [View dataset](https://huggingface.co/datasets/asr-africa/African-ASR-Domain-Adaptation-Evaluation)
|
| 140 |
+
""")
|
| 141 |
+
|
| 142 |
+
with st.expander("Speech Type Adaptation"):
|
| 143 |
+
st.markdown("""
|
| 144 |
+
- **Goal:** Measure ASR performance on different speech styles.
|
| 145 |
+
- **Types of Speech:** Read speech, conversational, spontaneous speech.
|
| 146 |
+
- **Work in progress** β expanding to multiple African languages.
|
| 147 |
+
π [View dataset](https://huggingface.co/datasets/asr-africa/African-ASR-Speech-Type-Adaptation)
|
| 148 |
+
""")
|