Beijuka commited on
Commit
ba4ffeb
Β·
verified Β·
1 Parent(s): a1923a3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +62 -0
src/streamlit_app.py CHANGED
@@ -84,3 +84,65 @@ with tab3:
84
  "Models fine-tuned from Wav2Vec2 XLS-R, Whisper, MMS-1B, and W2V2-BERT "
85
  "to support high-quality speech recognition in this language."
86
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "Models fine-tuned from Wav2Vec2 XLS-R, Whisper, MMS-1B, and W2V2-BERT "
85
  "to support high-quality speech recognition in this language."
86
  )
87
+ # --- Tab 4: Evaluation Scenarios ---
88
+ with tab4:
89
+ st.header("Evaluation Scenarios")
90
+ st.write(
91
+ "To benchmark ASR models for African languages, we design evaluation scenarios "
92
+ "that mimic real-world challenges such as limited training data, domain shift, "
93
+ "and variation in speech style."
94
+ )
95
+
96
+ # Summary Table
97
+ st.subheader("Scenario Overview")
98
+ scenarios = pd.DataFrame([
99
+ {
100
+ "Scenario": "Data Efficiency Benchmark",
101
+ "Focus": "Low-resource training (1 hour per language)",
102
+ "Languages": "Multiple African languages",
103
+ "Dataset": "[ASR Africa Data Efficiency Benchmark](https://huggingface.co/datasets/asr-africa/ASRAfricaDataEfficiencyBenchmark)"
104
+ },
105
+ {
106
+ "Scenario": "Domain Adaptation Benchmark",
107
+ "Focus": "Performance shift across domains",
108
+ "Languages": "Akan (General β†’ Finance), Wolof (General β†’ Agriculture)",
109
+ "Dataset": "[African ASR Domain Adaptation Benchmark](https://huggingface.co/datasets/asr-africa/African-ASR-Domain-Adaptation-Evaluation)"
110
+ },
111
+ {
112
+ "Scenario": "Speech Type Adaptation",
113
+ "Focus": "Different speech types (read, conversation, etc.)",
114
+ "Languages": "Ongoing (various African languages)",
115
+ "Dataset": "[African ASR Speech Type Adaptation](https://huggingface.co/datasets/asr-africa/African-ASR-Speech-Type-Adaptation)"
116
+ }
117
+ ])
118
+
119
+ st.dataframe(scenarios, use_container_width=True)
120
+
121
+ st.subheader("Explore Scenarios")
122
+
123
+ with st.expander("Data Efficiency Benchmark"):
124
+ st.markdown("""
125
+ - **Goal:** Evaluate ASR performance in low-resource conditions.
126
+ - **Design:** 1 hour of transcribed audio per language.
127
+ - **Includes:** MP3 audio + metadata (speaker age, gender, environment).
128
+ - **Use case:** Encourage data-efficient ASR systems.
129
+ πŸ”— [View dataset](https://huggingface.co/datasets/asr-africa/ASRAfricaDataEfficiencyBenchmark)
130
+ """)
131
+
132
+ with st.expander("Domain Adaptation Benchmark"):
133
+ st.markdown("""
134
+ - **Goal:** Test ASR generalization across domains.
135
+ - **Languages:**
136
+ - Akan β†’ General purpose training, Financial domain testing.
137
+ - Wolof β†’ General purpose training, Agricultural domain testing.
138
+ - **Challenge:** Many ASR systems degrade when moved to new domains.
139
+ πŸ”— [View dataset](https://huggingface.co/datasets/asr-africa/African-ASR-Domain-Adaptation-Evaluation)
140
+ """)
141
+
142
+ with st.expander("Speech Type Adaptation"):
143
+ st.markdown("""
144
+ - **Goal:** Measure ASR performance on different speech styles.
145
+ - **Types of Speech:** Read speech, conversational, spontaneous speech.
146
+ - **Work in progress** – expanding to multiple African languages.
147
+ πŸ”— [View dataset](https://huggingface.co/datasets/asr-africa/African-ASR-Speech-Type-Adaptation)
148
+ """)