Update src/streamlit_app.py
Browse files- src/streamlit_app.py +43 -71
src/streamlit_app.py
CHANGED
|
@@ -2,82 +2,54 @@ import altair as alt
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
import streamlit as st
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
-
from
|
| 8 |
-
from streamlit_audiorec import st_audiorec
|
| 9 |
-
import tempfile
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
model_map = {
|
| 15 |
-
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
|
| 16 |
-
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
|
| 17 |
-
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
|
| 18 |
-
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
|
| 19 |
-
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
|
| 20 |
-
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
|
| 21 |
-
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
|
| 22 |
-
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
|
| 23 |
-
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
|
| 24 |
-
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
|
| 25 |
-
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
|
| 26 |
-
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
|
| 27 |
-
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
|
| 28 |
-
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1-nolm",
|
| 29 |
-
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
|
| 30 |
-
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
|
| 31 |
-
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
|
| 32 |
-
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
|
| 33 |
-
}
|
| 34 |
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
#
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
model=model_map[language],
|
| 45 |
-
device=inference_device,
|
| 46 |
-
token=HF_TOKEN
|
| 47 |
)
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
# Choose input method
|
| 55 |
-
input_method = st.radio("Choose input method", ["Upload Audio", "Record Microphone"])
|
| 56 |
-
|
| 57 |
-
audio_bytes = None
|
| 58 |
-
|
| 59 |
-
if input_method == "Upload Audio":
|
| 60 |
-
audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "flac", "ogg"])
|
| 61 |
-
if audio_file is not None:
|
| 62 |
-
audio_bytes = audio_file.read()
|
| 63 |
-
st.audio(audio_bytes, format="audio/wav")
|
| 64 |
-
|
| 65 |
-
elif input_method == "Record Microphone":
|
| 66 |
-
wav_audio_data = st_audiorec()
|
| 67 |
-
if wav_audio_data is not None:
|
| 68 |
-
audio_bytes = wav_audio_data
|
| 69 |
-
st.audio(audio_bytes, format="audio/wav")
|
| 70 |
-
|
| 71 |
-
# Run transcription
|
| 72 |
-
if audio_bytes and st.button("Transcribe"):
|
| 73 |
-
st.write("π Running transcription...")
|
| 74 |
-
asr = load_asr(language)
|
| 75 |
-
|
| 76 |
-
# Write audio to a temporary file (transformers expects a file or array)
|
| 77 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
| 78 |
-
tmp.write(audio_bytes)
|
| 79 |
-
tmp_path = tmp.name
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
import streamlit as st
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import streamlit.components.v1 as components
|
| 7 |
+
from pathlib import Path
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
st.set_page_config(page_title="ASR Africa Hub", layout="wide")
|
| 10 |
|
| 11 |
+
st.title("π ASR Africa Hub")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
# Create tabs
|
| 14 |
+
tab1, tab2, tab3 = st.tabs(["π€ Demo", "π€ Owner Avatar", "π¦ Model Collections"])
|
| 15 |
|
| 16 |
+
# --- Tab 1: Demo ---
|
| 17 |
+
with tab1:
|
| 18 |
+
st.header("Demo")
|
| 19 |
+
st.write("Try the ASR Africa demo directly:")
|
| 20 |
|
| 21 |
+
# Option 1: Embed Hugging Face Space with iframe
|
| 22 |
+
components.iframe(
|
| 23 |
+
"https://asr-africa-asr-african-languages.hf.space/",
|
| 24 |
+
height=800,
|
| 25 |
+
scrolling=True
|
|
|
|
|
|
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
+
# Option 2: Just provide a link
|
| 29 |
+
st.markdown(
|
| 30 |
+
"[Open full demo in a new tab](https://asr-africa-asr-african-languages.hf.space/)"
|
| 31 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
# --- Tab 2: Owner Avatar / Dataset ---
|
| 34 |
+
with tab2:
|
| 35 |
+
st.header("ASR-Africa-Benchmark-Dataset")
|
| 36 |
+
st.write("Project Owner Information and Dataset details:")
|
| 37 |
+
|
| 38 |
+
# Load Markdown file if provided
|
| 39 |
+
md_file = Path("dataset_info.md") # you can rename it
|
| 40 |
+
if md_file.exists():
|
| 41 |
+
st.markdown(md_file.read_text())
|
| 42 |
+
else:
|
| 43 |
+
st.info("Upload your `dataset_info.md` file to display content here.")
|
| 44 |
+
|
| 45 |
+
# --- Tab 3: Model Collections ---
|
| 46 |
+
with tab3:
|
| 47 |
+
st.header("Model Collections")
|
| 48 |
+
st.write("Explore available ASR models for African languages:")
|
| 49 |
+
|
| 50 |
+
st.markdown("""
|
| 51 |
+
- [Afrikaans Model](https://huggingface.co/asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1)
|
| 52 |
+
- [Hausa Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0)
|
| 53 |
+
- [Yoruba Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0)
|
| 54 |
+
- *(Add more models as needed)*
|
| 55 |
+
""")
|