Spaces:

asr-africa
/

Automatic_Speech_Recognition_for_African_Languages

Sleeping

App Files Files Community

Beijuka commited on Sep 27

Commit

bb039ee

verified ·

1 Parent(s): d7f19fd

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +43 -71

src/streamlit_app.py CHANGED Viewed

@@ -2,82 +2,54 @@ import altair as alt
 import numpy as np
 import pandas as pd
 import streamlit as st
-import os
-import torch
-from transformers import pipeline
-from streamlit_audiorec import st_audiorec
-import tempfile
-HF_TOKEN = os.getenv("HF_TOKEN")
-# Model map
-model_map = {
-    "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
-    "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
-    "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
-    "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
-    "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
-    "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
-    "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
-    "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
-    "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
-    "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
-    "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
-    "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
-    "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
-    "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1-nolm",
-    "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
-    "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
-    "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
-    "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
-}
-# Device selection
-inference_device = 0 if torch.cuda.is_available() else -1
-# Cached model loader
-@st.cache_resource
-def load_asr(language):
-    return pipeline(
-        "automatic-speech-recognition",
-        model=model_map[language],
-        device=inference_device,
-        token=HF_TOKEN
     )
-st.title("ASR for African Languages Demo")
-# Language selector
-language = st.selectbox("Select Language", list(model_map.keys()))
-# Choose input method
-input_method = st.radio("Choose input method", ["Upload Audio", "Record Microphone"])
-audio_bytes = None
-if input_method == "Upload Audio":
-    audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "flac", "ogg"])
-    if audio_file is not None:
-        audio_bytes = audio_file.read()
-        st.audio(audio_bytes, format="audio/wav")
-elif input_method == "Record Microphone":
-    wav_audio_data = st_audiorec()
-    if wav_audio_data is not None:
-        audio_bytes = wav_audio_data
-        st.audio(audio_bytes, format="audio/wav")
-# Run transcription
-if audio_bytes and st.button("Transcribe"):
-    st.write("🔄 Running transcription...")
-    asr = load_asr(language)
-    # Write audio to a temporary file (transformers expects a file or array)
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-        tmp.write(audio_bytes)
-        tmp_path = tmp.name
-    text = asr(tmp_path)["text"]
-    st.success(" Transcription complete!")
-    st.text_area("Transcribed Text", value=text, height=200)

 import numpy as np
 import pandas as pd
 import streamlit as st
+import streamlit as st
+import streamlit.components.v1 as components
+from pathlib import Path
+st.set_page_config(page_title="ASR Africa Hub", layout="wide")
+st.title("🌍 ASR Africa Hub")
+# Create tabs
+tab1, tab2, tab3 = st.tabs(["🎤 Demo", "👤 Owner Avatar", "📦 Model Collections"])
+# --- Tab 1: Demo ---
+with tab1:
+    st.header("Demo")
+    st.write("Try the ASR Africa demo directly:")
+    # Option 1: Embed Hugging Face Space with iframe
+    components.iframe(
+        "https://asr-africa-asr-african-languages.hf.space/",
+        height=800,
+        scrolling=True
     )
+    # Option 2: Just provide a link
+    st.markdown(
+        "[Open full demo in a new tab](https://asr-africa-asr-african-languages.hf.space/)"
+    )
+# --- Tab 2: Owner Avatar / Dataset ---
+with tab2:
+    st.header("ASR-Africa-Benchmark-Dataset")
+    st.write("Project Owner Information and Dataset details:")
+    # Load Markdown file if provided
+    md_file = Path("dataset_info.md")  # you can rename it
+    if md_file.exists():
+        st.markdown(md_file.read_text())
+    else:
+        st.info("Upload your `dataset_info.md` file to display content here.")
+# --- Tab 3: Model Collections ---
+with tab3:
+    st.header("Model Collections")
+    st.write("Explore available ASR models for African languages:")
+    st.markdown("""
+    - [Afrikaans Model](https://huggingface.co/asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1)
+    - [Hausa Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0)
+    - [Yoruba Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0)
+    - *(Add more models as needed)*
+    """)