Beijuka commited on
Commit
bb039ee
Β·
verified Β·
1 Parent(s): d7f19fd

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +43 -71
src/streamlit_app.py CHANGED
@@ -2,82 +2,54 @@ import altair as alt
2
  import numpy as np
3
  import pandas as pd
4
  import streamlit as st
5
- import os
6
- import torch
7
- from transformers import pipeline
8
- from streamlit_audiorec import st_audiorec
9
- import tempfile
10
 
11
- HF_TOKEN = os.getenv("HF_TOKEN")
12
 
13
- # Model map
14
- model_map = {
15
- "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
16
- "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
17
- "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
18
- "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
19
- "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
20
- "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
21
- "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
22
- "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
23
- "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
24
- "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
25
- "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
26
- "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
27
- "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
28
- "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1-nolm",
29
- "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
30
- "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
31
- "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
32
- "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
33
- }
34
 
 
 
35
 
36
- # Device selection
37
- inference_device = 0 if torch.cuda.is_available() else -1
 
 
38
 
39
- # Cached model loader
40
- @st.cache_resource
41
- def load_asr(language):
42
- return pipeline(
43
- "automatic-speech-recognition",
44
- model=model_map[language],
45
- device=inference_device,
46
- token=HF_TOKEN
47
  )
48
 
49
- st.title("ASR for African Languages Demo")
50
-
51
- # Language selector
52
- language = st.selectbox("Select Language", list(model_map.keys()))
53
-
54
- # Choose input method
55
- input_method = st.radio("Choose input method", ["Upload Audio", "Record Microphone"])
56
-
57
- audio_bytes = None
58
-
59
- if input_method == "Upload Audio":
60
- audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "flac", "ogg"])
61
- if audio_file is not None:
62
- audio_bytes = audio_file.read()
63
- st.audio(audio_bytes, format="audio/wav")
64
-
65
- elif input_method == "Record Microphone":
66
- wav_audio_data = st_audiorec()
67
- if wav_audio_data is not None:
68
- audio_bytes = wav_audio_data
69
- st.audio(audio_bytes, format="audio/wav")
70
-
71
- # Run transcription
72
- if audio_bytes and st.button("Transcribe"):
73
- st.write("πŸ”„ Running transcription...")
74
- asr = load_asr(language)
75
-
76
- # Write audio to a temporary file (transformers expects a file or array)
77
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
78
- tmp.write(audio_bytes)
79
- tmp_path = tmp.name
80
 
81
- text = asr(tmp_path)["text"]
82
- st.success(" Transcription complete!")
83
- st.text_area("Transcribed Text", value=text, height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
3
  import pandas as pd
4
  import streamlit as st
5
+ import streamlit as st
6
+ import streamlit.components.v1 as components
7
+ from pathlib import Path
 
 
8
 
9
+ st.set_page_config(page_title="ASR Africa Hub", layout="wide")
10
 
11
+ st.title("🌍 ASR Africa Hub")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Create tabs
14
+ tab1, tab2, tab3 = st.tabs(["🎀 Demo", "πŸ‘€ Owner Avatar", "πŸ“¦ Model Collections"])
15
 
16
+ # --- Tab 1: Demo ---
17
+ with tab1:
18
+ st.header("Demo")
19
+ st.write("Try the ASR Africa demo directly:")
20
 
21
+ # Option 1: Embed Hugging Face Space with iframe
22
+ components.iframe(
23
+ "https://asr-africa-asr-african-languages.hf.space/",
24
+ height=800,
25
+ scrolling=True
 
 
 
26
  )
27
 
28
+ # Option 2: Just provide a link
29
+ st.markdown(
30
+ "[Open full demo in a new tab](https://asr-africa-asr-african-languages.hf.space/)"
31
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # --- Tab 2: Owner Avatar / Dataset ---
34
+ with tab2:
35
+ st.header("ASR-Africa-Benchmark-Dataset")
36
+ st.write("Project Owner Information and Dataset details:")
37
+
38
+ # Load Markdown file if provided
39
+ md_file = Path("dataset_info.md") # you can rename it
40
+ if md_file.exists():
41
+ st.markdown(md_file.read_text())
42
+ else:
43
+ st.info("Upload your `dataset_info.md` file to display content here.")
44
+
45
+ # --- Tab 3: Model Collections ---
46
+ with tab3:
47
+ st.header("Model Collections")
48
+ st.write("Explore available ASR models for African languages:")
49
+
50
+ st.markdown("""
51
+ - [Afrikaans Model](https://huggingface.co/asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1)
52
+ - [Hausa Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0)
53
+ - [Yoruba Model](https://huggingface.co/asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0)
54
+ - *(Add more models as needed)*
55
+ """)