Spaces:

Beijuka
/

ASR_AFRICA

Sleeping

App Files Files Community

Beijuka commited on May 6, 2025

Commit

25bd6c5

verified ·

1 Parent(s): 9513fa0

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -51

app.py CHANGED Viewed

@@ -5,25 +5,26 @@ from transformers import pipeline
 # Map of models (already defined in your app)
 model_map = {
-        "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
-        "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
-        "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
-        "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
-        "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
-        "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
-        "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
-        "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
-        "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
-        "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
-        "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
-        "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
-        "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
-        "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
-        "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
-        "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
-        "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
-        "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
-    }
 # Create storage directory
 os.makedirs("responses", exist_ok=True)
@@ -31,30 +32,36 @@ os.makedirs("responses", exist_ok=True)
 def transcribe(audio, language):
     asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
     text = asr(audio)["text"]
-    return text, audio
 # Save feedback
-def save_feedback(audio_file, transcription, age, gender, model_used, native, speak_level, write_level, accuracy, meaning, env, domain, dialect, code_switching, comparison, errors, strengths, email):
     data = {
         "audio_file": audio_file,
         "transcription": transcription,
-        "age": age,
         "gender": gender,
-        "model": model_used,
-        "native_speaker": native,
         "speak_level": speak_level,
         "write_level": write_level,
-        "accuracy_rating": accuracy,
-        "meaning_preserved": meaning,
-        "recording_env": env,
         "domain": domain,
-        "dialect": dialect,
-        "code_switching": code_switching,
-        "model_comparison": comparison,
         "errors": errors,
-        "strengths": strengths,
-        "email": email,
     }
     with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
         writer = csv.DictWriter(f, fieldnames=data.keys())
         if f.tell() == 0:
@@ -66,40 +73,71 @@ def save_feedback(audio_file, transcription, age, gender, model_used, native, sp
 with gr.Blocks() as demo:
     gr.Markdown("## African ASR + Feedback")
     with gr.Row():
         audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
         lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
     transcribed_text = gr.Textbox(label="Transcribed Text")
     submit_btn = gr.Button("Transcribe")
     submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
     gr.Markdown("---\n## Feedback Form")
-    # Feedback fields
-    age = gr.Number(label="Age")
-    gender = gr.Dropdown(["Male", "Female", "Prefer not to say"], label="Gender")
-    model_used = lang
-    native = gr.Radio(["Yes", "No"], label="Are you a native speaker?")
-    speak_level = gr.Slider(1, 10, label="Speaking Fluency")
-    write_level = gr.Slider(1, 10, label="Writing Fluency")
-    accuracy = gr.Slider(1, 10, label="Model Accuracy")
-    meaning = gr.Radio(["Yes", "No", "Partially"], label="Meaning preserved?")
-    env = gr.Dropdown(["Studio-quality", "Noisy", "Live broadcast", "Phone mic", "Other"], label="Environment")
-    domain = gr.Textbox(label="Speech Domain")
-    dialect = gr.Textbox(label="Dialect and performance")
-    code_switching = gr.Textbox(label="Code-switching details")
-    comparison = gr.Textbox(label="Model comparison")
-    errors = gr.Textbox(label="Errors observed")
-    strengths = gr.Textbox(label="Strengths observed")
     email = gr.Textbox(label="Email (optional)")
     save_btn = gr.Button("Submit Feedback")
     output_msg = gr.Textbox(interactive=False)
     save_btn.click(fn=save_feedback,
-                   inputs=[audio_input, transcribed_text, age, gender, model_used, native, speak_level, write_level, accuracy, meaning, env, domain, dialect, code_switching, comparison, errors, strengths, email],
                    outputs=[output_msg])
 demo.launch()

 # Map of models (already defined in your app)
 model_map = {
+    "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
+    "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
+    "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
+    "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
+    "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
+    "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
+    "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
+    "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
+    "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
+    "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
+    "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
+    "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
+    "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
+    "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
+    "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
+    "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
+    "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
+    "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
+}
 # Create storage directory
 os.makedirs("responses", exist_ok=True)
 def transcribe(audio, language):
     asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
     text = asr(audio)["text"]
+    return text, audio
 # Save feedback
+def save_feedback(audio_file, transcription, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email):
     data = {
         "audio_file": audio_file,
         "transcription": transcription,
+        "age_group": age_group,
         "gender": gender,
+        "evaluated_language": evaluated_language,
         "speak_level": speak_level,
         "write_level": write_level,
+        "native": native,
+        "native_language": native_language,
+        "environment": env,
+        "device": device,
         "domain": domain,
+        "accuracy": accuracy,
+        "orthography": orthography,
+        "meaning": meaning,
         "errors": errors,
+        "performance": performance,
+        "improvement": improvement,
+        "usability": usability,
+        "technical_issues": technical_issues,
+        "final_comments": final_comments,
+        "email": email
     }
+    # Write feedback to a CSV file
     with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
         writer = csv.DictWriter(f, fieldnames=data.keys())
         if f.tell() == 0:
 with gr.Blocks() as demo:
     gr.Markdown("## African ASR + Feedback")
+    # First Row for Audio and Language
     with gr.Row():
         audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
         lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
+    # Transcription Textbox
     transcribed_text = gr.Textbox(label="Transcribed Text")
+    # Button for Transcription
     submit_btn = gr.Button("Transcribe")
     submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
+    # Feedback Form Section
     gr.Markdown("---\n## Feedback Form")
+    # Age Group
+    age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group")
+    # Gender
+    gender = gr.Dropdown(["Male", "Female", "Prefer not to say", "Other"], label="Gender")
+    # Evaluated Language
+    evaluated_language = gr.Dropdown(list(model_map.keys()), label="Which language did you evaluate for?")
+    # Language proficiency
+    speak_level = gr.Slider(1, 10, label="How well do you speak this language?")
+    write_level = gr.Slider(1, 10, label="How well do you write the language?")
+    # Native Speaker
+    native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
+    native_language = gr.Textbox(label="If not, what is your native language?")
+    # Recording Environment
+    env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room", "Noisy Background", "Multiple Environments", "Unsure", "Other"], label="What was the type of recording environment?")
+    device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone", "Unsure", "Other"], label="What type of recording device was used?")
+    domain = gr.Textbox(label="Was the speech related to a specific domain or topic? (Optional)")
+    # Model Performance Evaluation
+    accuracy = gr.Slider(1, 10, label="How accurate was the model’s transcription?")
+    orthography = gr.Dropdown(["Yes, mostly correct", "No, major issues", "Partially", "Not Applicable"], label="Did the transcription use standard orthography?")
+    meaning = gr.Slider(1, 10, label="Did the transcription preserve the original meaning?")
+    # Errors
+    errors = gr.CheckboxGroup(["Substitutions", "Omissions", "Insertions", "Pronunciation-related", "Diacritic Errors", "Code-switching Errors", "Named Entity Errors", "Punctuation Errors", "No significant errors"], label="Which errors were prominent?")
+    # Performance Feedback
+    performance = gr.Textbox(label="What did the model do well? What did it struggle with?")
+    improvement = gr.Textbox(label="How could this ASR model be improved?")
+    # Usability & Final Comments
+    usability = gr.Slider(1, 5, label="How easy was it to use the tool?")
+    technical_issues = gr.Textbox(label="Did you encounter any technical issues?")
+    final_comments = gr.Textbox(label="Any other comments or suggestions?")
+    # Email (Optional)
     email = gr.Textbox(label="Email (optional)")
+    # Save Button for Feedback
     save_btn = gr.Button("Submit Feedback")
     output_msg = gr.Textbox(interactive=False)
+    # When submit is clicked, save feedback
     save_btn.click(fn=save_feedback,
+                   inputs=[audio_input, transcribed_text, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email],
                    outputs=[output_msg])
+# Launch the interface
 demo.launch()