File size: 9,723 Bytes
b847e4f
522582e
 
83ccaf2
9513fa0
8566b0f
83ccaf2
 
4c47a96
b847e4f
9513fa0
25bd6c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9513fa0
 
 
83ccaf2
9513fa0
 
 
25bd6c5
 
83ccaf2
b847e4f
 
 
83ccaf2
 
8566b0f
 
83ccaf2
 
 
 
 
 
 
 
 
 
 
 
b847e4f
 
 
 
 
83ccaf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8566b0f
83ccaf2
 
4c47a96
83ccaf2
9513fa0
 
522582e
9513fa0
 
 
25bd6c5
9513fa0
 
 
 
 
83ccaf2
 
 
 
 
 
 
b847e4f
 
 
 
 
 
 
 
 
 
 
 
 
 
83ccaf2
b847e4f
83ccaf2
 
b847e4f
 
83ccaf2
b847e4f
83ccaf2
 
 
 
 
 
 
 
 
 
b847e4f
 
83ccaf2
b847e4f
83ccaf2
b847e4f
83ccaf2
b847e4f
83ccaf2
 
9513fa0
 
b847e4f
83ccaf2
b847e4f
 
 
 
 
83ccaf2
b847e4f
 
 
83ccaf2
522582e
b847e4f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# imports
import gradio as gr
import os
import requests
from transformers import pipeline

# Set your FastAPI backend endpoint
BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback"

# Language-to-model map
model_map = {
    "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
    "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
    "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
    "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
    "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
    "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
    "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
    "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
    "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
    "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
    "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
    "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
    "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
    "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
    "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
    "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
    "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
    "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
}

# Create storage directory
os.makedirs("responses", exist_ok=True)

# Transcription function
def transcribe(audio, language):
    asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
    text = asr(audio)["text"]
    return text, audio

# Save feedback by sending it to FastAPI backend
def save_feedback(audio_file, transcription, lang, age_group, gender, speak_level, write_level,
                  native, native_language, education_level, multilingual, other_languages,
                  regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues,
                  meaning, meaning_loss, errors, error_examples, performance, improvement,
                  usability, technical_issues_bool, technical_issues_desc, final_comments, email):

    try:
        with open(audio_file, "rb") as f:
            audio_content = f.read()

        metadata = {
            "transcription": transcription,
            "age_group": age_group,
            "gender": gender,
            "evaluated_language": lang,
            "speak_level": speak_level,
            "write_level": write_level,
            "native": native,
            "native_language": native_language,
            "education_level": education_level,
            "multilingual": multilingual,
            "other_languages": other_languages,
            "regional_accent": regional_accent,
            "accent_description": accent_desc,
            "environment": env,
            "device": device,
            "domain": domain,
            "accuracy": accuracy,
            "orthography": orthography,
            "orthography_issues": orthography_issues,
            "meaning": meaning,
            "meaning_loss": meaning_loss,
            "errors": ",".join(errors) if errors else "",
            "error_examples": error_examples,
            "performance": performance,
            "improvement": improvement,
            "usability": usability,
            "technical_issues": technical_issues_bool,
            "technical_issues_desc": technical_issues_desc,
            "final_comments": final_comments,
            "email": email
        }

        files = {
            "audio_file": ("audio.wav", audio_content, "audio/wav")
        }

        response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)

        if response.status_code == 201:
            return "βœ… Feedback submitted successfully. Thank you!"
        else:
            return f"⚠️ Submission failed: {response.status_code} β€” {response.text}"

    except Exception as e:
        return f"❌ Could not connect to the backend: {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## African ASR + Feedback")

    with gr.Row():
        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
        lang = gr.Dropdown(list(model_map.keys()), label="Select Language")

    transcribed_text = gr.Textbox(label="Transcribed Text")
    submit_btn = gr.Button("Transcribe")
    submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])

    gr.Markdown("---\n## Feedback Form")

    age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group *")
    gender = gr.Dropdown(["Male", "Female", "Prefer not to say"], label="Gender *")
    speak_level = gr.Slider(1, 10, step=1, label="How well do you speak this language? *")
    write_level = gr.Slider(1, 10, step=1, label="How well do you write the language? *")
    native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language? *")
    native_language = gr.Textbox(label="If you are not a native speaker, what is your native language?")

    # βœ… NEW: Education level
    education_level = gr.Dropdown(["Primary", "Secondary", "Tertiary", "None", "Prefer not to say"], label="What is your highest level of education? *")

    # βœ… NEW: Multilingual + gated text input
    multilingual = gr.Radio(["Yes", "No"], label="Are you multilingual (i.e., speak more than one language)? *")
    other_languages = gr.Textbox(label="What other languages do you speak?")
    multilingual.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=multilingual, outputs=other_languages)

    # βœ… NEW: Regional Accent + gated text input
    regional_accent = gr.Radio(["Yes", "No", "Unsure"], label="Did the speaker in the audio have a regional accent? *")
    accent_desc = gr.Textbox(label="If yes, please describe the accent or region.")
    regional_accent.change(fn=lambda x: gr.update(visible=x == "Yes"), inputs=regional_accent, outputs=accent_desc)

    env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)", "Multiple Environments", "Unsure"], label="What was the type of recording environment for the speech you evaluated? *")
    device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)", "Unsure"], label="What type of recording device was used? *")
    domain = gr.Textbox(label="If yes, please specify the domain/topic (e.g., news broadcast, casual conversation, lecture, medical, parliamentary, religious).")
    accuracy = gr.Slider(1, 10, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *")
    orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable / Unsure"], label="Did the transcription correctly use the standard orthography?")
    orthography_issues = gr.Textbox(label="If you selected 'No' or 'Partially', please describe the issues.")
    meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *")
    meaning_loss = gr.Textbox(label="If the meaning was not fully preserved, please explain how.")
    errors = gr.CheckboxGroup([
        "Substitutions (wrong words used)",
        "Omissions (words missing)",
        "Insertions (extra words added)",
        "Pronunciation-related errors (phonetically plausible but wrong word/spelling)",
        "Diacritic/Tone/Special Character errors",
        "Code-switching errors (mixing languages incorrectly)",
        "Named Entity errors (names of people/places wrong)",
        "Punctuation errors",
        "No significant errors observed"
    ], label="Which types of errors were most prominent or impactful in the transcriptions? *")
    error_examples = gr.Textbox(label="(Optional) Can you provide 1–2 examples of significant errors and how you would correct them?")
    performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *")
    improvement = gr.Textbox(label="How could this ASR model be improved? What features would be most helpful? *")
    usability = gr.Slider(1, 5, step=1, label="How easy was it to use the Hugging Face evaluation tool/interface? *")
    technical_issues_bool = gr.Radio(["Yes", "No"], label="Did you encounter any technical issues using the tool? *")
    technical_issues_desc = gr.Textbox(label="If yes, please describe the technical issues you encountered.")
    final_comments = gr.Textbox(label="Any other comments or suggestions regarding the evaluation process or ASR model?")
    email = gr.Textbox(label="Email")

    save_btn = gr.Button("Submit Feedback")
    output_msg = gr.Textbox(interactive=False)

    save_btn.click(
        fn=save_feedback,
        inputs=[
            audio_input, transcribed_text, lang, age_group, gender, speak_level, write_level,
            native, native_language, education_level, multilingual, other_languages,
            regional_accent, accent_desc, env, device, domain, accuracy, orthography, orthography_issues,
            meaning, meaning_loss, errors, error_examples, performance, improvement,
            usability, technical_issues_bool, technical_issues_desc, final_comments, email
        ],
        outputs=[output_msg]
    )

# Launch
demo.launch()