Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,25 +5,26 @@ from transformers import pipeline
|
|
| 5 |
|
| 6 |
# Map of models (already defined in your app)
|
| 7 |
model_map = {
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
# Create storage directory
|
| 28 |
os.makedirs("responses", exist_ok=True)
|
| 29 |
|
|
@@ -31,30 +32,36 @@ os.makedirs("responses", exist_ok=True)
|
|
| 31 |
def transcribe(audio, language):
|
| 32 |
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
|
| 33 |
text = asr(audio)["text"]
|
| 34 |
-
return text, audio
|
| 35 |
-
|
| 36 |
# Save feedback
|
| 37 |
-
def save_feedback(audio_file, transcription,
|
| 38 |
data = {
|
| 39 |
"audio_file": audio_file,
|
| 40 |
"transcription": transcription,
|
| 41 |
-
"
|
| 42 |
"gender": gender,
|
| 43 |
-
"
|
| 44 |
-
"native_speaker": native,
|
| 45 |
"speak_level": speak_level,
|
| 46 |
"write_level": write_level,
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
-
"
|
|
|
|
| 50 |
"domain": domain,
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
"errors": errors,
|
| 55 |
-
"
|
| 56 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
|
|
|
|
|
|
| 58 |
with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
|
| 59 |
writer = csv.DictWriter(f, fieldnames=data.keys())
|
| 60 |
if f.tell() == 0:
|
|
@@ -66,40 +73,71 @@ def save_feedback(audio_file, transcription, age, gender, model_used, native, sp
|
|
| 66 |
with gr.Blocks() as demo:
|
| 67 |
gr.Markdown("## African ASR + Feedback")
|
| 68 |
|
|
|
|
| 69 |
with gr.Row():
|
| 70 |
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
|
| 71 |
lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
|
| 72 |
-
|
|
|
|
| 73 |
transcribed_text = gr.Textbox(label="Transcribed Text")
|
| 74 |
|
|
|
|
| 75 |
submit_btn = gr.Button("Transcribe")
|
| 76 |
submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
|
| 77 |
|
|
|
|
| 78 |
gr.Markdown("---\n## Feedback Form")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
| 96 |
email = gr.Textbox(label="Email (optional)")
|
| 97 |
|
|
|
|
| 98 |
save_btn = gr.Button("Submit Feedback")
|
| 99 |
output_msg = gr.Textbox(interactive=False)
|
| 100 |
|
|
|
|
| 101 |
save_btn.click(fn=save_feedback,
|
| 102 |
-
inputs=[audio_input, transcribed_text,
|
| 103 |
outputs=[output_msg])
|
| 104 |
|
|
|
|
| 105 |
demo.launch()
|
|
|
|
| 5 |
|
| 6 |
# Map of models (already defined in your app)
|
| 7 |
model_map = {
|
| 8 |
+
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
|
| 9 |
+
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
|
| 10 |
+
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
|
| 11 |
+
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
|
| 12 |
+
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
|
| 13 |
+
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
|
| 14 |
+
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
|
| 15 |
+
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
|
| 16 |
+
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
|
| 17 |
+
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
|
| 18 |
+
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
|
| 19 |
+
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
|
| 20 |
+
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
|
| 21 |
+
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
|
| 22 |
+
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
|
| 23 |
+
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
|
| 24 |
+
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
|
| 25 |
+
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
# Create storage directory
|
| 29 |
os.makedirs("responses", exist_ok=True)
|
| 30 |
|
|
|
|
| 32 |
def transcribe(audio, language):
|
| 33 |
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
|
| 34 |
text = asr(audio)["text"]
|
| 35 |
+
return text, audio
|
| 36 |
+
|
| 37 |
# Save feedback
|
| 38 |
+
def save_feedback(audio_file, transcription, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email):
|
| 39 |
data = {
|
| 40 |
"audio_file": audio_file,
|
| 41 |
"transcription": transcription,
|
| 42 |
+
"age_group": age_group,
|
| 43 |
"gender": gender,
|
| 44 |
+
"evaluated_language": evaluated_language,
|
|
|
|
| 45 |
"speak_level": speak_level,
|
| 46 |
"write_level": write_level,
|
| 47 |
+
"native": native,
|
| 48 |
+
"native_language": native_language,
|
| 49 |
+
"environment": env,
|
| 50 |
+
"device": device,
|
| 51 |
"domain": domain,
|
| 52 |
+
"accuracy": accuracy,
|
| 53 |
+
"orthography": orthography,
|
| 54 |
+
"meaning": meaning,
|
| 55 |
"errors": errors,
|
| 56 |
+
"performance": performance,
|
| 57 |
+
"improvement": improvement,
|
| 58 |
+
"usability": usability,
|
| 59 |
+
"technical_issues": technical_issues,
|
| 60 |
+
"final_comments": final_comments,
|
| 61 |
+
"email": email
|
| 62 |
}
|
| 63 |
+
|
| 64 |
+
# Write feedback to a CSV file
|
| 65 |
with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
|
| 66 |
writer = csv.DictWriter(f, fieldnames=data.keys())
|
| 67 |
if f.tell() == 0:
|
|
|
|
| 73 |
with gr.Blocks() as demo:
|
| 74 |
gr.Markdown("## African ASR + Feedback")
|
| 75 |
|
| 76 |
+
# First Row for Audio and Language
|
| 77 |
with gr.Row():
|
| 78 |
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
|
| 79 |
lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
|
| 80 |
+
|
| 81 |
+
# Transcription Textbox
|
| 82 |
transcribed_text = gr.Textbox(label="Transcribed Text")
|
| 83 |
|
| 84 |
+
# Button for Transcription
|
| 85 |
submit_btn = gr.Button("Transcribe")
|
| 86 |
submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
|
| 87 |
|
| 88 |
+
# Feedback Form Section
|
| 89 |
gr.Markdown("---\n## Feedback Form")
|
| 90 |
+
|
| 91 |
+
# Age Group
|
| 92 |
+
age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group")
|
| 93 |
+
|
| 94 |
+
# Gender
|
| 95 |
+
gender = gr.Dropdown(["Male", "Female", "Prefer not to say", "Other"], label="Gender")
|
| 96 |
+
|
| 97 |
+
# Evaluated Language
|
| 98 |
+
evaluated_language = gr.Dropdown(list(model_map.keys()), label="Which language did you evaluate for?")
|
| 99 |
+
|
| 100 |
+
# Language proficiency
|
| 101 |
+
speak_level = gr.Slider(1, 10, label="How well do you speak this language?")
|
| 102 |
+
write_level = gr.Slider(1, 10, label="How well do you write the language?")
|
| 103 |
+
|
| 104 |
+
# Native Speaker
|
| 105 |
+
native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
|
| 106 |
+
native_language = gr.Textbox(label="If not, what is your native language?")
|
| 107 |
+
|
| 108 |
+
# Recording Environment
|
| 109 |
+
env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room", "Noisy Background", "Multiple Environments", "Unsure", "Other"], label="What was the type of recording environment?")
|
| 110 |
+
device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone", "Unsure", "Other"], label="What type of recording device was used?")
|
| 111 |
+
domain = gr.Textbox(label="Was the speech related to a specific domain or topic? (Optional)")
|
| 112 |
|
| 113 |
+
# Model Performance Evaluation
|
| 114 |
+
accuracy = gr.Slider(1, 10, label="How accurate was the model’s transcription?")
|
| 115 |
+
orthography = gr.Dropdown(["Yes, mostly correct", "No, major issues", "Partially", "Not Applicable"], label="Did the transcription use standard orthography?")
|
| 116 |
+
meaning = gr.Slider(1, 10, label="Did the transcription preserve the original meaning?")
|
| 117 |
+
|
| 118 |
+
# Errors
|
| 119 |
+
errors = gr.CheckboxGroup(["Substitutions", "Omissions", "Insertions", "Pronunciation-related", "Diacritic Errors", "Code-switching Errors", "Named Entity Errors", "Punctuation Errors", "No significant errors"], label="Which errors were prominent?")
|
| 120 |
+
|
| 121 |
+
# Performance Feedback
|
| 122 |
+
performance = gr.Textbox(label="What did the model do well? What did it struggle with?")
|
| 123 |
+
improvement = gr.Textbox(label="How could this ASR model be improved?")
|
| 124 |
+
|
| 125 |
+
# Usability & Final Comments
|
| 126 |
+
usability = gr.Slider(1, 5, label="How easy was it to use the tool?")
|
| 127 |
+
technical_issues = gr.Textbox(label="Did you encounter any technical issues?")
|
| 128 |
+
final_comments = gr.Textbox(label="Any other comments or suggestions?")
|
| 129 |
+
|
| 130 |
+
# Email (Optional)
|
| 131 |
email = gr.Textbox(label="Email (optional)")
|
| 132 |
|
| 133 |
+
# Save Button for Feedback
|
| 134 |
save_btn = gr.Button("Submit Feedback")
|
| 135 |
output_msg = gr.Textbox(interactive=False)
|
| 136 |
|
| 137 |
+
# When submit is clicked, save feedback
|
| 138 |
save_btn.click(fn=save_feedback,
|
| 139 |
+
inputs=[audio_input, transcribed_text, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email],
|
| 140 |
outputs=[output_msg])
|
| 141 |
|
| 142 |
+
# Launch the interface
|
| 143 |
demo.launch()
|