Beijuka commited on
Commit
25bd6c5
·
verified ·
1 Parent(s): 9513fa0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -51
app.py CHANGED
@@ -5,25 +5,26 @@ from transformers import pipeline
5
 
6
  # Map of models (already defined in your app)
7
  model_map = {
8
- "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
9
- "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
10
- "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
11
- "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
12
- "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
13
- "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
14
- "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
15
- "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
16
- "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
17
- "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
18
- "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
19
- "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
20
- "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
21
- "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
22
- "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
23
- "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
24
- "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
25
- "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
26
- }
 
27
  # Create storage directory
28
  os.makedirs("responses", exist_ok=True)
29
 
@@ -31,30 +32,36 @@ os.makedirs("responses", exist_ok=True)
31
  def transcribe(audio, language):
32
  asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
33
  text = asr(audio)["text"]
34
- return text, audio
35
-
36
  # Save feedback
37
- def save_feedback(audio_file, transcription, age, gender, model_used, native, speak_level, write_level, accuracy, meaning, env, domain, dialect, code_switching, comparison, errors, strengths, email):
38
  data = {
39
  "audio_file": audio_file,
40
  "transcription": transcription,
41
- "age": age,
42
  "gender": gender,
43
- "model": model_used,
44
- "native_speaker": native,
45
  "speak_level": speak_level,
46
  "write_level": write_level,
47
- "accuracy_rating": accuracy,
48
- "meaning_preserved": meaning,
49
- "recording_env": env,
 
50
  "domain": domain,
51
- "dialect": dialect,
52
- "code_switching": code_switching,
53
- "model_comparison": comparison,
54
  "errors": errors,
55
- "strengths": strengths,
56
- "email": email,
 
 
 
 
57
  }
 
 
58
  with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
59
  writer = csv.DictWriter(f, fieldnames=data.keys())
60
  if f.tell() == 0:
@@ -66,40 +73,71 @@ def save_feedback(audio_file, transcription, age, gender, model_used, native, sp
66
  with gr.Blocks() as demo:
67
  gr.Markdown("## African ASR + Feedback")
68
 
 
69
  with gr.Row():
70
  audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
71
  lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
72
-
 
73
  transcribed_text = gr.Textbox(label="Transcribed Text")
74
 
 
75
  submit_btn = gr.Button("Transcribe")
76
  submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
77
 
 
78
  gr.Markdown("---\n## Feedback Form")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- # Feedback fields
81
- age = gr.Number(label="Age")
82
- gender = gr.Dropdown(["Male", "Female", "Prefer not to say"], label="Gender")
83
- model_used = lang
84
- native = gr.Radio(["Yes", "No"], label="Are you a native speaker?")
85
- speak_level = gr.Slider(1, 10, label="Speaking Fluency")
86
- write_level = gr.Slider(1, 10, label="Writing Fluency")
87
- accuracy = gr.Slider(1, 10, label="Model Accuracy")
88
- meaning = gr.Radio(["Yes", "No", "Partially"], label="Meaning preserved?")
89
- env = gr.Dropdown(["Studio-quality", "Noisy", "Live broadcast", "Phone mic", "Other"], label="Environment")
90
- domain = gr.Textbox(label="Speech Domain")
91
- dialect = gr.Textbox(label="Dialect and performance")
92
- code_switching = gr.Textbox(label="Code-switching details")
93
- comparison = gr.Textbox(label="Model comparison")
94
- errors = gr.Textbox(label="Errors observed")
95
- strengths = gr.Textbox(label="Strengths observed")
 
 
96
  email = gr.Textbox(label="Email (optional)")
97
 
 
98
  save_btn = gr.Button("Submit Feedback")
99
  output_msg = gr.Textbox(interactive=False)
100
 
 
101
  save_btn.click(fn=save_feedback,
102
- inputs=[audio_input, transcribed_text, age, gender, model_used, native, speak_level, write_level, accuracy, meaning, env, domain, dialect, code_switching, comparison, errors, strengths, email],
103
  outputs=[output_msg])
104
 
 
105
  demo.launch()
 
5
 
6
  # Map of models (already defined in your app)
7
  model_map = {
8
+ "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
9
+ "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
10
+ "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
11
+ "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
12
+ "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
13
+ "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
14
+ "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
15
+ "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
16
+ "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
17
+ "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
18
+ "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
19
+ "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
20
+ "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
21
+ "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
22
+ "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
23
+ "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
24
+ "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
25
+ "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
26
+ }
27
+
28
  # Create storage directory
29
  os.makedirs("responses", exist_ok=True)
30
 
 
32
  def transcribe(audio, language):
33
  asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
34
  text = asr(audio)["text"]
35
+ return text, audio
36
+
37
  # Save feedback
38
+ def save_feedback(audio_file, transcription, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email):
39
  data = {
40
  "audio_file": audio_file,
41
  "transcription": transcription,
42
+ "age_group": age_group,
43
  "gender": gender,
44
+ "evaluated_language": evaluated_language,
 
45
  "speak_level": speak_level,
46
  "write_level": write_level,
47
+ "native": native,
48
+ "native_language": native_language,
49
+ "environment": env,
50
+ "device": device,
51
  "domain": domain,
52
+ "accuracy": accuracy,
53
+ "orthography": orthography,
54
+ "meaning": meaning,
55
  "errors": errors,
56
+ "performance": performance,
57
+ "improvement": improvement,
58
+ "usability": usability,
59
+ "technical_issues": technical_issues,
60
+ "final_comments": final_comments,
61
+ "email": email
62
  }
63
+
64
+ # Write feedback to a CSV file
65
  with open("responses/feedback.csv", "a", newline="", encoding="utf-8") as f:
66
  writer = csv.DictWriter(f, fieldnames=data.keys())
67
  if f.tell() == 0:
 
73
  with gr.Blocks() as demo:
74
  gr.Markdown("## African ASR + Feedback")
75
 
76
+ # First Row for Audio and Language
77
  with gr.Row():
78
  audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
79
  lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
80
+
81
+ # Transcription Textbox
82
  transcribed_text = gr.Textbox(label="Transcribed Text")
83
 
84
+ # Button for Transcription
85
  submit_btn = gr.Button("Transcribe")
86
  submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
87
 
88
+ # Feedback Form Section
89
  gr.Markdown("---\n## Feedback Form")
90
+
91
+ # Age Group
92
+ age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group")
93
+
94
+ # Gender
95
+ gender = gr.Dropdown(["Male", "Female", "Prefer not to say", "Other"], label="Gender")
96
+
97
+ # Evaluated Language
98
+ evaluated_language = gr.Dropdown(list(model_map.keys()), label="Which language did you evaluate for?")
99
+
100
+ # Language proficiency
101
+ speak_level = gr.Slider(1, 10, label="How well do you speak this language?")
102
+ write_level = gr.Slider(1, 10, label="How well do you write the language?")
103
+
104
+ # Native Speaker
105
+ native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
106
+ native_language = gr.Textbox(label="If not, what is your native language?")
107
+
108
+ # Recording Environment
109
+ env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room", "Noisy Background", "Multiple Environments", "Unsure", "Other"], label="What was the type of recording environment?")
110
+ device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone", "Unsure", "Other"], label="What type of recording device was used?")
111
+ domain = gr.Textbox(label="Was the speech related to a specific domain or topic? (Optional)")
112
 
113
+ # Model Performance Evaluation
114
+ accuracy = gr.Slider(1, 10, label="How accurate was the model’s transcription?")
115
+ orthography = gr.Dropdown(["Yes, mostly correct", "No, major issues", "Partially", "Not Applicable"], label="Did the transcription use standard orthography?")
116
+ meaning = gr.Slider(1, 10, label="Did the transcription preserve the original meaning?")
117
+
118
+ # Errors
119
+ errors = gr.CheckboxGroup(["Substitutions", "Omissions", "Insertions", "Pronunciation-related", "Diacritic Errors", "Code-switching Errors", "Named Entity Errors", "Punctuation Errors", "No significant errors"], label="Which errors were prominent?")
120
+
121
+ # Performance Feedback
122
+ performance = gr.Textbox(label="What did the model do well? What did it struggle with?")
123
+ improvement = gr.Textbox(label="How could this ASR model be improved?")
124
+
125
+ # Usability & Final Comments
126
+ usability = gr.Slider(1, 5, label="How easy was it to use the tool?")
127
+ technical_issues = gr.Textbox(label="Did you encounter any technical issues?")
128
+ final_comments = gr.Textbox(label="Any other comments or suggestions?")
129
+
130
+ # Email (Optional)
131
  email = gr.Textbox(label="Email (optional)")
132
 
133
+ # Save Button for Feedback
134
  save_btn = gr.Button("Submit Feedback")
135
  output_msg = gr.Textbox(interactive=False)
136
 
137
+ # When submit is clicked, save feedback
138
  save_btn.click(fn=save_feedback,
139
+ inputs=[audio_input, transcribed_text, age_group, gender, evaluated_language, speak_level, write_level, native, native_language, env, device, domain, accuracy, orthography, meaning, errors, performance, improvement, usability, technical_issues, final_comments, email],
140
  outputs=[output_msg])
141
 
142
+ # Launch the interface
143
  demo.launch()