Beijuka commited on
Commit
4c47a96
·
verified ·
1 Parent(s): 522582e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -38
app.py CHANGED
@@ -1,8 +1,41 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import os
 
 
4
 
5
- def transcribe(audio, language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  model_map = {
7
  "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
8
  "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
@@ -23,44 +56,78 @@ def transcribe(audio, language):
23
  "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
24
  "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
25
  }
26
-
27
- # load eval pipeline
28
  asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- text = asr(audio)["text"]
31
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- asr_app = gr.Interface(
34
- fn=transcribe,
35
- inputs=[
36
- gr.Audio(sources=["upload", "microphone"], type="filepath"),
37
- gr.Dropdown(
38
- [
39
- "hausa",
40
- "igbo",
41
- "yoruba",
42
- "zulu",
43
- "xhosa",
44
- "afrikaans",
45
- "bemba",
46
- "shona",
47
- "luganda",
48
- "swahili",
49
- "lingala",
50
- "amharic",
51
- "kinyarwanda",
52
- "oromo",
53
- "akan",
54
- "ewe",
55
- "wolof",
56
- "bambara",
57
- ]
58
- ),
59
- ],
60
- outputs="text",
61
- title="ASR Africa",
62
- description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 18 African languages using open source data.\
63
- \nWe would appreciate your feedback on these models, you can share your feedback via this form https://forms.gle/RbzpwBFbC6Lcx5V78 :)"
64
- )
65
 
66
- asr_app.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import os
4
+ import sqlite3
5
+ from datetime import datetime
6
 
7
+ # Initialize SQLite database
8
+ conn = sqlite3.connect("asr_feedback.db")
9
+ cursor = conn.cursor()
10
+ cursor.execute("""
11
+ CREATE TABLE IF NOT EXISTS feedback (
12
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
13
+ timestamp TEXT,
14
+ model TEXT,
15
+ audio_language TEXT,
16
+ native_speaker TEXT,
17
+ speak_proficiency INTEGER,
18
+ write_proficiency INTEGER,
19
+ audio_description TEXT,
20
+ environment TEXT,
21
+ transcription_rating INTEGER,
22
+ code_switching TEXT,
23
+ dialect TEXT,
24
+ negatives TEXT,
25
+ positives TEXT,
26
+ intelligibility TEXT,
27
+ user_role TEXT,
28
+ collaboration TEXT,
29
+ audio_path TEXT,
30
+ transcription TEXT
31
+ )
32
+ """)
33
+ conn.commit()
34
+
35
+ def transcribe_and_evaluate(audio, language, native_speaker, speak_proficiency, write_proficiency,
36
+ audio_description, environment, transcription_rating, code_switching,
37
+ dialect, negatives, positives, intelligibility, user_role, collaboration):
38
+ # ASR transcription
39
  model_map = {
40
  "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
41
  "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
 
56
  "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
57
  "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
58
  }
 
 
59
  asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))
60
+ transcription = asr(audio)["text"]
61
+
62
+ # Save audio file
63
+ audio_path = f"uploads/audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
64
+ os.makedirs("uploads", exist_ok=True)
65
+ os.rename(audio, audio_path)
66
+
67
+ # Store feedback in database
68
+ cursor.execute("""
69
+ INSERT INTO feedback (timestamp, model, audio_language, native_speaker, speak_proficiency,
70
+ write_proficiency, audio_description, environment, transcription_rating,
71
+ code_switching, dialect, negatives, positives, intelligibility, user_role,
72
+ collaboration, audio_path, transcription)
73
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
74
+ """, (
75
+ datetime.now().isoformat(), language, language, native_speaker, speak_proficiency,
76
+ write_proficiency, audio_description, environment, transcription_rating, code_switching,
77
+ dialect, negatives, positives, intelligibility, user_role, collaboration, audio_path, transcription
78
+ ))
79
+ conn.commit()
80
+
81
+ return transcription, "Feedback submitted successfully!"
82
 
83
+ # Gradio Blocks interface
84
+ with gr.Blocks(title="ASR Africa Qualitative Evaluation") as asr_app:
85
+ gr.Markdown("## ASR Africa\nTest our 18 African language ASR models and provide feedback.")
86
+ with gr.Row():
87
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio")
88
+ language = gr.Dropdown(
89
+ ["hausa", "igbo", "yoruba", "zulu", "xhosa", "afrikaans", "bemba", "shona", "luganda",
90
+ "swahili", "lingala", "amharic", "kinyarwanda", "oromo", "akan", "ewe", "wolof", "bambara"],
91
+ label="Select Language"
92
+ )
93
+ transcription = gr.Textbox(label="Transcription Output")
94
+ with gr.Group():
95
+ gr.Markdown("### Qualitative Feedback")
96
+ audio_language = gr.Dropdown(
97
+ ["hausa", "igbo", "yoruba", "zulu", "xhosa", "afrikaans", "bemba", "shona", "luganda",
98
+ "swahili", "lingala", "amharic", "kinyarwanda", "oromo", "akan", "ewe", "wolof", "bambara"],
99
+ label="Primary Language of Audio"
100
+ )
101
+ native_speaker = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
102
+ speak_proficiency = gr.Slider(1, 10, step=1, label="Speaking Proficiency (1=Beginner, 10=Fluent)")
103
+ write_proficiency = gr.Slider(1, 10, step=1, label="Writing Proficiency (1=Beginner, 10=Fluent)")
104
+ audio_description = gr.Textbox(label="Describe the audio (e.g., monologue, radio segment, duration, quality, accents/dialects)")
105
+ environment = gr.Dropdown(
106
+ ["Studio-quality", "Noisy background", "Live broadcast", "Phone call-in", "Other"],
107
+ label="Recording Environment"
108
+ )
109
+ transcription_rating = gr.Slider(1, 10, step=1, label="Transcription Accuracy (1=Inaccurate, 10=Perfect)")
110
+ code_switching = gr.Textbox(label="Did audio include code-switching (e.g., Swahili-English)? If yes, how well was it handled?")
111
+ dialect = gr.Textbox(label="Did speech include a specific dialect/accent? If so, which one, and how well was it handled?")
112
+ negatives = gr.Textbox(label="Issues with performance (e.g., tone errors, morphological mistakes, noise issues)")
113
+ positives = gr.Textbox(label="Strengths of performance (e.g., accurate tones, robust to noise)")
114
+ intelligibility = gr.Textbox(label="Was the transcript understandable and useful for your purpose (e.g., radio subtitling)?")
115
+ user_role = gr.Dropdown(
116
+ ["Radio producer", "DJ", "Listener", "Linguist", "Developer", "Other"],
117
+ label="Your Role"
118
+ )
119
+ collaboration = gr.Textbox(label="Collaboration interest (email and brief description)")
120
+ submit = gr.Button("Submit Feedback")
121
+ output = gr.Textbox(label="Submission Status")
122
 
123
+ submit.click(
124
+ fn=transcribe_and_evaluate,
125
+ inputs=[
126
+ audio_input, language, native_speaker, speak_proficiency, write_proficiency,
127
+ audio_description, environment, transcription_rating, code_switching, dialect,
128
+ negatives, positives, intelligibility, user_role, collaboration
129
+ ],
130
+ outputs=[transcription, output]
131
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ asr_app.launch()