Spaces:

Beijuka
/

ASR_AFRICA

Sleeping

App Files Files Community

Beijuka commited on Apr 28, 2025

Commit

4c47a96

verified ·

1 Parent(s): 522582e

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -38

app.py CHANGED Viewed

@@ -1,8 +1,41 @@
 import gradio as gr
 from transformers import pipeline
 import os
-def transcribe(audio, language):
     model_map = {
         "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
         "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
@@ -23,44 +56,78 @@ def transcribe(audio, language):
         "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
         "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
     }
-    # load eval pipeline
     asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))
-    text = asr(audio)["text"]
-    return text
-asr_app = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(sources=["upload", "microphone"], type="filepath"),
-        gr.Dropdown(
-            [
-                "hausa",
-                "igbo",
-                "yoruba",
-                "zulu",
-                "xhosa",
-                "afrikaans",
-                "bemba",
-                "shona",
-                "luganda",
-                "swahili",
-                "lingala",
-                "amharic",
-                "kinyarwanda",
-                "oromo",
-                "akan",
-                "ewe",
-                "wolof",
-                "bambara",
-            ]
-        ),
-    ],
-    outputs="text",
-    title="ASR Africa",
-    description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 18 African languages using open source data.\
-    \nWe would appreciate your feedback on these models, you can share your feedback via this form https://forms.gle/RbzpwBFbC6Lcx5V78 :)"
-)
-asr_app.launch()

 import gradio as gr
 from transformers import pipeline
 import os
+import sqlite3
+from datetime import datetime
+# Initialize SQLite database
+conn = sqlite3.connect("asr_feedback.db")
+cursor = conn.cursor()
+cursor.execute("""
+    CREATE TABLE IF NOT EXISTS feedback (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        timestamp TEXT,
+        model TEXT,
+        audio_language TEXT,
+        native_speaker TEXT,
+        speak_proficiency INTEGER,
+        write_proficiency INTEGER,
+        audio_description TEXT,
+        environment TEXT,
+        transcription_rating INTEGER,
+        code_switching TEXT,
+        dialect TEXT,
+        negatives TEXT,
+        positives TEXT,
+        intelligibility TEXT,
+        user_role TEXT,
+        collaboration TEXT,
+        audio_path TEXT,
+        transcription TEXT
+    )
+""")
+conn.commit()
+def transcribe_and_evaluate(audio, language, native_speaker, speak_proficiency, write_proficiency,
+                           audio_description, environment, transcription_rating, code_switching,
+                           dialect, negatives, positives, intelligibility, user_role, collaboration):
+    # ASR transcription
     model_map = {
         "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
         "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
         "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
         "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
     }
     asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))
+    transcription = asr(audio)["text"]
+    # Save audio file
+    audio_path = f"uploads/audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+    os.makedirs("uploads", exist_ok=True)
+    os.rename(audio, audio_path)
+    # Store feedback in database
+    cursor.execute("""
+        INSERT INTO feedback (timestamp, model, audio_language, native_speaker, speak_proficiency,
+                             write_proficiency, audio_description, environment, transcription_rating,
+                             code_switching, dialect, negatives, positives, intelligibility, user_role,
+                             collaboration, audio_path, transcription)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    """, (
+        datetime.now().isoformat(), language, language, native_speaker, speak_proficiency,
+        write_proficiency, audio_description, environment, transcription_rating, code_switching,
+        dialect, negatives, positives, intelligibility, user_role, collaboration, audio_path, transcription
+    ))
+    conn.commit()
+    return transcription, "Feedback submitted successfully!"
+# Gradio Blocks interface
+with gr.Blocks(title="ASR Africa Qualitative Evaluation") as asr_app:
+    gr.Markdown("## ASR Africa\nTest our 18 African language ASR models and provide feedback.")
+    with gr.Row():
+        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio")
+        language = gr.Dropdown(
+            ["hausa", "igbo", "yoruba", "zulu", "xhosa", "afrikaans", "bemba", "shona", "luganda",
+             "swahili", "lingala", "amharic", "kinyarwanda", "oromo", "akan", "ewe", "wolof", "bambara"],
+            label="Select Language"
+        )
+    transcription = gr.Textbox(label="Transcription Output")
+    with gr.Group():
+        gr.Markdown("### Qualitative Feedback")
+        audio_language = gr.Dropdown(
+            ["hausa", "igbo", "yoruba", "zulu", "xhosa", "afrikaans", "bemba", "shona", "luganda",
+             "swahili", "lingala", "amharic", "kinyarwanda", "oromo", "akan", "ewe", "wolof", "bambara"],
+            label="Primary Language of Audio"
+        )
+        native_speaker = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
+        speak_proficiency = gr.Slider(1, 10, step=1, label="Speaking Proficiency (1=Beginner, 10=Fluent)")
+        write_proficiency = gr.Slider(1, 10, step=1, label="Writing Proficiency (1=Beginner, 10=Fluent)")
+        audio_description = gr.Textbox(label="Describe the audio (e.g., monologue, radio segment, duration, quality, accents/dialects)")
+        environment = gr.Dropdown(
+            ["Studio-quality", "Noisy background", "Live broadcast", "Phone call-in", "Other"],
+            label="Recording Environment"
+        )
+        transcription_rating = gr.Slider(1, 10, step=1, label="Transcription Accuracy (1=Inaccurate, 10=Perfect)")
+        code_switching = gr.Textbox(label="Did audio include code-switching (e.g., Swahili-English)? If yes, how well was it handled?")
+        dialect = gr.Textbox(label="Did speech include a specific dialect/accent? If so, which one, and how well was it handled?")
+        negatives = gr.Textbox(label="Issues with performance (e.g., tone errors, morphological mistakes, noise issues)")
+        positives = gr.Textbox(label="Strengths of performance (e.g., accurate tones, robust to noise)")
+        intelligibility = gr.Textbox(label="Was the transcript understandable and useful for your purpose (e.g., radio subtitling)?")
+        user_role = gr.Dropdown(
+            ["Radio producer", "DJ", "Listener", "Linguist", "Developer", "Other"],
+            label="Your Role"
+        )
+        collaboration = gr.Textbox(label="Collaboration interest (email and brief description)")
+    submit = gr.Button("Submit Feedback")
+    output = gr.Textbox(label="Submission Status")
+    submit.click(
+        fn=transcribe_and_evaluate,
+        inputs=[
+            audio_input, language, native_speaker, speak_proficiency, write_proficiency,
+            audio_description, environment, transcription_rating, code_switching, dialect,
+            negatives, positives, intelligibility, user_role, collaboration
+        ],
+        outputs=[transcription, output]
+    )
+asr_app.launch()