Spaces:

audeering
/

speech-analysis

Running

App Files Files Community

hagenw commited on Aug 29, 2024

Commit

d1ab157

1 Parent(s): 0aad17a

Try to split age and gender output

Browse files

Files changed (1) hide show

app.py +43 -18

app.py CHANGED Viewed

@@ -10,6 +10,10 @@ from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedMod
 import audiofile
 class ModelHead(nn.Module):
     r"""Classification head."""
@@ -63,7 +67,6 @@ class AgeGenderModel(Wav2Vec2PreTrainedModel):
 # load model from hub
 device = 0 if torch.cuda.is_available() else "cpu"
-model_name = "audeering/wav2vec2-large-robust-24-ft-age-gender"
 processor = Wav2Vec2Processor.from_pretrained(model_name)
 model = AgeGenderModel.from_pretrained(model_name)
@@ -98,23 +101,26 @@ def process_func(x: np.ndarray, sampling_rate: int) -> dict:
 @spaces.GPU
-def recognize(file):
     if file is None:
         raise gr.Error(
             "No audio file submitted! "
             "Please upload or record an audio file "
             "before submitting your request."
         )
-    signal, sampling_rate = audiofile.read(file)
     age_gender = process_func(signal, sampling_rate)
-    return  age_gender
 outputs = gr.Label()
 title = "audEERING age and gender recognition"
 description = (
     "Recognize age and gender of a microphone recording or audio file. "
-    "Demo uses the checkpoint [{model_name}](https://huggingface.co/{model_name})."
 )
 allow_flagging = "never"
@@ -127,16 +133,35 @@ allow_flagging = "never"
 #     allow_flagging=allow_flagging,
 # )
-file = gr.Interface(
-    fn=recognize,
-    inputs=gr.Audio(sources="upload", type="filepath", label="Audio file"),
-    outputs=outputs,
-    title=title,
-    description=description,
-    allow_flagging=allow_flagging,
-)
-# demo = gr.TabbedInterface([microphone, file], ["Microphone", "Audio file"])
-# demo.queue().launch()
-# demo.launch()
-file.launch()

 import audiofile
+model_name = "audeering/wav2vec2-large-robust-24-ft-age-gender"
+duration = 1  # limit processing of audio
 class ModelHead(nn.Module):
     r"""Classification head."""
 # load model from hub
 device = 0 if torch.cuda.is_available() else "cpu"
 processor = Wav2Vec2Processor.from_pretrained(model_name)
 model = AgeGenderModel.from_pretrained(model_name)
 @spaces.GPU
+def recognize(file, output_selector):
     if file is None:
         raise gr.Error(
             "No audio file submitted! "
             "Please upload or record an audio file "
             "before submitting your request."
         )
+    signal, sampling_rate = audiofile.read(file, duration=duration)
     age_gender = process_func(signal, sampling_rate)
+    if output_selector == "age":
+        return age_gender["age"]
+    else:
+        return {k: v for k, v in age_gender.items() if k != "age"}
 outputs = gr.Label()
 title = "audEERING age and gender recognition"
 description = (
     "Recognize age and gender of a microphone recording or audio file. "
+    f"Demo uses the checkpoint [{model_name}](https://huggingface.co/{model_name})."
 )
 allow_flagging = "never"
 #     allow_flagging=allow_flagging,
 # )
+# file = gr.Interface(
+#     fn=recognize,
+#     inputs=gr.Audio(sources="upload", type="filepath", label="Audio file"),
+#     outputs=outputs,
+#     title=title,
+#     description=description,
+#     allow_flagging=allow_flagging,
+# )
+#
+# # demo = gr.TabbedInterface([microphone, file], ["Microphone", "Audio file"])
+# # demo.queue().launch()
+# # demo.launch()
+# file.launch()
+with gr.Blocks() as demo:
+    gr.Markdown(description)
+    with gr.Tab(label="Input"):
+        with gr.Row():
+            with gr.Column():
+                audio = gr.Audio(sources="upload", type="filepath", label="Audio file")
+                output_selector = gr.Dropdown(
+                    choices=["age", "gender"],
+                    label="Output",
+                    value="age",
+                )
+                submit_btn = gr.Button(value="Submit")
+            with gr.Column():
+                output_text = gr.Textbox(label="Output Text")
+        submit_btn.click(recognize, [audio, output_selector], [output_text])
+demo.launch(debug=True)