Spaces:

audeering
/

speech-analysis

Running

hagenw commited on Aug 30, 2024

Commit

0182e84

1 Parent(s): dd32c3d

Clean up code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
@@ -119,7 +121,7 @@ expression_processor = Wav2Vec2Processor.from_pretrained(expression_model_name)
 expression_model = ExpressionModel.from_pretrained(expression_model_name)
-def process_func(x: np.ndarray, sampling_rate: int) -> dict:
     r"""Predict age and gender or extract embeddings from raw audio signal."""
     # run through processor to normalize signal
     # always returns a batch, so we just get the first entry
@@ -160,16 +162,11 @@ def process_func(x: np.ndarray, sampling_rate: int) -> dict:
             "child": results[0][3],
         },
         expression_file,
-        # {
-        #     "arousal": results[1][0],
-        #     "dominance": results[1][1],
-        #     "valence": results[1][2],
-        # }
     )
 @spaces.GPU
-def recognize(input_file):
     # sampling_rate, signal = input_microphone
     # signal = signal.astype(np.float32, order="C") / 32768.0
     if input_file is None:
@@ -257,7 +254,6 @@ with gr.Blocks() as demo:
             with gr.Column():
                 output_age = gr.Textbox(label="Age")
                 output_gender = gr.Label(label="Gender")
-                # output_expression = gr.Label(label="Expression")
                 output_expression = gr.Image(label="Expression")
         outputs = [output_age, output_gender, output_expression]

+import typing
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 expression_model = ExpressionModel.from_pretrained(expression_model_name)
+def process_func(x: np.ndarray, sampling_rate: int) -> typing.Tuple[str, dict, str]:
     r"""Predict age and gender or extract embeddings from raw audio signal."""
     # run through processor to normalize signal
     # always returns a batch, so we just get the first entry
             "child": results[0][3],
         },
         expression_file,
     )
 @spaces.GPU
+def recognize(input_file: str) -> typing.Tuple[str, dict, str]:
     # sampling_rate, signal = input_microphone
     # signal = signal.astype(np.float32, order="C") / 32768.0
     if input_file is None:
             with gr.Column():
                 output_age = gr.Textbox(label="Age")
                 output_gender = gr.Label(label="Gender")
                 output_expression = gr.Image(label="Expression")
         outputs = [output_age, output_gender, output_expression]