Spaces:
Running
Running
Clean up code
Browse files
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
import numpy as np
|
|
@@ -119,7 +121,7 @@ expression_processor = Wav2Vec2Processor.from_pretrained(expression_model_name)
|
|
| 119 |
expression_model = ExpressionModel.from_pretrained(expression_model_name)
|
| 120 |
|
| 121 |
|
| 122 |
-
def process_func(x: np.ndarray, sampling_rate: int) -> dict:
|
| 123 |
r"""Predict age and gender or extract embeddings from raw audio signal."""
|
| 124 |
# run through processor to normalize signal
|
| 125 |
# always returns a batch, so we just get the first entry
|
|
@@ -160,16 +162,11 @@ def process_func(x: np.ndarray, sampling_rate: int) -> dict:
|
|
| 160 |
"child": results[0][3],
|
| 161 |
},
|
| 162 |
expression_file,
|
| 163 |
-
# {
|
| 164 |
-
# "arousal": results[1][0],
|
| 165 |
-
# "dominance": results[1][1],
|
| 166 |
-
# "valence": results[1][2],
|
| 167 |
-
# }
|
| 168 |
)
|
| 169 |
|
| 170 |
|
| 171 |
@spaces.GPU
|
| 172 |
-
def recognize(input_file):
|
| 173 |
# sampling_rate, signal = input_microphone
|
| 174 |
# signal = signal.astype(np.float32, order="C") / 32768.0
|
| 175 |
if input_file is None:
|
|
@@ -257,7 +254,6 @@ with gr.Blocks() as demo:
|
|
| 257 |
with gr.Column():
|
| 258 |
output_age = gr.Textbox(label="Age")
|
| 259 |
output_gender = gr.Label(label="Gender")
|
| 260 |
-
# output_expression = gr.Label(label="Expression")
|
| 261 |
output_expression = gr.Image(label="Expression")
|
| 262 |
|
| 263 |
outputs = [output_age, output_gender, output_expression]
|
|
|
|
| 1 |
+
import typing
|
| 2 |
+
|
| 3 |
import gradio as gr
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import numpy as np
|
|
|
|
| 121 |
expression_model = ExpressionModel.from_pretrained(expression_model_name)
|
| 122 |
|
| 123 |
|
| 124 |
+
def process_func(x: np.ndarray, sampling_rate: int) -> typing.Tuple[str, dict, str]:
|
| 125 |
r"""Predict age and gender or extract embeddings from raw audio signal."""
|
| 126 |
# run through processor to normalize signal
|
| 127 |
# always returns a batch, so we just get the first entry
|
|
|
|
| 162 |
"child": results[0][3],
|
| 163 |
},
|
| 164 |
expression_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
)
|
| 166 |
|
| 167 |
|
| 168 |
@spaces.GPU
|
| 169 |
+
def recognize(input_file: str) -> typing.Tuple[str, dict, str]:
|
| 170 |
# sampling_rate, signal = input_microphone
|
| 171 |
# signal = signal.astype(np.float32, order="C") / 32768.0
|
| 172 |
if input_file is None:
|
|
|
|
| 254 |
with gr.Column():
|
| 255 |
output_age = gr.Textbox(label="Age")
|
| 256 |
output_gender = gr.Label(label="Gender")
|
|
|
|
| 257 |
output_expression = gr.Image(label="Expression")
|
| 258 |
|
| 259 |
outputs = [output_age, output_gender, output_expression]
|