Spaces:
Sleeping
Sleeping
Commit
·
6bd65db
1
Parent(s):
6fb8ca1
update app.py
Browse files
app.py
CHANGED
|
@@ -47,7 +47,7 @@ Try to play around with the sliders and buttons and see what you can come up wit
|
|
| 47 |
> **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
|
| 48 |
"""
|
| 49 |
|
| 50 |
-
DEVICE = "
|
| 51 |
SLIDER_MAX = 3
|
| 52 |
SLIDER_MIN = -3
|
| 53 |
NUMBER_OF_PCS = 4
|
|
@@ -180,77 +180,82 @@ def convert2float(sr: int, x: np.ndarray) -> np.ndarray:
|
|
| 180 |
def inference(
|
| 181 |
input_audio,
|
| 182 |
ref_audio,
|
| 183 |
-
ratio,
|
| 184 |
method,
|
| 185 |
dataset,
|
| 186 |
embedding,
|
| 187 |
-
remove_approx,
|
| 188 |
mid_side,
|
| 189 |
steps,
|
| 190 |
prior_weight,
|
| 191 |
optimiser,
|
| 192 |
lr,
|
| 193 |
):
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
ref = convert2float(*ref_audio)
|
|
|
|
|
|
|
|
|
|
| 196 |
|
|
|
|
| 197 |
loudness = meter.integrated_loudness(y)
|
| 198 |
y = pyln.normalize.loudness(y, loudness, -18.0)
|
| 199 |
y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
|
| 200 |
|
| 201 |
-
ref_loudness = meter.integrated_loudness(ref)
|
| 202 |
-
ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
|
| 203 |
-
ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(DEVICE)
|
| 204 |
-
|
| 205 |
if y.shape[1] != 1:
|
| 206 |
y = y.mean(dim=1, keepdim=True)
|
| 207 |
|
| 208 |
fx = deepcopy(global_fx).to(DEVICE)
|
| 209 |
fx.train()
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
match method:
|
| 212 |
-
case "
|
| 213 |
-
vec =
|
| 214 |
-
|
| 215 |
-
two_chs_emb_fn = chain_functions(
|
| 216 |
-
hadamard if mid_side else lambda x: x,
|
| 217 |
-
get_embedding_model(embedding),
|
| 218 |
)
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
y,
|
| 234 |
-
optimiser_type=optimiser,
|
| 235 |
-
lr=lr,
|
| 236 |
-
steps=steps,
|
| 237 |
-
weight=prior_weight,
|
| 238 |
-
)
|
| 239 |
)
|
|
|
|
| 240 |
case _:
|
| 241 |
raise ValueError(f"Unknown method: {method}")
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
if remove_approx:
|
| 244 |
infer_fx = instantiate(rt_config).to(DEVICE)
|
| 245 |
else:
|
| 246 |
-
infer_fx =
|
| 247 |
|
| 248 |
infer_fx.load_state_dict(vec2dict(vec), strict=False)
|
| 249 |
# fx.apply(partial(clip_delay_eq_Q, Q=0.707))
|
| 250 |
infer_fx.eval()
|
| 251 |
|
| 252 |
with torch.no_grad():
|
| 253 |
-
direct, wet =
|
| 254 |
direct = direct.squeeze(0).T.cpu().numpy()
|
| 255 |
wet = wet.squeeze(0).T.cpu().numpy()
|
| 256 |
angle = ratio * math.pi * 0.5
|
|
@@ -424,7 +429,7 @@ def vec2fx(x):
|
|
| 424 |
|
| 425 |
|
| 426 |
with gr.Blocks() as demo:
|
| 427 |
-
|
| 428 |
# fx = vec2fx(fx_params.value)
|
| 429 |
# sr, y = read(EXAMPLE_PATH)
|
| 430 |
|
|
@@ -514,15 +519,16 @@ with gr.Blocks() as demo:
|
|
| 514 |
with gr.Row():
|
| 515 |
optimisation_steps = gr.Slider(
|
| 516 |
minimum=1,
|
| 517 |
-
maximum=
|
| 518 |
-
value=
|
|
|
|
| 519 |
label="Number of Optimisation Steps",
|
| 520 |
interactive=True,
|
| 521 |
)
|
| 522 |
-
prior_weight = gr.
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
value=0.
|
| 526 |
label="Prior Weight",
|
| 527 |
interactive=True,
|
| 528 |
)
|
|
@@ -544,10 +550,9 @@ with gr.Blocks() as demo:
|
|
| 544 |
label="Optimiser",
|
| 545 |
interactive=True,
|
| 546 |
)
|
| 547 |
-
lr_slider = gr.
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
value=1e-3,
|
| 551 |
label="Learning Rate",
|
| 552 |
interactive=True,
|
| 553 |
)
|
|
@@ -559,16 +564,22 @@ with gr.Blocks() as demo:
|
|
| 559 |
# ratio,
|
| 560 |
# # assign_fx_params(vec2fx(x), *all_s),
|
| 561 |
# ),
|
| 562 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
),
|
| 564 |
inputs=[
|
| 565 |
audio_input,
|
| 566 |
-
|
| 567 |
dry_wet_ratio,
|
|
|
|
| 568 |
method_dropdown,
|
| 569 |
dataset_dropdown,
|
| 570 |
embedding_dropdown,
|
| 571 |
-
remove_approx_checkbox,
|
| 572 |
mid_side_checkbox,
|
| 573 |
optimisation_steps,
|
| 574 |
prior_weight,
|
|
@@ -580,6 +591,7 @@ with gr.Blocks() as demo:
|
|
| 580 |
audio_output,
|
| 581 |
direct_output,
|
| 582 |
wet_output,
|
|
|
|
| 583 |
],
|
| 584 |
)
|
| 585 |
|
|
|
|
| 47 |
> **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
|
| 48 |
"""
|
| 49 |
|
| 50 |
+
DEVICE = "cpu"
|
| 51 |
SLIDER_MAX = 3
|
| 52 |
SLIDER_MIN = -3
|
| 53 |
NUMBER_OF_PCS = 4
|
|
|
|
| 180 |
def inference(
|
| 181 |
input_audio,
|
| 182 |
ref_audio,
|
|
|
|
| 183 |
method,
|
| 184 |
dataset,
|
| 185 |
embedding,
|
|
|
|
| 186 |
mid_side,
|
| 187 |
steps,
|
| 188 |
prior_weight,
|
| 189 |
optimiser,
|
| 190 |
lr,
|
| 191 |
):
|
| 192 |
+
if method == "Mean":
|
| 193 |
+
return gaussian_params_dict[dataset][0].to(DEVICE)
|
| 194 |
+
|
| 195 |
ref = convert2float(*ref_audio)
|
| 196 |
+
ref_loudness = meter.integrated_loudness(ref)
|
| 197 |
+
ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
|
| 198 |
+
ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(DEVICE)
|
| 199 |
|
| 200 |
+
y = convert2float(*input_audio)
|
| 201 |
loudness = meter.integrated_loudness(y)
|
| 202 |
y = pyln.normalize.loudness(y, loudness, -18.0)
|
| 203 |
y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
if y.shape[1] != 1:
|
| 206 |
y = y.mean(dim=1, keepdim=True)
|
| 207 |
|
| 208 |
fx = deepcopy(global_fx).to(DEVICE)
|
| 209 |
fx.train()
|
| 210 |
|
| 211 |
+
two_chs_emb_fn = chain_functions(
|
| 212 |
+
hadamard if mid_side else lambda x: x,
|
| 213 |
+
get_embedding_model(embedding),
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
match method:
|
| 217 |
+
case "Nearest Neighbour":
|
| 218 |
+
vec = find_closest_training_sample(
|
| 219 |
+
fx, two_chs_emb_fn, to_fx_state_dict, preset_dict[dataset], ref, y
|
|
|
|
|
|
|
|
|
|
| 220 |
)
|
| 221 |
+
|
| 222 |
+
case "ST-ITO":
|
| 223 |
+
vec = one_evaluation(
|
| 224 |
+
fx,
|
| 225 |
+
two_chs_emb_fn,
|
| 226 |
+
to_fx_state_dict,
|
| 227 |
+
partial(logp_x, *[x.to(DEVICE) for x in gaussian_params_dict[dataset]]),
|
| 228 |
+
internal_mean.to(DEVICE),
|
| 229 |
+
ref,
|
| 230 |
+
y,
|
| 231 |
+
optimiser_type=optimiser,
|
| 232 |
+
lr=lr,
|
| 233 |
+
steps=steps,
|
| 234 |
+
weight=prior_weight,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
)
|
| 236 |
+
|
| 237 |
case _:
|
| 238 |
raise ValueError(f"Unknown method: {method}")
|
| 239 |
|
| 240 |
+
return vec
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def render(y, remove_approx, ratio, vec):
|
| 244 |
+
y = convert2float(*y)
|
| 245 |
+
loudness = meter.integrated_loudness(y)
|
| 246 |
+
y = pyln.normalize.loudness(y, loudness, -18.0)
|
| 247 |
+
y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
|
| 248 |
if remove_approx:
|
| 249 |
infer_fx = instantiate(rt_config).to(DEVICE)
|
| 250 |
else:
|
| 251 |
+
infer_fx = instantiate(fx_config).to(DEVICE)
|
| 252 |
|
| 253 |
infer_fx.load_state_dict(vec2dict(vec), strict=False)
|
| 254 |
# fx.apply(partial(clip_delay_eq_Q, Q=0.707))
|
| 255 |
infer_fx.eval()
|
| 256 |
|
| 257 |
with torch.no_grad():
|
| 258 |
+
direct, wet = infer_fx(y)
|
| 259 |
direct = direct.squeeze(0).T.cpu().numpy()
|
| 260 |
wet = wet.squeeze(0).T.cpu().numpy()
|
| 261 |
angle = ratio * math.pi * 0.5
|
|
|
|
| 429 |
|
| 430 |
|
| 431 |
with gr.Blocks() as demo:
|
| 432 |
+
fx_params = gr.State(internal_mean)
|
| 433 |
# fx = vec2fx(fx_params.value)
|
| 434 |
# sr, y = read(EXAMPLE_PATH)
|
| 435 |
|
|
|
|
| 519 |
with gr.Row():
|
| 520 |
optimisation_steps = gr.Slider(
|
| 521 |
minimum=1,
|
| 522 |
+
maximum=100,
|
| 523 |
+
value=100,
|
| 524 |
+
step=1,
|
| 525 |
label="Number of Optimisation Steps",
|
| 526 |
interactive=True,
|
| 527 |
)
|
| 528 |
+
prior_weight = gr.Dropdown(
|
| 529 |
+
[("0", 0.0), ("0.001", 0.001), ("0.01", 0.01), ("0.1", 0.1), ("1", 1.0)],
|
| 530 |
+
info="Weight of the prior distribution in the loss function. A higher value means the model will try to stay closer to the prior distribution.",
|
| 531 |
+
value=0.01,
|
| 532 |
label="Prior Weight",
|
| 533 |
interactive=True,
|
| 534 |
)
|
|
|
|
| 550 |
label="Optimiser",
|
| 551 |
interactive=True,
|
| 552 |
)
|
| 553 |
+
lr_slider = gr.Dropdown(
|
| 554 |
+
[("0.0001", 1e-4), ("0.001", 1e-3), ("0.01", 1e-2), ("0.1", 1e-1)],
|
| 555 |
+
value=1e-2,
|
|
|
|
| 556 |
label="Learning Rate",
|
| 557 |
interactive=True,
|
| 558 |
)
|
|
|
|
| 564 |
# ratio,
|
| 565 |
# # assign_fx_params(vec2fx(x), *all_s),
|
| 566 |
# ),
|
| 567 |
+
lambda audio, approx, ratio, *args: (
|
| 568 |
+
audio,
|
| 569 |
+
approx,
|
| 570 |
+
ratio,
|
| 571 |
+
inference(audio, *args),
|
| 572 |
+
),
|
| 573 |
+
lambda audio, approx, ratio, vec: (*render(audio, approx, ratio, vec), vec),
|
| 574 |
),
|
| 575 |
inputs=[
|
| 576 |
audio_input,
|
| 577 |
+
remove_approx_checkbox,
|
| 578 |
dry_wet_ratio,
|
| 579 |
+
audio_reference,
|
| 580 |
method_dropdown,
|
| 581 |
dataset_dropdown,
|
| 582 |
embedding_dropdown,
|
|
|
|
| 583 |
mid_side_checkbox,
|
| 584 |
optimisation_steps,
|
| 585 |
prior_weight,
|
|
|
|
| 591 |
audio_output,
|
| 592 |
direct_output,
|
| 593 |
wet_output,
|
| 594 |
+
fx_params,
|
| 595 |
],
|
| 596 |
)
|
| 597 |
|