Spaces:
Running on Zero
Running on Zero
Commit ·
9b25b66
1
Parent(s): df0ae2d
update app.py
Browse files
app.py
CHANGED
|
@@ -73,6 +73,9 @@ EXAMPLE_PATH = "eleanor_erased.wav"
|
|
| 73 |
with open(CONFIG_PATH["approx"]) as fp:
|
| 74 |
fx_config = yaml.safe_load(fp)["model"]
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def load_presets(preset_folder: Path) -> Tensor:
|
| 78 |
raw_params = torch.from_numpy(np.load(preset_folder / PARAMS_PATH))
|
|
@@ -136,8 +139,18 @@ global_fx.load_state_dict(vec2dict(internal_mean), strict=False)
|
|
| 136 |
meter = pyln.Meter(44100)
|
| 137 |
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
sr, y = audio
|
| 142 |
if sr != 44100:
|
| 143 |
y = resample(y, sr, 44100)
|
|
@@ -153,7 +166,26 @@ def inference(audio, ratio, fx):
|
|
| 153 |
if y.shape[1] != 1:
|
| 154 |
y = y.mean(dim=1, keepdim=True)
|
| 155 |
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
direct = direct.squeeze(0).T.numpy()
|
| 158 |
wet = wet.squeeze(0).T.numpy()
|
| 159 |
angle = ratio * math.pi * 0.5
|
|
@@ -327,8 +359,8 @@ def vec2fx(x):
|
|
| 327 |
|
| 328 |
|
| 329 |
with gr.Blocks() as demo:
|
| 330 |
-
fx_params = gr.State(internal_mean)
|
| 331 |
-
fx = vec2fx(fx_params.value)
|
| 332 |
# sr, y = read(EXAMPLE_PATH)
|
| 333 |
|
| 334 |
default_pc_slider = partial(
|
|
@@ -357,14 +389,10 @@ with gr.Blocks() as demo:
|
|
| 357 |
label="Input Audio",
|
| 358 |
# value=(sr, y)
|
| 359 |
)
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
)
|
| 365 |
-
render_button = gr.Button(
|
| 366 |
-
"Run", elem_id="render-button", variant="primary"
|
| 367 |
-
)
|
| 368 |
|
| 369 |
with gr.Column():
|
| 370 |
audio_output = default_audio_block(label="Output Audio", interactive=False)
|
|
@@ -378,6 +406,10 @@ with gr.Blocks() as demo:
|
|
| 378 |
direct_output = default_audio_block(label="Direct Audio", interactive=False)
|
| 379 |
wet_output = default_audio_block(label="Wet Audio", interactive=False)
|
| 380 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
_ = gr.Markdown("## Common Parameters")
|
| 382 |
with gr.Row():
|
| 383 |
method_dropdown = gr.Dropdown(
|
|
@@ -387,10 +419,10 @@ with gr.Blocks() as demo:
|
|
| 387 |
interactive=True,
|
| 388 |
)
|
| 389 |
dataset_dropdown = gr.Dropdown(
|
| 390 |
-
["Internal", "MedleyDB"],
|
| 391 |
label="Prior Distribution",
|
| 392 |
info="When using the Regression method, this parameter has no effect as the model is trained on the internal dataset.",
|
| 393 |
-
value="
|
| 394 |
interactive=True,
|
| 395 |
)
|
| 396 |
embedding_dropdown = gr.Dropdown(
|
|
@@ -400,6 +432,12 @@ with gr.Blocks() as demo:
|
|
| 400 |
value="AFx-Rep",
|
| 401 |
interactive=True,
|
| 402 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
_ = gr.Markdown("## Parameters for ST-ITO Method")
|
| 405 |
with gr.Row():
|
|
@@ -435,5 +473,42 @@ with gr.Blocks() as demo:
|
|
| 435 |
label="Optimiser",
|
| 436 |
interactive=True,
|
| 437 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
demo.launch()
|
|
|
|
| 73 |
with open(CONFIG_PATH["approx"]) as fp:
|
| 74 |
fx_config = yaml.safe_load(fp)["model"]
|
| 75 |
|
| 76 |
+
with open(CONFIG_PATH["realtime"]) as fp:
|
| 77 |
+
rt_config = yaml.safe_load(fp)["model"]
|
| 78 |
+
|
| 79 |
|
| 80 |
def load_presets(preset_folder: Path) -> Tensor:
|
| 81 |
raw_params = torch.from_numpy(np.load(preset_folder / PARAMS_PATH))
|
|
|
|
| 139 |
meter = pyln.Meter(44100)
|
| 140 |
|
| 141 |
|
| 142 |
+
def inference(
|
| 143 |
+
audio,
|
| 144 |
+
ratio,
|
| 145 |
+
method,
|
| 146 |
+
dataset,
|
| 147 |
+
embedding,
|
| 148 |
+
remove_approx,
|
| 149 |
+
steps,
|
| 150 |
+
prior_weight,
|
| 151 |
+
optimiser,
|
| 152 |
+
lr,
|
| 153 |
+
):
|
| 154 |
sr, y = audio
|
| 155 |
if sr != 44100:
|
| 156 |
y = resample(y, sr, 44100)
|
|
|
|
| 166 |
if y.shape[1] != 1:
|
| 167 |
y = y.mean(dim=1, keepdim=True)
|
| 168 |
|
| 169 |
+
fx = deepcopy(global_fx)
|
| 170 |
+
fx.train()
|
| 171 |
+
|
| 172 |
+
match method:
|
| 173 |
+
case "Mean":
|
| 174 |
+
vec = gaussian_params_dict[dataset][0]
|
| 175 |
+
case _:
|
| 176 |
+
vec = internal_mean.clone()
|
| 177 |
+
|
| 178 |
+
if remove_approx:
|
| 179 |
+
infer_fx = instantiate(rt_config)
|
| 180 |
+
else:
|
| 181 |
+
infer_fx = fx
|
| 182 |
+
|
| 183 |
+
infer_fx.load_state_dict(vec2dict(vec), strict=False)
|
| 184 |
+
# fx.apply(partial(clip_delay_eq_Q, Q=0.707))
|
| 185 |
+
infer_fx.eval()
|
| 186 |
+
|
| 187 |
+
with torch.no_grad():
|
| 188 |
+
direct, wet = fx(y)
|
| 189 |
direct = direct.squeeze(0).T.numpy()
|
| 190 |
wet = wet.squeeze(0).T.numpy()
|
| 191 |
angle = ratio * math.pi * 0.5
|
|
|
|
| 359 |
|
| 360 |
|
| 361 |
with gr.Blocks() as demo:
|
| 362 |
+
# fx_params = gr.State(internal_mean)
|
| 363 |
+
# fx = vec2fx(fx_params.value)
|
| 364 |
# sr, y = read(EXAMPLE_PATH)
|
| 365 |
|
| 366 |
default_pc_slider = partial(
|
|
|
|
| 389 |
label="Input Audio",
|
| 390 |
# value=(sr, y)
|
| 391 |
)
|
| 392 |
+
audio_reference = default_audio_block(
|
| 393 |
+
sources="upload",
|
| 394 |
+
label="Reference Audio",
|
| 395 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
with gr.Column():
|
| 398 |
audio_output = default_audio_block(label="Output Audio", interactive=False)
|
|
|
|
| 406 |
direct_output = default_audio_block(label="Direct Audio", interactive=False)
|
| 407 |
wet_output = default_audio_block(label="Wet Audio", interactive=False)
|
| 408 |
|
| 409 |
+
with gr.Row():
|
| 410 |
+
reset_button = gr.Button("Reset", elem_id="reset-button")
|
| 411 |
+
render_button = gr.Button("Run", elem_id="render-button", variant="primary")
|
| 412 |
+
|
| 413 |
_ = gr.Markdown("## Common Parameters")
|
| 414 |
with gr.Row():
|
| 415 |
method_dropdown = gr.Dropdown(
|
|
|
|
| 419 |
interactive=True,
|
| 420 |
)
|
| 421 |
dataset_dropdown = gr.Dropdown(
|
| 422 |
+
[("Internal", "internal"), ("MedleyDB", "medleydb")],
|
| 423 |
label="Prior Distribution",
|
| 424 |
info="When using the Regression method, this parameter has no effect as the model is trained on the internal dataset.",
|
| 425 |
+
value="internal",
|
| 426 |
interactive=True,
|
| 427 |
)
|
| 428 |
embedding_dropdown = gr.Dropdown(
|
|
|
|
| 432 |
value="AFx-Rep",
|
| 433 |
interactive=True,
|
| 434 |
)
|
| 435 |
+
remove_approx_checkbox = gr.Checkbox(
|
| 436 |
+
label="Use Real-time Effects",
|
| 437 |
+
info="Use real-time delay and reverb effects instead of approximated ones.",
|
| 438 |
+
value=False,
|
| 439 |
+
interactive=True,
|
| 440 |
+
)
|
| 441 |
|
| 442 |
_ = gr.Markdown("## Parameters for ST-ITO Method")
|
| 443 |
with gr.Row():
|
|
|
|
| 473 |
label="Optimiser",
|
| 474 |
interactive=True,
|
| 475 |
)
|
| 476 |
+
lr_slider = gr.Slider(
|
| 477 |
+
minimum=1e-6,
|
| 478 |
+
maximum=1.0,
|
| 479 |
+
value=1e-3,
|
| 480 |
+
label="Learning Rate",
|
| 481 |
+
interactive=True,
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
render_button.click(
|
| 485 |
+
chain_functions(
|
| 486 |
+
# lambda audio, ratio, x, *all_s: (
|
| 487 |
+
# audio,
|
| 488 |
+
# ratio,
|
| 489 |
+
# # assign_fx_params(vec2fx(x), *all_s),
|
| 490 |
+
# ),
|
| 491 |
+
inference,
|
| 492 |
+
),
|
| 493 |
+
inputs=[
|
| 494 |
+
audio_input,
|
| 495 |
+
dry_wet_ratio,
|
| 496 |
+
method_dropdown,
|
| 497 |
+
dataset_dropdown,
|
| 498 |
+
embedding_dropdown,
|
| 499 |
+
remove_approx_checkbox,
|
| 500 |
+
optimisation_steps,
|
| 501 |
+
prior_weight,
|
| 502 |
+
optimiser_dropdown,
|
| 503 |
+
lr_slider,
|
| 504 |
+
# fx_params,
|
| 505 |
+
],
|
| 506 |
+
outputs=[
|
| 507 |
+
audio_output,
|
| 508 |
+
direct_output,
|
| 509 |
+
wet_output,
|
| 510 |
+
],
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
|
| 514 |
demo.launch()
|