yoyolicoris commited on
Commit
9b25b66
·
1 Parent(s): df0ae2d

update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -15
app.py CHANGED
@@ -73,6 +73,9 @@ EXAMPLE_PATH = "eleanor_erased.wav"
73
  with open(CONFIG_PATH["approx"]) as fp:
74
  fx_config = yaml.safe_load(fp)["model"]
75
 
 
 
 
76
 
77
  def load_presets(preset_folder: Path) -> Tensor:
78
  raw_params = torch.from_numpy(np.load(preset_folder / PARAMS_PATH))
@@ -136,8 +139,18 @@ global_fx.load_state_dict(vec2dict(internal_mean), strict=False)
136
  meter = pyln.Meter(44100)
137
 
138
 
139
- @torch.no_grad()
140
- def inference(audio, ratio, fx):
 
 
 
 
 
 
 
 
 
 
141
  sr, y = audio
142
  if sr != 44100:
143
  y = resample(y, sr, 44100)
@@ -153,7 +166,26 @@ def inference(audio, ratio, fx):
153
  if y.shape[1] != 1:
154
  y = y.mean(dim=1, keepdim=True)
155
 
156
- direct, wet = fx(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  direct = direct.squeeze(0).T.numpy()
158
  wet = wet.squeeze(0).T.numpy()
159
  angle = ratio * math.pi * 0.5
@@ -327,8 +359,8 @@ def vec2fx(x):
327
 
328
 
329
  with gr.Blocks() as demo:
330
- fx_params = gr.State(internal_mean)
331
- fx = vec2fx(fx_params.value)
332
  # sr, y = read(EXAMPLE_PATH)
333
 
334
  default_pc_slider = partial(
@@ -357,14 +389,10 @@ with gr.Blocks() as demo:
357
  label="Input Audio",
358
  # value=(sr, y)
359
  )
360
- with gr.Row():
361
- reset_button = gr.Button(
362
- "Reset",
363
- elem_id="reset-button",
364
- )
365
- render_button = gr.Button(
366
- "Run", elem_id="render-button", variant="primary"
367
- )
368
 
369
  with gr.Column():
370
  audio_output = default_audio_block(label="Output Audio", interactive=False)
@@ -378,6 +406,10 @@ with gr.Blocks() as demo:
378
  direct_output = default_audio_block(label="Direct Audio", interactive=False)
379
  wet_output = default_audio_block(label="Wet Audio", interactive=False)
380
 
 
 
 
 
381
  _ = gr.Markdown("## Common Parameters")
382
  with gr.Row():
383
  method_dropdown = gr.Dropdown(
@@ -387,10 +419,10 @@ with gr.Blocks() as demo:
387
  interactive=True,
388
  )
389
  dataset_dropdown = gr.Dropdown(
390
- ["Internal", "MedleyDB"],
391
  label="Prior Distribution",
392
  info="When using the Regression method, this parameter has no effect as the model is trained on the internal dataset.",
393
- value="Internal",
394
  interactive=True,
395
  )
396
  embedding_dropdown = gr.Dropdown(
@@ -400,6 +432,12 @@ with gr.Blocks() as demo:
400
  value="AFx-Rep",
401
  interactive=True,
402
  )
 
 
 
 
 
 
403
 
404
  _ = gr.Markdown("## Parameters for ST-ITO Method")
405
  with gr.Row():
@@ -435,5 +473,42 @@ with gr.Blocks() as demo:
435
  label="Optimiser",
436
  interactive=True,
437
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  demo.launch()
 
73
  with open(CONFIG_PATH["approx"]) as fp:
74
  fx_config = yaml.safe_load(fp)["model"]
75
 
76
+ with open(CONFIG_PATH["realtime"]) as fp:
77
+ rt_config = yaml.safe_load(fp)["model"]
78
+
79
 
80
  def load_presets(preset_folder: Path) -> Tensor:
81
  raw_params = torch.from_numpy(np.load(preset_folder / PARAMS_PATH))
 
139
  meter = pyln.Meter(44100)
140
 
141
 
142
+ def inference(
143
+ audio,
144
+ ratio,
145
+ method,
146
+ dataset,
147
+ embedding,
148
+ remove_approx,
149
+ steps,
150
+ prior_weight,
151
+ optimiser,
152
+ lr,
153
+ ):
154
  sr, y = audio
155
  if sr != 44100:
156
  y = resample(y, sr, 44100)
 
166
  if y.shape[1] != 1:
167
  y = y.mean(dim=1, keepdim=True)
168
 
169
+ fx = deepcopy(global_fx)
170
+ fx.train()
171
+
172
+ match method:
173
+ case "Mean":
174
+ vec = gaussian_params_dict[dataset][0]
175
+ case _:
176
+ vec = internal_mean.clone()
177
+
178
+ if remove_approx:
179
+ infer_fx = instantiate(rt_config)
180
+ else:
181
+ infer_fx = fx
182
+
183
+ infer_fx.load_state_dict(vec2dict(vec), strict=False)
184
+ # fx.apply(partial(clip_delay_eq_Q, Q=0.707))
185
+ infer_fx.eval()
186
+
187
+ with torch.no_grad():
188
+ direct, wet = fx(y)
189
  direct = direct.squeeze(0).T.numpy()
190
  wet = wet.squeeze(0).T.numpy()
191
  angle = ratio * math.pi * 0.5
 
359
 
360
 
361
  with gr.Blocks() as demo:
362
+ # fx_params = gr.State(internal_mean)
363
+ # fx = vec2fx(fx_params.value)
364
  # sr, y = read(EXAMPLE_PATH)
365
 
366
  default_pc_slider = partial(
 
389
  label="Input Audio",
390
  # value=(sr, y)
391
  )
392
+ audio_reference = default_audio_block(
393
+ sources="upload",
394
+ label="Reference Audio",
395
+ )
 
 
 
 
396
 
397
  with gr.Column():
398
  audio_output = default_audio_block(label="Output Audio", interactive=False)
 
406
  direct_output = default_audio_block(label="Direct Audio", interactive=False)
407
  wet_output = default_audio_block(label="Wet Audio", interactive=False)
408
 
409
+ with gr.Row():
410
+ reset_button = gr.Button("Reset", elem_id="reset-button")
411
+ render_button = gr.Button("Run", elem_id="render-button", variant="primary")
412
+
413
  _ = gr.Markdown("## Common Parameters")
414
  with gr.Row():
415
  method_dropdown = gr.Dropdown(
 
419
  interactive=True,
420
  )
421
  dataset_dropdown = gr.Dropdown(
422
+ [("Internal", "internal"), ("MedleyDB", "medleydb")],
423
  label="Prior Distribution",
424
  info="When using the Regression method, this parameter has no effect as the model is trained on the internal dataset.",
425
+ value="internal",
426
  interactive=True,
427
  )
428
  embedding_dropdown = gr.Dropdown(
 
432
  value="AFx-Rep",
433
  interactive=True,
434
  )
435
+ remove_approx_checkbox = gr.Checkbox(
436
+ label="Use Real-time Effects",
437
+ info="Use real-time delay and reverb effects instead of approximated ones.",
438
+ value=False,
439
+ interactive=True,
440
+ )
441
 
442
  _ = gr.Markdown("## Parameters for ST-ITO Method")
443
  with gr.Row():
 
473
  label="Optimiser",
474
  interactive=True,
475
  )
476
+ lr_slider = gr.Slider(
477
+ minimum=1e-6,
478
+ maximum=1.0,
479
+ value=1e-3,
480
+ label="Learning Rate",
481
+ interactive=True,
482
+ )
483
+
484
+ render_button.click(
485
+ chain_functions(
486
+ # lambda audio, ratio, x, *all_s: (
487
+ # audio,
488
+ # ratio,
489
+ # # assign_fx_params(vec2fx(x), *all_s),
490
+ # ),
491
+ inference,
492
+ ),
493
+ inputs=[
494
+ audio_input,
495
+ dry_wet_ratio,
496
+ method_dropdown,
497
+ dataset_dropdown,
498
+ embedding_dropdown,
499
+ remove_approx_checkbox,
500
+ optimisation_steps,
501
+ prior_weight,
502
+ optimiser_dropdown,
503
+ lr_slider,
504
+ # fx_params,
505
+ ],
506
+ outputs=[
507
+ audio_output,
508
+ direct_output,
509
+ wet_output,
510
+ ],
511
+ )
512
+
513
 
514
  demo.launch()