yoyolicoris commited on
Commit
6bd65db
·
1 Parent(s): 6fb8ca1

update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -50
app.py CHANGED
@@ -47,7 +47,7 @@ Try to play around with the sliders and buttons and see what you can come up wit
47
  > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
48
  """
49
 
50
- DEVICE = "cuda"
51
  SLIDER_MAX = 3
52
  SLIDER_MIN = -3
53
  NUMBER_OF_PCS = 4
@@ -180,77 +180,82 @@ def convert2float(sr: int, x: np.ndarray) -> np.ndarray:
180
  def inference(
181
  input_audio,
182
  ref_audio,
183
- ratio,
184
  method,
185
  dataset,
186
  embedding,
187
- remove_approx,
188
  mid_side,
189
  steps,
190
  prior_weight,
191
  optimiser,
192
  lr,
193
  ):
194
- y = convert2float(*input_audio)
 
 
195
  ref = convert2float(*ref_audio)
 
 
 
196
 
 
197
  loudness = meter.integrated_loudness(y)
198
  y = pyln.normalize.loudness(y, loudness, -18.0)
199
  y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
200
 
201
- ref_loudness = meter.integrated_loudness(ref)
202
- ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
203
- ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(DEVICE)
204
-
205
  if y.shape[1] != 1:
206
  y = y.mean(dim=1, keepdim=True)
207
 
208
  fx = deepcopy(global_fx).to(DEVICE)
209
  fx.train()
210
 
 
 
 
 
 
211
  match method:
212
- case "Mean":
213
- vec = gaussian_params_dict[dataset][0]
214
- case "Nearest Neighbour" | "ST-ITO":
215
- two_chs_emb_fn = chain_functions(
216
- hadamard if mid_side else lambda x: x,
217
- get_embedding_model(embedding),
218
  )
219
- vec = (
220
- find_closest_training_sample(
221
- fx, two_chs_emb_fn, to_fx_state_dict, preset_dict[dataset], ref, y
222
- )
223
- if method == "Nearest Neighbour"
224
- else one_evaluation(
225
- fx,
226
- two_chs_emb_fn,
227
- to_fx_state_dict,
228
- partial(
229
- logp_x, *[x.to(DEVICE) for x in gaussian_params_dict[dataset]]
230
- ),
231
- internal_mean.to(DEVICE),
232
- ref,
233
- y,
234
- optimiser_type=optimiser,
235
- lr=lr,
236
- steps=steps,
237
- weight=prior_weight,
238
- )
239
  )
 
240
  case _:
241
  raise ValueError(f"Unknown method: {method}")
242
 
 
 
 
 
 
 
 
 
243
  if remove_approx:
244
  infer_fx = instantiate(rt_config).to(DEVICE)
245
  else:
246
- infer_fx = fx
247
 
248
  infer_fx.load_state_dict(vec2dict(vec), strict=False)
249
  # fx.apply(partial(clip_delay_eq_Q, Q=0.707))
250
  infer_fx.eval()
251
 
252
  with torch.no_grad():
253
- direct, wet = fx(y)
254
  direct = direct.squeeze(0).T.cpu().numpy()
255
  wet = wet.squeeze(0).T.cpu().numpy()
256
  angle = ratio * math.pi * 0.5
@@ -424,7 +429,7 @@ def vec2fx(x):
424
 
425
 
426
  with gr.Blocks() as demo:
427
- # fx_params = gr.State(internal_mean)
428
  # fx = vec2fx(fx_params.value)
429
  # sr, y = read(EXAMPLE_PATH)
430
 
@@ -514,15 +519,16 @@ with gr.Blocks() as demo:
514
  with gr.Row():
515
  optimisation_steps = gr.Slider(
516
  minimum=1,
517
- maximum=10000,
518
- value=1000,
 
519
  label="Number of Optimisation Steps",
520
  interactive=True,
521
  )
522
- prior_weight = gr.Slider(
523
- minimum=0.0,
524
- maximum=1.0,
525
- value=0.1,
526
  label="Prior Weight",
527
  interactive=True,
528
  )
@@ -544,10 +550,9 @@ with gr.Blocks() as demo:
544
  label="Optimiser",
545
  interactive=True,
546
  )
547
- lr_slider = gr.Slider(
548
- minimum=1e-6,
549
- maximum=1.0,
550
- value=1e-3,
551
  label="Learning Rate",
552
  interactive=True,
553
  )
@@ -559,16 +564,22 @@ with gr.Blocks() as demo:
559
  # ratio,
560
  # # assign_fx_params(vec2fx(x), *all_s),
561
  # ),
562
- inference,
 
 
 
 
 
 
563
  ),
564
  inputs=[
565
  audio_input,
566
- audio_reference,
567
  dry_wet_ratio,
 
568
  method_dropdown,
569
  dataset_dropdown,
570
  embedding_dropdown,
571
- remove_approx_checkbox,
572
  mid_side_checkbox,
573
  optimisation_steps,
574
  prior_weight,
@@ -580,6 +591,7 @@ with gr.Blocks() as demo:
580
  audio_output,
581
  direct_output,
582
  wet_output,
 
583
  ],
584
  )
585
 
 
47
  > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
48
  """
49
 
50
+ DEVICE = "cpu"
51
  SLIDER_MAX = 3
52
  SLIDER_MIN = -3
53
  NUMBER_OF_PCS = 4
 
180
  def inference(
181
  input_audio,
182
  ref_audio,
 
183
  method,
184
  dataset,
185
  embedding,
 
186
  mid_side,
187
  steps,
188
  prior_weight,
189
  optimiser,
190
  lr,
191
  ):
192
+ if method == "Mean":
193
+ return gaussian_params_dict[dataset][0].to(DEVICE)
194
+
195
  ref = convert2float(*ref_audio)
196
+ ref_loudness = meter.integrated_loudness(ref)
197
+ ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
198
+ ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(DEVICE)
199
 
200
+ y = convert2float(*input_audio)
201
  loudness = meter.integrated_loudness(y)
202
  y = pyln.normalize.loudness(y, loudness, -18.0)
203
  y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
204
 
 
 
 
 
205
  if y.shape[1] != 1:
206
  y = y.mean(dim=1, keepdim=True)
207
 
208
  fx = deepcopy(global_fx).to(DEVICE)
209
  fx.train()
210
 
211
+ two_chs_emb_fn = chain_functions(
212
+ hadamard if mid_side else lambda x: x,
213
+ get_embedding_model(embedding),
214
+ )
215
+
216
  match method:
217
+ case "Nearest Neighbour":
218
+ vec = find_closest_training_sample(
219
+ fx, two_chs_emb_fn, to_fx_state_dict, preset_dict[dataset], ref, y
 
 
 
220
  )
221
+
222
+ case "ST-ITO":
223
+ vec = one_evaluation(
224
+ fx,
225
+ two_chs_emb_fn,
226
+ to_fx_state_dict,
227
+ partial(logp_x, *[x.to(DEVICE) for x in gaussian_params_dict[dataset]]),
228
+ internal_mean.to(DEVICE),
229
+ ref,
230
+ y,
231
+ optimiser_type=optimiser,
232
+ lr=lr,
233
+ steps=steps,
234
+ weight=prior_weight,
 
 
 
 
 
 
235
  )
236
+
237
  case _:
238
  raise ValueError(f"Unknown method: {method}")
239
 
240
+ return vec
241
+
242
+
243
+ def render(y, remove_approx, ratio, vec):
244
+ y = convert2float(*y)
245
+ loudness = meter.integrated_loudness(y)
246
+ y = pyln.normalize.loudness(y, loudness, -18.0)
247
+ y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
248
  if remove_approx:
249
  infer_fx = instantiate(rt_config).to(DEVICE)
250
  else:
251
+ infer_fx = instantiate(fx_config).to(DEVICE)
252
 
253
  infer_fx.load_state_dict(vec2dict(vec), strict=False)
254
  # fx.apply(partial(clip_delay_eq_Q, Q=0.707))
255
  infer_fx.eval()
256
 
257
  with torch.no_grad():
258
+ direct, wet = infer_fx(y)
259
  direct = direct.squeeze(0).T.cpu().numpy()
260
  wet = wet.squeeze(0).T.cpu().numpy()
261
  angle = ratio * math.pi * 0.5
 
429
 
430
 
431
  with gr.Blocks() as demo:
432
+ fx_params = gr.State(internal_mean)
433
  # fx = vec2fx(fx_params.value)
434
  # sr, y = read(EXAMPLE_PATH)
435
 
 
519
  with gr.Row():
520
  optimisation_steps = gr.Slider(
521
  minimum=1,
522
+ maximum=100,
523
+ value=100,
524
+ step=1,
525
  label="Number of Optimisation Steps",
526
  interactive=True,
527
  )
528
+ prior_weight = gr.Dropdown(
529
+ [("0", 0.0), ("0.001", 0.001), ("0.01", 0.01), ("0.1", 0.1), ("1", 1.0)],
530
+ info="Weight of the prior distribution in the loss function. A higher value means the model will try to stay closer to the prior distribution.",
531
+ value=0.01,
532
  label="Prior Weight",
533
  interactive=True,
534
  )
 
550
  label="Optimiser",
551
  interactive=True,
552
  )
553
+ lr_slider = gr.Dropdown(
554
+ [("0.0001", 1e-4), ("0.001", 1e-3), ("0.01", 1e-2), ("0.1", 1e-1)],
555
+ value=1e-2,
 
556
  label="Learning Rate",
557
  interactive=True,
558
  )
 
564
  # ratio,
565
  # # assign_fx_params(vec2fx(x), *all_s),
566
  # ),
567
+ lambda audio, approx, ratio, *args: (
568
+ audio,
569
+ approx,
570
+ ratio,
571
+ inference(audio, *args),
572
+ ),
573
+ lambda audio, approx, ratio, vec: (*render(audio, approx, ratio, vec), vec),
574
  ),
575
  inputs=[
576
  audio_input,
577
+ remove_approx_checkbox,
578
  dry_wet_ratio,
579
+ audio_reference,
580
  method_dropdown,
581
  dataset_dropdown,
582
  embedding_dropdown,
 
583
  mid_side_checkbox,
584
  optimisation_steps,
585
  prior_weight,
 
591
  audio_output,
592
  direct_output,
593
  wet_output,
594
+ fx_params,
595
  ],
596
  )
597