Spaces:

yoyolicoris
/

diffvox

Running

App Files Files Community

yoyolicoris commited on May 15

Commit

f455314

1 Parent(s): 4059958

feat: add dry/wet ratio slider to inference and update rendering logic

Browse files

Files changed (1) hide show

app.py +36 -6

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 import torch
 import yaml
 import json
 import pyloudnorm as pyln
@@ -134,7 +135,7 @@ def x2z(x):
 @torch.no_grad()
-def inference(audio, fx):
     sr, y = audio
     if sr != 44100:
         y = resample(y, sr, 44100)
@@ -153,13 +154,16 @@ def inference(audio, fx):
     direct, wet = fx(y)
     direct = direct.squeeze(0).T.numpy()
     wet = wet.squeeze(0).T.numpy()
-    rendered = direct + wet
     # rendered = fx(y).squeeze(0).T.numpy()
-    if np.max(np.abs(rendered)) > 1:
-        scaler = np.max(np.abs(rendered))
-        rendered = rendered / scaler
         direct = direct / scaler
         wet = wet / scaler
     return (
         (44100, (rendered * 32768).astype(np.int16)),
         (44100, (direct * 32768).astype(np.int16)),
@@ -420,6 +424,13 @@ with gr.Blocks() as demo:
         with gr.Column():
             audio_output = default_audio_block(label="Output Audio", interactive=False)
             direct_output = default_audio_block(label="Direct Audio", interactive=False)
             wet_output = default_audio_block(label="Wet Audio", interactive=False)
@@ -782,11 +793,16 @@ with gr.Blocks() as demo:
     render_button.click(
         chain_functions(
-            lambda audio, x, *all_s: (audio, assign_fx_params(vec2fx(x), *all_s)),
             inference,
         ),
         inputs=[
             audio_input,
             fx_params,
         ]
         + all_effect_sliders,
@@ -946,4 +962,18 @@ with gr.Blocks() as demo:
         outputs=[z, fx_params] + update_all_outputs,
     )
 demo.launch()

 import numpy as np
 import matplotlib.pyplot as plt
 import torch
+import math
 import yaml
 import json
 import pyloudnorm as pyln
 @torch.no_grad()
+def inference(audio, ratio, fx):
     sr, y = audio
     if sr != 44100:
         y = resample(y, sr, 44100)
     direct, wet = fx(y)
     direct = direct.squeeze(0).T.numpy()
     wet = wet.squeeze(0).T.numpy()
+    angle = ratio * math.pi * 0.5
+    test_clipping = direct + wet
     # rendered = fx(y).squeeze(0).T.numpy()
+    if np.max(np.abs(test_clipping)) > 1:
+        scaler = np.max(np.abs(test_clipping))
+        # rendered = rendered / scaler
         direct = direct / scaler
         wet = wet / scaler
+    rendered = math.sqrt(2) * (math.cos(angle) * direct + math.sin(angle) * wet)
     return (
         (44100, (rendered * 32768).astype(np.int16)),
         (44100, (direct * 32768).astype(np.int16)),
         with gr.Column():
             audio_output = default_audio_block(label="Output Audio", interactive=False)
+            dry_wet_ratio = gr.Slider(
+                minimum=0,
+                maximum=1,
+                value=0.5,
+                label="Dry/Wet Ratio",
+                interactive=True,
+            )
             direct_output = default_audio_block(label="Direct Audio", interactive=False)
             wet_output = default_audio_block(label="Wet Audio", interactive=False)
     render_button.click(
         chain_functions(
+            lambda audio, ratio, x, *all_s: (
+                audio,
+                ratio,
+                assign_fx_params(vec2fx(x), *all_s),
+            ),
             inference,
         ),
         inputs=[
             audio_input,
+            dry_wet_ratio,
             fx_params,
         ]
         + all_effect_sliders,
         outputs=[z, fx_params] + update_all_outputs,
     )
+    dry_wet_ratio.input(
+        chain_functions(
+            lambda _, *args: (_, *map(lambda x: x[1] / 32768, args)),
+            lambda ratio, d, w: math.sqrt(2)
+            * (
+                math.cos(ratio * math.pi * 0.5) * d
+                + math.sin(ratio * math.pi * 0.5) * w
+            ),
+            lambda x: (44100, (x * 32768).astype(np.int16)),
+        ),
+        inputs=[dry_wet_ratio, direct_output, wet_output],
+        outputs=[audio_output],
+    )
 demo.launch()