Spaces:

yoyolicoris
/

diffvox-ito

Sleeping

App Files Files Community

yoyolicoris commited on Jul 9, 2025

Commit

bbb9d09

1 Parent(s): f4d4abb

feat: read device configuration from DEVICE.txt for dynamic device management

Browse files

Files changed (1) hide show

app.py +16 -14

app.py CHANGED Viewed

@@ -47,8 +47,7 @@ Try to play around with the sliders and buttons and see what you can come up wit
 > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
 """
-# DEVICE = "cpu"
-DEVICE = Path("DEVICE.txt").read_text()
 SLIDER_MAX = 3
 SLIDER_MIN = -3
 NUMBER_OF_PCS = 4
@@ -153,15 +152,16 @@ meter = pyln.Meter(44100)
 def get_embedding_model(embedding: str) -> Callable:
     match embedding:
         case "afx-rep":
-            afx_rep = load_param_model().to(DEVICE)
             two_chs_emb_fn = lambda x: get_param_embeds(x, afx_rep, 44100)
         case "mfcc":
-            mfcc = load_mfcc_feature_extractor().to(DEVICE)
             two_chs_emb_fn = lambda x: get_feature_embeds(x, mfcc)
         case "mir":
-            mir = load_mir_feature_extractor().to(DEVICE)
             two_chs_emb_fn = lambda x: get_feature_embeds(x, mir)
         case _:
             raise ValueError(f"Unknown encoder: {embedding}")
@@ -190,23 +190,24 @@ def inference(
     optimiser,
     lr,
 ):
     if method == "Mean":
-        return gaussian_params_dict[dataset][0].to(DEVICE)
     ref = convert2float(*ref_audio)
     ref_loudness = meter.integrated_loudness(ref)
     ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
-    ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(DEVICE)
     y = convert2float(*input_audio)
     loudness = meter.integrated_loudness(y)
     y = pyln.normalize.loudness(y, loudness, -18.0)
-    y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
     if y.shape[1] != 1:
         y = y.mean(dim=1, keepdim=True)
-    fx = deepcopy(global_fx).to(DEVICE)
     fx.train()
     two_chs_emb_fn = chain_functions(
@@ -225,8 +226,8 @@ def inference(
                 fx,
                 two_chs_emb_fn,
                 to_fx_state_dict,
-                partial(logp_x, *[x.to(DEVICE) for x in gaussian_params_dict[dataset]]),
-                internal_mean.to(DEVICE),
                 ref,
                 y,
                 optimiser_type=optimiser,
@@ -242,14 +243,15 @@ def inference(
 def render(y, remove_approx, ratio, vec):
     y = convert2float(*y)
     loudness = meter.integrated_loudness(y)
     y = pyln.normalize.loudness(y, loudness, -18.0)
-    y = torch.from_numpy(y).float().T.unsqueeze(0).to(DEVICE)
     if remove_approx:
-        infer_fx = instantiate(rt_config).to(DEVICE)
     else:
-        infer_fx = instantiate(fx_config).to(DEVICE)
     infer_fx.load_state_dict(vec2dict(vec), strict=False)
     # fx.apply(partial(clip_delay_eq_Q, Q=0.707))

 > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block.
 """
+# device = "cpu"
 SLIDER_MAX = 3
 SLIDER_MIN = -3
 NUMBER_OF_PCS = 4
 def get_embedding_model(embedding: str) -> Callable:
+    device = Path("device.txt").read_text()
     match embedding:
         case "afx-rep":
+            afx_rep = load_param_model().to(device)
             two_chs_emb_fn = lambda x: get_param_embeds(x, afx_rep, 44100)
         case "mfcc":
+            mfcc = load_mfcc_feature_extractor().to(device)
             two_chs_emb_fn = lambda x: get_feature_embeds(x, mfcc)
         case "mir":
+            mir = load_mir_feature_extractor().to(device)
             two_chs_emb_fn = lambda x: get_feature_embeds(x, mir)
         case _:
             raise ValueError(f"Unknown encoder: {embedding}")
     optimiser,
     lr,
 ):
+    device = Path("device.txt").read_text()
     if method == "Mean":
+        return gaussian_params_dict[dataset][0].to(device)
     ref = convert2float(*ref_audio)
     ref_loudness = meter.integrated_loudness(ref)
     ref = pyln.normalize.loudness(ref, ref_loudness, -18.0)
+    ref = torch.from_numpy(ref).float().T.unsqueeze(0).to(device)
     y = convert2float(*input_audio)
     loudness = meter.integrated_loudness(y)
     y = pyln.normalize.loudness(y, loudness, -18.0)
+    y = torch.from_numpy(y).float().T.unsqueeze(0).to(device)
     if y.shape[1] != 1:
         y = y.mean(dim=1, keepdim=True)
+    fx = deepcopy(global_fx).to(device)
     fx.train()
     two_chs_emb_fn = chain_functions(
                 fx,
                 two_chs_emb_fn,
                 to_fx_state_dict,
+                partial(logp_x, *[x.to(device) for x in gaussian_params_dict[dataset]]),
+                internal_mean.to(device),
                 ref,
                 y,
                 optimiser_type=optimiser,
 def render(y, remove_approx, ratio, vec):
+    device = Path("device.txt").read_text()
     y = convert2float(*y)
     loudness = meter.integrated_loudness(y)
     y = pyln.normalize.loudness(y, loudness, -18.0)
+    y = torch.from_numpy(y).float().T.unsqueeze(0).to(device)
     if remove_approx:
+        infer_fx = instantiate(rt_config).to(device)
     else:
+        infer_fx = instantiate(fx_config).to(device)
     infer_fx.load_state_dict(vec2dict(vec), strict=False)
     # fx.apply(partial(clip_delay_eq_Q, Q=0.707))