Spaces:

LocalAI-io
/

LocalVQE-demo

Running

richiejp commited on Apr 27

Commit

6e0a6e4

verified ·

1 Parent(s): 3a7d44e

Initial upload: LocalVQE demo Space

Files changed (4) hide show

app.py CHANGED Viewed

@@ -36,11 +36,11 @@ def _build_model() -> LocalVQE:
     del peek
     model = LocalVQE.from_config(cfg).to("cpu")
     load_checkpoint(ckpt_path, model)
-    # Bake the trained AlignBlock softmax temperature into the smoothing
-    # conv weights — checkpoints don't persist the temperature scalar, so
-    # without this the model runs at the default 1.0 instead of the
-    # trained 0.1, costing several dB of FE-ST ERLE on real recordings.
-    model.align.fold_temperature(cfg.model.align_temp_end)
     model.eval()
     n_params = sum(p.numel() for p in model.parameters())
     print(f"LocalVQE loaded: {n_params:,} params from {ckpt_path}")

     del peek
     model = LocalVQE.from_config(cfg).to("cpu")
     load_checkpoint(ckpt_path, model)
+    # Fold the trained AlignBlock softmax temperature (carried in the
+    # checkpoint as a buffer) into the smoothing conv weights — without
+    # this the model runs at the default 1.0 instead of the trained value
+    # and loses several dB of FE-ST ERLE on real recordings.
+    model.align.fold_temperature()
     model.eval()
     n_params = sum(p.numel() for p in model.parameters())
     print(f"LocalVQE loaded: {n_params:,} params from {ckpt_path}")

localvqe_model/align.py CHANGED Viewed

@@ -20,7 +20,9 @@ class AlignBlock(nn.Module):
         self.in_channels = in_channels
         self.hidden_channels = hidden_channels
         self.dmax = dmax
-        self.temperature = temperature
         # Pointwise projections for Q and K
         self.pconv_mic = nn.Conv2d(in_channels, hidden_channels, 1)
@@ -39,21 +41,18 @@ class AlignBlock(nn.Module):
         )
     def fold_temperature(self, temperature=None):
-        """Bake a softmax temperature into the smoothing-conv weights.
-        After this, forward() at self.temperature=1.0 is mathematically
-        equivalent to running with the given temperature on the original
-        weights. Used at inference-load time so PyTorch eval and the GGML
-        graph (whose softmax has no temperature parameter) both produce
-        the trained-temperature distribution.
         """
-        t = temperature if temperature is not None else self.temperature
         if t == 1.0:
             return
         with torch.no_grad():
             self.conv[1].weight.div_(t)
             self.conv[1].bias.div_(t)
-        self.temperature = 1.0
     def forward(self, x_mic, x_ref, return_delay=False):
         """

         self.in_channels = in_channels
         self.hidden_channels = hidden_channels
         self.dmax = dmax
+        # Registered as a buffer so the trained value persists in state_dict.
+        # Mutate via .fill_(), never re-assign.
+        self.register_buffer("temperature", torch.tensor(float(temperature)))
         # Pointwise projections for Q and K
         self.pconv_mic = nn.Conv2d(in_channels, hidden_channels, 1)
         )
     def fold_temperature(self, temperature=None):
+        """Bake the AlignBlock softmax temperature into the smoothing-conv
+        weights. Reads `self.temperature` (a buffer carried by the
+        checkpoint) when called with no argument; after folding the buffer
+        is reset to 1.0 so subsequent calls are no-ops.
         """
+        t = float(temperature if temperature is not None else self.temperature)
         if t == 1.0:
             return
         with torch.no_grad():
             self.conv[1].weight.div_(t)
             self.conv[1].bias.div_(t)
+            self.temperature.fill_(1.0)
     def forward(self, x_mic, x_ref, return_delay=False):
         """

localvqe_model/config.py CHANGED Viewed

@@ -11,7 +11,6 @@ class ModelConfig:
     power_law_c: float = 0.3
     kernel_size: Tuple[int, int] = (4, 4)
     bottleneck_hidden: int = 0
-    align_temp_end: float = 0.1
 @dataclass

     power_law_c: float = 0.3
     kernel_size: Tuple[int, int] = (4, 4)
     bottleneck_hidden: int = 0
 @dataclass

localvqe_model/utils.py CHANGED Viewed

@@ -28,5 +28,8 @@ def load_checkpoint(path, model):
     state = ckpt["model_state_dict"]
     state = {k.removeprefix("_orig_mod."): v for k, v in state.items()}
     state.pop("decoder._overlap_count", None)
     _unwrap(model).load_state_dict(state)
     return ckpt["epoch"], ckpt.get("loss")

     state = ckpt["model_state_dict"]
     state = {k.removeprefix("_orig_mod."): v for k, v in state.items()}
     state.pop("decoder._overlap_count", None)
+    # Pre-buffer checkpoints lack align.temperature; default to 1.0.
+    if "align.temperature" not in state:
+        state["align.temperature"] = torch.tensor(1.0)
     _unwrap(model).load_state_dict(state)
     return ckpt["epoch"], ckpt.get("loss")