Spaces:

adowu
/

ACE-Step-Training

Sleeping

App Files Files Community

pedroapfilho commited on Feb 11

Commit

011d30d

unverified ·

1 Parent(s): 0c47532

Patch both residual_fsq and finite_scalar_quantization tensor refs

Browse files

FSQ.__init__ also calls .prod().item() on a meta tensor created via
its own 'from torch import tensor'. Patch both VQ module namespaces.

Files changed (1) hide show

acestep/handler.py +23 -19

acestep/handler.py CHANGED Viewed

@@ -483,29 +483,32 @@ class AceStepHandler:
                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
-                # Patch ResidualFSQ to avoid meta tensor failures.
-                # ResidualFSQ uses `from torch import tensor` (a direct
-                # module-level reference). During model init, transformers
-                # sets a meta device context that makes all `tensor()` calls
-                # create meta tensors. ResidualFSQ then does:
-                #   levels_tensor = tensor(levels)
-                #   assert (levels_tensor > 1).all()  # fails on meta
-                # Fix: replace the `tensor` name in the residual_fsq module
-                # namespace with a CPU-forcing version. All derived operations
-                # on CPU tensors stay on CPU, so the assertion works.
-                _fsq_patched = False
-                _orig_tensor_fn = None
                 try:
-                    from vector_quantize_pytorch import residual_fsq as _fsq_mod
-                    _orig_tensor_fn = _fsq_mod.tensor
                     def _cpu_tensor(data, *args, **kwargs):
                         kwargs["device"] = "cpu"
-                        return _orig_tensor_fn(data, *args, **kwargs)
                     _fsq_mod.tensor = _cpu_tensor
-                    _fsq_patched = True
-                    logger.info("[initialize_service] Patched residual_fsq.tensor for meta device compat")
                 except (ImportError, AttributeError):
                     pass
@@ -529,8 +532,9 @@ class AceStepHandler:
                             last_attn_error = e
                             logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 finally:
-                    if _fsq_patched:
-                        _fsq_mod.tensor = _orig_tensor_fn
                 if self.model is None:
                     raise RuntimeError(

                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
+                # Patch vector_quantize_pytorch to avoid meta tensor failures.
+                # Both ResidualFSQ and FSQ use `from torch import tensor`,
+                # creating a direct module-level reference. During model init
+                # transformers sets a meta device context that redirects all
+                # tensor() calls to meta device. FSQ then calls .item() on
+                # meta tensors which fails.
+                # Fix: replace `tensor` in both module namespaces with a
+                # CPU-forcing version during model loading.
+                _vq_patched = False
+                _vq_originals = {}
                 try:
+                    from vector_quantize_pytorch import residual_fsq as _rfsq_mod
+                    from vector_quantize_pytorch import finite_scalar_quantization as _fsq_mod
+                    _orig_tensor = _rfsq_mod.tensor
                     def _cpu_tensor(data, *args, **kwargs):
                         kwargs["device"] = "cpu"
+                        return _orig_tensor(data, *args, **kwargs)
+                    _vq_originals["rfsq"] = _rfsq_mod.tensor
+                    _vq_originals["fsq"] = _fsq_mod.tensor
+                    _rfsq_mod.tensor = _cpu_tensor
                     _fsq_mod.tensor = _cpu_tensor
+                    _vq_patched = True
+                    logger.info("[initialize_service] Patched vector_quantize_pytorch for meta device compat")
                 except (ImportError, AttributeError):
                     pass
                             last_attn_error = e
                             logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 finally:
+                    if _vq_patched:
+                        _rfsq_mod.tensor = _vq_originals["rfsq"]
+                        _fsq_mod.tensor = _vq_originals["fsq"]
                 if self.model is None:
                     raise RuntimeError(