Spaces:

adowu
/

ACE-Step-Training

Running

App Files Files Community

pedroapfilho commited on Feb 11

Commit

0c47532

unverified ·

1 Parent(s): 773a0e0

Fix meta tensor: patch residual_fsq module's tensor reference, not torch.tensor

Browse files

ResidualFSQ uses 'from torch import tensor' - a direct module-level
reference that bypasses our torch.tensor patch. Now we replace the
'tensor' name in the residual_fsq module namespace directly, forcing
CPU device for all tensor creation during FSQ init.

Files changed (1) hide show

acestep/handler.py +20 -22

acestep/handler.py CHANGED Viewed

@@ -483,32 +483,30 @@ class AceStepHandler:
                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
-                # Monkey-patch ResidualFSQ to handle ZeroGPU meta tensors.
-                # ZeroGPU redirects tensor creation to meta device via
-                # __torch_function__. ResidualFSQ.__init__ does
-                # `assert (torch.tensor(levels) > 1).all()` which fails
-                # because .all() can't evaluate meta tensors.
-                # Fix: force torch.tensor calls in FSQ init to use CPU.
                 _fsq_patched = False
                 try:
                     from vector_quantize_pytorch import residual_fsq as _fsq_mod
-                    _orig_fsq_init = _fsq_mod.ResidualFSQ.__init__
-                    def _meta_safe_fsq_init(self_fsq, **kwargs):
-                        _real_torch_tensor = torch.tensor
-                        def _cpu_tensor(data, *a, **kw):
-                            kw["device"] = "cpu"
-                            return _real_torch_tensor(data, *a, **kw)
-                        torch.tensor = _cpu_tensor
-                        try:
-                            _orig_fsq_init(self_fsq, **kwargs)
-                        finally:
-                            torch.tensor = _real_torch_tensor
-                    _fsq_mod.ResidualFSQ.__init__ = _meta_safe_fsq_init
                     _fsq_patched = True
-                    logger.info("[initialize_service] Patched ResidualFSQ for ZeroGPU compat")
-                except ImportError:
                     pass
                 last_attn_error = None
@@ -532,7 +530,7 @@ class AceStepHandler:
                             logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 finally:
                     if _fsq_patched:
-                        _fsq_mod.ResidualFSQ.__init__ = _orig_fsq_init
                 if self.model is None:
                     raise RuntimeError(

                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
+                # Patch ResidualFSQ to avoid meta tensor failures.
+                # ResidualFSQ uses `from torch import tensor` (a direct
+                # module-level reference). During model init, transformers
+                # sets a meta device context that makes all `tensor()` calls
+                # create meta tensors. ResidualFSQ then does:
+                #   levels_tensor = tensor(levels)
+                #   assert (levels_tensor > 1).all()  # fails on meta
+                # Fix: replace the `tensor` name in the residual_fsq module
+                # namespace with a CPU-forcing version. All derived operations
+                # on CPU tensors stay on CPU, so the assertion works.
                 _fsq_patched = False
+                _orig_tensor_fn = None
                 try:
                     from vector_quantize_pytorch import residual_fsq as _fsq_mod
+                    _orig_tensor_fn = _fsq_mod.tensor
+                    def _cpu_tensor(data, *args, **kwargs):
+                        kwargs["device"] = "cpu"
+                        return _orig_tensor_fn(data, *args, **kwargs)
+                    _fsq_mod.tensor = _cpu_tensor
                     _fsq_patched = True
+                    logger.info("[initialize_service] Patched residual_fsq.tensor for meta device compat")
+                except (ImportError, AttributeError):
                     pass
                 last_attn_error = None
                             logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 finally:
                     if _fsq_patched:
+                        _fsq_mod.tensor = _orig_tensor_fn
                 if self.model is None:
                     raise RuntimeError(