Spaces:

adowu
/

ACE-Step-Training

Sleeping

App Files Files Community

pedroapfilho commited on Feb 11

Commit

f266123

unverified ·

1 Parent(s): 011d30d

Fix meta tensor device mismatch by also patching torch.arange

Browse files

FSQ.__init__ calls torch.arange() through the torch module to build
its implicit codebook. The meta device context set by newer transformers
(which ignores low_cpu_mem_usage=False) redirects this to meta device,
causing a device mismatch with our CPU-forced _levels buffer.

Patch torch.arange globally during model loading to force device="cpu",
alongside the existing tensor() patches for both VQ module namespaces.

Files changed (1) hide show

acestep/handler.py +26 -8

acestep/handler.py CHANGED Viewed

@@ -484,13 +484,22 @@ class AceStepHandler:
                     attn_candidates.append("eager")
                 # Patch vector_quantize_pytorch to avoid meta tensor failures.
-                # Both ResidualFSQ and FSQ use `from torch import tensor`,
-                # creating a direct module-level reference. During model init
-                # transformers sets a meta device context that redirects all
-                # tensor() calls to meta device. FSQ then calls .item() on
-                # meta tensors which fails.
-                # Fix: replace `tensor` in both module namespaces with a
-                # CPU-forcing version during model loading.
                 _vq_patched = False
                 _vq_originals = {}
                 try:
@@ -498,17 +507,25 @@ class AceStepHandler:
                     from vector_quantize_pytorch import finite_scalar_quantization as _fsq_mod
                     _orig_tensor = _rfsq_mod.tensor
                     def _cpu_tensor(data, *args, **kwargs):
                         kwargs["device"] = "cpu"
                         return _orig_tensor(data, *args, **kwargs)
                     _vq_originals["rfsq"] = _rfsq_mod.tensor
                     _vq_originals["fsq"] = _fsq_mod.tensor
                     _rfsq_mod.tensor = _cpu_tensor
                     _fsq_mod.tensor = _cpu_tensor
                     _vq_patched = True
-                    logger.info("[initialize_service] Patched vector_quantize_pytorch for meta device compat")
                 except (ImportError, AttributeError):
                     pass
@@ -535,6 +552,7 @@ class AceStepHandler:
                     if _vq_patched:
                         _rfsq_mod.tensor = _vq_originals["rfsq"]
                         _fsq_mod.tensor = _vq_originals["fsq"]
                 if self.model is None:
                     raise RuntimeError(

                     attn_candidates.append("eager")
                 # Patch vector_quantize_pytorch to avoid meta tensor failures.
+                #
+                # Newer transformers ignores low_cpu_mem_usage/fast_init and
+                # always wraps model __init__ in `with torch.device("meta"):`,
+                # which redirects ALL factory functions to meta device.
+                #
+                # vector_quantize_pytorch's FSQ and ResidualFSQ do real
+                # computation during __init__ (assertions, codebook building)
+                # that is incompatible with meta tensors.
+                #
+                # We patch two layers:
+                # 1. `tensor` in both VQ module namespaces (they use
+                #    `from torch import tensor`, a direct reference)
+                # 2. `torch.arange` globally (FSQ calls `torch.arange()`
+                #    through the torch module to build its implicit codebook)
+                #
+                # Both patches force device="cpu", then we restore after loading.
                 _vq_patched = False
                 _vq_originals = {}
                 try:
                     from vector_quantize_pytorch import finite_scalar_quantization as _fsq_mod
                     _orig_tensor = _rfsq_mod.tensor
+                    _orig_arange = torch.arange
                     def _cpu_tensor(data, *args, **kwargs):
                         kwargs["device"] = "cpu"
                         return _orig_tensor(data, *args, **kwargs)
+                    def _cpu_arange(*args, **kwargs):
+                        if "device" not in kwargs:
+                            kwargs["device"] = "cpu"
+                        return _orig_arange(*args, **kwargs)
                     _vq_originals["rfsq"] = _rfsq_mod.tensor
                     _vq_originals["fsq"] = _fsq_mod.tensor
+                    _vq_originals["arange"] = _orig_arange
                     _rfsq_mod.tensor = _cpu_tensor
                     _fsq_mod.tensor = _cpu_tensor
+                    torch.arange = _cpu_arange
                     _vq_patched = True
+                    logger.info("[initialize_service] Patched vector_quantize_pytorch + torch.arange for meta device compat")
                 except (ImportError, AttributeError):
                     pass
                     if _vq_patched:
                         _rfsq_mod.tensor = _vq_originals["rfsq"]
                         _fsq_mod.tensor = _vq_originals["fsq"]
+                        torch.arange = _vq_originals["arange"]
                 if self.model is None:
                     raise RuntimeError(