Spaces:

adowu
/

ACE-Step-Training

Running

App Files Files Community

pedroapfilho commited on Feb 11

Commit

bf4c82d

unverified ·

1 Parent(s): b0a0560

Fix meta tensor: monkey-patch ResidualFSQ to force CPU tensor creation

Browse files

ZeroGPU's __torch_function__ hooks redirect ALL tensor creation to
meta device, even with torch.device('cpu') context. The fix patches
torch.tensor within ResidualFSQ.__init__ to explicitly pass device='cpu',
so the levels assertion runs on real CPU tensors instead of meta tensors.

Files changed (1) hide show

acestep/handler.py +40 -12

acestep/handler.py CHANGED Viewed

@@ -483,15 +483,40 @@ class AceStepHandler:
                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
                 last_attn_error = None
                 self.model = None
-                for candidate in attn_candidates:
-                    try:
-                        logger.info(f"[initialize_service] Attempting to load model with attention implementation: {candidate}")
-                        # Force CPU device context to override ZeroGPU's meta device
-                        # redirection. ResidualFSQ asserts on tensor values during
-                        # __init__, which fails on meta tensors.
-                        with torch.device("cpu"):
                             self.model = AutoModel.from_pretrained(
                                 acestep_v15_checkpoint_path,
                                 trust_remote_code=True,
@@ -500,11 +525,14 @@ class AceStepHandler:
                                 low_cpu_mem_usage=False,
                                 _fast_init=False,
                             )
-                        attn_implementation = candidate
-                        break
-                    except Exception as e:
-                        last_attn_error = e
-                        logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 if self.model is None:
                     raise RuntimeError(

                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
+                # Monkey-patch ResidualFSQ to handle ZeroGPU meta tensors.
+                # ZeroGPU redirects tensor creation to meta device via
+                # __torch_function__. ResidualFSQ.__init__ does
+                # `assert (torch.tensor(levels) > 1).all()` which fails
+                # because .all() can't evaluate meta tensors.
+                # Fix: force torch.tensor calls in FSQ init to use CPU.
+                _fsq_patched = False
+                try:
+                    from vector_quantize_pytorch import residual_fsq as _fsq_mod
+                    _orig_fsq_init = _fsq_mod.ResidualFSQ.__init__
+                    def _meta_safe_fsq_init(self_fsq, **kwargs):
+                        _real_torch_tensor = torch.tensor
+                        def _cpu_tensor(data, *a, **kw):
+                            kw["device"] = "cpu"
+                            return _real_torch_tensor(data, *a, **kw)
+                        torch.tensor = _cpu_tensor
+                        try:
+                            _orig_fsq_init(self_fsq, **kwargs)
+                        finally:
+                            torch.tensor = _real_torch_tensor
+                    _fsq_mod.ResidualFSQ.__init__ = _meta_safe_fsq_init
+                    _fsq_patched = True
+                    logger.info("[initialize_service] Patched ResidualFSQ for ZeroGPU compat")
+                except ImportError:
+                    pass
                 last_attn_error = None
                 self.model = None
+                try:
+                    for candidate in attn_candidates:
+                        try:
+                            logger.info(f"[initialize_service] Attempting to load model with attention implementation: {candidate}")
                             self.model = AutoModel.from_pretrained(
                                 acestep_v15_checkpoint_path,
                                 trust_remote_code=True,
                                 low_cpu_mem_usage=False,
                                 _fast_init=False,
                             )
+                            attn_implementation = candidate
+                            break
+                        except Exception as e:
+                            last_attn_error = e
+                            logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
+                finally:
+                    if _fsq_patched:
+                        _fsq_mod.ResidualFSQ.__init__ = _orig_fsq_init
                 if self.model is None:
                     raise RuntimeError(