pedroapfilho commited on
Commit
011d30d
·
unverified ·
1 Parent(s): 0c47532

Patch both residual_fsq and finite_scalar_quantization tensor refs

Browse files

FSQ.__init__ also calls .prod().item() on a meta tensor created via
its own 'from torch import tensor'. Patch both VQ module namespaces.

Files changed (1) hide show
  1. acestep/handler.py +23 -19
acestep/handler.py CHANGED
@@ -483,29 +483,32 @@ class AceStepHandler:
483
  if "eager" not in attn_candidates:
484
  attn_candidates.append("eager")
485
 
486
- # Patch ResidualFSQ to avoid meta tensor failures.
487
- # ResidualFSQ uses `from torch import tensor` (a direct
488
- # module-level reference). During model init, transformers
489
- # sets a meta device context that makes all `tensor()` calls
490
- # create meta tensors. ResidualFSQ then does:
491
- # levels_tensor = tensor(levels)
492
- # assert (levels_tensor > 1).all() # fails on meta
493
- # Fix: replace the `tensor` name in the residual_fsq module
494
- # namespace with a CPU-forcing version. All derived operations
495
- # on CPU tensors stay on CPU, so the assertion works.
496
- _fsq_patched = False
497
- _orig_tensor_fn = None
498
  try:
499
- from vector_quantize_pytorch import residual_fsq as _fsq_mod
500
- _orig_tensor_fn = _fsq_mod.tensor
 
 
501
 
502
  def _cpu_tensor(data, *args, **kwargs):
503
  kwargs["device"] = "cpu"
504
- return _orig_tensor_fn(data, *args, **kwargs)
505
 
 
 
 
506
  _fsq_mod.tensor = _cpu_tensor
507
- _fsq_patched = True
508
- logger.info("[initialize_service] Patched residual_fsq.tensor for meta device compat")
509
  except (ImportError, AttributeError):
510
  pass
511
 
@@ -529,8 +532,9 @@ class AceStepHandler:
529
  last_attn_error = e
530
  logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
531
  finally:
532
- if _fsq_patched:
533
- _fsq_mod.tensor = _orig_tensor_fn
 
534
 
535
  if self.model is None:
536
  raise RuntimeError(
 
483
  if "eager" not in attn_candidates:
484
  attn_candidates.append("eager")
485
 
486
+ # Patch vector_quantize_pytorch to avoid meta tensor failures.
487
+ # Both ResidualFSQ and FSQ use `from torch import tensor`,
488
+ # creating a direct module-level reference. During model init
489
+ # transformers sets a meta device context that redirects all
490
+ # tensor() calls to meta device. FSQ then calls .item() on
491
+ # meta tensors which fails.
492
+ # Fix: replace `tensor` in both module namespaces with a
493
+ # CPU-forcing version during model loading.
494
+ _vq_patched = False
495
+ _vq_originals = {}
 
 
496
  try:
497
+ from vector_quantize_pytorch import residual_fsq as _rfsq_mod
498
+ from vector_quantize_pytorch import finite_scalar_quantization as _fsq_mod
499
+
500
+ _orig_tensor = _rfsq_mod.tensor
501
 
502
  def _cpu_tensor(data, *args, **kwargs):
503
  kwargs["device"] = "cpu"
504
+ return _orig_tensor(data, *args, **kwargs)
505
 
506
+ _vq_originals["rfsq"] = _rfsq_mod.tensor
507
+ _vq_originals["fsq"] = _fsq_mod.tensor
508
+ _rfsq_mod.tensor = _cpu_tensor
509
  _fsq_mod.tensor = _cpu_tensor
510
+ _vq_patched = True
511
+ logger.info("[initialize_service] Patched vector_quantize_pytorch for meta device compat")
512
  except (ImportError, AttributeError):
513
  pass
514
 
 
532
  last_attn_error = e
533
  logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
534
  finally:
535
+ if _vq_patched:
536
+ _rfsq_mod.tensor = _vq_originals["rfsq"]
537
+ _fsq_mod.tensor = _vq_originals["fsq"]
538
 
539
  if self.model is None:
540
  raise RuntimeError(