Spaces:
Running
Running
Patch both residual_fsq and finite_scalar_quantization tensor refs
Browse filesFSQ.__init__ also calls .prod().item() on a meta tensor created via
its own 'from torch import tensor'. Patch both VQ module namespaces.
- acestep/handler.py +23 -19
acestep/handler.py
CHANGED
|
@@ -483,29 +483,32 @@ class AceStepHandler:
|
|
| 483 |
if "eager" not in attn_candidates:
|
| 484 |
attn_candidates.append("eager")
|
| 485 |
|
| 486 |
-
# Patch
|
| 487 |
-
# ResidualFSQ
|
| 488 |
-
# module-level reference
|
| 489 |
-
# sets a meta device context that
|
| 490 |
-
#
|
| 491 |
-
#
|
| 492 |
-
#
|
| 493 |
-
#
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
_fsq_patched = False
|
| 497 |
-
_orig_tensor_fn = None
|
| 498 |
try:
|
| 499 |
-
from vector_quantize_pytorch import residual_fsq as
|
| 500 |
-
|
|
|
|
|
|
|
| 501 |
|
| 502 |
def _cpu_tensor(data, *args, **kwargs):
|
| 503 |
kwargs["device"] = "cpu"
|
| 504 |
-
return
|
| 505 |
|
|
|
|
|
|
|
|
|
|
| 506 |
_fsq_mod.tensor = _cpu_tensor
|
| 507 |
-
|
| 508 |
-
logger.info("[initialize_service] Patched
|
| 509 |
except (ImportError, AttributeError):
|
| 510 |
pass
|
| 511 |
|
|
@@ -529,8 +532,9 @@ class AceStepHandler:
|
|
| 529 |
last_attn_error = e
|
| 530 |
logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
|
| 531 |
finally:
|
| 532 |
-
if
|
| 533 |
-
|
|
|
|
| 534 |
|
| 535 |
if self.model is None:
|
| 536 |
raise RuntimeError(
|
|
|
|
| 483 |
if "eager" not in attn_candidates:
|
| 484 |
attn_candidates.append("eager")
|
| 485 |
|
| 486 |
+
# Patch vector_quantize_pytorch to avoid meta tensor failures.
|
| 487 |
+
# Both ResidualFSQ and FSQ use `from torch import tensor`,
|
| 488 |
+
# creating a direct module-level reference. During model init
|
| 489 |
+
# transformers sets a meta device context that redirects all
|
| 490 |
+
# tensor() calls to meta device. FSQ then calls .item() on
|
| 491 |
+
# meta tensors which fails.
|
| 492 |
+
# Fix: replace `tensor` in both module namespaces with a
|
| 493 |
+
# CPU-forcing version during model loading.
|
| 494 |
+
_vq_patched = False
|
| 495 |
+
_vq_originals = {}
|
|
|
|
|
|
|
| 496 |
try:
|
| 497 |
+
from vector_quantize_pytorch import residual_fsq as _rfsq_mod
|
| 498 |
+
from vector_quantize_pytorch import finite_scalar_quantization as _fsq_mod
|
| 499 |
+
|
| 500 |
+
_orig_tensor = _rfsq_mod.tensor
|
| 501 |
|
| 502 |
def _cpu_tensor(data, *args, **kwargs):
|
| 503 |
kwargs["device"] = "cpu"
|
| 504 |
+
return _orig_tensor(data, *args, **kwargs)
|
| 505 |
|
| 506 |
+
_vq_originals["rfsq"] = _rfsq_mod.tensor
|
| 507 |
+
_vq_originals["fsq"] = _fsq_mod.tensor
|
| 508 |
+
_rfsq_mod.tensor = _cpu_tensor
|
| 509 |
_fsq_mod.tensor = _cpu_tensor
|
| 510 |
+
_vq_patched = True
|
| 511 |
+
logger.info("[initialize_service] Patched vector_quantize_pytorch for meta device compat")
|
| 512 |
except (ImportError, AttributeError):
|
| 513 |
pass
|
| 514 |
|
|
|
|
| 532 |
last_attn_error = e
|
| 533 |
logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
|
| 534 |
finally:
|
| 535 |
+
if _vq_patched:
|
| 536 |
+
_rfsq_mod.tensor = _vq_originals["rfsq"]
|
| 537 |
+
_fsq_mod.tensor = _vq_originals["fsq"]
|
| 538 |
|
| 539 |
if self.model is None:
|
| 540 |
raise RuntimeError(
|