Spaces:

Gamahea
/

ACE-Step-Custom

Running on Zero

App Files Files Community

ACE-Step Custom commited on 5 days ago

Commit

fc9be45

1 Parent(s): aa918f7

Fix: Force CPU device context during model init to prevent meta tensor operations

Browse files

Files changed (1) hide show

acestep/handler.py +21 -16

acestep/handler.py CHANGED Viewed

@@ -483,24 +483,29 @@ class AceStepHandler:
                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
                 last_attn_error = None
                 self.model = None
-                for candidate in attn_candidates:
-                    try:
-                        logger.info(f"[initialize_service] Attempting to load model with attention implementation: {candidate}")
-                        self.model = AutoModel.from_pretrained(
-                            acestep_v15_checkpoint_path,
-                            trust_remote_code=True,
-                            attn_implementation=candidate,
-                            torch_dtype=self.dtype,
-                            low_cpu_mem_usage=False,  # Disable memory-efficient weight loading
-                            _fast_init=False,  # Disable fast initialization (prevents meta device)
-                        )
-                        attn_implementation = candidate
-                        break
-                    except Exception as e:
-                        last_attn_error = e
-                        logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 if self.model is None:
                     raise RuntimeError(

                 if "eager" not in attn_candidates:
                     attn_candidates.append("eager")
                 last_attn_error = None
                 self.model = None
+                # Use device context to force model initialization on CPU instead of meta device
+                # ACE-Step's ResidualFSQ performs tensor assertions during __init__ that fail on meta device
+                with torch.device("cpu"):
+                    for candidate in attn_candidates:
+                        try:
+                            logger.info(f"[initialize_service] Attempting to load model with attention implementation: {candidate}")
+                            self.model = AutoModel.from_pretrained(
+                                acestep_v15_checkpoint_path,
+                                trust_remote_code=True,
+                                attn_implementation=candidate,
+                                torch_dtype=self.dtype,
+                                low_cpu_mem_usage=False,  # Disable memory-efficient weight loading
+                                _fast_init=False,  # Disable fast initialization
+                            )
+                            attn_implementation = candidate
+                            break
+                        except Exception as e:
+                            last_attn_error = e
+                            logger.warning(f"[initialize_service] Failed to load model with {candidate}: {e}")
                 if self.model is None:
                     raise RuntimeError(