waypoint-1-small

Paused

dn6 HF Staff commited on Jan 21

Commit

6d071e0

1 Parent(s): 8bb9483

update

Files changed (3) hide show

__pycache__/aoti.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/aoti.cpython-310.pyc and b/__pycache__/aoti.cpython-310.pyc differ

aoti.py CHANGED Viewed

@@ -2,13 +2,27 @@ import torch
 from huggingface_hub import hf_hub_download
 from spaces.zero.torch.aoti import ZeroGPUCompiledModel
 from spaces.zero.torch.aoti import ZeroGPUWeights
-from spaces.zero.torch.aoti import drain_module_parameters
-def aoti_load_(module: torch.nn.Module, repo_id: str, filename: str):
     compiled_graph_file = hf_hub_download(repo_id, filename)
-    state_dict = module.state_dict()
-    zerogpu_weights = ZeroGPUWeights({name: weight for name, weight in state_dict.items()})
     compiled = ZeroGPUCompiledModel(compiled_graph_file, zerogpu_weights)
     setattr(module, "forward", compiled)

 from huggingface_hub import hf_hub_download
 from spaces.zero.torch.aoti import ZeroGPUCompiledModel
 from spaces.zero.torch.aoti import ZeroGPUWeights
+def aoti_load_(
+    module: torch.nn.Module,
+    repo_id: str,
+    filename: str,
+    constants_filename: str,
+):
+    """Load an AOT compiled model and replace the module's forward method.
+    Args:
+        module: The module to replace forward with AOT compiled version
+        repo_id: HuggingFace repo ID containing the compiled model
+        filename: Filename of the compiled .pt2 file
+        constants_filename: Filename of the saved constants (from compiled.weights.constants_map)
+    """
     compiled_graph_file = hf_hub_download(repo_id, filename)
+    constants_file = hf_hub_download(repo_id, constants_filename)
+    constants_map = torch.load(constants_file, map_location="cpu", weights_only=True)
+    zerogpu_weights = ZeroGPUWeights(constants_map, to_cuda=True)
     compiled = ZeroGPUCompiledModel(compiled_graph_file, zerogpu_weights)
     setattr(module, "forward", compiled)

app.py CHANGED Viewed

@@ -113,14 +113,23 @@ def create_gpu_game_loop(command_queue: Queue):
         """
         pipe.to("cuda")
         pipe.blocks.sub_blocks['before_denoise'].sub_blocks['setup_kv_cache']._setup_kv_cache(pipe.transformer, pipe.device, torch.bfloat16)
-        pipe.transformer.apply_inference_patches()
         #pipe.transformer.quantize("fp8")
-        #aoti_load_(pipe.transformer, "diffusers-internal-dev/world-engine-aot", "transformer-fp8.pt2")
-        aoti_load_(pipe.transformer, "diffusers-internal-dev/world-engine-aot", "transformer-inference-patch.pt2")
-        pipe.vae.bake_weight_norm()
-        aoti_load_(pipe.vae.encoder, "diffusers-internal-dev/world-engine-aot", "encoder.pt2")
-        aoti_load_(pipe.vae.decoder, "diffusers-internal-dev/world-engine-aot", "decoder.pt2")
         n_frames = pipe.transformer.config.n_frames
         print(f"Model loaded! (n_frames={n_frames})")

         """
         pipe.to("cuda")
         pipe.blocks.sub_blocks['before_denoise'].sub_blocks['setup_kv_cache']._setup_kv_cache(pipe.transformer, pipe.device, torch.bfloat16)
+        #pipe.transformer.apply_inference_patches()
         #pipe.transformer.quantize("fp8")
+        aoti_load_(
+            pipe.transformer,
+            "diffusers-internal-dev/world-engine-aot",
+            "transformer-fp8.pt2",
+            "transformer-fp8-constants.pt"
+        )
+        #aoti_load_(pipe.transformer, "diffusers-internal-dev/world-engine-aot", "transformer-inference-patch.pt2")
+        #pipe.vae.bake_weight_norm()
+        #aoti_load_(pipe.vae.encoder, "diffusers-internal-dev/world-engine-aot", "encoder.pt2")
+        aoti_load_(
+            pipe.vae.decoder,
+            "diffusers-internal-dev/world-engine-aot",
+            "decoder.pt2",
+            "decoder-constants.pt"
+        )
         n_frames = pipe.transformer.config.n_frames
         print(f"Model loaded! (n_frames={n_frames})")