SUPIR

Runtime error

App Files Files Community

Fabrice-TIERCELIN commited on 18 days ago

Commit

30d1371

verified ·

1 Parent(s): 34937e3

loader Hub + delete duplicate

Browse files

Files changed (1) hide show

optimization.py +158 -172

optimization.py CHANGED Viewed

@@ -1,173 +1,159 @@
-from typing import Any
-from typing import Callable
-from typing import ParamSpec
-import os
-import spaces
-import torch
-from torch.utils._pytree import tree_map_only
-from torchao.quantization import quantize_
-from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
-from torchao.quantization import Int8WeightOnlyConfig
-from huggingface_hub import hf_hub_download
-from io import BytesIO
-from optimization_utils import capture_component_call
-from optimization_utils import aoti_compile
-from optimization_utils import drain_module_parameters
-# NEW: import classes to rebuild compiled objects
-from optimization_utils import ZeroGPUCompiledModel, ZeroGPUWeights
-P = ParamSpec('P')
-# Expose compiled models so app.py can offer them for download
-COMPILED_TRANSFORMER_1 = None
-COMPILED_TRANSFORMER_2 = None
-LATENT_FRAMES_DIM = torch.export.Dim('num_latent_frames', min=8, max=81)
-LATENT_PATCHED_HEIGHT_DIM = torch.export.Dim('latent_patched_height', min=30, max=52)
-LATENT_PATCHED_WIDTH_DIM = torch.export.Dim('latent_patched_width', min=30, max=52)
-TRANSFORMER_DYNAMIC_SHAPES = {
-    'hidden_states': {
-        2: LATENT_FRAMES_DIM,
-        3: 2 * LATENT_PATCHED_HEIGHT_DIM,
-        4: 2 * LATENT_PATCHED_WIDTH_DIM,
-    },
-}
-INDUCTOR_CONFIGS = {
-    'conv_1x1_as_mm': True,
-    'epilogue_fusion': False,
-    'coordinate_descent_tuning': True,
-    'coordinate_descent_check_all_directions': True,
-    'max_autotune': True,
-    'triton.cudagraphs': True,
-}
-def _deserialize_zerogpu_aoti(payload: dict[str, Any]) -> ZeroGPUCompiledModel:
-    """
-    Rebuild a ZeroGPUCompiledModel from a stable serialized dict produced by
-    ZeroGPUCompiledModel.to_serializable_dict().
-    """
-    if not isinstance(payload, dict):
-        raise ValueError(f"Expected dict payload, got: {type(payload)}")
-    fmt = payload.get("format")
-    if fmt != "zerogpu_aoti_v1":
-        raise ValueError(f"Unsupported payload format: {fmt!r}")
-    archive_bytes = payload.get("archive_bytes")
-    constants_map = payload.get("constants_map")
-    if not isinstance(archive_bytes, (bytes, bytearray)):
-        raise ValueError("payload['archive_bytes'] must be bytes")
-    if not isinstance(constants_map, dict):
-        raise ValueError("payload['constants_map'] must be a dict of tensors")
-    # Recreate in-memory archive file (what aoti_load_package expects)
-    archive_file = BytesIO(archive_bytes)
-    # Ensure constants are CPU tensors (ZeroGPUWeights will pin/copy for runtime)
-    constants_map = {k: v.detach().to("cpu") for k, v in constants_map.items()}
-    weights = ZeroGPUWeights(constants_map, to_cuda=False)
-    return ZeroGPUCompiledModel(archive_file, weights)
-def load_compiled_transformers_from_hub(
-    repo_id: str,
-    filename_1: str = "compiled_transformer_1.pt",
-    filename_2: str = "compiled_transformer_2.pt",
-):
-    """
-    Charge les artefacts précompilés depuis le Hub.
-    IMPORTANT: les fichiers .pt doivent contenir le dict sérialisé produit par
-    ZeroGPUCompiledModel.to_serializable_dict() (format "zerogpu_aoti_v1").
-    """
-    path_1 = hf_hub_download(repo_id=repo_id, filename=filename_1)
-    path_2 = hf_hub_download(repo_id=repo_id, filename=filename_2)
-    payload_1 = torch.load(path_1, map_location="cpu", weights_only=False)
-    payload_2 = torch.load(path_2, map_location="cpu", weights_only=False)
-    compiled_1 = _deserialize_zerogpu_aoti(payload_1)
-    compiled_2 = _deserialize_zerogpu_aoti(payload_2)
-    return compiled_1, compiled_2
-def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
-    global COMPILED_TRANSFORMER_1, COMPILED_TRANSFORMER_2
-    @spaces.GPU(duration=1500)
-    def compile_transformer():
-        pipeline.load_lora_weights(
-            "Kijai/WanVideo_comfy",
-            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-            adapter_name="lightx2v",
-        )
-        kwargs_lora = {"load_into_transformer_2": True}
-        pipeline.load_lora_weights(
-            "Kijai/WanVideo_comfy",
-            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-            adapter_name="lightx2v_2",
-            **kwargs_lora,
-        )
-        pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
-        pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
-        pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
-        pipeline.unload_lora_weights()
-        with capture_component_call(pipeline, "transformer") as call:
-            pipeline(*args, **kwargs)
-        dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
-        dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
-        quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
-        quantize_(pipeline.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-        exported_1 = torch.export.export(
-            mod=pipeline.transformer,
-            args=call.args,
-            kwargs=call.kwargs,
-            dynamic_shapes=dynamic_shapes,
-        )
-        exported_2 = torch.export.export(
-            mod=pipeline.transformer_2,
-            args=call.args,
-            kwargs=call.kwargs,
-            dynamic_shapes=dynamic_shapes,
-        )
-        compiled_1 = aoti_compile(exported_1, INDUCTOR_CONFIGS)
-        compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
-        return compiled_1, compiled_2
-    quantize_(pipeline.text_encoder, Int8WeightOnlyConfig())
-    use_precompiled = False
-    precompiled_repo = "Fabrice-TIERCELIN/Wan_2.2_compiled"
-    if use_precompiled:
-        compiled_transformer_1, compiled_transformer_2 = load_compiled_transformers_from_hub(
-            repo_id=precompiled_repo
-        )
-    else:
-        compiled_transformer_1, compiled_transformer_2 = compile_transformer()
-    # expose for downloads
-    COMPILED_TRANSFORMER_1 = compiled_transformer_1
-    COMPILED_TRANSFORMER_2 = compiled_transformer_2
-    pipeline.transformer.forward = compiled_transformer_1
-    drain_module_parameters(pipeline.transformer)
-    pipeline.transformer_2.forward = compiled_transformer_2
     drain_module_parameters(pipeline.transformer_2)

+"""
+"""
+from typing import Any
+from typing import Callable
+from typing import ParamSpec
+import os
+import spaces
+import torch
+from torch.utils._pytree import tree_map_only
+from torchao.quantization import quantize_
+from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
+from torchao.quantization import Int8WeightOnlyConfig
+from huggingface_hub import hf_hub_download
+from optimization_utils import capture_component_call
+from optimization_utils import aoti_compile
+from optimization_utils import drain_module_parameters
+from optimization_utils import zerogpu_compiled_from_serializable_dict
+from optimization_utils import ZeroGPUCompiledModel
+P = ParamSpec('P')
+LATENT_FRAMES_DIM = torch.export.Dim('num_latent_frames', min=8, max=81)
+LATENT_PATCHED_HEIGHT_DIM = torch.export.Dim('latent_patched_height', min=30, max=52)
+LATENT_PATCHED_WIDTH_DIM = torch.export.Dim('latent_patched_width', min=30, max=52)
+TRANSFORMER_DYNAMIC_SHAPES = {
+    'hidden_states': {
+        2: LATENT_FRAMES_DIM,
+        3: 2 * LATENT_PATCHED_HEIGHT_DIM,
+        4: 2 * LATENT_PATCHED_WIDTH_DIM,
+    },
+}
+INDUCTOR_CONFIGS = {
+    'conv_1x1_as_mm': True,
+    'epilogue_fusion': False,
+    'coordinate_descent_tuning': True,
+    'coordinate_descent_check_all_directions': True,
+    'max_autotune': True,
+    'triton.cudagraphs': True,
+}
+def _strtobool(v: str | None, default: bool = True) -> bool:
+    if v is None:
+        return default
+    return v.strip().lower() in ("1", "true", "yes", "y", "on")
+def _load_compiled_pt(path: str):
+    """
+    Load either:
+    - a serialized dict produced by to_serializable_dict()  (format zerogpu_aoti_v1), or
+    - an old-style pickled ZeroGPUCompiledModel.
+    """
+    obj = torch.load(path, map_location="cpu", weights_only=False)
+    # New format: dict payload
+    if isinstance(obj, dict) and obj.get("format") == "zerogpu_aoti_v1":
+        return zerogpu_compiled_from_serializable_dict(obj)
+    # Old format: direct object
+    if isinstance(obj, ZeroGPUCompiledModel):
+        return obj
+    raise ValueError(
+        f"Unsupported compiled transformer file format at {path}. "
+        f"Got type={type(obj)} keys={list(obj.keys()) if isinstance(obj, dict) else None}"
+    )
+def load_compiled_transformers_from_hub(
+    repo_id: str,
+    filename_1: str = "compiled_transformer_1.pt",
+    filename_2: str = "compiled_transformer_2.pt",
+):
+    """
+    Charge les artefacts précompilés depuis le Hub.
+    IMPORTANT:
+    Les fichiers attendus sont ceux que tu exportes via to_serializable_dict()
+    (format 'zerogpu_aoti_v1') OU un pickle direct de ZeroGPUCompiledModel.
+    """
+    path_1 = hf_hub_download(repo_id=repo_id, filename=filename_1)
+    path_2 = hf_hub_download(repo_id=repo_id, filename=filename_2)
+    compiled_1 = _load_compiled_pt(path_1)
+    compiled_2 = _load_compiled_pt(path_2)
+    return compiled_1, compiled_2
+def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
+    @spaces.GPU(duration=1500)
+    def compile_transformer():
+        pipeline.load_lora_weights(
+            "Kijai/WanVideo_comfy",
+            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+            adapter_name="lightx2v",
+        )
+        kwargs_lora = {"load_into_transformer_2": True}
+        pipeline.load_lora_weights(
+            "Kijai/WanVideo_comfy",
+            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+            adapter_name="lightx2v_2",
+            **kwargs_lora,
+        )
+        pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
+        pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
+        pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
+        pipeline.unload_lora_weights()
+        with capture_component_call(pipeline, "transformer") as call:
+            pipeline(*args, **kwargs)
+        dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
+        dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
+        quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
+        quantize_(pipeline.transformer_2, Float8DynamicActivationFloat8WeightConfig())
+        exported_1 = torch.export.export(
+            mod=pipeline.transformer,
+            args=call.args,
+            kwargs=call.kwargs,
+            dynamic_shapes=dynamic_shapes,
+        )
+        exported_2 = torch.export.export(
+            mod=pipeline.transformer_2,
+            args=call.args,
+            kwargs=call.kwargs,
+            dynamic_shapes=dynamic_shapes,
+        )
+        compiled_1 = aoti_compile(exported_1, INDUCTOR_CONFIGS)
+        compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
+        return compiled_1, compiled_2
+    # Text encoder quant (inchangé)
+    quantize_(pipeline.text_encoder, Int8WeightOnlyConfig())
+    use_precompiled = True
+    precompiled_repo = os.getenv("WAN_PRECOMPILED_REPO", "Fabrice-TIERCELIN/Wan_2.2_compiled")
+    if use_precompiled:
+        compiled_transformer_1, compiled_transformer_2 = load_compiled_transformers_from_hub(
+            repo_id=precompiled_repo
+        )
+    else:
+        compiled_transformer_1, compiled_transformer_2 = compile_transformer()
+    pipeline.transformer.forward = compiled_transformer_1
+    drain_module_parameters(pipeline.transformer)
+    pipeline.transformer_2.forward = compiled_transformer_2
     drain_module_parameters(pipeline.transformer_2)