lym00
/

nunchaku_svdquant_deepcompressor_0.1.0_quantization_flux.1_kontext_dev_test

Model card Files Files and versions

xet

Community

lym00 commited on Jul 22, 2025

Commit

dc3f6fc

verified ·

1 Parent(s): 2ec570a

Update README.md

Browse files

Files changed (1) hide show

README.md +133 -1

README.md CHANGED Viewed

@@ -51,7 +51,7 @@ Folder Structure
 # Blockers
 1) NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.
-potential fix: app.diffusion.pipeline.config.py
 ```python
     @staticmethod
     def _default_build(
@@ -160,6 +160,138 @@ Loading pipeline components...: 100%|██████████████
 2) KeyError: <class 'diffusers.models.transformers.transformer_flux.FluxAttention'>
 https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformers/transformer_flux.py#L266
 https://github.com/nunchaku-tech/deepcompressor/blob/main/deepcompressor/nn/struct/attn.py

 # Blockers
 1) NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.
+Potential fix: app.diffusion.pipeline.config.py
 ```python
     @staticmethod
     def _default_build(
 2) KeyError: <class 'diffusers.models.transformers.transformer_flux.FluxAttention'>
+Potential fix: app.diffusion.nn.struct.py
+```python
+    @staticmethod
+    def _default_construct(
+        module: Attention,
+        /,
+        parent: tp.Optional["DiffusionTransformerBlockStruct"] = None,
+        fname: str = "",
+        rname: str = "",
+        rkey: str = "",
+        idx: int = 0,
+        **kwargs,
+    ) -> "DiffusionAttentionStruct":
+        if isinstance(module, FluxAttention):
+            # FluxAttention has different attribute names than standard attention
+            with_rope = True
+            num_query_heads = module.heads  # FluxAttention uses 'heads', not 'num_heads'
+            num_key_value_heads = module.heads  # FLUX typically uses same for q/k/v
+            # FluxAttention doesn't have 'to_out', but may have other output projections
+            # Check what output projection attributes actually exist
+            o_proj = None
+            o_proj_rname = ""
+            # Try to find the correct output projection
+            if hasattr(module, 'to_out') and module.to_out is not None:
+                o_proj = module.to_out[0] if isinstance(module.to_out, (list, tuple)) else module.to_out
+                o_proj_rname = "to_out.0" if isinstance(module.to_out, (list, tuple)) else "to_out"
+            elif hasattr(module, 'to_add_out'):
+                o_proj = module.to_add_out
+                o_proj_rname = "to_add_out"
+            q_proj, k_proj, v_proj = module.to_q, module.to_k, module.to_v
+            q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "to_k", "to_v"
+            q, k, v = module.to_q, module.to_k, module.to_v
+            q_rname, k_rname, v_rname = "to_q", "to_k", "to_v"
+            # Handle the add_* projections that FluxAttention has
+            add_q_proj = getattr(module, "add_q_proj", None)
+            add_k_proj = getattr(module, "add_k_proj", None)
+            add_v_proj = getattr(module, "add_v_proj", None)
+            add_o_proj = getattr(module, "to_add_out", None)
+            add_q_proj_rname = "add_q_proj" if add_q_proj else ""
+            add_k_proj_rname = "add_k_proj" if add_k_proj else ""
+            add_v_proj_rname = "add_v_proj" if add_v_proj else ""
+            add_o_proj_rname = "to_add_out" if add_o_proj else ""
+            kwargs = (
+                "encoder_hidden_states",
+                "attention_mask",
+                "image_rotary_emb",
+            )
+            cross_attention = add_k_proj is not None
+        elif module.is_cross_attention:
+            q_proj, k_proj, v_proj = module.to_q, None, None
+            add_q_proj, add_k_proj, add_v_proj, add_o_proj = None, module.to_k, module.to_v, None
+            q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "", ""
+            add_q_proj_rname, add_k_proj_rname, add_v_proj_rname, add_o_proj_rname = "", "to_k", "to_v", ""
+        else:
+            q_proj, k_proj, v_proj = module.to_q, module.to_k, module.to_v
+            add_q_proj = getattr(module, "add_q_proj", None)
+            add_k_proj = getattr(module, "add_k_proj", None)
+            add_v_proj = getattr(module, "add_v_proj", None)
+            add_o_proj = getattr(module, "to_add_out", None)
+            q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "to_k", "to_v"
+            add_q_proj_rname, add_k_proj_rname, add_v_proj_rname = "add_q_proj", "add_k_proj", "add_v_proj"
+            add_o_proj_rname = "to_add_out"
+        if getattr(module, "to_out", None) is not None:
+            o_proj = module.to_out[0]
+            o_proj_rname = "to_out.0"
+            assert isinstance(o_proj, nn.Linear)
+        elif parent is not None:
+            assert isinstance(parent.module, FluxSingleTransformerBlock)
+            assert isinstance(parent.module.proj_out, ConcatLinear)
+            assert len(parent.module.proj_out.linears) == 2
+            o_proj = parent.module.proj_out.linears[0]
+            o_proj_rname = ".proj_out.linears.0"
+        else:
+            raise RuntimeError("Cannot find the output projection.")
+        if isinstance(module.processor, DiffusionAttentionProcessor):
+            with_rope = module.processor.rope is not None
+        elif module.processor.__class__.__name__.startswith("Flux"):
+            with_rope = True
+        else:
+            with_rope = False  # TODO: fix for other processors
+        config = AttentionConfigStruct(
+            hidden_size=q_proj.weight.shape[1],
+            add_hidden_size=add_k_proj.weight.shape[1] if add_k_proj is not None else 0,
+            inner_size=q_proj.weight.shape[0],
+            num_query_heads=module.heads,
+            num_key_value_heads=module.to_k.weight.shape[0] // (module.to_q.weight.shape[0] // module.heads),
+            with_qk_norm=module.norm_q is not None,
+            with_rope=with_rope,
+            linear_attn=isinstance(module.processor, SanaLinearAttnProcessor2_0),
+        )
+        return DiffusionAttentionStruct(
+            module=module,
+            parent=parent,
+            fname=fname,
+            idx=idx,
+            rname=rname,
+            rkey=rkey,
+            config=config,
+            q_proj=q_proj,
+            k_proj=k_proj,
+            v_proj=v_proj,
+            o_proj=o_proj,
+            add_q_proj=add_q_proj,
+            add_k_proj=add_k_proj,
+            add_v_proj=add_v_proj,
+            add_o_proj=add_o_proj,
+            q=None,  # TODO: add q, k, v
+            k=None,
+            v=None,
+            q_proj_rname=q_proj_rname,
+            k_proj_rname=k_proj_rname,
+            v_proj_rname=v_proj_rname,
+            o_proj_rname=o_proj_rname,
+            add_q_proj_rname=add_q_proj_rname,
+            add_k_proj_rname=add_k_proj_rname,
+            add_v_proj_rname=add_v_proj_rname,
+            add_o_proj_rname=add_o_proj_rname,
+            q_rname="",
+            k_rname="",
+            v_rname="",
+        )
+```
+3) ValueError: Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.
+References
 https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformers/transformer_flux.py#L266
 https://github.com/nunchaku-tech/deepcompressor/blob/main/deepcompressor/nn/struct/attn.py