Update README.md
Browse files
README.md
CHANGED
|
@@ -51,7 +51,7 @@ Folder Structure
|
|
| 51 |
# Blockers
|
| 52 |
1) NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.
|
| 53 |
|
| 54 |
-
|
| 55 |
```python
|
| 56 |
@staticmethod
|
| 57 |
def _default_build(
|
|
@@ -160,6 +160,138 @@ Loading pipeline components...: 100%|ββββββββββββββ
|
|
| 160 |
|
| 161 |
2) KeyError: <class 'diffusers.models.transformers.transformer_flux.FluxAttention'>
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformers/transformer_flux.py#L266
|
| 164 |
|
| 165 |
https://github.com/nunchaku-tech/deepcompressor/blob/main/deepcompressor/nn/struct/attn.py
|
|
|
|
| 51 |
# Blockers
|
| 52 |
1) NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.
|
| 53 |
|
| 54 |
+
Potential fix: app.diffusion.pipeline.config.py
|
| 55 |
```python
|
| 56 |
@staticmethod
|
| 57 |
def _default_build(
|
|
|
|
| 160 |
|
| 161 |
2) KeyError: <class 'diffusers.models.transformers.transformer_flux.FluxAttention'>
|
| 162 |
|
| 163 |
+
Potential fix: app.diffusion.nn.struct.py
|
| 164 |
+
```python
|
| 165 |
+
@staticmethod
|
| 166 |
+
def _default_construct(
|
| 167 |
+
module: Attention,
|
| 168 |
+
/,
|
| 169 |
+
parent: tp.Optional["DiffusionTransformerBlockStruct"] = None,
|
| 170 |
+
fname: str = "",
|
| 171 |
+
rname: str = "",
|
| 172 |
+
rkey: str = "",
|
| 173 |
+
idx: int = 0,
|
| 174 |
+
**kwargs,
|
| 175 |
+
) -> "DiffusionAttentionStruct":
|
| 176 |
+
if isinstance(module, FluxAttention):
|
| 177 |
+
# FluxAttention has different attribute names than standard attention
|
| 178 |
+
with_rope = True
|
| 179 |
+
num_query_heads = module.heads # FluxAttention uses 'heads', not 'num_heads'
|
| 180 |
+
num_key_value_heads = module.heads # FLUX typically uses same for q/k/v
|
| 181 |
+
|
| 182 |
+
# FluxAttention doesn't have 'to_out', but may have other output projections
|
| 183 |
+
# Check what output projection attributes actually exist
|
| 184 |
+
o_proj = None
|
| 185 |
+
o_proj_rname = ""
|
| 186 |
+
|
| 187 |
+
# Try to find the correct output projection
|
| 188 |
+
if hasattr(module, 'to_out') and module.to_out is not None:
|
| 189 |
+
o_proj = module.to_out[0] if isinstance(module.to_out, (list, tuple)) else module.to_out
|
| 190 |
+
o_proj_rname = "to_out.0" if isinstance(module.to_out, (list, tuple)) else "to_out"
|
| 191 |
+
elif hasattr(module, 'to_add_out'):
|
| 192 |
+
o_proj = module.to_add_out
|
| 193 |
+
o_proj_rname = "to_add_out"
|
| 194 |
+
|
| 195 |
+
q_proj, k_proj, v_proj = module.to_q, module.to_k, module.to_v
|
| 196 |
+
q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "to_k", "to_v"
|
| 197 |
+
q, k, v = module.to_q, module.to_k, module.to_v
|
| 198 |
+
q_rname, k_rname, v_rname = "to_q", "to_k", "to_v"
|
| 199 |
+
|
| 200 |
+
# Handle the add_* projections that FluxAttention has
|
| 201 |
+
add_q_proj = getattr(module, "add_q_proj", None)
|
| 202 |
+
add_k_proj = getattr(module, "add_k_proj", None)
|
| 203 |
+
add_v_proj = getattr(module, "add_v_proj", None)
|
| 204 |
+
add_o_proj = getattr(module, "to_add_out", None)
|
| 205 |
+
add_q_proj_rname = "add_q_proj" if add_q_proj else ""
|
| 206 |
+
add_k_proj_rname = "add_k_proj" if add_k_proj else ""
|
| 207 |
+
add_v_proj_rname = "add_v_proj" if add_v_proj else ""
|
| 208 |
+
add_o_proj_rname = "to_add_out" if add_o_proj else ""
|
| 209 |
+
|
| 210 |
+
kwargs = (
|
| 211 |
+
"encoder_hidden_states",
|
| 212 |
+
"attention_mask",
|
| 213 |
+
"image_rotary_emb",
|
| 214 |
+
)
|
| 215 |
+
cross_attention = add_k_proj is not None
|
| 216 |
+
elif module.is_cross_attention:
|
| 217 |
+
q_proj, k_proj, v_proj = module.to_q, None, None
|
| 218 |
+
add_q_proj, add_k_proj, add_v_proj, add_o_proj = None, module.to_k, module.to_v, None
|
| 219 |
+
q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "", ""
|
| 220 |
+
add_q_proj_rname, add_k_proj_rname, add_v_proj_rname, add_o_proj_rname = "", "to_k", "to_v", ""
|
| 221 |
+
else:
|
| 222 |
+
q_proj, k_proj, v_proj = module.to_q, module.to_k, module.to_v
|
| 223 |
+
add_q_proj = getattr(module, "add_q_proj", None)
|
| 224 |
+
add_k_proj = getattr(module, "add_k_proj", None)
|
| 225 |
+
add_v_proj = getattr(module, "add_v_proj", None)
|
| 226 |
+
add_o_proj = getattr(module, "to_add_out", None)
|
| 227 |
+
q_proj_rname, k_proj_rname, v_proj_rname = "to_q", "to_k", "to_v"
|
| 228 |
+
add_q_proj_rname, add_k_proj_rname, add_v_proj_rname = "add_q_proj", "add_k_proj", "add_v_proj"
|
| 229 |
+
add_o_proj_rname = "to_add_out"
|
| 230 |
+
if getattr(module, "to_out", None) is not None:
|
| 231 |
+
o_proj = module.to_out[0]
|
| 232 |
+
o_proj_rname = "to_out.0"
|
| 233 |
+
assert isinstance(o_proj, nn.Linear)
|
| 234 |
+
elif parent is not None:
|
| 235 |
+
assert isinstance(parent.module, FluxSingleTransformerBlock)
|
| 236 |
+
assert isinstance(parent.module.proj_out, ConcatLinear)
|
| 237 |
+
assert len(parent.module.proj_out.linears) == 2
|
| 238 |
+
o_proj = parent.module.proj_out.linears[0]
|
| 239 |
+
o_proj_rname = ".proj_out.linears.0"
|
| 240 |
+
else:
|
| 241 |
+
raise RuntimeError("Cannot find the output projection.")
|
| 242 |
+
if isinstance(module.processor, DiffusionAttentionProcessor):
|
| 243 |
+
with_rope = module.processor.rope is not None
|
| 244 |
+
elif module.processor.__class__.__name__.startswith("Flux"):
|
| 245 |
+
with_rope = True
|
| 246 |
+
else:
|
| 247 |
+
with_rope = False # TODO: fix for other processors
|
| 248 |
+
config = AttentionConfigStruct(
|
| 249 |
+
hidden_size=q_proj.weight.shape[1],
|
| 250 |
+
add_hidden_size=add_k_proj.weight.shape[1] if add_k_proj is not None else 0,
|
| 251 |
+
inner_size=q_proj.weight.shape[0],
|
| 252 |
+
num_query_heads=module.heads,
|
| 253 |
+
num_key_value_heads=module.to_k.weight.shape[0] // (module.to_q.weight.shape[0] // module.heads),
|
| 254 |
+
with_qk_norm=module.norm_q is not None,
|
| 255 |
+
with_rope=with_rope,
|
| 256 |
+
linear_attn=isinstance(module.processor, SanaLinearAttnProcessor2_0),
|
| 257 |
+
)
|
| 258 |
+
return DiffusionAttentionStruct(
|
| 259 |
+
module=module,
|
| 260 |
+
parent=parent,
|
| 261 |
+
fname=fname,
|
| 262 |
+
idx=idx,
|
| 263 |
+
rname=rname,
|
| 264 |
+
rkey=rkey,
|
| 265 |
+
config=config,
|
| 266 |
+
q_proj=q_proj,
|
| 267 |
+
k_proj=k_proj,
|
| 268 |
+
v_proj=v_proj,
|
| 269 |
+
o_proj=o_proj,
|
| 270 |
+
add_q_proj=add_q_proj,
|
| 271 |
+
add_k_proj=add_k_proj,
|
| 272 |
+
add_v_proj=add_v_proj,
|
| 273 |
+
add_o_proj=add_o_proj,
|
| 274 |
+
q=None, # TODO: add q, k, v
|
| 275 |
+
k=None,
|
| 276 |
+
v=None,
|
| 277 |
+
q_proj_rname=q_proj_rname,
|
| 278 |
+
k_proj_rname=k_proj_rname,
|
| 279 |
+
v_proj_rname=v_proj_rname,
|
| 280 |
+
o_proj_rname=o_proj_rname,
|
| 281 |
+
add_q_proj_rname=add_q_proj_rname,
|
| 282 |
+
add_k_proj_rname=add_k_proj_rname,
|
| 283 |
+
add_v_proj_rname=add_v_proj_rname,
|
| 284 |
+
add_o_proj_rname=add_o_proj_rname,
|
| 285 |
+
q_rname="",
|
| 286 |
+
k_rname="",
|
| 287 |
+
v_rname="",
|
| 288 |
+
)
|
| 289 |
+
```
|
| 290 |
+
|
| 291 |
+
3) ValueError: Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.
|
| 292 |
+
|
| 293 |
+
References
|
| 294 |
+
|
| 295 |
https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformers/transformer_flux.py#L266
|
| 296 |
|
| 297 |
https://github.com/nunchaku-tech/deepcompressor/blob/main/deepcompressor/nn/struct/attn.py
|