Spaces:
Runtime error
Runtime error
nsfwalex Claude Opus 4.8 (1M context) commited on
Commit Β·
877c9af
1
Parent(s): 66e8e32
Fix Anima loader: de-prefix CivitAI checkpoint so finetuned DiT+adapter actually load
Browse filesCivitAI Anima checkpoints are ComfyUI-format (model.diffusion_model.* prefix). The
diffusers Cosmos single-file converter only strips a 'net.' prefix, so the raw file
matched no transformer param -> the load failed and a silent try/except fell back to
the BASE Anima DiT, making all three Anima finetunes render identical base weights
(differing only by prompt prefix). De-prefix and load both finetuned components
explicitly (567 DiT tensors -> transformer, 118 llm_adapter tensors -> text_conditioner),
verified exact key match vs the base repo, and let failures raise loudly.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
app.py
CHANGED
|
@@ -22,6 +22,7 @@ from diffusers import (
|
|
| 22 |
)
|
| 23 |
from compel import Compel, ReturnedEmbeddingsType
|
| 24 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 25 |
from transformers import (
|
| 26 |
AutoProcessor,
|
| 27 |
AutoModelForImageTextToText,
|
|
@@ -289,18 +290,63 @@ def _load_anima(entry):
|
|
| 289 |
# Anima = Cosmos-Predict2 DiT + Qwen3 text encoder + Qwen-Image VAE. Stock
|
| 290 |
# diffusers ships it only as an experimental *modular* pipeline. Load the base
|
| 291 |
# components (TE / VAE / scheduler / text-conditioner) from the converted repo,
|
| 292 |
-
# then swap the finetuned
|
| 293 |
pipe = AnimaModularPipeline.from_pretrained(ANIMA_BASE)
|
| 294 |
pipe.load_components(torch_dtype=torch.bfloat16)
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
pipe.to("cuda")
|
| 305 |
return pipe
|
| 306 |
|
|
|
|
| 22 |
)
|
| 23 |
from compel import Compel, ReturnedEmbeddingsType
|
| 24 |
from huggingface_hub import hf_hub_download
|
| 25 |
+
from safetensors.torch import load_file
|
| 26 |
from transformers import (
|
| 27 |
AutoProcessor,
|
| 28 |
AutoModelForImageTextToText,
|
|
|
|
| 290 |
# Anima = Cosmos-Predict2 DiT + Qwen3 text encoder + Qwen-Image VAE. Stock
|
| 291 |
# diffusers ships it only as an experimental *modular* pipeline. Load the base
|
| 292 |
# components (TE / VAE / scheduler / text-conditioner) from the converted repo,
|
| 293 |
+
# then swap the finetuned weights in from the single-file checkpoint.
|
| 294 |
pipe = AnimaModularPipeline.from_pretrained(ANIMA_BASE)
|
| 295 |
pipe.load_components(torch_dtype=torch.bfloat16)
|
| 296 |
+
|
| 297 |
+
# CivitAI Anima checkpoints are ComfyUI-format: every tensor is prefixed
|
| 298 |
+
# `model.diffusion_model.`, and the finetune trains TWO diffusers components:
|
| 299 |
+
# * 567 DiT tensors -> the `transformer` (CosmosTransformer3DModel)
|
| 300 |
+
# * 118 `llm_adapter.*` tensors -> the separate `text_conditioner` component
|
| 301 |
+
# The diffusers Cosmos single-file converter only strips a `net.` prefix (it has
|
| 302 |
+
# no `model.diffusion_model.` rule, unlike the Z-Image converter), so feeding the
|
| 303 |
+
# raw file leaves every key prefixed -> it matches NO param -> the load fails and
|
| 304 |
+
# (previously, behind a silent try/except) fell back to the BASE DiT. That made
|
| 305 |
+
# all three Anima finetunes render identical base weights, differing only by
|
| 306 |
+
# prompt prefix. We de-prefix and load each component explicitly instead, and let
|
| 307 |
+
# any real failure raise loudly rather than silently degrade to base weights.
|
| 308 |
+
DIT_PREFIX = "model.diffusion_model."
|
| 309 |
+
ADAPTER_PREFIX = "model.diffusion_model.llm_adapter."
|
| 310 |
+
raw = load_file(_checkpoint_path(entry["checkpoint"]))
|
| 311 |
+
dit_sd = {
|
| 312 |
+
k[len(DIT_PREFIX):]: v for k, v in raw.items()
|
| 313 |
+
if k.startswith(DIT_PREFIX) and not k.startswith(ADAPTER_PREFIX)
|
| 314 |
+
}
|
| 315 |
+
adapter_sd = {
|
| 316 |
+
k[len(ADAPTER_PREFIX):]: v for k, v in raw.items()
|
| 317 |
+
if k.startswith(ADAPTER_PREFIX)
|
| 318 |
+
}
|
| 319 |
+
if not dit_sd:
|
| 320 |
+
raise RuntimeError(
|
| 321 |
+
f"[anima] {entry['label']}: no '{DIT_PREFIX}*' keys in checkpoint "
|
| 322 |
+
f"(got prefixes {sorted({k.split('.')[0] for k in raw})}); "
|
| 323 |
+
f"the single-file format changed β fix the de-prefix logic.")
|
| 324 |
+
|
| 325 |
+
# Finetuned DiT: from_single_file runs the Cosmos converter over the de-prefixed
|
| 326 |
+
# state dict (keys now line up with what the converter expects).
|
| 327 |
+
transformer = CosmosTransformer3DModel.from_single_file(
|
| 328 |
+
dit_sd, config=ANIMA_BASE, subfolder="transformer",
|
| 329 |
+
torch_dtype=torch.bfloat16,
|
| 330 |
+
)
|
| 331 |
+
pipe.update_components(transformer=transformer)
|
| 332 |
+
|
| 333 |
+
# Finetuned LLM adapter -> the `text_conditioner` component. Its keys match the
|
| 334 |
+
# base component's names verbatim, so load straight in (strict=False just so a
|
| 335 |
+
# future key drift logs instead of crashing the whole boot).
|
| 336 |
+
tc = getattr(pipe, "text_conditioner", None)
|
| 337 |
+
if adapter_sd and tc is not None:
|
| 338 |
+
info = tc.load_state_dict(adapter_sd, strict=False)
|
| 339 |
+
tc.to(torch.bfloat16)
|
| 340 |
+
if info.missing_keys or info.unexpected_keys:
|
| 341 |
+
print(f"[anima] {entry['label']} text_conditioner: "
|
| 342 |
+
f"{len(info.missing_keys)} missing / "
|
| 343 |
+
f"{len(info.unexpected_keys)} unexpected keys")
|
| 344 |
+
else:
|
| 345 |
+
print(f"[anima] {entry['label']}: no text_conditioner / adapter weights "
|
| 346 |
+
f"(adapter_tensors={len(adapter_sd)}); using base adapter")
|
| 347 |
+
|
| 348 |
+
print(f"[anima] {entry['label']}: loaded finetuned DiT ({len(dit_sd)} tensors) "
|
| 349 |
+
f"+ adapter ({len(adapter_sd)} tensors)")
|
| 350 |
pipe.to("cuda")
|
| 351 |
return pipe
|
| 352 |
|