athul020 commited on
Commit
12cfa0f
·
verified ·
1 Parent(s): 63db017

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +53 -0
  2. adapter_config.json +46 -0
  3. adapter_model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: THUDM/CogVideoX-2b
3
+ library_name: peft
4
+ tags:
5
+ - lora
6
+ - dora
7
+ - cogvideox
8
+ - physics
9
+ - video-generation
10
+ - warp
11
+ ---
12
+
13
+ # PDW — Physics-Corrected CogVideoX-2b World Model (DoRA Adapter)
14
+
15
+ A **DoRA (Weight-Decomposed Low-Rank Adaptation)** adapter for [CogVideoX-2b](https://huggingface.co/THUDM/CogVideoX-2b), fine-tuned to generate physically accurate videos using **NVIDIA Warp** physics simulation data and **TRD (Temporal Representation Distillation)** with DINOv2-large as teacher.
16
+
17
+ ## Model Details
18
+
19
+ - **Base model:** THUDM/CogVideoX-2b (1.7B params)
20
+ - **Adapter:** DoRA (r=16, lora_alpha=32, use_dora=True)
21
+ - **Target modules:** `to_q`, `to_k`, `to_v`, `to_out.0`
22
+ - **Trainable params:** 7.6M / 1.7B (0.45%)
23
+ - **Physics engine:** NVIDIA Warp (28-scenario 7×4 grid)
24
+ - **TRD teacher:** DINOv2-large
25
+ - **Hardware:** NVIDIA H100 NVL
26
+ - **Training steps:** 400
27
+
28
+ ## Evaluation Results
29
+
30
+ | Metric | Delta |
31
+ |---|---|
32
+ | Diffusion MSE | +94.1% |
33
+ | Motion score | +1.7% |
34
+ | Overall | +47.9% |
35
+
36
+ ## How to Use
37
+
38
+ ```python
39
+ from peft import PeftModel
40
+ from diffusers import CogVideoXTransformer3DModel
41
+
42
+ # Load base transformer
43
+ base_transformer = CogVideoXTransformer3DModel.from_pretrained(
44
+ "THUDM/CogVideoX-2b", subfolder="transformer"
45
+ )
46
+
47
+ # Load DoRA adapter
48
+ model = PeftModel.from_pretrained(base_transformer, "athul020/pdw_final_dora")
49
+ ```
50
+
51
+ ### Framework versions
52
+
53
+ - PEFT 0.18.1
adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "CogVideoXTransformer3DModel",
7
+ "parent_library": "diffusers.models.transformers.cogvideox_transformer_3d"
8
+ },
9
+ "base_model_name_or_path": null,
10
+ "bias": "none",
11
+ "corda_config": null,
12
+ "ensure_weight_tying": false,
13
+ "eva_config": null,
14
+ "exclude_modules": null,
15
+ "fan_in_fan_out": false,
16
+ "inference_mode": true,
17
+ "init_lora_weights": true,
18
+ "layer_replication": null,
19
+ "layers_pattern": null,
20
+ "layers_to_transform": null,
21
+ "loftq_config": {},
22
+ "lora_alpha": 32,
23
+ "lora_bias": false,
24
+ "lora_dropout": 0.05,
25
+ "megatron_config": null,
26
+ "megatron_core": "megatron.core",
27
+ "modules_to_save": null,
28
+ "peft_type": "LORA",
29
+ "peft_version": "0.18.1",
30
+ "qalora_group_size": 16,
31
+ "r": 16,
32
+ "rank_pattern": {},
33
+ "revision": null,
34
+ "target_modules": [
35
+ "to_q",
36
+ "to_out.0",
37
+ "to_k",
38
+ "to_v"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": null,
42
+ "trainable_token_indices": null,
43
+ "use_dora": true,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a7ca557a5abb369d2d67122eb3bef6d40685a4bc1ec25cc86e08d98ac4f485
3
+ size 30462024