Upload folder using huggingface_hub
Browse files- README.md +14 -0
- lora_diffusion_pytorch_model.safetensors +3 -0
- transformer/config.json +30 -0
- transformer/diffusion_pytorch_model-00001-of-00004.safetensors +3 -0
- transformer/diffusion_pytorch_model-00002-of-00004.safetensors +3 -0
- transformer/diffusion_pytorch_model-00003-of-00004.safetensors +3 -0
- transformer/diffusion_pytorch_model-00004-of-00004.safetensors +3 -0
- transformer/diffusion_pytorch_model.safetensors.index.json +0 -0
README.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: gpl-3.0
|
| 3 |
+
---
|
| 4 |
+
In this work, we introduce Micro-World, an action-controlled interactive world model designed to generate high-quality, open-domain scenes. Built on top of the Wan2.1 family of models, we train both image-to-video (I2V) and text-to-video (T2V) variants to support a wide range of use cases. To foster open research and practical adoption in the community, we release the model weights, full training and inference code, as well as a curated dataset specifically tailored for controllable world modeling.
|
| 5 |
+
|
| 6 |
+
For action injection, we favor adaLN for its lightweight parameter footprint, and ControlNet for its strong empirical stability during training.
|
| 7 |
+
|
| 8 |
+
Note that released I2V model is trained using AdaLN architecture.
|
| 9 |
+
|
| 10 |
+
More info please refer to code.
|
| 11 |
+
<div style="margin: 0; padding: 0; text-align: center;">
|
| 12 |
+
<img src="https://github.com/user-attachments/assets/680b87ac-0c95-4a27-b4fd-fcafb9fdf609" alt="t2v architecture" title="t2v architecture" class="t2v architecture">
|
| 13 |
+
<img src="https://github.com/user-attachments/assets/c9cd8d9e-9555-42d3-b884-04705d1e329c" alt="t2v architecture" title="t2v architecture" class="t2v architecture">
|
| 14 |
+
</div>
|
lora_diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20d52fc5ef9171a1aaa7f5560352772ebdc52906008bd95a71b60d10092c1213
|
| 3 |
+
size 1458497816
|
transformer/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "WanActionAdaLNModel",
|
| 3 |
+
"_diffusers_version": "0.34.0",
|
| 4 |
+
"action_dim": 1536,
|
| 5 |
+
"cross_attn_norm": true,
|
| 6 |
+
"dim": 5120,
|
| 7 |
+
"eps": 1e-06,
|
| 8 |
+
"ffn_dim": 13824,
|
| 9 |
+
"freq_dim": 256,
|
| 10 |
+
"in_channels": 16,
|
| 11 |
+
"in_dim": 36,
|
| 12 |
+
"keyboard_dim": 7,
|
| 13 |
+
"model_type": "i2v",
|
| 14 |
+
"mouse_dim": 2,
|
| 15 |
+
"num_heads": 40,
|
| 16 |
+
"num_layers": 40,
|
| 17 |
+
"out_dim": 16,
|
| 18 |
+
"patch_size": [
|
| 19 |
+
1,
|
| 20 |
+
2,
|
| 21 |
+
2
|
| 22 |
+
],
|
| 23 |
+
"qk_norm": true,
|
| 24 |
+
"text_dim": 4096,
|
| 25 |
+
"text_len": 512,
|
| 26 |
+
"window_size": [
|
| 27 |
+
-1,
|
| 28 |
+
-1
|
| 29 |
+
]
|
| 30 |
+
}
|
transformer/diffusion_pytorch_model-00001-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b93198ad45411face10c0d764b4c512883b859d56e205fd81319f3af05a0007f
|
| 3 |
+
size 9957502392
|
transformer/diffusion_pytorch_model-00002-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18739d9ef54f21fb8500701b8100ea82c7783916c64712ff8ac08bfff07793c7
|
| 3 |
+
size 9954400440
|
transformer/diffusion_pytorch_model-00003-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7be3524698564432f6af1f8bc2a4ab2af5121df495e9e2330d99f15acc0d7306
|
| 3 |
+
size 9901951000
|
transformer/diffusion_pytorch_model-00004-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71d18792f93a9a45d3802c73cae60177d9e2bdeb95554fb8fa10b87979d32c6a
|
| 3 |
+
size 6761695152
|
transformer/diffusion_pytorch_model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|