yugeng-amd commited on
Commit
dc39c21
·
verified ·
1 Parent(s): f593644

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ ---
4
+ In this work, we introduce Micro-World, an action-controlled interactive world model designed to generate high-quality, open-domain scenes. Built on top of the Wan2.1 family of models, we train both image-to-video (I2V) and text-to-video (T2V) variants to support a wide range of use cases. To foster open research and practical adoption in the community, we release the model weights, full training and inference code, as well as a curated dataset specifically tailored for controllable world modeling.
5
+
6
+ For action injection, we favor adaLN for its lightweight parameter footprint, and ControlNet for its strong empirical stability during training.
7
+
8
+ Note that released I2V model is trained using AdaLN architecture.
9
+
10
+ More info please refer to code.
11
+ <div style="margin: 0; padding: 0; text-align: center;">
12
+ <img src="https://github.com/user-attachments/assets/680b87ac-0c95-4a27-b4fd-fcafb9fdf609" alt="t2v architecture" title="t2v architecture" class="t2v architecture">
13
+ <img src="https://github.com/user-attachments/assets/c9cd8d9e-9555-42d3-b884-04705d1e329c" alt="t2v architecture" title="t2v architecture" class="t2v architecture">
14
+ </div>
lora_diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d52fc5ef9171a1aaa7f5560352772ebdc52906008bd95a71b60d10092c1213
3
+ size 1458497816
transformer/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanActionAdaLNModel",
3
+ "_diffusers_version": "0.34.0",
4
+ "action_dim": 1536,
5
+ "cross_attn_norm": true,
6
+ "dim": 5120,
7
+ "eps": 1e-06,
8
+ "ffn_dim": 13824,
9
+ "freq_dim": 256,
10
+ "in_channels": 16,
11
+ "in_dim": 36,
12
+ "keyboard_dim": 7,
13
+ "model_type": "i2v",
14
+ "mouse_dim": 2,
15
+ "num_heads": 40,
16
+ "num_layers": 40,
17
+ "out_dim": 16,
18
+ "patch_size": [
19
+ 1,
20
+ 2,
21
+ 2
22
+ ],
23
+ "qk_norm": true,
24
+ "text_dim": 4096,
25
+ "text_len": 512,
26
+ "window_size": [
27
+ -1,
28
+ -1
29
+ ]
30
+ }
transformer/diffusion_pytorch_model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93198ad45411face10c0d764b4c512883b859d56e205fd81319f3af05a0007f
3
+ size 9957502392
transformer/diffusion_pytorch_model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18739d9ef54f21fb8500701b8100ea82c7783916c64712ff8ac08bfff07793c7
3
+ size 9954400440
transformer/diffusion_pytorch_model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be3524698564432f6af1f8bc2a4ab2af5121df495e9e2330d99f15acc0d7306
3
+ size 9901951000
transformer/diffusion_pytorch_model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d18792f93a9a45d3802c73cae60177d9e2bdeb95554fb8fa10b87979d32c6a
3
+ size 6761695152
transformer/diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff