Upload folder using huggingface_hub
Browse files
.gitattributes
CHANGED
|
@@ -41,3 +41,7 @@ la_ddwc_phase_2/inference_0000.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
| 41 |
la_ddwc_phase_2/inference_0001.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
la_ddwc_phase_2/inference_0002.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
la_ddwc_phase_2/inference_0003.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
la_ddwc_phase_2/inference_0001.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
la_ddwc_phase_2/inference_0002.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
la_ddwc_phase_2/inference_0003.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
gla_ddwc_phase_2/inference_0000.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
gla_ddwc_phase_2/inference_0001.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
gla_ddwc_phase_2/inference_0002.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
gla_ddwc_phase_2/inference_0003.mp4 filter=lfs diff=lfs merge=lfs -text
|
gla_ddwc_phase_2/inference_0000.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3be29d2b5864cc813999b6cb6278b5a386a9cc182442560e74d22eda9a4ac88
|
| 3 |
+
size 326760
|
gla_ddwc_phase_2/inference_0001.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9b1058a78d6035b0a645f44d09c4759be2d5242e36fbe03a6de68d8417bf694
|
| 3 |
+
size 1250017
|
gla_ddwc_phase_2/inference_0002.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de144291bb0fcc2fb82fea581e51d94859794048bc3c92d1a1d263af3fe6cbac
|
| 3 |
+
size 461301
|
gla_ddwc_phase_2/inference_0003.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:056b4dcfd4edf3012c36cfa29983d3506bcdda2d99e353ff455d5214dc2bd35b
|
| 3 |
+
size 223736
|
gla_ddwc_phase_2/model.txt
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
JetWanT2V(
|
| 2 |
+
(patch_embedding): Conv3d(16, 1536, kernel_size=(1, 2, 2), stride=(1, 2, 2))
|
| 3 |
+
(text_embedding): Sequential(
|
| 4 |
+
(0): Linear(in_features=4096, out_features=1536, bias=True)
|
| 5 |
+
(1): GELU(approximate='tanh')
|
| 6 |
+
(2): Linear(in_features=1536, out_features=1536, bias=True)
|
| 7 |
+
)
|
| 8 |
+
(time_embedding): Sequential(
|
| 9 |
+
(0): Linear(in_features=256, out_features=1536, bias=True)
|
| 10 |
+
(1): SiLU()
|
| 11 |
+
(2): Linear(in_features=1536, out_features=1536, bias=True)
|
| 12 |
+
)
|
| 13 |
+
(time_projection): Sequential(
|
| 14 |
+
(0): SiLU()
|
| 15 |
+
(1): Linear(in_features=1536, out_features=9216, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(blocks): ModuleList(
|
| 18 |
+
(0-29): 30 x JetWanBlock(
|
| 19 |
+
(norm1): WanLayerNorm(normalized_shape=(1536,), eps=1e-06, elementwise_affine=False)
|
| 20 |
+
(self_attn): GatedLinearAttentionWithDynamicDWC(
|
| 21 |
+
(q): Linear(in_features=1536, out_features=1536, bias=True)
|
| 22 |
+
(k): Linear(in_features=1536, out_features=1536, bias=True)
|
| 23 |
+
(v): Linear(in_features=1536, out_features=1536, bias=True)
|
| 24 |
+
(o): Linear(in_features=1536, out_features=1536, bias=True)
|
| 25 |
+
(norm_q): WanRMSNorm(dim=1536, eps=1e-06)
|
| 26 |
+
(norm_k): WanRMSNorm(dim=1536, eps=1e-06)
|
| 27 |
+
(norm_o): WanRMSNorm(dim=128, eps=1e-06)
|
| 28 |
+
(g): Linear(in_features=1536, out_features=1536, bias=True)
|
| 29 |
+
(gate_act): Sigmoid()
|
| 30 |
+
(dwc): DynamicDWC3D(
|
| 31 |
+
channels=128, kernel_size=(3, 3, 3)
|
| 32 |
+
(kernel_generator): Sequential(
|
| 33 |
+
(conv1): Conv3d(128, 1024, kernel_size=(1, 1, 1), stride=(1, 1, 1))
|
| 34 |
+
(act1): SiLU()
|
| 35 |
+
(conv2): Conv3d(1024, 3456, kernel_size=(1, 1, 1), stride=(1, 1, 1))
|
| 36 |
+
)
|
| 37 |
+
)
|
| 38 |
+
)
|
| 39 |
+
(norm3): WanLayerNorm(normalized_shape=(1536,), eps=1e-06, elementwise_affine=True)
|
| 40 |
+
(cross_attn): WanT2VCrossAttention(
|
| 41 |
+
(q): Linear(in_features=1536, out_features=1536, bias=True)
|
| 42 |
+
(k): Linear(in_features=1536, out_features=1536, bias=True)
|
| 43 |
+
(v): Linear(in_features=1536, out_features=1536, bias=True)
|
| 44 |
+
(o): Linear(in_features=1536, out_features=1536, bias=True)
|
| 45 |
+
(norm_q): WanRMSNorm(dim=1536, eps=1e-06)
|
| 46 |
+
(norm_k): WanRMSNorm(dim=1536, eps=1e-06)
|
| 47 |
+
)
|
| 48 |
+
(norm2): WanLayerNorm(normalized_shape=(1536,), eps=1e-06, elementwise_affine=False)
|
| 49 |
+
(ffn): Sequential(
|
| 50 |
+
(0): Linear(in_features=1536, out_features=8960, bias=True)
|
| 51 |
+
(1): GELU(approximate='tanh')
|
| 52 |
+
(2): Linear(in_features=8960, out_features=1536, bias=True)
|
| 53 |
+
)
|
| 54 |
+
)
|
| 55 |
+
)
|
| 56 |
+
(head): WanHead(
|
| 57 |
+
(norm): WanLayerNorm(normalized_shape=(1536,), eps=1e-06, elementwise_affine=False)
|
| 58 |
+
(head): Linear(in_features=1536, out_features=64, bias=True)
|
| 59 |
+
)
|
| 60 |
+
)
|