thanks to iic ❤
Browse files- README.md +41 -0
- configuration.json +1 -0
- dw-ll_ucoco_384.onnx +3 -0
- open_clip_pytorch_model.bin +3 -0
- unianimate_16f_32f_non_ema_223000.pth +3 -0
- v2-1_512-ema-pruned.ckpt +3 -0
- yolox_l.onnx +3 -0
README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# UniAnimate: Taming Unified Video Diffusion Models for Consistent Human Image Animation
|
| 2 |
+
|
| 3 |
+
## This repo include the checkpoints for UniAnimate:
|
| 4 |
+
|
| 5 |
+
- "models/dw-ll_ucoco_384.onnx": the checkpoint for dwpose extraction.
|
| 6 |
+
|
| 7 |
+
- "models/open_clip_pytorch_model.bin": the checkpoint for clip embedding.
|
| 8 |
+
|
| 9 |
+
- "models/unianimate_16f_32f_non_ema_223000.pth": the checkpoint for human image animation in UniAnimate (16/32 frames).
|
| 10 |
+
|
| 11 |
+
- "models/yolox_l.onnx": the checkpoint for dwpose extraction.
|
| 12 |
+
|
| 13 |
+
- "models/v2-1_512-ema-pruned.ckpt": the checkpoint for Stable Diffusion.
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
## BibTeX
|
| 19 |
+
|
| 20 |
+
If this repo is useful to you, please cite our corresponding technical paper.
|
| 21 |
+
|
| 22 |
+
```bibtex
|
| 23 |
+
@article{wang2024unianimate,
|
| 24 |
+
title={UniAnimate: Taming Unified Video Diffusion Models for Consistent Human Image Animation},
|
| 25 |
+
author={Wang, Xiang and Zhang, Shiwei and Gao, Changxin and Wang, Jiayu and Zhou, Xiaoqiang and Zhang, Yingya and Yan, Luxin and Sang, Nong},
|
| 26 |
+
journal={arXiv preprint arXiv:2406.01188},
|
| 27 |
+
year={2024}
|
| 28 |
+
}
|
| 29 |
+
@inproceedings{TFT2V,
|
| 30 |
+
title={A Recipe for Scaling up Text-to-Video Generation with Text-free Videos},
|
| 31 |
+
author={Wang, Xiang and Zhang, Shiwei and Yuan, Hangjie and Qing, Zhiwu and Gong, Biao and Zhang, Yingya and Shen, Yujun and Gao, Changxin and Sang, Nong},
|
| 32 |
+
booktitle={CVPR},
|
| 33 |
+
year={2024}
|
| 34 |
+
}
|
| 35 |
+
@article{VideoComposer,
|
| 36 |
+
title={VideoComposer: Compositional Video Synthesis with Motion Controllability},
|
| 37 |
+
author={Wang, Xiang and Yuan, Hangjie and Zhang, Shiwei and Chen, Dayou and Wang, Jiuniu and Zhang, Yingya and Shen, Yujun and Zhao, Deli and Zhou, Jingren},
|
| 38 |
+
journal={NeurIPS},
|
| 39 |
+
year={2023}
|
| 40 |
+
}
|
| 41 |
+
```
|
configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"framework":"Pytorch","task":"text-to-video-synthesis"}
|
dw-ll_ucoco_384.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
|
| 3 |
+
size 134399116
|
open_clip_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a78ef8e8c73fd0df621682e7a8e8eb36c6916cb3c16b291a082ecd52ab79cc4
|
| 3 |
+
size 3944692325
|
unianimate_16f_32f_non_ema_223000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b78e68c02d5974d9f8198b2e96f6868c1b17921dea3eb7f729ce6dacdc481b45
|
| 3 |
+
size 5666897525
|
v2-1_512-ema-pruned.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88ecb782561455673c4b78d05093494b9c539fc6bfc08f3a9a4a0dd7b0b10f36
|
| 3 |
+
size 5214865159
|
yolox_l.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
|
| 3 |
+
size 216746733
|