vita-video-gen commited on
Commit
cd53bfa
·
verified ·
1 Parent(s): a57a9d7

Add demo data and ablation videos

Browse files
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/ablation/stage1.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ data/ablation/stage2.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ data/test/faces/demo_000001_faces.mp4 filter=lfs diff=lfs merge=lfs -text
39
+ data/test/frames/demo_000001.png filter=lfs diff=lfs merge=lfs -text
40
+ data/test/poses/demo_000001_poses.mp4 filter=lfs diff=lfs merge=lfs -text
41
+ data/train/faces/sample_000001_faces.mp4 filter=lfs diff=lfs merge=lfs -text
42
+ data/train/poses/sample_000001_poses.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ data/train/videos/sample_000001.mp4 filter=lfs diff=lfs merge=lfs -text
data/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimal Data
2
+
3
+ This directory contains a tiny smoke-test dataset for checking training and inference wiring.
4
+
5
+ ## Training
6
+
7
+ `train/metadata.csv` contains one training sample with the same columns expected by `examples/wanvideo/model_training/train_svi.py`:
8
+
9
+ ```text
10
+ video,prompt,animate_pose_video,animate_face_video
11
+ ```
12
+
13
+ The paths in the CSV are relative to `data/train/`.
14
+
15
+ The toy training videos are stored with normalized sample names:
16
+
17
+ ```text
18
+ videos/sample_000001.mp4
19
+ poses/sample_000001_poses.mp4
20
+ faces/sample_000001_faces.mp4
21
+ ```
22
+
23
+ ## Inference
24
+
25
+ `test/` contains a single inference case:
26
+
27
+ ```text
28
+ frames/demo_000001.png
29
+ poses/demo_000001_poses.mp4
30
+ faces/demo_000001_faces.mp4
31
+ ```
32
+
33
+ This matches the default layout used by the top-level `inference.sh`.
34
+
35
+ ## Ablation
36
+
37
+ `ablation/` contains the two-stage ablation outputs:
38
+
39
+ ```text
40
+ ablation/stage1.mp4
41
+ ablation/stage2.mp4
42
+ ```
data/ablation/stage1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a05b14713902db41d25753f22aabe82ea01b84d0dfbcc1f7bed569d881994a2
3
+ size 102321871
data/ablation/stage2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820ba9df94274e7bdce68949024965c9762f6a8c889069d92880201c6b10c93b
3
+ size 67100373
data/test/faces/demo_000001_faces.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72603e64c81ab96408eb87249887294a10fb5becd2ee2cf58316335e8f9e0033
3
+ size 22094107
data/test/frames/demo_000001.png ADDED

Git LFS Details

  • SHA256: 048985a11c40c6c2606d97f0c2a6eeaca7b8bc0b699c6fe41c28bfcfab2e6005
  • Pointer size: 131 Bytes
  • Size of remote file: 470 kB
data/test/poses/demo_000001_poses.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d3143beaa8dbfa547f0f91b472fc96f9557dd65520068d92163df93f5bfa89
3
+ size 15528027
data/train/faces/sample_000001_faces.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cad74551dbd9de60a7350731cc01c393bd051257f340557035fbe1705b439e2
3
+ size 713201
data/train/metadata.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ video,prompt,animate_pose_video,animate_face_video
2
+ videos/sample_000001.mp4,A person is moving naturally.,poses/sample_000001_poses.mp4,faces/sample_000001_faces.mp4
data/train/poses/sample_000001_poses.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f84eb13fcb45b08adea9d4b940823ca38356dd5fa7a96debbc7245b3a22b8f8
3
+ size 481328
data/train/videos/sample_000001.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d135995d2275f594a48cba96dbf5c7317387200df121a6f55e1da5c1e46cf868
3
+ size 1026939