Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +275 -0
- Meissonic/.github/FUNDING.yml +15 -0
- Meissonic/.gitignore +166 -0
- Meissonic/InfinityStar/.gitignore +59 -0
- Meissonic/InfinityStar/LICENSE +21 -0
- Meissonic/InfinityStar/README.md +187 -0
- Meissonic/InfinityStar/__pycache__/train.cpython-310.pyc +0 -0
- Meissonic/InfinityStar/assets/Infinitystar_image_gen_benchmark.png +3 -0
- Meissonic/InfinityStar/assets/Infinitystar_videogen_benchmark.png +3 -0
- Meissonic/InfinityStar/assets/Infinitystar_videogen_humaneval.png +3 -0
- Meissonic/InfinityStar/assets/framework.png +3 -0
- Meissonic/InfinityStar/assets/i2v_examples.png +3 -0
- Meissonic/InfinityStar/assets/logo.png +3 -0
- Meissonic/InfinityStar/assets/reference_image.webp +3 -0
- Meissonic/InfinityStar/assets/supp_show_images.png +3 -0
- Meissonic/InfinityStar/assets/v2v_examples.png +3 -0
- Meissonic/InfinityStar/cog.yaml +46 -0
- Meissonic/InfinityStar/data/README.md +57 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0001_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0002_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0003_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0004_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0005_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0006_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0007_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0008_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0009_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0010_0010_000000100.jsonl +0 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls_for_training.py +118 -0
- Meissonic/InfinityStar/data/infinitystar_toy_data/videos/e06b8ca5dbc6.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/002a061bdbc110ca8fb48e7e0a663c94/0000_refine_720p.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/002a061bdbc110ca8fb48e7e0a663c94/prompt.txt +4 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/0037784abafa76e2b84fe746750a7988/0000_refine_720p.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/0037784abafa76e2b84fe746750a7988/prompt.txt +4 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/00a79efb495c29e082c246e9ca9a7e8f/0000_refine_720p.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/00a79efb495c29e082c246e9ca9a7e8f/prompt.txt +4 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/011341aa8bea615b76d69423862f5f31/0000_refine_720p.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/011341aa8bea615b76d69423862f5f31/prompt.txt +4 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/012900f0605d2e4777119aeefaa7f31b/0000_refine_720p.mp4 +3 -0
- Meissonic/InfinityStar/data/interactive_toy_videos/012900f0605d2e4777119aeefaa7f31b/prompt.txt +4 -0
- Meissonic/InfinityStar/evaluation/README.md +2 -0
- Meissonic/InfinityStar/evaluation/VBench_rewrited_prompt.json +0 -0
- Meissonic/InfinityStar/infinity/__init__.py +2 -0
- Meissonic/InfinityStar/infinity/__pycache__/__init__.cpython-310.pyc +0 -0
- Meissonic/InfinityStar/infinity/dataset/__pycache__/build.cpython-310.pyc +0 -0
- Meissonic/InfinityStar/infinity/dataset/__pycache__/dataset_joint_vi.cpython-310.pyc +0 -0
- Meissonic/InfinityStar/infinity/dataset/build.py +218 -0
- Meissonic/InfinityStar/infinity/dataset/dataset_joint_vi.py +689 -0
- Meissonic/InfinityStar/infinity/models/__init__.py +29 -0
- Meissonic/InfinityStar/infinity/models/__pycache__/__init__.cpython-310.pyc +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,278 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
OpenVid1M/video_reorg/OpenVid1M_reorganized.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
OpenVid1M/video_reorg/OpenVid1M_reorganized.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Meissonic/InfinityStar/assets/Infinitystar_image_gen_benchmark.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Meissonic/InfinityStar/assets/Infinitystar_videogen_benchmark.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
Meissonic/InfinityStar/assets/Infinitystar_videogen_humaneval.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Meissonic/InfinityStar/assets/framework.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
Meissonic/InfinityStar/assets/i2v_examples.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
Meissonic/InfinityStar/assets/logo.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
Meissonic/InfinityStar/assets/reference_image.webp filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
Meissonic/InfinityStar/assets/supp_show_images.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
Meissonic/InfinityStar/assets/v2v_examples.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
Meissonic/InfinityStar/data/infinitystar_toy_data/videos/e06b8ca5dbc6.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
Meissonic/InfinityStar/data/interactive_toy_videos/002a061bdbc110ca8fb48e7e0a663c94/0000_refine_720p.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
Meissonic/InfinityStar/data/interactive_toy_videos/0037784abafa76e2b84fe746750a7988/0000_refine_720p.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
Meissonic/InfinityStar/data/interactive_toy_videos/00a79efb495c29e082c246e9ca9a7e8f/0000_refine_720p.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
Meissonic/InfinityStar/data/interactive_toy_videos/011341aa8bea615b76d69423862f5f31/0000_refine_720p.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
Meissonic/InfinityStar/data/interactive_toy_videos/012900f0605d2e4777119aeefaa7f31b/0000_refine_720p.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_grid_video_0.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_grid_video_1.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_grid_video_2.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_grid_video_3.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_video_0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_video_1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_video_2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
Meissonic/InfinityStar/infinity_vqvae_test_output/comparison_video_3.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
Meissonic/InfinityStar/vae_reconstruction_test/comparison.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
Meissonic/InfinityStar/vae_reconstruction_test/comparison_grid.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
Meissonic/InfinityStar/vae_reconstruction_test/frame_000_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
Meissonic/InfinityStar/vae_reconstruction_test/frame_001_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
Meissonic/InfinityStar/vae_reconstruction_test/frame_002_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
Meissonic/InfinityStar/vae_reconstruction_test/frame_003_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
Meissonic/InfinityStar/vae_reconstruction_test/frame_004_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
Meissonic/VidTok/assets/example.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
Meissonic/VidTok/assets/radar.png filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
Meissonic/VidTok/assets/vidtwin.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
Meissonic/VidTok/assets/vidtwin_demo.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
Meissonic/VidTok/vidtok_cache/VidTok/assets/example.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
Meissonic/VidTok/vidtok_cache/VidTok/assets/radar.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
Meissonic/VidTok/vidtok_cache/VidTok/assets/vidtwin.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
Meissonic/VidTok/vidtok_cache/VidTok/assets/vidtwin_demo.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
Meissonic/VidTok/vidtok_test_output/comparison_grid_video_0.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
Meissonic/VidTok/vidtok_test_output/comparison_grid_video_1.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
Meissonic/VidTok/vidtok_test_output/comparison_grid_video_2.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
Meissonic/VidTok/vidtok_test_output/comparison_grid_video_3.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
Meissonic/VidTok/vidtok_test_output/comparison_video_0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
Meissonic/VidTok/vidtok_test_output/comparison_video_1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
Meissonic/VidTok/vidtok_test_output/comparison_video_2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
Meissonic/VidTok/vidtok_test_output/comparison_video_3.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
Meissonic/assets/architecture.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
Meissonic/assets/demos.pdf filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
Meissonic/assets/demos.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
Meissonic/assets/inpaint/0eKR4M2uuL8.jpg filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
Meissonic/assets/inpaint/__Owak0IgJk.jpg filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
Meissonic/assets/outpaint/__G2yFuW7jQ.jpg filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
Meissonic/cosmos_test_output/comparison_grid_video_0.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
Meissonic/cosmos_test_output/comparison_grid_video_1.png filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
Meissonic/cosmos_test_output/comparison_grid_video_2.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
Meissonic/cosmos_test_output/comparison_grid_video_3.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
Meissonic/cosmos_test_output/comparison_video_1.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
Meissonic/cosmos_test_output/comparison_video_2.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
Meissonic/cosmos_test_output/comparison_video_3.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
Meissonic/output/9_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
Meissonic/output/9_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
Meissonic/output/A[[:space:]]black[[:space:]]an_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
Meissonic/output/A[[:space:]]cat[[:space:]]wear_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
Meissonic/output/A[[:space:]]dog[[:space:]]in[[:space:]]a_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
Meissonic/output/A[[:space:]]large[[:space:]]bo_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
Meissonic/output/A[[:space:]]robot[[:space:]]pl_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
Meissonic/output/A[[:space:]]white[[:space:]]an_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
Meissonic/output/The[[:space:]]sun[[:space:]]is_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
Meissonic/output/Three[[:space:]]boat_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
Meissonic/output/Two[[:space:]]actors_1024_64_9_fp8.png filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/1499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/1499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
Meissonic/output_128x128_17f_2\*4bs_4\*8\*8vqvae_0_2_ratio/999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
Meissonic/output_180x320_16f_2bs_4\*8\*8vqvae_0_2_ratio/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
Meissonic/output_180x320_16f_2bs_4\*8\*8vqvae_0_2_ratio/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/1499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/1499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/1999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/1999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/2499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/2499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/2999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/2999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/3499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/3499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/3999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/3999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio/999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue/1499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue/999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue/999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/1499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/1499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/1999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/1999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/2499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/2499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/2999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/2999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/3499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/3499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_00_ratio_continue_tmp/999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/1499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/1499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/1999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/1999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/2499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/2499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/2999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/2999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/3499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/3499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/3999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/3999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/4499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/4499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/4999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/4999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/5499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/5499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/5999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/5999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/6499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/6499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/6999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/6999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/7499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/7499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/7999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/7999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/8499_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/8999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/8999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/9499_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/9999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/9999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/999_video_0_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
Meissonic/output_256x448_4f_2bs_4\*8\*8vqvae_0_10_ratio/999_video_1_CFG-9.png filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV4x8x8/autoencoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV4x8x8/decoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV4x8x8/encoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV8x8x8/autoencoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV8x8x8/decoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
Meissonic/pretrained_ckpts/Cosmos-0.1-Tokenizer-DV8x8x8/encoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
Meissonic/pretrained_ckpts/Cosmos-1.0-Tokenizer-DV8x16x16/autoencoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
Meissonic/pretrained_ckpts/Cosmos-1.0-Tokenizer-DV8x16x16/decoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
Meissonic/pretrained_ckpts/Cosmos-1.0-Tokenizer-DV8x16x16/encoder.jit filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
Meissonic/vidtok_cache/VidTok/assets/example.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
Meissonic/vidtok_cache/VidTok/assets/radar.png filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
Meissonic/vidtok_cache/VidTok/assets/vidtwin.png filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
Meissonic/vidtok_cache/VidTok/assets/vidtwin_demo.png filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
Meissonic/wandb/run-20251207_092554-l16v7o9l/run-l16v7o9l.wandb filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
Meissonic/wandb/run-20251207_094329-qf4q6gjw/run-qf4q6gjw.wandb filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
Meissonic/wandb/run-20251207_094715-uvgb9hvt/run-uvgb9hvt.wandb filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
Meissonic/wandb/run-20251207_102454-nnww5mz8/run-nnww5mz8.wandb filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
Meissonic/wandb/run-20251207_111518-slrbepi0/run-slrbepi0.wandb filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
Meissonic/wandb/run-20251207_113103-ijl2gw6b/run-ijl2gw6b.wandb filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
Meissonic/wandb/run-20251207_113607-aryc95f2/files/media/images/generated_videos_first_frame_10_2f39bee6c4969d94f6d2.png filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
Meissonic/wandb/run-20251207_113607-aryc95f2/files/media/images/generated_videos_first_frame_10_a0ddb52b457bceac4774.png filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
Meissonic/wandb/run-20251207_114426-5sh31nrg/files/media/images/generated_videos_first_frame_1000_1fc345a8cdc18e62468b.png filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
Meissonic/wandb/run-20251207_114426-5sh31nrg/files/media/images/generated_videos_first_frame_1000_f4b36308698e96e11163.png filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
Meissonic/wandb/run-20251207_114426-5sh31nrg/files/media/images/generated_videos_first_frame_500_0798147230daa742b054.png filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
Meissonic/wandb/run-20251207_114426-5sh31nrg/files/media/images/generated_videos_first_frame_500_aed08910c4a8dcdc87f6.png filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
Meissonic/wandb/run-20251207_114426-5sh31nrg/run-5sh31nrg.wandb filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
Meissonic/wandb/run-20251207_162442-54o4hegd/run-54o4hegd.wandb filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
Meissonic/wandb/run-20251208_032955-tl61pd0t/run-tl61pd0t.wandb filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
Meissonic/wandb/run-20251208_040606-2dcjc9k8/run-2dcjc9k8.wandb filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
Meissonic/wandb/run-20251208_062741-qalkbn80/run-qalkbn80.wandb filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
Meissonic/wandb/run-20251208_071823-0hjx73rw/run-0hjx73rw.wandb filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_10000_8328d2d0556a95ff2759.png filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_10000_980ee3261a5cf9cce942.png filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_1000_8fd26361f0705a90a632.png filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_1000_cec203cb5c36d2873217.png filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_1500_c061c65a6ce343b1660e.png filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_1500_f047fb97b642dc30b33c.png filter=lfs diff=lfs merge=lfs -text
|
| 225 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_2000_2805ac51dfa6ef4de083.png filter=lfs diff=lfs merge=lfs -text
|
| 226 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_2000_e98ce360ce92d75f9a36.png filter=lfs diff=lfs merge=lfs -text
|
| 227 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_2500_430592107b01c838d952.png filter=lfs diff=lfs merge=lfs -text
|
| 228 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_2500_ecca4db815beca263f13.png filter=lfs diff=lfs merge=lfs -text
|
| 229 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_3000_52422bc6ab7caedd5b8c.png filter=lfs diff=lfs merge=lfs -text
|
| 230 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_3000_8e35b9b7d6b7a0806553.png filter=lfs diff=lfs merge=lfs -text
|
| 231 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_3500_227067a6cd64b7cdced4.png filter=lfs diff=lfs merge=lfs -text
|
| 232 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_3500_55ba9221da0bf3c49190.png filter=lfs diff=lfs merge=lfs -text
|
| 233 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_4000_9a50d3903fd31767c616.png filter=lfs diff=lfs merge=lfs -text
|
| 234 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_4000_ffacfcca81b53cb27319.png filter=lfs diff=lfs merge=lfs -text
|
| 235 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_4500_935711ba29b3ab613691.png filter=lfs diff=lfs merge=lfs -text
|
| 236 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_4500_bf885e1339d92cc386d1.png filter=lfs diff=lfs merge=lfs -text
|
| 237 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_5000_bdd3a8c8c0c8a7a7d4dd.png filter=lfs diff=lfs merge=lfs -text
|
| 238 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_5000_c8333d970fbc70e45c64.png filter=lfs diff=lfs merge=lfs -text
|
| 239 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_500_3d483725c07baf8663d3.png filter=lfs diff=lfs merge=lfs -text
|
| 240 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_500_b0f06ea56e9a9c08850c.png filter=lfs diff=lfs merge=lfs -text
|
| 241 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_5500_60d433cf43a3cb8d1412.png filter=lfs diff=lfs merge=lfs -text
|
| 242 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_5500_7cd8c962e4d1b79b5dcc.png filter=lfs diff=lfs merge=lfs -text
|
| 243 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_6000_41402987f48490139945.png filter=lfs diff=lfs merge=lfs -text
|
| 244 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_6000_c6c41d57fcadc12fd69b.png filter=lfs diff=lfs merge=lfs -text
|
| 245 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_6500_2d21e8a2ea1688bffb9d.png filter=lfs diff=lfs merge=lfs -text
|
| 246 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_6500_a609810c96cec2279a46.png filter=lfs diff=lfs merge=lfs -text
|
| 247 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_7000_4a1fe2fe98784f7b8841.png filter=lfs diff=lfs merge=lfs -text
|
| 248 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_7000_6119b9f39242430c319b.png filter=lfs diff=lfs merge=lfs -text
|
| 249 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_7500_7e0ee18074e9b8d85c45.png filter=lfs diff=lfs merge=lfs -text
|
| 250 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_7500_b01a808f5a897296f898.png filter=lfs diff=lfs merge=lfs -text
|
| 251 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_8000_15555bb3e2ce8b16ddcf.png filter=lfs diff=lfs merge=lfs -text
|
| 252 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_8000_9652b904aa757dce7aeb.png filter=lfs diff=lfs merge=lfs -text
|
| 253 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_8500_c2d1b91c197ca101b350.png filter=lfs diff=lfs merge=lfs -text
|
| 254 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_9000_03ba3747205343bd9935.png filter=lfs diff=lfs merge=lfs -text
|
| 255 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_9000_cc8a6153b15016f58ad3.png filter=lfs diff=lfs merge=lfs -text
|
| 256 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/files/media/images/generated_videos_first_frame_9500_342589ce9380e8bb866b.png filter=lfs diff=lfs merge=lfs -text
|
| 257 |
+
Meissonic/wandb/run-20251208_155943-j5rc8ish/run-j5rc8ish.wandb filter=lfs diff=lfs merge=lfs -text
|
| 258 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_1000_4ea9441b252682155006.png filter=lfs diff=lfs merge=lfs -text
|
| 259 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_1000_be5afcc9b61ce7cc9765.png filter=lfs diff=lfs merge=lfs -text
|
| 260 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_1500_7c59a605f746fefa06f3.png filter=lfs diff=lfs merge=lfs -text
|
| 261 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_1500_e846322d8d1fe1da0c06.png filter=lfs diff=lfs merge=lfs -text
|
| 262 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_2000_e7251adb287026b97ff8.png filter=lfs diff=lfs merge=lfs -text
|
| 263 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_2000_fb353e591b1b0dbac386.png filter=lfs diff=lfs merge=lfs -text
|
| 264 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_2500_4254c55c5a44dae8222b.png filter=lfs diff=lfs merge=lfs -text
|
| 265 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_2500_880fb5b7bb7d55a41102.png filter=lfs diff=lfs merge=lfs -text
|
| 266 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_3000_0af47ac2b0fd0a7b83b9.png filter=lfs diff=lfs merge=lfs -text
|
| 267 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_3000_38859ead3b87553090be.png filter=lfs diff=lfs merge=lfs -text
|
| 268 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_3500_1b3f708ccf2664b9bd84.png filter=lfs diff=lfs merge=lfs -text
|
| 269 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_3500_96fc2c23d9374b5c001f.png filter=lfs diff=lfs merge=lfs -text
|
| 270 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_4000_7f60fcf85257e0427cb4.png filter=lfs diff=lfs merge=lfs -text
|
| 271 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_4000_f36bf77eea280b84a34e.png filter=lfs diff=lfs merge=lfs -text
|
| 272 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_500_92b8064a4e25f8ad3702.png filter=lfs diff=lfs merge=lfs -text
|
| 273 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/files/media/images/generated_videos_first_frame_500_f6969510d28d905ce414.png filter=lfs diff=lfs merge=lfs -text
|
| 274 |
+
Meissonic/wandb/run-20251209_060856-ctbp97lz/run-ctbp97lz.wandb filter=lfs diff=lfs merge=lfs -text
|
| 275 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/files/media/images/generated_videos_first_frame_1000_2a764e89458c3c8d15fb.png filter=lfs diff=lfs merge=lfs -text
|
| 276 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/files/media/images/generated_videos_first_frame_1000_80cf7f467b6a4ea9a5d4.png filter=lfs diff=lfs merge=lfs -text
|
| 277 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/files/media/images/generated_videos_first_frame_1500_fb32391d5c492e093a1a.png filter=lfs diff=lfs merge=lfs -text
|
| 278 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/files/media/images/generated_videos_first_frame_500_9a388a1a15b60d9f4438.png filter=lfs diff=lfs merge=lfs -text
|
| 279 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/files/media/images/generated_videos_first_frame_500_c2c619bff47ae122a524.png filter=lfs diff=lfs merge=lfs -text
|
| 280 |
+
Meissonic/wandb/run-20251209_102651-55o5soqg/run-55o5soqg.wandb filter=lfs diff=lfs merge=lfs -text
|
| 281 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_1000_7b2c7dbea7c77c3a3523.png filter=lfs diff=lfs merge=lfs -text
|
| 282 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_1000_d3b01b8e129b539a85ed.png filter=lfs diff=lfs merge=lfs -text
|
| 283 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_1500_287117d5d7643ba31ec4.png filter=lfs diff=lfs merge=lfs -text
|
| 284 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_1500_f6b18ba278e34d44baab.png filter=lfs diff=lfs merge=lfs -text
|
| 285 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_2000_321720abba124381620b.png filter=lfs diff=lfs merge=lfs -text
|
| 286 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_2000_fa7af054654656754134.png filter=lfs diff=lfs merge=lfs -text
|
| 287 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_2500_e6c1efef5a74bd11c582.png filter=lfs diff=lfs merge=lfs -text
|
| 288 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_2500_f00b3e2c752ac3cf926a.png filter=lfs diff=lfs merge=lfs -text
|
| 289 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_3000_67d5ba7897e123897b95.png filter=lfs diff=lfs merge=lfs -text
|
| 290 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_3000_9c128d777c7dab549107.png filter=lfs diff=lfs merge=lfs -text
|
| 291 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_3500_4274b237825ef8cf5d05.png filter=lfs diff=lfs merge=lfs -text
|
| 292 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_3500_de7aecbbb4729ab5af9d.png filter=lfs diff=lfs merge=lfs -text
|
| 293 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_500_09fa45bbfff36049e141.png filter=lfs diff=lfs merge=lfs -text
|
| 294 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/files/media/images/generated_videos_first_frame_500_d8fc778d368d5c2cb79c.png filter=lfs diff=lfs merge=lfs -text
|
| 295 |
+
Meissonic/wandb/run-20251209_141739-fk5kdvzr/run-fk5kdvzr.wandb filter=lfs diff=lfs merge=lfs -text
|
| 296 |
+
Meissonic/wandb/run-20251209_162337-uv3abozu/run-uv3abozu.wandb filter=lfs diff=lfs merge=lfs -text
|
| 297 |
+
Meissonic/wandb/run-20251210_030325-gkrz1ykg/run-gkrz1ykg.wandb filter=lfs diff=lfs merge=lfs -text
|
| 298 |
+
Meissonic/wandb/run-20251210_032745-o7so78o8/run-o7so78o8.wandb filter=lfs diff=lfs merge=lfs -text
|
| 299 |
+
Meissonic/wandb/run-20251210_035336-u8db4xs3/run-u8db4xs3.wandb filter=lfs diff=lfs merge=lfs -text
|
| 300 |
+
Meissonic/wandb/run-20251210_043009-5878wpml/run-5878wpml.wandb filter=lfs diff=lfs merge=lfs -text
|
| 301 |
+
Meissonic/wandb/run-20251210_045934-tcqz8xbx/run-tcqz8xbx.wandb filter=lfs diff=lfs merge=lfs -text
|
| 302 |
+
Meissonic/wandb/run-20251210_065438-svzut638/run-svzut638.wandb filter=lfs diff=lfs merge=lfs -text
|
| 303 |
+
Meissonic/wandb/run-20251210_071716-kc9aapl4/run-kc9aapl4.wandb filter=lfs diff=lfs merge=lfs -text
|
| 304 |
+
Meissonic/wandb/run-20251210_105833-im5q8jfr/run-im5q8jfr.wandb filter=lfs diff=lfs merge=lfs -text
|
| 305 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_1000_654e84862d8c0a13f1b5.png filter=lfs diff=lfs merge=lfs -text
|
| 306 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_1000_cd153009051a7d605018.png filter=lfs diff=lfs merge=lfs -text
|
| 307 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_1500_56d7d215080b273e9155.png filter=lfs diff=lfs merge=lfs -text
|
| 308 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_1500_736c017ca88662cd1d11.png filter=lfs diff=lfs merge=lfs -text
|
| 309 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_500_4f69c990d95a223b9d06.png filter=lfs diff=lfs merge=lfs -text
|
| 310 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/files/media/images/generated_videos_grid_500_5fc1dbdaeeaef4847234.png filter=lfs diff=lfs merge=lfs -text
|
| 311 |
+
Meissonic/wandb/run-20251210_114439-mrtah7xe/run-mrtah7xe.wandb filter=lfs diff=lfs merge=lfs -text
|
Meissonic/.github/FUNDING.yml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# These are supported funding model platforms
|
| 2 |
+
|
| 3 |
+
github: viiika
|
| 4 |
+
patreon: # Replace with a single Patreon username
|
| 5 |
+
open_collective: # Replace with a single Open Collective username
|
| 6 |
+
ko_fi: # Replace with a single Ko-fi username
|
| 7 |
+
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
| 8 |
+
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
| 9 |
+
liberapay: # Replace with a single Liberapay username
|
| 10 |
+
issuehunt: # Replace with a single IssueHunt username
|
| 11 |
+
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
| 12 |
+
polar: # Replace with a single Polar username
|
| 13 |
+
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
|
| 14 |
+
thanks_dev: # Replace with a single thanks.dev username
|
| 15 |
+
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
Meissonic/.gitignore
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Mac OS related
|
| 40 |
+
.DS_Store
|
| 41 |
+
*.DS_Store
|
| 42 |
+
|
| 43 |
+
# Unit test / coverage reports
|
| 44 |
+
htmlcov/
|
| 45 |
+
.tox/
|
| 46 |
+
.nox/
|
| 47 |
+
.coverage
|
| 48 |
+
.coverage.*
|
| 49 |
+
.cache
|
| 50 |
+
nosetests.xml
|
| 51 |
+
coverage.xml
|
| 52 |
+
*.cover
|
| 53 |
+
*.py,cover
|
| 54 |
+
.hypothesis/
|
| 55 |
+
.pytest_cache/
|
| 56 |
+
cover/
|
| 57 |
+
|
| 58 |
+
# Translations
|
| 59 |
+
*.mo
|
| 60 |
+
*.pot
|
| 61 |
+
|
| 62 |
+
# Django stuff:
|
| 63 |
+
*.log
|
| 64 |
+
local_settings.py
|
| 65 |
+
db.sqlite3
|
| 66 |
+
db.sqlite3-journal
|
| 67 |
+
|
| 68 |
+
# Flask stuff:
|
| 69 |
+
instance/
|
| 70 |
+
.webassets-cache
|
| 71 |
+
|
| 72 |
+
# Scrapy stuff:
|
| 73 |
+
.scrapy
|
| 74 |
+
|
| 75 |
+
# Sphinx documentation
|
| 76 |
+
docs/_build/
|
| 77 |
+
|
| 78 |
+
# PyBuilder
|
| 79 |
+
.pybuilder/
|
| 80 |
+
target/
|
| 81 |
+
|
| 82 |
+
# Jupyter Notebook
|
| 83 |
+
.ipynb_checkpoints
|
| 84 |
+
|
| 85 |
+
# IPython
|
| 86 |
+
profile_default/
|
| 87 |
+
ipython_config.py
|
| 88 |
+
|
| 89 |
+
# pyenv
|
| 90 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 91 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 92 |
+
# .python-version
|
| 93 |
+
|
| 94 |
+
# pipenv
|
| 95 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 96 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 97 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 98 |
+
# install all needed dependencies.
|
| 99 |
+
#Pipfile.lock
|
| 100 |
+
|
| 101 |
+
# poetry
|
| 102 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 103 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 104 |
+
# commonly ignored for libraries.
|
| 105 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 106 |
+
#poetry.lock
|
| 107 |
+
|
| 108 |
+
# pdm
|
| 109 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 110 |
+
#pdm.lock
|
| 111 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 112 |
+
# in version control.
|
| 113 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 114 |
+
.pdm.toml
|
| 115 |
+
.pdm-python
|
| 116 |
+
.pdm-build/
|
| 117 |
+
|
| 118 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 119 |
+
__pypackages__/
|
| 120 |
+
|
| 121 |
+
# Celery stuff
|
| 122 |
+
celerybeat-schedule
|
| 123 |
+
celerybeat.pid
|
| 124 |
+
|
| 125 |
+
# SageMath parsed files
|
| 126 |
+
*.sage.py
|
| 127 |
+
|
| 128 |
+
# Environments
|
| 129 |
+
.env
|
| 130 |
+
.venv
|
| 131 |
+
env/
|
| 132 |
+
venv/
|
| 133 |
+
ENV/
|
| 134 |
+
env.bak/
|
| 135 |
+
venv.bak/
|
| 136 |
+
|
| 137 |
+
# Spyder project settings
|
| 138 |
+
.spyderproject
|
| 139 |
+
.spyproject
|
| 140 |
+
|
| 141 |
+
# Rope project settings
|
| 142 |
+
.ropeproject
|
| 143 |
+
|
| 144 |
+
# mkdocs documentation
|
| 145 |
+
/site
|
| 146 |
+
|
| 147 |
+
# mypy
|
| 148 |
+
.mypy_cache/
|
| 149 |
+
.dmypy.json
|
| 150 |
+
dmypy.json
|
| 151 |
+
|
| 152 |
+
# Pyre type checker
|
| 153 |
+
.pyre/
|
| 154 |
+
|
| 155 |
+
# pytype static type analyzer
|
| 156 |
+
.pytype/
|
| 157 |
+
|
| 158 |
+
# Cython debug symbols
|
| 159 |
+
cython_debug/
|
| 160 |
+
|
| 161 |
+
# PyCharm
|
| 162 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 163 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 164 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 165 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 166 |
+
#.idea/
|
Meissonic/InfinityStar/.gitignore
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.swp
|
| 2 |
+
**/__pycache__/**
|
| 3 |
+
**/.ipynb_checkpoints/**
|
| 4 |
+
.idea/*
|
| 5 |
+
llava/
|
| 6 |
+
_vis_cached/
|
| 7 |
+
_vqgan/
|
| 8 |
+
_vae/
|
| 9 |
+
_vae*/
|
| 10 |
+
ckpt/
|
| 11 |
+
log/
|
| 12 |
+
tb*/
|
| 13 |
+
img*/
|
| 14 |
+
local_output*
|
| 15 |
+
_auto_*
|
| 16 |
+
sd-vae-ft-mse/
|
| 17 |
+
stable-diffusion-v1-4/
|
| 18 |
+
*.pth
|
| 19 |
+
*.pth.tar
|
| 20 |
+
*.ckpt
|
| 21 |
+
*.log
|
| 22 |
+
*.ipynb
|
| 23 |
+
toscli
|
| 24 |
+
*.hydra
|
| 25 |
+
wandb
|
| 26 |
+
*.jpg
|
| 27 |
+
*.csv
|
| 28 |
+
*.tar.gz
|
| 29 |
+
*.bin
|
| 30 |
+
tmp
|
| 31 |
+
output
|
| 32 |
+
*.tsv
|
| 33 |
+
output/*
|
| 34 |
+
results/
|
| 35 |
+
*.JPEG
|
| 36 |
+
debug/
|
| 37 |
+
weights
|
| 38 |
+
checkpoints
|
| 39 |
+
ref.py
|
| 40 |
+
wandb
|
| 41 |
+
.DS_Store
|
| 42 |
+
ref.sh
|
| 43 |
+
ref.py
|
| 44 |
+
checkpoints_bk
|
| 45 |
+
*.avi
|
| 46 |
+
infinity/VideoVAE
|
| 47 |
+
saves/
|
| 48 |
+
tmp.sh
|
| 49 |
+
ref_*.sh
|
| 50 |
+
tmpp.sh
|
| 51 |
+
ref2.sh
|
| 52 |
+
checkpoints_new
|
| 53 |
+
checkpoints_*
|
| 54 |
+
tmp_images
|
| 55 |
+
tmp_videos
|
| 56 |
+
shm
|
| 57 |
+
wget-log
|
| 58 |
+
data/interactive_toy_data
|
| 59 |
+
tools/infer_interact_480p.py.bk
|
Meissonic/InfinityStar/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 FoundationVision
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
Meissonic/InfinityStar/README.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<p align="center">
|
| 2 |
+
<img src="assets/logo.png" width="400" style="border:none;box-shadow:none;border-radius:0;background:none;">
|
| 3 |
+
<p>
|
| 4 |
+
|
| 5 |
+
# Infinity**⭐️**: Unified **S**pace**T**ime **A**uto**R**egressive Modeling for Visual Generation
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
<div align="center">
|
| 9 |
+
|
| 10 |
+
[](http://opensource.bytedance.com/discord/invite)
|
| 11 |
+
[](https://arxiv.org/abs/2511.04675)
|
| 12 |
+
[](https://huggingface.co/FoundationVision/InfinityStar)
|
| 13 |
+
|
| 14 |
+
</div>
|
| 15 |
+
<p align="center" style="font-size: larger;">
|
| 16 |
+
<a href="http://arxiv.org/abs/2511.04675">Infinity⭐️: Unified Spacetime AutoRegressive Modeling for Visual Generation</a>
|
| 17 |
+
</p>
|
| 18 |
+
|
| 19 |
+
<!-- <p align="center">
|
| 20 |
+
<img src="assets/show_images.jpg" width=95%>
|
| 21 |
+
<p> -->
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
## 🔥 Updates!!
|
| 25 |
+
* Nov 7, 2025: 🔥 Paper, Training and Inference Codes && Checkpoints && Demo Website released!
|
| 26 |
+
* Sep 18, 2025: 🎉 InfinityStar is accepted as NeurIPS 2025 Oral.
|
| 27 |
+
|
| 28 |
+
## 🕹️ Try and Play with Infinity⭐️!
|
| 29 |
+
|
| 30 |
+
We provide a [demo website](http://opensource.bytedance.com/discord/invite) for you to play with InfinityStar and generate videos. Enjoy the fun of bitwise video autoregressive modeling!
|
| 31 |
+
|
| 32 |
+
## ✨ Overview
|
| 33 |
+
We introduce InfinityStar, a unified spacetime autoregressive framework for high-resolution image and dynamic video synthesis.
|
| 34 |
+
|
| 35 |
+
- 🧠 **Unified Spacetime Model**: A purely discrete, autoregressive approach that jointly captures spatial and temporal dependencies within a single, elegant architecture.
|
| 36 |
+
|
| 37 |
+
- 🎬 **Versatile Generation**: This unified design naturally supports a variety of generation tasks such as **text-to-image**, **text-to-video**, **image-to-video**, and **long interactive video synthesis** via straightforward temporal autoregression.
|
| 38 |
+
|
| 39 |
+
- 🏆 **Leading Performance & Speed**: Through extensive experiments, InfinityStar scores **83.74** on VBench, outperforming all autoregressive models by large margins, even surpassing diffusion competitors like HunyuanVideo, approximately **10x** faster than leading diffusion-based methods.
|
| 40 |
+
|
| 41 |
+
- 📖 **Pioneering High-Resolution Autoregressive Generation**: To our knowledge, InfinityStar is the first discrete autoregressive video generator capable of producing industrial-level 720p videos, setting a new standard for quality in its class.
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
### 🔥 Unified modeling for image, video generation and long interactive video synthesis 📈:
|
| 45 |
+
|
| 46 |
+
<div align="left">
|
| 47 |
+
<img src="assets/framework.png" alt="" style="width: 100%;" />
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
## 🎬 Video Demos
|
| 51 |
+
#### General Aesthetics
|
| 52 |
+
<div align="left">
|
| 53 |
+
<video src="https://github.com/user-attachments/assets/14e2b18b-9234-42ce-bdab-670faeef4b2a" width="100%" controls autoplay loop></video>
|
| 54 |
+
</div>
|
| 55 |
+
|
| 56 |
+
#### Anime & 3D Animation
|
| 57 |
+
<div align="left">
|
| 58 |
+
<video src="https://github.com/user-attachments/assets/478e9571-b550-4c23-a567-6fee9a0afb5b" width="100%" controls autoplay loop></video>
|
| 59 |
+
</div>
|
| 60 |
+
|
| 61 |
+
#### Motion
|
| 62 |
+
<div align="left">
|
| 63 |
+
<video src="https://github.com/user-attachments/assets/adab669b-d38f-4607-9a52-32d8d0bf0e53" width="100%" controls autoplay loop></video>
|
| 64 |
+
</div>
|
| 65 |
+
|
| 66 |
+
#### Extended Application: Long Interactive Videos
|
| 67 |
+
<div align="center">
|
| 68 |
+
<video src="https://github.com/user-attachments/assets/411666a6-563d-4551-a3f8-dc5de00436c1" width="100%" controls autoplay loop></video>
|
| 69 |
+
</div>
|
| 70 |
+
|
| 71 |
+
## Benchmark
|
| 72 |
+
|
| 73 |
+
### Achieve sota performance on image generation benchmark:
|
| 74 |
+
|
| 75 |
+
<div align="left">
|
| 76 |
+
<img src="assets/Infinitystar_image_gen_benchmark.png" alt="Image Generation Evaluation" style="width: 100%;" />
|
| 77 |
+
</div>
|
| 78 |
+
|
| 79 |
+
### Achieve sota performance on video generation benchmark:
|
| 80 |
+
|
| 81 |
+
<div align="left">
|
| 82 |
+
<img src="assets/Infinitystar_videogen_benchmark.png" alt="" style="width: 100%;" />
|
| 83 |
+
</div>
|
| 84 |
+
|
| 85 |
+
### Surpassing diffusion competitors like HunyuanVideo*:
|
| 86 |
+
|
| 87 |
+
<div align="left">
|
| 88 |
+
<img src="assets/Infinitystar_videogen_humaneval.png" alt="" style="width: 100%;" />
|
| 89 |
+
</div>
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
## Visualization
|
| 93 |
+
|
| 94 |
+
### Text to image examples
|
| 95 |
+
|
| 96 |
+
<div align="left">
|
| 97 |
+
<img src="assets/supp_show_images.png" alt="Text to Image Examples" style="width: 100%;" />
|
| 98 |
+
</div>
|
| 99 |
+
|
| 100 |
+
### Image to video examples
|
| 101 |
+
|
| 102 |
+
<div align="left">
|
| 103 |
+
<img src="assets/i2v_examples.png" alt="Image to Video Examples" style="width: 100%;" />
|
| 104 |
+
</div>
|
| 105 |
+
|
| 106 |
+
### Video extrapolation examples
|
| 107 |
+
|
| 108 |
+
<div align="left">
|
| 109 |
+
<img src="assets/v2v_examples.png" alt="Video Extrapolation Examples" style="width: 100%;" />
|
| 110 |
+
</div>
|
| 111 |
+
|
| 112 |
+
## 📑 Open-Source Plan
|
| 113 |
+
- [x] Training Code
|
| 114 |
+
- [x] Web Demo
|
| 115 |
+
- [x] InfinityStar Inference Code
|
| 116 |
+
- [x] InfinityStar Models Checkpoints
|
| 117 |
+
- [x] InfinityStar-Interact Inference Code
|
| 118 |
+
- [x] InfinityStar-Interact Checkpoints
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
## Installation
|
| 122 |
+
1. We use FlexAttention to speedup training, which requires `torch>=2.5.1`.
|
| 123 |
+
2. Install other pip packages via `pip3 install -r requirements.txt`.
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
## Training Scripts
|
| 127 |
+
We provide a comprehensive workflow for training and finetuning our model, covering data organization, feature extraction, and training scripts. For detailed instructions, please refer to `data/README.md`.
|
| 128 |
+
|
| 129 |
+
## Inference
|
| 130 |
+
* **720p Video Generation:**
|
| 131 |
+
Use `tools/infer_video_720p.py` to generate 5-second videos at 720p resolution. Due to the high computational cost of training, our released 720p model is trained for 5-second video generation. This script also supports image-to-video generation by specifying an image path.
|
| 132 |
+
```bash
|
| 133 |
+
python3 tools/infer_video_720p.py
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
* **480p Variable-Length Video Generation:**
|
| 137 |
+
We also provide an intermediate checkpoint for 480p resolution, capable of generating videos of 5 and 10 seconds. Since this model is not specifically optimized for Text-to-Video (T2V), we recommend using the experimental Image-to-Video (I2V) and Video-to-Video (V2V) modes for better results. To specify the video duration, you can edit the `generation_duration` variable in `tools/infer_video_480p.py` to either 5 or 10. This script also supports image-to-video and video continuation by providing a path to an image or a video.
|
| 138 |
+
```bash
|
| 139 |
+
python3 tools/infer_video_480p.py
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
* **480p Long Interactive Video Generation:**
|
| 143 |
+
Use `tools/infer_interact_480p.py` to generate a long interactive video in 480p. This script supports interactive video generation. You can provide a reference video and multiple prompts. The model will generate a video interactively with your assistance.
|
| 144 |
+
```bash
|
| 145 |
+
python3 tools/infer_interact_480p.py
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
## Citation
|
| 149 |
+
If our work assists your research, feel free to give us a star ⭐ or cite us using:
|
| 150 |
+
|
| 151 |
+
```
|
| 152 |
+
@Article{VAR,
|
| 153 |
+
title={Visual Autoregressive Modeling: Scalable Image Generation via Next-Scale Prediction},
|
| 154 |
+
author={Keyu Tian and Yi Jiang and Zehuan Yuan and Bingyue Peng and Liwei Wang},
|
| 155 |
+
year={2024},
|
| 156 |
+
eprint={2404.02905},
|
| 157 |
+
archivePrefix={arXiv},
|
| 158 |
+
primaryClass={cs.CV}
|
| 159 |
+
}
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
```
|
| 163 |
+
@misc{Infinity,
|
| 164 |
+
title={Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis},
|
| 165 |
+
author={Jian Han and Jinlai Liu and Yi Jiang and Bin Yan and Yuqi Zhang and Zehuan Yuan and Bingyue Peng and Xiaobing Liu},
|
| 166 |
+
year={2024},
|
| 167 |
+
eprint={2412.04431},
|
| 168 |
+
archivePrefix={arXiv},
|
| 169 |
+
primaryClass={cs.CV},
|
| 170 |
+
url={https://arxiv.org/abs/2412.04431},
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
```
|
| 175 |
+
@misc{InfinityStar,
|
| 176 |
+
title={InfinityStar: Unified Spacetime AutoRegressive Modeling for Visual Generation},
|
| 177 |
+
author={Jinlai Liu and Jian Han and Bin Yan and Hui Wu and Fengda Zhu and Xing Wang and Yi Jiang and Bingyue Peng and Zehuan Yuan},
|
| 178 |
+
year={2025},
|
| 179 |
+
eprint={2511.04675},
|
| 180 |
+
archivePrefix={arXiv},
|
| 181 |
+
primaryClass={cs.CV},
|
| 182 |
+
url={https://arxiv.org/abs/2511.04675},
|
| 183 |
+
}
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
## License
|
| 187 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
Meissonic/InfinityStar/__pycache__/train.cpython-310.pyc
ADDED
|
Binary file (15.6 kB). View file
|
|
|
Meissonic/InfinityStar/assets/Infinitystar_image_gen_benchmark.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/Infinitystar_videogen_benchmark.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/Infinitystar_videogen_humaneval.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/framework.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/i2v_examples.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/logo.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/reference_image.webp
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/supp_show_images.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/assets/v2v_examples.png
ADDED
|
Git LFS Details
|
Meissonic/InfinityStar/cog.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Configuration for Cog ⚙️
|
| 2 |
+
# Reference: https://cog.run/yaml
|
| 3 |
+
|
| 4 |
+
build:
|
| 5 |
+
# set to true if your model requires a GPU
|
| 6 |
+
gpu: true
|
| 7 |
+
|
| 8 |
+
# a list of ubuntu apt packages to install
|
| 9 |
+
system_packages:
|
| 10 |
+
- "libgl1-mesa-glx"
|
| 11 |
+
- "libglib2.0-0"
|
| 12 |
+
|
| 13 |
+
# python version in the form '3.11' or '3.11.4'
|
| 14 |
+
python_version: "3.11"
|
| 15 |
+
|
| 16 |
+
# a list of packages in the format <package-name>==<version>
|
| 17 |
+
python_packages:
|
| 18 |
+
- torch
|
| 19 |
+
- transformers
|
| 20 |
+
- easydict
|
| 21 |
+
- typed-argument-parser
|
| 22 |
+
- seaborn
|
| 23 |
+
- kornia
|
| 24 |
+
- gputil
|
| 25 |
+
- colorama
|
| 26 |
+
- omegaconf
|
| 27 |
+
- pandas
|
| 28 |
+
- timm==0.9.6
|
| 29 |
+
- decord
|
| 30 |
+
- pytz
|
| 31 |
+
- pandas
|
| 32 |
+
- wandb
|
| 33 |
+
- colorama
|
| 34 |
+
- imageio
|
| 35 |
+
- einops
|
| 36 |
+
- openai
|
| 37 |
+
- httpx==0.20.0
|
| 38 |
+
- opencv-python
|
| 39 |
+
- ipython
|
| 40 |
+
|
| 41 |
+
# commands run after the environment is setup
|
| 42 |
+
run:
|
| 43 |
+
- pip install "pydantic<2.0"
|
| 44 |
+
- pip install -U flash-attn --no-build-isolation
|
| 45 |
+
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.6.0/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
|
| 46 |
+
predict: "predict.py:Predictor"
|
Meissonic/InfinityStar/data/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Preparing and Training with Video Metadata
|
| 2 |
+
|
| 3 |
+
This guide walks you through preparing your video metadata, splitting it for efficient training, and running the training scripts.
|
| 4 |
+
|
| 5 |
+
## 1. Prepare Your Data in `.jsonl` Format
|
| 6 |
+
|
| 7 |
+
Your video metadata should be organized in JSON Lines (`.jsonl`) format, where each line is a valid JSON object representing one video.
|
| 8 |
+
|
| 9 |
+
**Example:**
|
| 10 |
+
```json
|
| 11 |
+
{
|
| 12 |
+
"video_path": "data/infinitystar_toy_data/videos/e06b8ca5dbc6.mp4",
|
| 13 |
+
"begin_frame_id": 0,
|
| 14 |
+
"end_frame_id": 120,
|
| 15 |
+
"tarsier2_caption": "The video features an animated character with long light orange hair and brown eyes.",
|
| 16 |
+
"width": 1280,
|
| 17 |
+
"height": 720,
|
| 18 |
+
"h_div_w": 0.5625,
|
| 19 |
+
"fps": 24
|
| 20 |
+
}
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
## 2. Split Metadata for Training
|
| 24 |
+
|
| 25 |
+
For efficient training, large `.jsonl` files can be split into smaller chunks.
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
python3 data/infinitystar_toy_data/split_jsonls_for_training.py --jsonl_folder_list JSONL_DIR --save_dir SAVE_DIR --chunk_size 100
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## 3. Extract Video Features
|
| 32 |
+
|
| 33 |
+
To extract video features, modify the `extract_video_features.sh` script. Set the `video_data_path` and choose the desired resolution.
|
| 34 |
+
|
| 35 |
+
* **480p (5s):** `pn=0.40M`
|
| 36 |
+
* **480p (10s):** `pn=0.40M` with `video_frames=161`
|
| 37 |
+
* **720p (5s):** `pn=0.90M`
|
| 38 |
+
|
| 39 |
+
Then, run the script:
|
| 40 |
+
```bash
|
| 41 |
+
bash scripts/extract_video_features.sh
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## 4. Run Training Scripts
|
| 45 |
+
|
| 46 |
+
Once your metadata is prepared and features are extracted, you can start training.
|
| 47 |
+
|
| 48 |
+
**480p Training (5s or 10s):**
|
| 49 |
+
```bash
|
| 50 |
+
bash scripts/train_480p.sh
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
**720p Training (only 5s):**
|
| 54 |
+
```bash
|
| 55 |
+
bash scripts/train_720p.sh
|
| 56 |
+
```
|
| 57 |
+
The 480p configuration supports both 5-second and 10-second video training. For 10-second training, ensure that `video_frames` is set to `161` in `extract_video_features.sh` and `train_480p.sh`.
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0001_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0002_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0003_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0004_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0005_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0006_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0007_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0008_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0009_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls/000001/0010_0010_000000100.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/data/infinitystar_toy_data/split_jsonls_for_training.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 FoundationVision
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
import os
|
| 4 |
+
import os.path as osp
|
| 5 |
+
import time
|
| 6 |
+
import itertools
|
| 7 |
+
import shutil
|
| 8 |
+
import glob
|
| 9 |
+
import argparse
|
| 10 |
+
import json
|
| 11 |
+
|
| 12 |
+
import tqdm
|
| 13 |
+
import numpy as np
|
| 14 |
+
import threading
|
| 15 |
+
|
| 16 |
+
def save_lines(lines, filename):
|
| 17 |
+
os.makedirs(osp.dirname(filename), exist_ok=True)
|
| 18 |
+
with open(filename, 'w') as f:
|
| 19 |
+
f.writelines(lines)
|
| 20 |
+
del lines
|
| 21 |
+
|
| 22 |
+
def get_part_jsonls(save_dir, total_line_number, ext='.jsonl', chunk_size=1000):
|
| 23 |
+
if osp.exists(save_dir):
|
| 24 |
+
shutil.rmtree(save_dir)
|
| 25 |
+
chunk_id2save_files = {}
|
| 26 |
+
missing = False
|
| 27 |
+
parts = int(np.ceil(total_line_number / chunk_size))
|
| 28 |
+
for chunk_id in range(1, parts+1):
|
| 29 |
+
if chunk_id == parts:
|
| 30 |
+
num_of_lines = total_line_number - chunk_size * (parts-1)
|
| 31 |
+
else:
|
| 32 |
+
num_of_lines = chunk_size
|
| 33 |
+
bucket = (chunk_id-1) // 1000 + 1
|
| 34 |
+
chunk_id2save_files[chunk_id] = osp.join(save_dir, f'{bucket:06d}', f'{chunk_id:04d}_{parts:04d}_{num_of_lines:09d}{ext}')
|
| 35 |
+
if not osp.exists(chunk_id2save_files[chunk_id]):
|
| 36 |
+
missing = True
|
| 37 |
+
return missing, chunk_id2save_files
|
| 38 |
+
|
| 39 |
+
def split_large_txt_files(all_lines, chunk_id2save_files):
|
| 40 |
+
thread_list = []
|
| 41 |
+
chunk_id = 1
|
| 42 |
+
total = len(all_lines)
|
| 43 |
+
pbar = tqdm.tqdm(total=total)
|
| 44 |
+
chunk = []
|
| 45 |
+
pbar = tqdm.tqdm(total=len(chunk_id2save_files))
|
| 46 |
+
for line in all_lines:
|
| 47 |
+
chunk.append(line)
|
| 48 |
+
cur_chunk_size = int(osp.splitext(osp.basename(chunk_id2save_files[chunk_id]))[0].split('_')[-1])
|
| 49 |
+
if len(chunk) >= cur_chunk_size:
|
| 50 |
+
pbar.update(1)
|
| 51 |
+
thread_list.append(threading.Thread(target=save_lines, args=(chunk, chunk_id2save_files[chunk_id])))
|
| 52 |
+
thread_list[-1].start()
|
| 53 |
+
chunk = []
|
| 54 |
+
chunk_id += 1
|
| 55 |
+
if len(chunk):
|
| 56 |
+
import ipdb; ipdb.set_trace()
|
| 57 |
+
assert not len(chunk)
|
| 58 |
+
for thread in thread_list:
|
| 59 |
+
thread.join()
|
| 60 |
+
|
| 61 |
+
from multiprocessing import Manager
|
| 62 |
+
lock = Manager().Lock()
|
| 63 |
+
def read_jsonl(jsonl_file):
|
| 64 |
+
with open(jsonl_file, 'r') as f:
|
| 65 |
+
lines = f.readlines()
|
| 66 |
+
global pbar
|
| 67 |
+
with lock:
|
| 68 |
+
pbar.update(1)
|
| 69 |
+
return lines
|
| 70 |
+
|
| 71 |
+
def read_jsonls(jsonl_files, worker):
|
| 72 |
+
global pbar
|
| 73 |
+
from multiprocessing.pool import ThreadPool
|
| 74 |
+
pbar = tqdm.tqdm(total=len(jsonl_files))
|
| 75 |
+
print(f'[Data Loading] Reading {len(jsonl_files)} meta files...')
|
| 76 |
+
all_lines = []
|
| 77 |
+
if len(jsonl_files) == 1:
|
| 78 |
+
try:
|
| 79 |
+
lines_num = int(osp.splitext(jsonl_files[0])[0].split('_')[-1])
|
| 80 |
+
except:
|
| 81 |
+
lines_num = 0
|
| 82 |
+
pbar = tqdm.tqdm(total=lines_num)
|
| 83 |
+
with open(jsonl_files[0], 'r') as f:
|
| 84 |
+
for line in f:
|
| 85 |
+
pbar.update(1)
|
| 86 |
+
all_lines.append(line)
|
| 87 |
+
else:
|
| 88 |
+
with ThreadPool(worker) as pool:
|
| 89 |
+
for img_metas in pool.starmap(read_jsonl, [(bin_file,) for bin_file in jsonl_files]):
|
| 90 |
+
all_lines.extend(img_metas)
|
| 91 |
+
np.random.shuffle(all_lines)
|
| 92 |
+
return all_lines
|
| 93 |
+
|
| 94 |
+
if __name__ == '__main__':
|
| 95 |
+
parser = argparse.ArgumentParser()
|
| 96 |
+
parser.add_argument('--jsonl_folder_list', type=str, default='', nargs='+', help='patha pathb pathc')
|
| 97 |
+
parser.add_argument('--save_dir', type=str, default='')
|
| 98 |
+
parser.add_argument('--chunk_size', type=int, default=1000)
|
| 99 |
+
parser.add_argument('--worker', type=int, default=128)
|
| 100 |
+
args = parser.parse_args()
|
| 101 |
+
|
| 102 |
+
global pbar
|
| 103 |
+
t1 = time.time()
|
| 104 |
+
jsonl_files = []
|
| 105 |
+
for item in args.jsonl_folder_list:
|
| 106 |
+
jsonl_files += glob.glob(osp.join(item, '*.jsonl'))
|
| 107 |
+
np.random.shuffle(jsonl_files)
|
| 108 |
+
|
| 109 |
+
pbar = tqdm.tqdm(total=len(jsonl_files))
|
| 110 |
+
lines = read_jsonls(jsonl_files, args.worker)
|
| 111 |
+
lines = lines * 1000
|
| 112 |
+
print(f'total {len(lines)} lines')
|
| 113 |
+
line_num = len(lines)
|
| 114 |
+
missing, chunk_id2save_files = get_part_jsonls(args.save_dir, line_num, chunk_size=args.chunk_size)
|
| 115 |
+
|
| 116 |
+
split_large_txt_files(lines, chunk_id2save_files)
|
| 117 |
+
t2 = time.time()
|
| 118 |
+
print(f'split takes {t2-t1}s')
|
Meissonic/InfinityStar/data/infinitystar_toy_data/videos/e06b8ca5dbc6.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aba75b0f17a90f9a150bd331f36b64ad4ef5bd298c3cf09e6b77a005b70b8df
|
| 3 |
+
size 4908102
|
Meissonic/InfinityStar/data/interactive_toy_videos/002a061bdbc110ca8fb48e7e0a663c94/0000_refine_720p.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b83cf64485c51f1cbbfdc1d627d7ce15de72ddce8c46de54adaf1231bf4a9313
|
| 3 |
+
size 8972169
|
Meissonic/InfinityStar/data/interactive_toy_videos/002a061bdbc110ca8fb48e7e0a663c94/prompt.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The office is tidy, with a large desk covered in papers, a laptop, and a cup of coffee. A man in a white shirt sits at the desk, typing on the laptop keyboard. A desk lamp is turned on, casting light on the workspace.
|
| 2 |
+
The man stops typing and picks up the cup of coffee from the desk.
|
| 3 |
+
The man takes a sip from the coffee cup.
|
| 4 |
+
The man sets the coffee cup down and opens a notebook lying next to the laptop.
|
Meissonic/InfinityStar/data/interactive_toy_videos/0037784abafa76e2b84fe746750a7988/0000_refine_720p.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1f64057df76d22164b3104c50894f61927aff7897cc15b684ead3622f231937
|
| 3 |
+
size 15799597
|
Meissonic/InfinityStar/data/interactive_toy_videos/0037784abafa76e2b84fe746750a7988/prompt.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A young boy wearing a yellow t-shirt and denim shorts is in a backyard garden. A red ball, a blue watering can, and a green garden hose lie on the grass nearby. The boy is standing next to a flower bed filled with colorful flowers, holding the blue watering can in his right hand. The sun is shining brightly overhead.
|
| 2 |
+
The boy lifts the watering can and starts pouring water onto the flowers in the flower bed.
|
| 3 |
+
The boy sets the watering can down on the grass and picks up the red ball with both hands.
|
| 4 |
+
The boy throws the red ball forward into the garden while standing near the flower bed.
|
Meissonic/InfinityStar/data/interactive_toy_videos/00a79efb495c29e082c246e9ca9a7e8f/0000_refine_720p.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd141840a07bb0b06f87df221eec2b705417a8aad13622f7b2298cf91d2eb2c7
|
| 3 |
+
size 10632210
|
Meissonic/InfinityStar/data/interactive_toy_videos/00a79efb495c29e082c246e9ca9a7e8f/prompt.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A young woman dressed in a light gray hoodie and black leggings is sitting on a wooden bench in a city park. Around her, there are green trees, a paved walking path, and a metal water bottle placed on the bench beside her. She is holding a closed book in her lap and looking ahead thoughtfully. The sky is clear with soft afternoon sunlight filtering through the leaves.
|
| 2 |
+
The woman opens the book and begins to read, her eyes scanning the pages.
|
| 3 |
+
The woman lifts the metal water bottle and takes a sip from it.
|
| 4 |
+
The woman closes the book and looks up, observing the park surroundings.
|
Meissonic/InfinityStar/data/interactive_toy_videos/011341aa8bea615b76d69423862f5f31/0000_refine_720p.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b53e8ebcffd4dd3a6a92acb4f7c836ec51ff874258d3c43b2aa56387b06c4384
|
| 3 |
+
size 13742016
|
Meissonic/InfinityStar/data/interactive_toy_videos/011341aa8bea615b76d69423862f5f31/prompt.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The garden is filled with blooming flowers and a wooden bench near a stone path. A watering can and a pair of gardening gloves rest on the bench. A woman in a light green dress stands by the bench, holding a small potted plant with soil visible in the pot. She looks at the plant attentively.
|
| 2 |
+
The woman places the potted plant on the bench next to the watering can.
|
| 3 |
+
The woman picks up the watering can from the bench and lifts it.
|
| 4 |
+
The woman waters the flowers along the stone path using the watering can.
|
Meissonic/InfinityStar/data/interactive_toy_videos/012900f0605d2e4777119aeefaa7f31b/0000_refine_720p.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:933e9fd3b2dfe640dc193357576dd8f7f894cdde1e2e9f7eba753de09a5a1ef7
|
| 3 |
+
size 12703988
|
Meissonic/InfinityStar/data/interactive_toy_videos/012900f0605d2e4777119aeefaa7f31b/prompt.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A young boy wearing a yellow t-shirt and denim shorts stands in a park next to a wooden bench. On the bench lies a red soccer ball and a blue backpack. Trees with green leaves surround the area, and sunlight filters through the branches. The boy looks at the soccer ball while holding the straps of his backpack.
|
| 2 |
+
The boy bends down and picks up the red soccer ball from the bench.
|
| 3 |
+
The boy holds the soccer ball with both hands and begins to bounce it on the ground.
|
| 4 |
+
The boy kicks the soccer ball forward, sending it rolling across the grass.
|
Meissonic/InfinityStar/evaluation/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Overview
|
| 2 |
+
To facilitate reproducibility and evaluation, we provide the rewritten prompts used in our VBench evaluations. After generating videos with our inference script, you can evaluate their performance using the scoring tools available at [VBench](https://github.com/Vchitect/VBench).
|
Meissonic/InfinityStar/evaluation/VBench_rewrited_prompt.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Meissonic/InfinityStar/infinity/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 FoundationVision
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
Meissonic/InfinityStar/infinity/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (141 Bytes). View file
|
|
|
Meissonic/InfinityStar/infinity/dataset/__pycache__/build.cpython-310.pyc
ADDED
|
Binary file (7.53 kB). View file
|
|
|
Meissonic/InfinityStar/infinity/dataset/__pycache__/dataset_joint_vi.cpython-310.pyc
ADDED
|
Binary file (21.8 kB). View file
|
|
|
Meissonic/InfinityStar/infinity/dataset/build.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 FoundationVision
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
|
| 4 |
+
import datetime
|
| 5 |
+
import os
|
| 6 |
+
import os.path as osp
|
| 7 |
+
import random
|
| 8 |
+
import subprocess
|
| 9 |
+
from functools import partial
|
| 10 |
+
from typing import Optional
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
import pytz
|
| 14 |
+
|
| 15 |
+
from infinity.dataset.dataset_joint_vi import JointViIterableDataset
|
| 16 |
+
from infinity.utils.sequence_parallel import SequenceParallelManager as sp_manager
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from grp import getgrgid
|
| 20 |
+
from pwd import getpwuid
|
| 21 |
+
except:
|
| 22 |
+
pass
|
| 23 |
+
import PIL.Image as PImage
|
| 24 |
+
from PIL import ImageFile
|
| 25 |
+
import numpy as np
|
| 26 |
+
from torchvision.transforms import transforms
|
| 27 |
+
from torchvision.transforms.functional import resize, to_tensor
|
| 28 |
+
import torch.distributed as tdist
|
| 29 |
+
|
| 30 |
+
from torchvision.transforms import InterpolationMode
|
| 31 |
+
bicubic = InterpolationMode.BICUBIC
|
| 32 |
+
lanczos = InterpolationMode.LANCZOS
|
| 33 |
+
PImage.MAX_IMAGE_PIXELS = (1024 * 1024 * 1024 // 4 // 3) * 5
|
| 34 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = False
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def time_str(fmt='[%m-%d %H:%M:%S]'):
|
| 38 |
+
return datetime.datetime.now(tz=pytz.timezone('Asia/Shanghai')).strftime(fmt)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def normalize_01_into_pm1(x): # normalize x from [0, 1] to [-1, 1] by (x*2) - 1
|
| 42 |
+
return x.add(x).add_(-1)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def denormalize_pm1_into_01(x): # denormalize x from [-1, 1] to [0, 1]
|
| 46 |
+
return x.add(1).mul_(0.5)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def center_crop_arr(pil_image, image_size):
|
| 50 |
+
"""
|
| 51 |
+
Center cropping implementation from ADM.
|
| 52 |
+
https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
|
| 53 |
+
"""
|
| 54 |
+
while min(*pil_image.size) >= 2 * image_size:
|
| 55 |
+
pil_image = pil_image.resize(
|
| 56 |
+
tuple(x // 2 for x in pil_image.size), resample=PImage.BOX
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
scale = image_size / min(*pil_image.size)
|
| 60 |
+
pil_image = pil_image.resize(
|
| 61 |
+
tuple(round(x * scale) for x in pil_image.size), resample=PImage.LANCZOS
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
arr = np.array(pil_image)
|
| 65 |
+
crop_y = (arr.shape[0] - image_size) // 2
|
| 66 |
+
crop_x = (arr.shape[1] - image_size) // 2
|
| 67 |
+
return PImage.fromarray(arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class RandomResize:
|
| 71 |
+
def __init__(self, mid_reso, final_reso, interpolation):
|
| 72 |
+
ub = max(round((mid_reso + (mid_reso-final_reso) / 8) / 4) * 4, mid_reso)
|
| 73 |
+
self.reso_lb, self.reso_ub = final_reso, ub
|
| 74 |
+
self.interpolation = interpolation
|
| 75 |
+
|
| 76 |
+
def __call__(self, img):
|
| 77 |
+
return resize(img, size=random.randint(self.reso_lb, self.reso_ub), interpolation=self.interpolation)
|
| 78 |
+
|
| 79 |
+
def __repr__(self):
|
| 80 |
+
return f'RandomResize(reso=({self.reso_lb}, {self.reso_ub}), interpolation={self.interpolation})'
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def load_save(reso=512):
|
| 84 |
+
import os
|
| 85 |
+
from PIL import Image as PImage
|
| 86 |
+
from torchvision.transforms import transforms, InterpolationMode
|
| 87 |
+
aug = transforms.Compose([
|
| 88 |
+
transforms.Resize(512, interpolation=InterpolationMode.LANCZOS),
|
| 89 |
+
transforms.CenterCrop((512, 512))
|
| 90 |
+
])
|
| 91 |
+
src_folder = r'C:\Users\16333\Pictures\imgs_to_visual_v2'
|
| 92 |
+
ls = [os.path.join(src_folder, x) for x in ('1.jpg', '2.jpg', '3.png', '4.png', '5.png')]
|
| 93 |
+
print(ls)
|
| 94 |
+
imgs = []
|
| 95 |
+
for i, fname in enumerate(ls):
|
| 96 |
+
assert os.path.exists(fname)
|
| 97 |
+
with PImage.open(fname) as img:
|
| 98 |
+
img = img.convert('RGB')
|
| 99 |
+
img = aug(img)
|
| 100 |
+
imgs.append(img)
|
| 101 |
+
dst_d, dst_f = os.path.split(fname)
|
| 102 |
+
dst = os.path.join(dst_d, f'crop{dst_f.replace(".jpg", ".png")}')
|
| 103 |
+
img.save(dst)
|
| 104 |
+
|
| 105 |
+
W, H = imgs[0].size
|
| 106 |
+
WW = W * len(imgs)
|
| 107 |
+
new_im = PImage.new('RGB', (WW, H))
|
| 108 |
+
x_offset = 0
|
| 109 |
+
for img in imgs:
|
| 110 |
+
new_im.paste(img, (x_offset, 0))
|
| 111 |
+
x_offset += W
|
| 112 |
+
dst = os.path.join(src_folder, f'junfeng.png')
|
| 113 |
+
new_im.save(dst)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def print_aug(transform, label):
|
| 117 |
+
print(f'Transform {label} = ')
|
| 118 |
+
if hasattr(transform, 'transforms'):
|
| 119 |
+
for t in transform.transforms:
|
| 120 |
+
print(t)
|
| 121 |
+
else:
|
| 122 |
+
print(transform)
|
| 123 |
+
print('---------------------------\n')
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def build_t2i_dataset(
|
| 127 |
+
args,
|
| 128 |
+
data_path: str,
|
| 129 |
+
max_caption_len: int,
|
| 130 |
+
short_prob=0.2,
|
| 131 |
+
load_vae_instead_of_image=False
|
| 132 |
+
):
|
| 133 |
+
if args.use_streaming_dataset:
|
| 134 |
+
return T2IIterableDataset(
|
| 135 |
+
data_path,
|
| 136 |
+
max_caption_len=max_caption_len,
|
| 137 |
+
short_prob=short_prob,
|
| 138 |
+
load_vae_instead_of_image=load_vae_instead_of_image,
|
| 139 |
+
buffersize=args.iterable_data_buffersize,
|
| 140 |
+
pn=args.pn,
|
| 141 |
+
online_t5=args.online_t5,
|
| 142 |
+
batch_size=args.batch_size,
|
| 143 |
+
num_replicas=sp_manager.get_sp_group_nums() if sp_manager.sp_on() else tdist.get_world_size(), # 1,
|
| 144 |
+
rank = sp_manager.get_sp_group_rank() if sp_manager.sp_on() else tdist.get_rank(),
|
| 145 |
+
dataloader_workers=args.workers,
|
| 146 |
+
dynamic_resolution_across_gpus=args.dynamic_resolution_across_gpus,
|
| 147 |
+
enable_dynamic_length_prompt=args.enable_dynamic_length_prompt,
|
| 148 |
+
seed=args.seed,
|
| 149 |
+
dynamic_scale_schedule=args.dynamic_scale_schedule,
|
| 150 |
+
)
|
| 151 |
+
else:
|
| 152 |
+
raise ValueError(f'args.use_streaming_dataset={args.use_streaming_dataset} unsupported')
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def build_joint_dataset(
|
| 156 |
+
args,
|
| 157 |
+
image_data_path: str,
|
| 158 |
+
video_data_path: str,
|
| 159 |
+
max_caption_len: int,
|
| 160 |
+
short_prob=0.2,
|
| 161 |
+
load_vae_instead_of_image=False
|
| 162 |
+
):
|
| 163 |
+
if args.use_streaming_dataset:
|
| 164 |
+
return JointViIterableDataset(
|
| 165 |
+
image_meta_folder=image_data_path,
|
| 166 |
+
video_meta_folder=video_data_path,
|
| 167 |
+
max_caption_len=max_caption_len,
|
| 168 |
+
short_prob=short_prob,
|
| 169 |
+
load_vae_instead_of_image=load_vae_instead_of_image,
|
| 170 |
+
buffersize=args.iterable_data_buffersize,
|
| 171 |
+
pn=args.pn,
|
| 172 |
+
video_fps=args.video_fps,
|
| 173 |
+
num_frames=args.video_frames,
|
| 174 |
+
online_t5=args.online_t5,
|
| 175 |
+
num_replicas=sp_manager.get_sp_group_nums() if sp_manager.sp_on() else tdist.get_world_size(), # 1,
|
| 176 |
+
rank = sp_manager.get_sp_group_rank() if sp_manager.sp_on() else tdist.get_rank(),
|
| 177 |
+
dataloader_workers=args.workers,
|
| 178 |
+
dynamic_resolution_across_gpus=args.dynamic_resolution_across_gpus,
|
| 179 |
+
enable_dynamic_length_prompt=args.enable_dynamic_length_prompt,
|
| 180 |
+
dynamic_scale_schedule=args.dynamic_scale_schedule,
|
| 181 |
+
add_motion_score2caption=args.add_motion_score2caption,
|
| 182 |
+
seed=args.seed,
|
| 183 |
+
other_args=args,
|
| 184 |
+
)
|
| 185 |
+
else:
|
| 186 |
+
raise ValueError(f'args.use_streaming_dataset={args.use_streaming_dataset} unsupported')
|
| 187 |
+
|
| 188 |
+
def pil_load(path: str, proposal_size):
|
| 189 |
+
with open(path, 'rb') as f:
|
| 190 |
+
img: PImage.Image = PImage.open(f)
|
| 191 |
+
w: int = img.width
|
| 192 |
+
h: int = img.height
|
| 193 |
+
sh: int = min(h, w)
|
| 194 |
+
if sh > proposal_size:
|
| 195 |
+
ratio: float = proposal_size / sh
|
| 196 |
+
w = round(ratio * w)
|
| 197 |
+
h = round(ratio * h)
|
| 198 |
+
img.draft('RGB', (w, h))
|
| 199 |
+
img = img.convert('RGB')
|
| 200 |
+
return img
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def rewrite(im: PImage, file: str, info: str):
|
| 204 |
+
kw = dict(quality=100)
|
| 205 |
+
if file.lower().endswith('.tif') or file.lower().endswith('.tiff'):
|
| 206 |
+
kw['compression'] = 'none'
|
| 207 |
+
elif file.lower().endswith('.webp'):
|
| 208 |
+
kw['lossless'] = True
|
| 209 |
+
|
| 210 |
+
st = os.stat(file)
|
| 211 |
+
uname = getpwuid(st.st_uid).pw_name
|
| 212 |
+
gname = getgrgid(st.st_gid).gr_name
|
| 213 |
+
mode = oct(st.st_mode)[-3:]
|
| 214 |
+
|
| 215 |
+
local_file = osp.basename(file)
|
| 216 |
+
im.save(local_file, **kw)
|
| 217 |
+
print(f'************* <REWRITE: {info}> ************* @ {file}')
|
| 218 |
+
subprocess.call(f'sudo mv {local_file} {file}; sudo chown {uname}:{gname} {file}; sudo chmod {mode} {file}', shell=True)
|
Meissonic/InfinityStar/infinity/dataset/dataset_joint_vi.py
ADDED
|
@@ -0,0 +1,689 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 FoundationVision
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
import glob
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
from os import path as osp
|
| 7 |
+
from typing import List, Tuple
|
| 8 |
+
import json
|
| 9 |
+
import hashlib
|
| 10 |
+
import copy
|
| 11 |
+
import collections
|
| 12 |
+
|
| 13 |
+
import tqdm
|
| 14 |
+
import numpy as np
|
| 15 |
+
import torch
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from decord import VideoReader
|
| 18 |
+
from PIL import Image as PImage
|
| 19 |
+
from torchvision.transforms.functional import to_tensor
|
| 20 |
+
from torch.utils.data import IterableDataset, DataLoader
|
| 21 |
+
import torch.distributed as tdist
|
| 22 |
+
from PIL import Image
|
| 23 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 24 |
+
|
| 25 |
+
from infinity.schedules.dynamic_resolution import get_dynamic_resolution_meta
|
| 26 |
+
from infinity.utils.video_decoder import EncodedVideoDecord, EncodedVideoOpencv
|
| 27 |
+
from transformers import AutoTokenizer
|
| 28 |
+
|
| 29 |
+
def transform(pil_img, tgt_h, tgt_w):
|
| 30 |
+
width, height = pil_img.size
|
| 31 |
+
if width / height <= tgt_w / tgt_h:
|
| 32 |
+
resized_width = tgt_w
|
| 33 |
+
resized_height = int(tgt_w / (width / height))
|
| 34 |
+
else:
|
| 35 |
+
resized_height = tgt_h
|
| 36 |
+
resized_width = int((width / height) * tgt_h)
|
| 37 |
+
pil_img = pil_img.resize((resized_width, resized_height), resample=PImage.LANCZOS)
|
| 38 |
+
# crop the center out
|
| 39 |
+
arr = np.array(pil_img)
|
| 40 |
+
crop_y = (arr.shape[0] - tgt_h) // 2
|
| 41 |
+
crop_x = (arr.shape[1] - tgt_w) // 2
|
| 42 |
+
im = to_tensor(arr[crop_y: crop_y + tgt_h, crop_x: crop_x + tgt_w])
|
| 43 |
+
# print(f'im size {im.shape}')
|
| 44 |
+
return im.add(im).add_(-1)
|
| 45 |
+
|
| 46 |
+
def get_prompt_id(prompt):
|
| 47 |
+
md5 = hashlib.md5()
|
| 48 |
+
md5.update(prompt.encode('utf-8'))
|
| 49 |
+
prompt_id = md5.hexdigest()
|
| 50 |
+
return prompt_id
|
| 51 |
+
|
| 52 |
+
def prepend_motion_score(prompt, motion_score):
|
| 53 |
+
return f'<<<motion_score: {round(motion_score):.1f}>>> {prompt}'
|
| 54 |
+
|
| 55 |
+
class VideoReaderWrapper(VideoReader):
|
| 56 |
+
def __init__(self, *args, **kwargs):
|
| 57 |
+
super().__init__(*args, **kwargs)
|
| 58 |
+
self.seek(0)
|
| 59 |
+
def __getitem__(self, key):
|
| 60 |
+
frames = super().__getitem__(key)
|
| 61 |
+
self.seek(0)
|
| 62 |
+
return frames
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class JointViIterableDataset(IterableDataset):
|
| 66 |
+
def __init__(
|
| 67 |
+
self,
|
| 68 |
+
video_meta_folder: str = '',
|
| 69 |
+
buffersize: int = 1000000 * 300,
|
| 70 |
+
seed: int = 0,
|
| 71 |
+
pn: str = '',
|
| 72 |
+
video_fps: int = 1,
|
| 73 |
+
num_replicas: int = 1,
|
| 74 |
+
rank: int = 0,
|
| 75 |
+
dataloader_workers: int = 2,
|
| 76 |
+
dynamic_resolution_across_gpus: bool = True,
|
| 77 |
+
enable_dynamic_length_prompt: bool = True,
|
| 78 |
+
shuffle: bool = True,
|
| 79 |
+
short_prob: float = 0.2,
|
| 80 |
+
verbose=False,
|
| 81 |
+
temp_dir= "/dev/shm",
|
| 82 |
+
add_motion_score2caption=False,
|
| 83 |
+
other_args=None,
|
| 84 |
+
**kwargs,
|
| 85 |
+
):
|
| 86 |
+
self.video_meta_folder = video_meta_folder
|
| 87 |
+
self.pn = pn
|
| 88 |
+
self.verbose = verbose
|
| 89 |
+
self.buffer_size = buffersize
|
| 90 |
+
self.num_replicas = num_replicas
|
| 91 |
+
self.rank = rank
|
| 92 |
+
self.worker_id = 0
|
| 93 |
+
self.global_worker_id = 0
|
| 94 |
+
self.short_prob = short_prob
|
| 95 |
+
self.dataloader_workers = max(1, dataloader_workers)
|
| 96 |
+
self.shuffle = shuffle
|
| 97 |
+
self.global_workers = self.num_replicas * self.dataloader_workers
|
| 98 |
+
self.add_motion_score2caption = add_motion_score2caption
|
| 99 |
+
self.seed = seed
|
| 100 |
+
self.text_tokenizer = other_args.text_tokenizer
|
| 101 |
+
self.feature_extraction = other_args.cache_check_mode < 0 # no sequence packing, for feature extraction
|
| 102 |
+
self.epoch_generator = None
|
| 103 |
+
self.epoch_worker_generator = None
|
| 104 |
+
self.epoch_global_worker_generator = None
|
| 105 |
+
self.epoch_rank_generator = None
|
| 106 |
+
self.other_args = other_args
|
| 107 |
+
self.drop_long_video = other_args.drop_long_video
|
| 108 |
+
self.dynamic_resolution_across_gpus = dynamic_resolution_across_gpus
|
| 109 |
+
self.enable_dynamic_length_prompt = enable_dynamic_length_prompt
|
| 110 |
+
self.set_epoch(other_args.epoch)
|
| 111 |
+
self.temporal_compress_rate = other_args.temporal_compress_rate
|
| 112 |
+
self.dynamic_resolution_h_w, self.h_div_w_templates = get_dynamic_resolution_meta(other_args.dynamic_scale_schedule, other_args.video_frames) # here video_frames is the max video frames
|
| 113 |
+
self.train_h_div_w_list = self.h_div_w_templates
|
| 114 |
+
self.video_fps = video_fps
|
| 115 |
+
self.min_training_duration = (other_args.min_video_frames-1) // self.video_fps
|
| 116 |
+
self.max_training_duration = (other_args.video_frames-1) // self.video_fps
|
| 117 |
+
self.append_duration2caption = other_args.append_duration2caption
|
| 118 |
+
print(f"{self.rank=} dataset {self.seed=}, {self.append_duration2caption=} add_motion_score2caption={add_motion_score2caption}, {self.min_training_duration=} {self.max_training_duration=}, cache_check_mode={self.other_args.cache_check_mode}")
|
| 119 |
+
self.token_cache_dir = other_args.token_cache_dir
|
| 120 |
+
self.use_vae_token_cache = other_args.use_vae_token_cache
|
| 121 |
+
self.allow_online_vae_feature_extraction = other_args.allow_online_vae_feature_extraction
|
| 122 |
+
self.use_text_token_cache = other_args.use_text_token_cache
|
| 123 |
+
self.max_video_frames = other_args.video_frames
|
| 124 |
+
self.cached_video_frames = other_args.cached_video_frames # cached max video frames
|
| 125 |
+
self.image_batches_multiply = other_args.image_batches_multiply
|
| 126 |
+
self.down_size_limit = other_args.down_size_limit
|
| 127 |
+
self.addition_pn_list = json.loads(other_args.addition_pn_list)
|
| 128 |
+
self.video_caption_type = other_args.video_caption_type
|
| 129 |
+
self.train_max_token_len = other_args.train_max_token_len
|
| 130 |
+
self.duration_resolution = other_args.duration_resolution
|
| 131 |
+
self.append_duration2caption = other_args.append_duration2caption
|
| 132 |
+
self.device = other_args.device
|
| 133 |
+
print(f'self.down_size_limit: {self.down_size_limit}')
|
| 134 |
+
self.max_text_len = other_args.tlen
|
| 135 |
+
self.temp_dir = temp_dir.rstrip("/")
|
| 136 |
+
self.metas = self.get_meta()
|
| 137 |
+
self.batches, self.batch_nums = self.form_batches(self.metas)
|
| 138 |
+
print(f'{num_replicas=}, {rank=}, {dataloader_workers=}, {self.batch_nums=}, {self.drop_long_video=} {self.max_text_len=}')
|
| 139 |
+
|
| 140 |
+
def append_duration_info(self, meta, mapped_duration):
|
| 141 |
+
meta['caption'] = f'<<<t={mapped_duration}s>>>' + meta['caption']
|
| 142 |
+
return meta
|
| 143 |
+
|
| 144 |
+
def get_captions_lens(self, captions):
|
| 145 |
+
if self.other_args.text_tokenizer_type == 'flan_t5':
|
| 146 |
+
tokens = self.other_args.text_tokenizer(text=captions, max_length=self.other_args.text_tokenizer.model_max_length, padding='max_length', truncation=True, return_tensors='pt')
|
| 147 |
+
mask = tokens.attention_mask.cuda(non_blocking=True)
|
| 148 |
+
lens: List[int] = mask.sum(dim=-1).tolist()
|
| 149 |
+
else: # umt5-xxl
|
| 150 |
+
ids, mask = self.other_args.text_tokenizer( captions, return_mask=True, add_special_tokens=True)
|
| 151 |
+
lens = mask.gt(0).sum(dim=1).tolist()
|
| 152 |
+
return lens
|
| 153 |
+
|
| 154 |
+
def get_meta(self):
|
| 155 |
+
part_filepaths = sorted(glob.glob(osp.join(self.video_meta_folder, '*/*.jsonl')))
|
| 156 |
+
self.epoch_generator.shuffle(part_filepaths)
|
| 157 |
+
print(f'jsonls sample: {part_filepaths[:4]}')
|
| 158 |
+
if self.num_replicas > 1:
|
| 159 |
+
part_filepaths = part_filepaths[self.rank::self.num_replicas]
|
| 160 |
+
|
| 161 |
+
metas = []
|
| 162 |
+
pbar = tqdm.tqdm(total=len(part_filepaths))
|
| 163 |
+
mapped_duration2freqs = collections.defaultdict(int)
|
| 164 |
+
total, corrupt = 0, 0
|
| 165 |
+
stop_read = False
|
| 166 |
+
rough_h_div_w = self.h_div_w_templates[np.argmin(np.abs((9/16-self.h_div_w_templates)))]
|
| 167 |
+
for part_filepath in part_filepaths:
|
| 168 |
+
if stop_read:
|
| 169 |
+
break
|
| 170 |
+
pbar.update(1)
|
| 171 |
+
with open(part_filepath, 'r', encoding='utf-8') as f:
|
| 172 |
+
lines = f.readlines()
|
| 173 |
+
for line in lines:
|
| 174 |
+
total += 1
|
| 175 |
+
try:
|
| 176 |
+
meta = json.loads(line)
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(e)
|
| 179 |
+
corrupt += 1
|
| 180 |
+
print(e, corrupt, total, corrupt/total)
|
| 181 |
+
continue
|
| 182 |
+
if 'h_div_w' in meta:
|
| 183 |
+
del meta['h_div_w']
|
| 184 |
+
if 'video_path' in meta:
|
| 185 |
+
begin_frame_id, end_frame_id, fps = meta['begin_frame_id'], meta['end_frame_id'], meta['fps']
|
| 186 |
+
real_duration = (end_frame_id - begin_frame_id) / fps
|
| 187 |
+
mapped_duration = int(real_duration / self.duration_resolution) * self.duration_resolution
|
| 188 |
+
if mapped_duration < self.min_training_duration:
|
| 189 |
+
continue
|
| 190 |
+
if mapped_duration > self.max_training_duration:
|
| 191 |
+
if self.drop_long_video:
|
| 192 |
+
continue
|
| 193 |
+
else:
|
| 194 |
+
mapped_duration = self.max_training_duration
|
| 195 |
+
caption_type = 'tarsier2_caption'
|
| 196 |
+
if ('MiniCPM_V_2_6_caption' in meta) and meta['MiniCPM_V_2_6_caption']:
|
| 197 |
+
caption_type = self.epoch_rank_generator.choice(['tarsier2_caption', 'MiniCPM_V_2_6_caption'])
|
| 198 |
+
meta['caption'] = meta[caption_type]
|
| 199 |
+
if self.enable_dynamic_length_prompt and (self.epoch_rank_generator.random() < self.short_prob):
|
| 200 |
+
meta['caption'] = self.random_drop_sentences(meta['caption'])
|
| 201 |
+
if 'quality_prompt' in meta:
|
| 202 |
+
meta['caption'] = meta['caption'] + ' ' + meta['quality_prompt']
|
| 203 |
+
if self.append_duration2caption:
|
| 204 |
+
meta = self.append_duration_info(meta, mapped_duration)
|
| 205 |
+
assert meta['caption']
|
| 206 |
+
sample_frames = int(mapped_duration * self.video_fps + 1)
|
| 207 |
+
pt = (sample_frames-1) // self.temporal_compress_rate + 1
|
| 208 |
+
scale_schedule = self.dynamic_resolution_h_w[rough_h_div_w][self.pn]['pt2scale_schedule'][pt]
|
| 209 |
+
meta['sample_frames'] = sample_frames
|
| 210 |
+
elif 'image_path' in meta:
|
| 211 |
+
mapped_duration = -1
|
| 212 |
+
scale_schedule = self.dynamic_resolution_h_w[rough_h_div_w][self.pn]['pt2scale_schedule'][1]
|
| 213 |
+
if not meta['text']:
|
| 214 |
+
meta['caption'] = meta['long_caption']
|
| 215 |
+
elif not meta['long_caption']:
|
| 216 |
+
meta['caption'] = meta['text']
|
| 217 |
+
else:
|
| 218 |
+
if self.epoch_rank_generator.random() < self.other_args.short_cap_prob:
|
| 219 |
+
meta['caption'] = meta['text']
|
| 220 |
+
else:
|
| 221 |
+
meta['caption'] = meta['long_caption']
|
| 222 |
+
if self.enable_dynamic_length_prompt and (self.epoch_rank_generator.random() < self.short_prob):
|
| 223 |
+
meta['caption'] = self.random_drop_sentences(meta['caption'])
|
| 224 |
+
else:
|
| 225 |
+
raise ValueError(f'video_path or image_path not exist in meta: {meta}')
|
| 226 |
+
|
| 227 |
+
cum_visual_tokens = np.array(scale_schedule).prod(-1).cumsum()
|
| 228 |
+
meta['cum_text_visual_tokens'] = cum_visual_tokens
|
| 229 |
+
if self.other_args.cache_check_mode == 1: # check at the begining
|
| 230 |
+
if self.exists_cache_file(meta):
|
| 231 |
+
metas.append(meta)
|
| 232 |
+
elif self.other_args.cache_check_mode == -1: # select unexist, used for token cache
|
| 233 |
+
if not self.exists_cache_file(meta):
|
| 234 |
+
metas.append(meta)
|
| 235 |
+
else:
|
| 236 |
+
metas.append(meta)
|
| 237 |
+
mapped_duration2freqs[mapped_duration] += 1
|
| 238 |
+
if (self.other_args.restrict_data_size > 0) and (len(metas) > self.other_args.restrict_data_size / self.num_replicas):
|
| 239 |
+
stop_read = True
|
| 240 |
+
break
|
| 241 |
+
|
| 242 |
+
# metas = sorted(metas, key=lambda x: -x['text_visual_tokens'])
|
| 243 |
+
|
| 244 |
+
# append text tokens
|
| 245 |
+
metas = self.append_text_tokens(metas)
|
| 246 |
+
|
| 247 |
+
self.epoch_rank_generator.shuffle(metas)
|
| 248 |
+
for mapped_duration in sorted(mapped_duration2freqs.keys()):
|
| 249 |
+
freq = mapped_duration2freqs[mapped_duration]
|
| 250 |
+
proportion = freq / len(metas) * 100
|
| 251 |
+
print(f'{mapped_duration=}, {freq=}, {proportion=:.1f}%')
|
| 252 |
+
return metas
|
| 253 |
+
|
| 254 |
+
def append_text_tokens(self, metas, bucket_size=100):
|
| 255 |
+
t1 = time.time()
|
| 256 |
+
max_text_visual_tokens = -1
|
| 257 |
+
pbar = tqdm.tqdm(total=len(metas) // bucket_size + 1, desc='append text tokens')
|
| 258 |
+
for bucket_id in range(len(metas) // bucket_size + 1):
|
| 259 |
+
pbar.update(1)
|
| 260 |
+
start = bucket_id * bucket_size
|
| 261 |
+
end = min(start + bucket_size, len(metas))
|
| 262 |
+
if start >= end:
|
| 263 |
+
break
|
| 264 |
+
if self.feature_extraction:
|
| 265 |
+
lens = [0 for i in range(start, end)]
|
| 266 |
+
else:
|
| 267 |
+
captions = [metas[i]['caption'] for i in range(start, end)]
|
| 268 |
+
assert len(captions), f'{len(captions)=}'
|
| 269 |
+
lens = self.get_captions_lens(captions)
|
| 270 |
+
for i in range(start, end):
|
| 271 |
+
metas[i]['text_tokens'] = min(self.max_text_len, lens[i-start])
|
| 272 |
+
metas[i]['cum_text_visual_tokens'] = metas[i]['cum_text_visual_tokens'] + metas[i]['text_tokens']
|
| 273 |
+
metas[i]['text_visual_tokens'] = metas[i]['cum_text_visual_tokens'][-1]
|
| 274 |
+
max_text_visual_tokens = max(max_text_visual_tokens, metas[i]['text_visual_tokens'])
|
| 275 |
+
if not self.other_args.allow_less_one_elem_in_seq:
|
| 276 |
+
assert max_text_visual_tokens <= self.train_max_token_len, f'{self.train_max_token_len=} should > {max_text_visual_tokens=}'
|
| 277 |
+
t2 = time.time()
|
| 278 |
+
print(f'append text tokens: {t2-t1:.1f}s')
|
| 279 |
+
return metas
|
| 280 |
+
|
| 281 |
+
def exists_cache_file(self, meta):
|
| 282 |
+
if 'image_path' in meta:
|
| 283 |
+
return osp.exists(self.get_image_cache_file(meta['image_path']))
|
| 284 |
+
else:
|
| 285 |
+
if '/vdataset/clip' in meta['video_path']: # clip
|
| 286 |
+
cache_file = self.get_video_cache_file(meta['video_path'], 0, meta['end_frame_id']-meta['begin_frame_id'], self.video_fps)
|
| 287 |
+
else:
|
| 288 |
+
cache_file = self.get_video_cache_file(meta['video_path'], meta['begin_frame_id'], meta['end_frame_id'], self.video_fps)
|
| 289 |
+
return osp.exists(cache_file)
|
| 290 |
+
|
| 291 |
+
def form_batches(self, metas):
|
| 292 |
+
st = time.time()
|
| 293 |
+
if self.feature_extraction: # no sequence packing, for feature extraction
|
| 294 |
+
batches = [[item] for item in range(len(metas))]
|
| 295 |
+
else:
|
| 296 |
+
batches = []
|
| 297 |
+
has_been_used = [False for _ in range(len(metas))]
|
| 298 |
+
bucket_size = min(len(metas), self.other_args.seq_pack_bucket)
|
| 299 |
+
print(f'[data preprocess] form_batches form {len(metas)} metas, bucket_size={bucket_size}...')
|
| 300 |
+
step = len(metas) // bucket_size + 1
|
| 301 |
+
for bucket_id in range(step):
|
| 302 |
+
left_ptr = bucket_id
|
| 303 |
+
while left_ptr < len(metas):
|
| 304 |
+
tmp_batch = [left_ptr]
|
| 305 |
+
tokens_remain = self.train_max_token_len - metas[left_ptr]['text_visual_tokens']
|
| 306 |
+
left_ptr += step
|
| 307 |
+
while (left_ptr < len(metas)) and (metas[left_ptr]['text_visual_tokens'] <= tokens_remain):
|
| 308 |
+
if not has_been_used[left_ptr]:
|
| 309 |
+
has_been_used[left_ptr] = True
|
| 310 |
+
tokens_remain -= metas[left_ptr]['text_visual_tokens']
|
| 311 |
+
tmp_batch.append(left_ptr)
|
| 312 |
+
left_ptr += step
|
| 313 |
+
tmp_ptr = left_ptr + step
|
| 314 |
+
while tmp_ptr < len(metas) and tokens_remain > 0:
|
| 315 |
+
if (not has_been_used[tmp_ptr]) and (metas[tmp_ptr]['text_visual_tokens'] <= tokens_remain):
|
| 316 |
+
has_been_used[tmp_ptr] = True
|
| 317 |
+
tokens_remain -= metas[tmp_ptr]['text_visual_tokens']
|
| 318 |
+
tmp_batch.append(tmp_ptr)
|
| 319 |
+
tmp_ptr += step
|
| 320 |
+
|
| 321 |
+
# 从text_tokens小于tokens_remain的数据中阶段选取序列填入,以提高利用率
|
| 322 |
+
if tokens_remain > 0:
|
| 323 |
+
increase_seq_usage_times = 0
|
| 324 |
+
while increase_seq_usage_times == 0 or (tokens_remain > self.max_text_len):
|
| 325 |
+
increase_seq_usage_times += 1
|
| 326 |
+
if increase_seq_usage_times >= 3: break
|
| 327 |
+
select_map = {}
|
| 328 |
+
for ind in tmp_batch:
|
| 329 |
+
select_map[ind] = True
|
| 330 |
+
candidates = []
|
| 331 |
+
min_val = 99999999
|
| 332 |
+
for tmp_ind in range(bucket_id, len(metas), step):
|
| 333 |
+
if (metas[tmp_ind]['cum_text_visual_tokens'][0] <= tokens_remain) and (tmp_ind not in select_map):
|
| 334 |
+
import bisect
|
| 335 |
+
idx = bisect.bisect_right(metas[tmp_ind]['cum_text_visual_tokens'], tokens_remain)
|
| 336 |
+
if tokens_remain - metas[tmp_ind]['cum_text_visual_tokens'][idx-1] < min_val:
|
| 337 |
+
min_val = tokens_remain - metas[tmp_ind]['cum_text_visual_tokens'][idx-1]
|
| 338 |
+
candidates = [tmp_ind]
|
| 339 |
+
elif tokens_remain - metas[tmp_ind]['cum_text_visual_tokens'][idx-1] == min_val:
|
| 340 |
+
candidates.append(tmp_ind)
|
| 341 |
+
if len(candidates):
|
| 342 |
+
tmp_batch.append(self.epoch_rank_generator.choice(candidates))
|
| 343 |
+
tokens_remain = min_val
|
| 344 |
+
else:
|
| 345 |
+
break
|
| 346 |
+
batches.append(tmp_batch)
|
| 347 |
+
if len(batches) % 1000 == 0:
|
| 348 |
+
print(f'form {len(batches)} batches, left_ptr={left_ptr}, len(metas)={len(metas)}')
|
| 349 |
+
batch_num = len(batches)
|
| 350 |
+
print(f'[data preprocess] form_batches done, got {len(batches)} batches, cost {time.time()-st:.2f}s')
|
| 351 |
+
try:
|
| 352 |
+
if self.num_replicas > 1:
|
| 353 |
+
batch_num = torch.tensor([batch_num], device=self.device)
|
| 354 |
+
if tdist.is_initialized():
|
| 355 |
+
tdist.all_reduce(batch_num, op=tdist.ReduceOp.MIN)
|
| 356 |
+
batch_num = batch_num.item()
|
| 357 |
+
except Exception as e:
|
| 358 |
+
print(e)
|
| 359 |
+
batch_num = batch_num // self.dataloader_workers * self.dataloader_workers
|
| 360 |
+
print(f'[data preprocess] form_batches done, got {batch_num} batches')
|
| 361 |
+
return batches, batch_num
|
| 362 |
+
|
| 363 |
+
def set_global_worker_id(self):
|
| 364 |
+
worker_info = torch.utils.data.get_worker_info()
|
| 365 |
+
if worker_info:
|
| 366 |
+
worker_total_num = worker_info.num_workers
|
| 367 |
+
worker_id = worker_info.id
|
| 368 |
+
else:
|
| 369 |
+
worker_id = 0
|
| 370 |
+
worker_total_num = 1
|
| 371 |
+
assert worker_total_num == self.dataloader_workers, print(worker_total_num, self.dataloader_workers)
|
| 372 |
+
self.worker_id = worker_id
|
| 373 |
+
self.global_worker_id = self.rank * self.dataloader_workers + worker_id
|
| 374 |
+
|
| 375 |
+
def set_epoch(self, epoch):
|
| 376 |
+
self.epoch = epoch
|
| 377 |
+
self.set_generator()
|
| 378 |
+
|
| 379 |
+
def set_generator(self, ):
|
| 380 |
+
self.epoch_generator = np.random.default_rng(self.seed + self.epoch)
|
| 381 |
+
self.epoch_worker_generator = np.random.default_rng(self.seed + self.epoch + self.worker_id)
|
| 382 |
+
self.epoch_global_worker_generator = np.random.default_rng(self.seed + self.epoch + self.global_worker_id)
|
| 383 |
+
self.epoch_rank_generator = np.random.default_rng(self.seed + self.epoch + self.rank)
|
| 384 |
+
|
| 385 |
+
def __iter__(self):
|
| 386 |
+
self.set_global_worker_id()
|
| 387 |
+
self.set_generator()
|
| 388 |
+
self.epoch_rank_generator.shuffle(self.batches)
|
| 389 |
+
yield_data_cnt = 0
|
| 390 |
+
batch_ind_ptr = self.worker_id
|
| 391 |
+
failed_batch_cnt = 0
|
| 392 |
+
last_yield_data_time = time.time()
|
| 393 |
+
while yield_data_cnt < self.batch_nums // self.dataloader_workers:
|
| 394 |
+
# if True:
|
| 395 |
+
try:
|
| 396 |
+
if time.time() - last_yield_data_time > 600:
|
| 397 |
+
raise ValueError(f'[dataset] it takes too long to yield data, please check your code')
|
| 398 |
+
batch_inds = self.batches[batch_ind_ptr%len(self.batches)]
|
| 399 |
+
if self.other_args.cache_check_mode in [-2, 2, 3]: # -2, 2 means check vae token cache at each iteration
|
| 400 |
+
all_has_been_cached = True
|
| 401 |
+
all_has_not_been_cached = True
|
| 402 |
+
for j in batch_inds:
|
| 403 |
+
exist_status = self.exists_cache_file(self.metas[j])
|
| 404 |
+
if exist_status:
|
| 405 |
+
all_has_not_been_cached = False
|
| 406 |
+
if not exist_status:
|
| 407 |
+
all_has_been_cached = False
|
| 408 |
+
if self.other_args.cache_check_mode == 2: # mush all example has been cached
|
| 409 |
+
if not all_has_been_cached:
|
| 410 |
+
batch_ind_ptr += self.dataloader_workers
|
| 411 |
+
continue
|
| 412 |
+
if self.other_args.cache_check_mode == -2: # must not all has been cached cached before
|
| 413 |
+
if all_has_been_cached:
|
| 414 |
+
batch_ind_ptr += self.dataloader_workers
|
| 415 |
+
# print(f"skipping batch_inds {batch_inds}")
|
| 416 |
+
continue
|
| 417 |
+
if self.other_args.cache_check_mode == 3: # at least one has been cached
|
| 418 |
+
if all_has_not_been_cached:
|
| 419 |
+
batch_ind_ptr += self.dataloader_workers
|
| 420 |
+
continue
|
| 421 |
+
|
| 422 |
+
batch_data = []
|
| 423 |
+
for j in batch_inds:
|
| 424 |
+
meta = self.metas[j]
|
| 425 |
+
if 'image_path' in meta:
|
| 426 |
+
ret, model_input = self.prepare_image_input(meta)
|
| 427 |
+
elif 'video_path' in meta:
|
| 428 |
+
ret, model_input = self.prepare_video_input(meta)
|
| 429 |
+
# if not ret: break
|
| 430 |
+
if ret:
|
| 431 |
+
batch_data.append(model_input)
|
| 432 |
+
if not len(batch_data):
|
| 433 |
+
batch_ind_ptr += self.dataloader_workers
|
| 434 |
+
continue
|
| 435 |
+
# raise ValueError(f'[dataset] prepare_video_input failed, continue, failed meta is {meta}')
|
| 436 |
+
|
| 437 |
+
captions4images, captions4raw_features, images, raw_features_bcthw, feature_cache_files4images, text_features = [], [], [], [], [], []
|
| 438 |
+
text_feature_cache_files = []
|
| 439 |
+
addition_pn_images = {}
|
| 440 |
+
for item in batch_data:
|
| 441 |
+
if item['raw_features_cthw'] is None:
|
| 442 |
+
images.append(item['img_T3HW'].permute(1,0,2,3)) # # tchw -> cthw
|
| 443 |
+
for key in item:
|
| 444 |
+
if key.startswith('img_T3HW_'):
|
| 445 |
+
if key not in addition_pn_images:
|
| 446 |
+
addition_pn_images[key] = []
|
| 447 |
+
addition_pn_images[key].append(item[key].permute(1,0,2,3))
|
| 448 |
+
feature_cache_files4images.append(item['feature_cache_file'])
|
| 449 |
+
captions4images.append(item['text_input'])
|
| 450 |
+
else:
|
| 451 |
+
raw_features_bcthw.append(item['raw_features_cthw'])
|
| 452 |
+
captions4raw_features.append(item['text_input'])
|
| 453 |
+
text_feature_cache_files.append(item['text_feature_cache_file'])
|
| 454 |
+
captions = captions4images + captions4raw_features
|
| 455 |
+
assert len(batch_data), f'len(batch_data)={len(batch_data)}'
|
| 456 |
+
text_cond_tuple = None
|
| 457 |
+
yield {
|
| 458 |
+
'captions': captions,
|
| 459 |
+
'images': images,
|
| 460 |
+
'addition_pn_images': addition_pn_images,
|
| 461 |
+
'feature_cache_files4images': feature_cache_files4images,
|
| 462 |
+
'raw_features_bcthw': raw_features_bcthw,
|
| 463 |
+
'text_cond_tuple': text_cond_tuple,
|
| 464 |
+
'text_feature_cache_files': text_feature_cache_files,
|
| 465 |
+
'media': 'videos',
|
| 466 |
+
}
|
| 467 |
+
yield_data_cnt += 1
|
| 468 |
+
batch_ind_ptr += self.dataloader_workers
|
| 469 |
+
del batch_data
|
| 470 |
+
del images
|
| 471 |
+
del captions
|
| 472 |
+
last_yield_data_time = time.time()
|
| 473 |
+
except Exception as e:
|
| 474 |
+
batch_ind_ptr += self.dataloader_workers
|
| 475 |
+
failed_batch_cnt += 1
|
| 476 |
+
if failed_batch_cnt % 400 == 0:
|
| 477 |
+
print(f'failed_batch_cnt: {failed_batch_cnt}, yield_data_cnt: {yield_data_cnt}')
|
| 478 |
+
print(f'[dataset] error: {e}')
|
| 479 |
+
|
| 480 |
+
def prepare_image_input(self, info) -> Tuple:
|
| 481 |
+
try:
|
| 482 |
+
img_path, text_input = osp.abspath(info['image_path']), info['caption']
|
| 483 |
+
img_T3HW, raw_features_cthw, feature_cache_file, text_features_lenxdim, text_feature_cache_file = [None] * 5
|
| 484 |
+
# text_input = process_short_text(text_input)
|
| 485 |
+
if self.use_text_token_cache:
|
| 486 |
+
text_feature_cache_file = osp.join(self.token_cache_dir, 'flan-t5-xl-official', get_prompt_id(text_input)+'.pt')
|
| 487 |
+
if osp.exists(text_feature_cache_file):
|
| 488 |
+
text_features_lenxdim = torch.load(text_feature_cache_file, weights_only=True)
|
| 489 |
+
|
| 490 |
+
if self.add_motion_score2caption:
|
| 491 |
+
rand_motion_score = -1 + self.epoch_rank_generator.random() * 21.0 # -1.0 ~ 20.0
|
| 492 |
+
text_input = prepend_motion_score(text_input, rand_motion_score)
|
| 493 |
+
if self.use_vae_token_cache:
|
| 494 |
+
feature_cache_file = self.get_image_cache_file(img_path)
|
| 495 |
+
if osp.exists(feature_cache_file):
|
| 496 |
+
try:
|
| 497 |
+
raw_features_cthw = torch.load(feature_cache_file, weights_only=True)
|
| 498 |
+
except Exception as e:
|
| 499 |
+
print(f'load cache file error: {e}')
|
| 500 |
+
os.remove(feature_cache_file)
|
| 501 |
+
if raw_features_cthw is None and (not self.allow_online_vae_feature_extraction):
|
| 502 |
+
return False, None
|
| 503 |
+
if raw_features_cthw is None:
|
| 504 |
+
with open(img_path, 'rb') as f:
|
| 505 |
+
img: PImage.Image = PImage.open(f)
|
| 506 |
+
w, h = img.size
|
| 507 |
+
h_div_w = h / w
|
| 508 |
+
h_div_w_template = self.h_div_w_templates[np.argmin(np.abs((h_div_w-self.h_div_w_templates)))]
|
| 509 |
+
tgt_h, tgt_w = self.dynamic_resolution_h_w[h_div_w_template][self.pn]['pixel']
|
| 510 |
+
img = img.convert('RGB')
|
| 511 |
+
img_T3HW = transform(img, tgt_h, tgt_w)
|
| 512 |
+
img_T3HW = img_T3HW.unsqueeze(0)
|
| 513 |
+
assert img_T3HW.shape[1] == 3
|
| 514 |
+
data_item = {
|
| 515 |
+
'text_input': text_input,
|
| 516 |
+
'img_T3HW': img_T3HW,
|
| 517 |
+
'raw_features_cthw': raw_features_cthw,
|
| 518 |
+
'feature_cache_file': feature_cache_file,
|
| 519 |
+
'text_features_lenxdim': text_features_lenxdim,
|
| 520 |
+
'text_feature_cache_file': text_feature_cache_file,
|
| 521 |
+
}
|
| 522 |
+
return True, data_item
|
| 523 |
+
except Exception as e:
|
| 524 |
+
print(f'prepare_image_input error: {e}')
|
| 525 |
+
return False, None
|
| 526 |
+
|
| 527 |
+
def prepare_pair_image_input(self, info) -> Tuple:
|
| 528 |
+
pass
|
| 529 |
+
|
| 530 |
+
def prepare_pair_video_input(self, info) -> Tuple:
|
| 531 |
+
tmp_info = copy.deepcopy(info)
|
| 532 |
+
tmp_info['video_path'] = info['win_video_path']
|
| 533 |
+
win_flag, win_data_item = self.prepare_video_input(tmp_info)
|
| 534 |
+
assert win_data_item['raw_features_cthw'] is None
|
| 535 |
+
|
| 536 |
+
tmp_info['video_path'] = info['lose_video_path']
|
| 537 |
+
lose_flag, lose_data_item = self.prepare_video_input(tmp_info)
|
| 538 |
+
assert lose_data_item['raw_features_cthw'] is None
|
| 539 |
+
|
| 540 |
+
flag = win_flag and lose_flag
|
| 541 |
+
img_T3HW = torch.stack([win_data_item['img_T3HW'], lose_data_item['img_T3HW']], dim=0) # [2,T,C,H,W]
|
| 542 |
+
win_data_item['img_T3HW'] = img_T3HW
|
| 543 |
+
return flag, win_data_item
|
| 544 |
+
|
| 545 |
+
def prepare_video_input(self, info) -> Tuple:
|
| 546 |
+
filename, begin_frame_id, end_frame_id = (
|
| 547 |
+
info["video_path"],
|
| 548 |
+
info["begin_frame_id"],
|
| 549 |
+
info["end_frame_id"],
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
if True:
|
| 553 |
+
# try:
|
| 554 |
+
img_T3HW, raw_features_cthw, feature_cache_file, text_features_lenxdim, text_feature_cache_file = None, None, None, None, None
|
| 555 |
+
img_T3HW_4additional_pn = {}
|
| 556 |
+
text_input = info['caption']
|
| 557 |
+
if '/vdataset/clip' in filename: # clip
|
| 558 |
+
begin_frame_id, end_frame_id = 0, end_frame_id - begin_frame_id
|
| 559 |
+
sample_frames = info['sample_frames']
|
| 560 |
+
if self.use_vae_token_cache:
|
| 561 |
+
feature_cache_file = self.get_video_cache_file(info["video_path"], begin_frame_id, end_frame_id, self.video_fps)
|
| 562 |
+
if osp.exists(feature_cache_file):
|
| 563 |
+
try:
|
| 564 |
+
pt = (sample_frames-1) // self.temporal_compress_rate + 1
|
| 565 |
+
raw_features_cthw = torch.load(feature_cache_file, weights_only=True)
|
| 566 |
+
# _, tgt_h, tgt_w = self.dynamic_resolution_h_w[h_div_w_template][self.pn]['pt2scale_schedule'][1][-1]
|
| 567 |
+
# assert raw_features_cthw.shape[-2:] == (tgt_h, tgt_w), f'raw_features_cthw.shape[-2:] == (tgt_h, tgt_w): {raw_features_cthw.shape[-2:]} vs {(tgt_h, tgt_w)}'
|
| 568 |
+
assert raw_features_cthw.shape[1] >= pt, f'raw_features_cthw.shape[1] >= pt: {raw_features_cthw.shape[1]} vs {pt}'
|
| 569 |
+
if raw_features_cthw.shape[1] > pt:
|
| 570 |
+
raw_features_cthw = raw_features_cthw[:,:pt]
|
| 571 |
+
except Exception as e:
|
| 572 |
+
print(f'load video cache file error: {e}')
|
| 573 |
+
os.remove(feature_cache_file)
|
| 574 |
+
raw_features_cthw = None
|
| 575 |
+
if raw_features_cthw is None and (not self.allow_online_vae_feature_extraction):
|
| 576 |
+
return False, None
|
| 577 |
+
pn_list = [self.pn]
|
| 578 |
+
if raw_features_cthw is None:
|
| 579 |
+
local_path = info["video_path"]
|
| 580 |
+
if not local_path: return False, None
|
| 581 |
+
if not osp.exists(local_path):
|
| 582 |
+
return False, None
|
| 583 |
+
video = EncodedVideoOpencv(local_path, os.path.basename(local_path), num_threads=0)
|
| 584 |
+
# video = EncodedVideoDecord(local_path, os.path.basename(local_path), num_threads=0)
|
| 585 |
+
start_interval = max(0, begin_frame_id / video._fps)
|
| 586 |
+
end_interval = start_interval+(sample_frames-1)/self.video_fps
|
| 587 |
+
assert end_interval <= video.duration + 0.2, f'{end_interval=}, but {video.duration=}' # 0.2s margin
|
| 588 |
+
end_interval = min(end_interval, video.duration)
|
| 589 |
+
raw_video, _ = video.get_clip(start_interval, end_interval, sample_frames)
|
| 590 |
+
h, w, _ = raw_video[0].shape
|
| 591 |
+
h_div_w = h / w
|
| 592 |
+
h_div_w_template = self.h_div_w_templates[np.argmin(np.abs((h_div_w-self.h_div_w_templates)))]
|
| 593 |
+
tgt_h, tgt_w = self.dynamic_resolution_h_w[h_div_w_template][self.pn]['pixel']
|
| 594 |
+
|
| 595 |
+
for addition_pn in self.addition_pn_list:
|
| 596 |
+
pn_list = pn_list + [addition_pn]
|
| 597 |
+
for pn in pn_list:
|
| 598 |
+
if isinstance(video, EncodedVideoDecord):
|
| 599 |
+
img_T3HW = [transform(Image.fromarray(frame).convert("RGB"), tgt_h, tgt_w) for frame in raw_video]
|
| 600 |
+
else:
|
| 601 |
+
img_T3HW = [transform(Image.fromarray(frame[:,:,::-1]), tgt_h, tgt_w) for frame in raw_video]
|
| 602 |
+
img_T3HW = torch.stack(img_T3HW, 0)
|
| 603 |
+
img_T3HW_4additional_pn[pn] = img_T3HW
|
| 604 |
+
del video
|
| 605 |
+
assert img_T3HW.shape[1] == 3
|
| 606 |
+
data_item = {
|
| 607 |
+
'text_input': text_input,
|
| 608 |
+
'img_T3HW': img_T3HW_4additional_pn.get(self.pn, None),
|
| 609 |
+
'raw_features_cthw': raw_features_cthw,
|
| 610 |
+
'feature_cache_file': feature_cache_file,
|
| 611 |
+
'text_features_lenxdim': text_features_lenxdim,
|
| 612 |
+
'text_feature_cache_file': text_feature_cache_file,
|
| 613 |
+
}
|
| 614 |
+
for pn in pn_list[1:]:
|
| 615 |
+
data_item.update({f'img_T3HW_{pn}': img_T3HW_4additional_pn.get(pn, None)})
|
| 616 |
+
return True, data_item
|
| 617 |
+
# except Exception as e:
|
| 618 |
+
# # print(f'prepare_video_input error: {e}, info: {info}')
|
| 619 |
+
# return False, None
|
| 620 |
+
# finally:
|
| 621 |
+
# try:
|
| 622 |
+
# if (img_T3HW is not None) and local_path and (local_path != filename):
|
| 623 |
+
# os.remove(local_path)
|
| 624 |
+
# except Exception as e:
|
| 625 |
+
# print(f'delete local_path: {local_path} error: {e}, info: {info}')
|
| 626 |
+
|
| 627 |
+
@staticmethod
|
| 628 |
+
def collate_function(batch, online_t5: bool = False) -> None:
|
| 629 |
+
pass
|
| 630 |
+
|
| 631 |
+
def random_drop_sentences(self, caption):
|
| 632 |
+
elems = [item for item in caption.split('.') if item]
|
| 633 |
+
if len(elems) < 2:
|
| 634 |
+
return caption
|
| 635 |
+
sentences = self.epoch_global_worker_generator.integers(1, len(elems)+1)
|
| 636 |
+
return '.'.join(elems[:sentences]) + '.'
|
| 637 |
+
|
| 638 |
+
def get_text_input(self, long_text_input, short_text_input, long_text_type):
|
| 639 |
+
assert long_text_input or short_text_input
|
| 640 |
+
if not long_text_input:
|
| 641 |
+
return short_text_input
|
| 642 |
+
if not short_text_input:
|
| 643 |
+
return long_text_input
|
| 644 |
+
random_value = self.epoch_global_worker_generator.random()
|
| 645 |
+
assert not self.enable_dynamic_length_prompt
|
| 646 |
+
if self.enable_dynamic_length_prompt and long_text_type != 'user_prompt':
|
| 647 |
+
long_text_elems = [item for item in long_text_input.split('.') if item]
|
| 648 |
+
if len(long_text_elems):
|
| 649 |
+
first_sentence_words = [item for item in long_text_elems[0].split(' ') if item]
|
| 650 |
+
else:
|
| 651 |
+
first_sentence_words = 0
|
| 652 |
+
if len(first_sentence_words) >= 15:
|
| 653 |
+
num_sentence4short_text = 1
|
| 654 |
+
else:
|
| 655 |
+
num_sentence4short_text = 2
|
| 656 |
+
if not short_text_input:
|
| 657 |
+
short_text_input = '.'.join(long_text_elems[:num_sentence4short_text])
|
| 658 |
+
if random_value < self.short_prob:
|
| 659 |
+
return short_text_input
|
| 660 |
+
if len(long_text_elems) <= num_sentence4short_text:
|
| 661 |
+
return long_text_input
|
| 662 |
+
select_sentence_num = self.epoch_global_worker_generator.integers(num_sentence4short_text+1, len(long_text_elems)+1)
|
| 663 |
+
return '.'.join(long_text_elems[:select_sentence_num])
|
| 664 |
+
else:
|
| 665 |
+
if random_value < self.short_prob:
|
| 666 |
+
return short_text_input
|
| 667 |
+
return long_text_input
|
| 668 |
+
|
| 669 |
+
def __len__(self):
|
| 670 |
+
return self.batch_nums
|
| 671 |
+
|
| 672 |
+
def get_image_cache_file(self, image_path):
|
| 673 |
+
elems = image_path.split('/')
|
| 674 |
+
elems = [item for item in elems if item]
|
| 675 |
+
filename, ext = osp.splitext(elems[-1])
|
| 676 |
+
filename = get_prompt_id(filename)
|
| 677 |
+
save_filepath = osp.join(self.token_cache_dir, f'images_pn_{self.pn}', '/'.join(elems[4:-1]), f'{filename}.pt')
|
| 678 |
+
return save_filepath
|
| 679 |
+
|
| 680 |
+
def get_video_cache_file(self, video_path, begin_frame_id, end_frame_id, video_fps):
|
| 681 |
+
elems = video_path.split('/')
|
| 682 |
+
elems = [item for item in elems if item]
|
| 683 |
+
filename, ext = osp.splitext(elems[-1])
|
| 684 |
+
filename = get_prompt_id(filename)
|
| 685 |
+
save_filepath = osp.join(self.token_cache_dir, f'pn_{self.pn}_sample_fps_{video_fps}', '/'.join(elems[4:-1]), f'{filename}_sf_{begin_frame_id}_ef_{end_frame_id}.pt')
|
| 686 |
+
return save_filepath
|
| 687 |
+
|
| 688 |
+
if __name__ == '__main__':
|
| 689 |
+
pass
|
Meissonic/InfinityStar/infinity/models/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025 FoundationVision
|
| 2 |
+
# SPDX-License-Identifier: MIT
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
from timm.loss import SoftTargetCrossEntropy
|
| 6 |
+
|
| 7 |
+
from timm.models.layers import DropPath
|
| 8 |
+
|
| 9 |
+
from .infinity import Infinity, sample_with_top_k_top_p_also_inplace_modifying_logits_
|
| 10 |
+
|
| 11 |
+
def _ex_repr(self):
|
| 12 |
+
return ', '.join(
|
| 13 |
+
f'{k}=' + (f'{v:g}' if isinstance(v, float) else str(v))
|
| 14 |
+
for k, v in vars(self).items()
|
| 15 |
+
if not k.startswith('_') and k != 'training'
|
| 16 |
+
and not isinstance(v, (torch.nn.Module, torch.Tensor))
|
| 17 |
+
)
|
| 18 |
+
for clz in (torch.nn.CrossEntropyLoss, SoftTargetCrossEntropy): # no longer __repr__ DropPath with drop_prob
|
| 19 |
+
if hasattr(clz, 'extra_repr'):
|
| 20 |
+
clz.extra_repr = _ex_repr
|
| 21 |
+
else:
|
| 22 |
+
clz.__repr__ = lambda self: f'{type(self).__name__}({_ex_repr(self)})'
|
| 23 |
+
|
| 24 |
+
DropPath.__repr__ = lambda self: f'{type(self).__name__}(...)'
|
| 25 |
+
|
| 26 |
+
alias_dict = {}
|
| 27 |
+
for d in range(6, 40+2, 2):
|
| 28 |
+
alias_dict[f'd{d}'] = f'infinity_d{d}'
|
| 29 |
+
alias_dict_inv = {v: k for k, v in alias_dict.items()}
|
Meissonic/InfinityStar/infinity/models/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (1.47 kB). View file
|
|
|