diff --git a/.gitattributes b/.gitattributes index 64aa7cc4218c46c0506a7f0c6fb28cc60f8f09c6..c5bcea2b42f57bed44d573cddbab6e2c13f22437 100644 --- a/.gitattributes +++ b/.gitattributes @@ -134,3 +134,61 @@ base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_q base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_950_a048891b4bb2e302eb8c.mp4 filter=lfs diff=lfs merge=lfs -text base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_975_dad91748c15462b26fb3.mp4 filter=lfs diff=lfs merge=lfs -text base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/run-p2mcszig.wandb filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_2_a619d3adb2f03b80bc21.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_3_256510a892d28f79e85e.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_4_b55a09705365f1f7bc7c.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_5_e65019ee1ebe9882b0b7.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_6_f4f71170b27199f67c46.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_7_ee4dfc69e0f0ff992917.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_8_3a191d37e945e0e0dc25.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_9_e121e2ba5f686fd4950c.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/run-c3u3silm.wandb filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_25_f90c3d650c4a707c1ec3.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_26_fcb0b2b66064e1cc523a.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_27_fb495aa0725107fd4084.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_28_f3d85415155c518fccc7.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_25_9060c16221f1e99f7b6e.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_26_aed6f3de431c91b8cae8.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_27_31f4a1a3e263ac0f4120.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_28_3aa6f2f161e847e89032.mp4 filter=lfs diff=lfs merge=lfs -text +k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/run-c3u3silm.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/best_fvd_105.24.pth b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/best_fvd_105.24.pth new file mode 100644 index 0000000000000000000000000000000000000000..7fe89fe6a12d81e7f9e86641e965b8a861de29f2 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/best_fvd_105.24.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94381c6f59e56b006cfb2704277816901a6222f0470205e5ad06ea787ef13447 +size 8223495896 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/cfg.yaml b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/cfg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb199467ef44e340e0436c410d13382521b04cc7 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/cfg.yaml @@ -0,0 +1,477 @@ +!!python/object/new:easydict.EasyDict +state: + trainer: our_tokenizer_trainer + train_dataset: &id020 !!python/object/new:easydict.EasyDict + state: + name: video_dataset + args: &id001 !!python/object/new:easydict.EasyDict + state: + root_path: data/metadata + split: train + frame_num: 16 + rand_augment: 'no' + csv_file: k600_train.csv+ucf101_train.csv + cls_vid_num: -1_-1 + crop_size: 128 + scale: 1.0 + aspect_ratio: 1.0 + rand_flip: 'yes' + use_all_frames: false + pre_load: false + dictitems: + root_path: data/metadata + split: train + frame_num: 16 + rand_augment: 'no' + csv_file: k600_train.csv+ucf101_train.csv + cls_vid_num: -1_-1 + crop_size: 128 + scale: 1.0 + aspect_ratio: 1.0 + rand_flip: 'yes' + use_all_frames: false + pre_load: false + loader: &id002 !!python/object/new:easydict.EasyDict + state: + batch_size: 128 + num_workers: 32 + dictitems: + batch_size: 128 + num_workers: 32 + dictitems: + name: video_dataset + args: *id001 + loader: *id002 + test_dataset: &id021 !!python/object/new:easydict.EasyDict + state: + name: video_dataset + args: &id003 !!python/object/new:easydict.EasyDict + state: + root_path: data/metadata + frame_num: 16 + cls_vid_num: -1_-1 + crop_size: 128 + use_all_frames: false + pre_load: false + dictitems: + root_path: data/metadata + frame_num: 16 + cls_vid_num: -1_-1 + crop_size: 128 + use_all_frames: false + pre_load: false + csv_paths: &id004 !!python/object/new:easydict.EasyDict + state: + ucf101_val: ucf101_val.csv + dictitems: + ucf101_val: ucf101_val.csv + loader: &id005 !!python/object/new:easydict.EasyDict + state: + batch_size: 128 + num_workers: 32 + dictitems: + batch_size: 128 + num_workers: 32 + dictitems: + name: video_dataset + args: *id003 + csv_paths: *id004 + loader: *id005 + model: &id022 !!python/object/new:easydict.EasyDict + state: + name: dyn_tokenizer + args: &id016 !!python/object/new:easydict.EasyDict + state: + noise_schedule: &id012 !!python/object/new:easydict.EasyDict + state: + name: min_rf_noise_module + args: &id006 !!python/object/new:easydict.EasyDict + state: + clean_data_read_key: clean_data + noised_data_write_key: noisy_input + noise_write_key: flow_noise + timesteps_write_key: timesteps + sigmas_write_key: sigmas + ln: false + stratisfied: false + mode_scale: 0.25 + dictitems: + clean_data_read_key: clean_data + noised_data_write_key: noisy_input + noise_write_key: flow_noise + timesteps_write_key: timesteps + sigmas_write_key: sigmas + ln: false + stratisfied: false + mode_scale: 0.25 + dictitems: + name: min_rf_noise_module + args: *id006 + bottleneck: &id013 !!python/object/new:easydict.EasyDict + state: + name: bottleneck + args: &id009 !!python/object/new:easydict.EasyDict + state: + regularizer: &id008 !!python/object/new:easydict.EasyDict + state: + name: vector_quantize + args: &id007 !!python/object/new:easydict.EasyDict + state: + codebook_dim: 16 + codebook_size: 8192 + ema_update: true + decay: 0.99 + kmeans_init: true + kmeans_iters: 10 + threshold_ema_dead_code: 0.2 + use_cosine_sim: true + commitment_weight: 1.0 + diversity_weight: 0.0 + smart_re_K: 0 + continuous: false + reg: + - 0.1 + - 0.3 + reset_cluster_size: 0.2 + ema_entropy_ratio: 0.8 + vq_start_step: 0 + dictitems: + codebook_dim: 16 + codebook_size: 8192 + ema_update: true + decay: 0.99 + kmeans_init: true + kmeans_iters: 10 + threshold_ema_dead_code: 0.2 + use_cosine_sim: true + commitment_weight: 1.0 + diversity_weight: 0.0 + smart_re_K: 0 + continuous: false + reg: + - 0.1 + - 0.3 + reset_cluster_size: 0.2 + ema_entropy_ratio: 0.8 + vq_start_step: 0 + dictitems: + name: vector_quantize + args: *id007 + dictitems: + regularizer: *id008 + dictitems: + name: bottleneck + args: *id009 + prior_model: &id014 !!python/object/new:easydict.EasyDict + state: + name: none + use_mix_ss: true + mix_ss_max_ratio: 0.5 + mix_ss_peak_steps_ratio: 0.3 + n_rounds: 2 + avg_loss_over_rounds: true + no_grad_before_last_round: false + no_dropout: false + latent_ce_temperature: 1.0 + args: &id010 !!python/object/new:easydict.EasyDict + state: + l2_normalized: true + dictitems: + l2_normalized: true + dictitems: + name: none + use_mix_ss: true + mix_ss_max_ratio: 0.5 + mix_ss_peak_steps_ratio: 0.3 + n_rounds: 2 + avg_loss_over_rounds: true + no_grad_before_last_round: false + no_dropout: false + latent_ce_temperature: 1.0 + args: *id010 + dec_time_embedder: &id015 !!python/object/new:easydict.EasyDict + state: + name: timestep_embedder + args: &id011 !!python/object/new:easydict.EasyDict + state: + timesteps_read_key: timesteps + time_embedding_write_key: dec_temb + dim: 1152 + frequency_embedding_size: 256 + max_timestep: 1000.0 + dictitems: + timesteps_read_key: timesteps + time_embedding_write_key: dec_temb + dim: 1152 + frequency_embedding_size: 256 + max_timestep: 1000.0 + dictitems: + name: timestep_embedder + args: *id011 + transformer_name: transformer_encoder_parallel + encoder_name: none + decoder_name: transformer_AdaLN_decoder_parallel + bottleneck_token_num: 1024 + input_size: 128 + frame_num: 16 + temporal_patch_size: 4 + patch_size: 8 + decoder_temporal_patch_size: 4 + decoder_patch_size: 8 + in_channels: 3 + encoder_hidden_size: 768 + decoder_hidden_size: 1152 + encoder_num_heads: 12 + decoder_num_heads: 18 + encoder_depth: 12 + decoder_depth: 18 + encoder_block_name: block_timm + decoder_block_name: adaLN_block_timm + encoder_mask_mode: full + decoder_mask_mode: full + learned_encoder_patch_pe: false + learned_encoder_latent_query_embed: true + learned_decoder_latent_pe: false + learned_decoder_patch_query_embed: false + use_encoder_patch_token_type_embed: false + use_encoder_latent_query_token_type_embed: false + enable_decoder_query: false + learned_decoder_pe: false + use_decoder_latent_token_type_embed: false + use_decoder_patch_query_token_type_embed: true + encoder_query_gaussian_init: true + latent_pe_scale_factor: 10000 + query_init_std: 0.02 + adaLN_expansion: 2 + final_layer_init: xavier_uniform + enable_vq: true + qk_norm: true + use_rope: true + rope_dim: + - 16 + - 24 + - 24 + final_layer_type: adanorm + dictitems: + noise_schedule: *id012 + bottleneck: *id013 + prior_model: *id014 + dec_time_embedder: *id015 + transformer_name: transformer_encoder_parallel + encoder_name: none + decoder_name: transformer_AdaLN_decoder_parallel + bottleneck_token_num: 1024 + input_size: 128 + frame_num: 16 + temporal_patch_size: 4 + patch_size: 8 + decoder_temporal_patch_size: 4 + decoder_patch_size: 8 + in_channels: 3 + encoder_hidden_size: 768 + decoder_hidden_size: 1152 + encoder_num_heads: 12 + decoder_num_heads: 18 + encoder_depth: 12 + decoder_depth: 18 + encoder_block_name: block_timm + decoder_block_name: adaLN_block_timm + encoder_mask_mode: full + decoder_mask_mode: full + learned_encoder_patch_pe: false + learned_encoder_latent_query_embed: true + learned_decoder_latent_pe: false + learned_decoder_patch_query_embed: false + use_encoder_patch_token_type_embed: false + use_encoder_latent_query_token_type_embed: false + enable_decoder_query: false + learned_decoder_pe: false + use_decoder_latent_token_type_embed: false + use_decoder_patch_query_token_type_embed: true + encoder_query_gaussian_init: true + latent_pe_scale_factor: 10000 + query_init_std: 0.02 + adaLN_expansion: 2 + final_layer_init: xavier_uniform + enable_vq: true + qk_norm: true + use_rope: true + rope_dim: + - 16 + - 24 + - 24 + final_layer_type: adanorm + dictitems: + name: dyn_tokenizer + args: *id016 + loss: &id023 !!python/object/new:easydict.EasyDict + state: + name: fm_disc_loss + args: &id017 !!python/object/new:easydict.EasyDict + state: + disc_type: transformer + disc_start: 999999 + disc_self_start: -1 + perceptual_weight: 0.5 + perceptual_loss: lpips + perceptual_fp16: false + lecam_weight: 0.001 + disc_loss: ns_smooth + disc_weight: 0.0 + r1_gp_weight: 0.0 + d_update_freq: 5 + spectral_norm: false + disc_tran_hidden_size: 512 + disc_tran_n_heads: 8 + disc_tran_n_layers: 12 + disc_tran_temporal_patch_size: 4 + disc_tran_patch_size: 8 + input_spatial_size: 128 + frame_num: 16 + fm_loss_weight: 1.0 + dictitems: + disc_type: transformer + disc_start: 999999 + disc_self_start: -1 + perceptual_weight: 0.5 + perceptual_loss: lpips + perceptual_fp16: false + lecam_weight: 0.001 + disc_loss: ns_smooth + disc_weight: 0.0 + r1_gp_weight: 0.0 + d_update_freq: 5 + spectral_norm: false + disc_tran_hidden_size: 512 + disc_tran_n_heads: 8 + disc_tran_n_layers: 12 + disc_tran_temporal_patch_size: 4 + disc_tran_patch_size: 8 + input_spatial_size: 128 + frame_num: 16 + fm_loss_weight: 1.0 + dictitems: + name: fm_disc_loss + args: *id017 + optimizer: &id024 !!python/object/new:easydict.EasyDict + state: + name: adamw + loss_name: adam + args: &id018 !!python/object/new:easydict.EasyDict + state: + lr: 0.0001 + betas: + - 0.9 + - 0.99 + dictitems: + lr: 0.0001 + betas: + - 0.9 + - 0.99 + loss_args: &id019 !!python/object/new:easydict.EasyDict + state: + lr: 3.0e-05 + betas: + - 0.5 + - 0.9 + dictitems: + lr: 3.0e-05 + betas: + - 0.5 + - 0.9 + lr_type: step + lr_step_pcts: 0.9_0.95 + warmup_epoch: 10 + min_lr_mult: 0.1 + prior_lr_mult: 1.0 + emb_lr_mult: 1.0 + dictitems: + name: adamw + loss_name: adam + args: *id018 + loss_args: *id019 + lr_type: step + lr_step_pcts: 0.9_0.95 + warmup_epoch: 10 + min_lr_mult: 0.1 + prior_lr_mult: 1.0 + emb_lr_mult: 1.0 + max_epoch: 200 + eval_epoch: 1 + vis_epoch: 1 + latest_interval: 1 + save_epoch: 100000000 + save_best: true + stepwise_logging: false + ema_decay: _ + use_amp: true + amp_dtype: float16 + compile: true + compile_mode: default + flash_attn: false + loss_q_weight: 0.1 + loss_q_warmup: '1.0_1' + loss_kl_weight: 0.0 + kl_decay_epoch: -1 + loss_latent_ce_weight: 0.0 + sqt_start_end_epoch: 0.0_0.0_0 + clip_grad_max_norm: 0.0 + init_checkpoint: '' + timesteps: 25 + verbose: false + guidance_scale: 1.0 + env: &id025 !!python/object/new:easydict.EasyDict + state: + tot_gpus: 8 + cudnn: false + wandb_upload: true + wandb_entity: lingmin + wandb_project: dyn_tokenizer + exp_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__ + save_dir: /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__ + port: '12778' + dictitems: + tot_gpus: 8 + cudnn: false + wandb_upload: true + wandb_entity: lingmin + wandb_project: dyn_tokenizer + exp_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__ + save_dir: /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__ + port: '12778' + comment: '' + manualSeed: 66667 +dictitems: + trainer: our_tokenizer_trainer + train_dataset: *id020 + test_dataset: *id021 + model: *id022 + loss: *id023 + optimizer: *id024 + max_epoch: 200 + eval_epoch: 1 + vis_epoch: 1 + latest_interval: 1 + save_epoch: 100000000 + save_best: true + stepwise_logging: false + ema_decay: _ + use_amp: true + amp_dtype: float16 + compile: true + compile_mode: default + flash_attn: false + loss_q_weight: 0.1 + loss_q_warmup: '1.0_1' + loss_kl_weight: 0.0 + kl_decay_epoch: -1 + loss_latent_ce_weight: 0.0 + sqt_start_end_epoch: 0.0_0.0_0 + clip_grad_max_norm: 0.0 + init_checkpoint: '' + timesteps: 25 + verbose: false + guidance_scale: 1.0 + env: *id025 + comment: '' + manualSeed: 66667 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth new file mode 100644 index 0000000000000000000000000000000000000000..7fe89fe6a12d81e7f9e86641e965b8a861de29f2 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94381c6f59e56b006cfb2704277816901a6222f0470205e5ad06ea787ef13447 +size 8223495896 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_26_03_58_19.txt b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_26_03_58_19.txt new file mode 100644 index 0000000000000000000000000000000000000000..d5832ee13adc02809608408df46eb0d29f49b872 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_26_03_58_19.txt @@ -0,0 +1,393 @@ +[10-26 03:58:19] Distributed training enabled. +[10-26 03:58:20] Environment setup done. +[10-26 03:58:21] Train dataset: len=435743 +[10-26 03:58:21] Test dataset: ucf101_val, len=3783 +[10-26 03:58:34] DYNTokenizer( + (x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 768, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) + (encoder): TransformerEncoderParallel( + (blocks): ModuleList( + (0-11): 12 x AttentionBlock( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=768, out_features=2304, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=768, out_features=768, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + ) + ) + ) + (decoder): TransformerAdaLNDecoderParallel( + (blocks): ModuleList( + (0-17): 18 x DiffusionAttentionBlock( + (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=1152, out_features=3456, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=1152, out_features=1152, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=1152, out_features=4608, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=4608, out_features=1152, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + (adaLN_modulation): Sequential( + (0): SiLU() + (1): Linear(in_features=1152, out_features=13824, bias=True) + ) + ) + ) + (rope): HunyuanVideoRotaryPosEmbed() + ) + (bottleneck): VectorQuantize( + (project_in): Linear(in_features=768, out_features=16, bias=True) + (project_out): Linear(in_features=16, out_features=1152, bias=True) + (_codebook): CosineSimCodebook() + ) + (final_layer): AdaLNOutputLayer( + (norm_final): AdaLayerNormContinuous( + (silu): SiLU() + (linear): Linear(in_features=1152, out_features=2304, bias=True) + (norm): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) + ) + (linear): Linear(in_features=1152, out_features=768, bias=True) + ) + (flow_matching_noise_module): MinRFNoiseModule() + (dec_time_embedder): TimestepEmbedder( + (mlp): Sequential( + (0): Linear(in_features=256, out_features=1152, bias=True) + (1): SiLU() + (2): Linear(in_features=1152, out_features=1152, bias=True) + ) + ) + (dec_x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 1152, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) +) +[10-26 03:58:34] Model: #params=666.3M +[10-26 03:58:34] SLURM_JOB_ID: None +[10-26 03:58:34] SLUMR_ARRAY_JOB_ID: None +[10-26 03:58:34] SLURM_ARRAY_TASK_ID: None +[10-26 03:58:34] wandb_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192_____ +[10-26 03:58:34] Compiling model with mode: default +[10-26 03:58:55] Discriminator: #params=38.2M +[10-26 03:58:55] compiling loss with mode default +[10-26 03:58:55] Epoch 1 started. +[10-26 05:34:28] Epoch 1 training done. Time: 5732.70s +[10-26 06:03:25] Calculating FVD with running real stats +[10-26 06:04:53] Converting video data to uint8 +[10-26 06:05:59] Converting video data to uint8 +[10-26 06:05:59] Preparing to save rng states... +[10-26 06:05:59] Saving checkpoint... +[10-26 06:06:35] New best checkpoint saved: best_fvd_7983.63.pth +[10-26 06:06:35] Epoch 1, train: fm_loss=0.4965 perceptual_loss=0.7230 rp_loss=0.8580 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=11.8697 ssim=0.1345 loss_q=0.0000 index_usage_batch=0.0224 perplexity=6358.6162 loss_commit=0.0000 loss=0.8580 fps=24.2793, + eval: ucf101_val_psnr=6.4218 ucf101_val_ssim=0.0048 ucf101_val_fps=3.1429 ucf101_val_fvd=7983.6285, +Latest checkpoint saved. Time: 35.78s +, 2.1h (d 0.47) 2.1h/425.6h +[10-26 06:06:35] Epoch 2 started. +[10-26 06:58:07] Epoch 2 training done. Time: 3091.38s +[10-26 07:00:34] Calculating FVD with running real stats +[10-26 07:01:54] Converting video data to uint8 +[10-26 07:03:06] Converting video data to uint8 +[10-26 07:03:07] Preparing to save rng states... +[10-26 07:03:07] Saving checkpoint... +[10-26 07:03:44] New best checkpoint saved: best_fvd_4078.66.pth +[10-26 07:03:44] Epoch 2, train: fm_loss=0.2040 perceptual_loss=0.6439 rp_loss=0.5259 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=15.9498 ssim=0.1968 loss_q=0.0002 index_usage_batch=0.0195 perplexity=7807.2646 loss_commit=0.0002 loss=0.5259 fps=23.9054, + eval: ucf101_val_psnr=8.9663 ucf101_val_ssim=0.0189 ucf101_val_fps=3.1781 ucf101_val_fvd=4078.6604, +Latest checkpoint saved. Time: 37.56s +, 57.2m (d 0.04) 3.1h/308.0h +[10-26 07:03:44] Epoch 3 started. +[10-26 07:56:18] Epoch 3 training done. Time: 3154.18s +[10-26 07:58:45] Calculating FVD with running real stats +[10-26 08:00:09] Converting video data to uint8 +[10-26 08:01:18] Converting video data to uint8 +[10-26 08:01:18] Preparing to save rng states... +[10-26 08:01:18] Saving checkpoint... +[10-26 08:01:58] New best checkpoint saved: best_fvd_3830.81.pth +[10-26 08:01:58] Epoch 3, train: fm_loss=0.1239 perceptual_loss=0.5882 rp_loss=0.4180 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=18.5751 ssim=0.2626 loss_q=0.0003 index_usage_batch=0.0115 perplexity=7967.7891 loss_commit=0.0003 loss=0.4181 fps=23.6399, + eval: ucf101_val_psnr=9.5385 ucf101_val_ssim=0.0304 ucf101_val_fps=3.1865 ucf101_val_fvd=3830.8149, +Latest checkpoint saved. Time: 39.61s +, 58.2m (d 0.02) 4.1h/270.1h +[10-26 08:01:58] Epoch 4 started. +[10-26 08:54:48] Epoch 4 training done. Time: 3169.99s +[10-26 08:57:15] Calculating FVD with running real stats +[10-26 08:58:33] Converting video data to uint8 +[10-26 08:59:41] Converting video data to uint8 +[10-26 08:59:43] Preparing to save rng states... +[10-26 08:59:49] Saving checkpoint... +[10-26 09:00:27] New best checkpoint saved: best_fvd_3380.49.pth +[10-26 09:00:27] Epoch 4, train: fm_loss=0.0941 perceptual_loss=0.5426 rp_loss=0.3654 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=20.0186 ssim=0.3119 loss_q=0.0008 index_usage_batch=0.0103 perplexity=8027.0205 loss_commit=0.0008 loss=0.3654 fps=23.6290, + eval: ucf101_val_psnr=9.6446 ucf101_val_ssim=0.0359 ucf101_val_fps=3.1718 ucf101_val_fvd=3380.4911, +Latest checkpoint saved. Time: 44.50s +, 58.5m (d 0.03) 5.0h/251.3h +[10-26 09:00:27] Epoch 5 started. +[10-26 09:53:14] Epoch 5 training done. Time: 3167.01s +[10-26 09:55:41] Calculating FVD with running real stats +[10-26 09:57:00] Converting video data to uint8 +[10-26 09:58:08] Converting video data to uint8 +[10-26 09:58:09] Preparing to save rng states... +[10-26 09:58:10] Saving checkpoint... +[10-26 09:58:47] New best checkpoint saved: best_fvd_2447.01.pth +[10-26 09:58:47] Epoch 5, train: fm_loss=0.0751 perceptual_loss=0.5013 rp_loss=0.3258 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=21.1530 ssim=0.3623 loss_q=0.0020 index_usage_batch=0.0285 perplexity=8064.0542 loss_commit=0.0020 loss=0.3260 fps=23.6636, + eval: ucf101_val_psnr=11.4611 ucf101_val_ssim=0.0623 ucf101_val_fps=3.1759 ucf101_val_fvd=2447.0120, +Latest checkpoint saved. Time: 37.86s +, 58.3m (d 0.01) 6.0h/239.9h +[10-26 09:58:47] Epoch 6 started. +[10-26 10:50:47] Epoch 6 training done. Time: 3120.14s +[10-26 10:53:16] Calculating FVD with running real stats +[10-26 10:54:37] Converting video data to uint8 +[10-26 10:55:44] Converting video data to uint8 +[10-26 10:55:46] Preparing to save rng states... +[10-26 10:55:49] Saving checkpoint... +[10-26 10:56:25] New best checkpoint saved: best_fvd_2160.28.pth +[10-26 10:56:25] Epoch 6, train: fm_loss=0.0625 perceptual_loss=0.4607 rp_loss=0.2928 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=22.0782 ssim=0.4143 loss_q=0.0022 index_usage_batch=0.1266 perplexity=8087.0312 loss_commit=0.0022 loss=0.2930 fps=23.6909, + eval: ucf101_val_psnr=13.6264 ucf101_val_ssim=0.0992 ucf101_val_fps=3.1386 ucf101_val_fvd=2160.2844, +Latest checkpoint saved. Time: 39.57s +, 57.6m (d 0.00) 7.0h/231.9h +[10-26 10:56:25] Epoch 7 started. +[10-26 11:45:07] Epoch 7 training done. Time: 2921.50s +[10-26 11:47:34] Calculating FVD with running real stats +[10-26 11:48:53] Converting video data to uint8 +[10-26 11:50:02] Converting video data to uint8 +[10-26 11:50:03] Preparing to save rng states... +[10-26 11:50:06] Saving checkpoint... +[10-26 11:50:42] New best checkpoint saved: best_fvd_1674.94.pth +[10-26 11:50:42] Epoch 7, train: fm_loss=0.0541 perceptual_loss=0.4081 rp_loss=0.2581 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=22.7474 ssim=0.4579 loss_q=0.0018 index_usage_batch=0.2620 perplexity=8086.5469 loss_commit=0.0018 loss=0.2583 fps=24.0509, + eval: ucf101_val_psnr=15.8265 ucf101_val_ssim=0.1793 ucf101_val_fps=3.1762 ucf101_val_fvd=1674.9365, +Latest checkpoint saved. Time: 38.41s +, 54.3m (d 0.00) 7.9h/224.7h +[10-26 11:50:42] Epoch 8 started. +[10-26 12:38:25] Epoch 8 training done. Time: 2862.67s +[10-26 12:40:51] Calculating FVD with running real stats +[10-26 12:42:13] Converting video data to uint8 +[10-26 12:43:24] Converting video data to uint8 +[10-26 12:43:24] Preparing to save rng states... +[10-26 12:43:24] Saving checkpoint... +[10-26 12:43:58] New best checkpoint saved: best_fvd_1113.78.pth +[10-26 12:43:58] Epoch 8, train: fm_loss=0.0474 perceptual_loss=0.3506 rp_loss=0.2227 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=23.4041 ssim=0.5031 loss_q=0.0020 index_usage_batch=0.4202 perplexity=8058.2388 loss_commit=0.0020 loss=0.2229 fps=24.2708, + eval: ucf101_val_psnr=16.7288 ucf101_val_ssim=0.2578 ucf101_val_fps=3.1883 ucf101_val_fvd=1113.7755, +Latest checkpoint saved. Time: 33.97s +, 53.3m (d 0.02) 8.8h/218.8h +[10-26 12:43:58] Epoch 9 started. +[10-26 13:31:57] Epoch 9 training done. Time: 2878.38s +[10-26 13:34:23] Calculating FVD with running real stats +[10-26 13:35:46] Converting video data to uint8 +[10-26 13:36:54] Converting video data to uint8 +[10-26 13:36:54] Preparing to save rng states... +[10-26 13:36:54] Saving checkpoint... +[10-26 13:37:29] New best checkpoint saved: best_fvd_827.66.pth +[10-26 13:37:29] Epoch 9, train: fm_loss=0.0423 perceptual_loss=0.3054 rp_loss=0.1950 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=23.9759 ssim=0.5432 loss_q=0.0027 index_usage_batch=0.5526 perplexity=8024.5972 loss_commit=0.0027 loss=0.1952 fps=24.1793, + eval: ucf101_val_psnr=18.4337 ucf101_val_ssim=0.3501 ucf101_val_fps=3.1849 ucf101_val_fvd=827.6603, +Latest checkpoint saved. Time: 34.83s +, 53.5m (d 0.04) 9.6h/214.3h +[10-26 13:37:29] Epoch 10 started. +[10-26 14:25:32] Epoch 10 training done. Time: 2882.85s +[10-26 14:27:58] Calculating FVD with running real stats +[10-26 14:29:22] Converting video data to uint8 +[10-26 14:30:28] Converting video data to uint8 +[10-26 14:30:29] Preparing to save rng states... +[10-26 14:30:29] Saving checkpoint... +[10-26 14:31:05] New best checkpoint saved: best_fvd_610.79.pth +[10-26 14:31:05] Epoch 10, train: fm_loss=0.0383 perceptual_loss=0.2705 rp_loss=0.1736 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=24.5157 ssim=0.5798 loss_q=0.0037 index_usage_batch=0.6788 perplexity=8023.9727 loss_commit=0.0037 loss=0.1740 fps=24.1696, + eval: ucf101_val_psnr=19.2263 ucf101_val_ssim=0.4042 ucf101_val_fps=3.1910 ucf101_val_fvd=610.7945, +Latest checkpoint saved. Time: 36.02s +, 53.6m (d 0.02) 10.5h/210.7h +[10-26 14:31:05] Epoch 11 started. +[10-26 15:19:04] Epoch 11 training done. Time: 2879.19s +[10-26 15:21:30] Calculating FVD with running real stats +[10-26 15:22:50] Converting video data to uint8 +[10-26 15:24:00] Converting video data to uint8 +[10-26 15:24:01] Preparing to save rng states... +[10-26 15:24:01] Saving checkpoint... +[10-26 15:24:37] New best checkpoint saved: best_fvd_381.65.pth +[10-26 15:24:37] Epoch 11, train: fm_loss=0.0351 perceptual_loss=0.2409 rp_loss=0.1555 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.0424 ssim=0.6155 loss_q=0.0043 index_usage_batch=0.7751 perplexity=8051.9331 loss_commit=0.0043 loss=0.1559 fps=24.1750, + eval: ucf101_val_psnr=20.3549 ucf101_val_ssim=0.4929 ucf101_val_fps=3.1925 ucf101_val_fvd=381.6456, +Latest checkpoint saved. Time: 36.06s +, 53.5m (d 0.07) 11.4h/207.8h +[10-26 15:24:37] Epoch 12 started. +[10-26 16:12:57] Epoch 12 training done. Time: 2900.10s +[10-26 16:15:24] Calculating FVD with running real stats +[10-26 16:16:51] Converting video data to uint8 +[10-26 16:18:01] Converting video data to uint8 +[10-26 16:18:02] Preparing to save rng states... +[10-26 16:18:02] Saving checkpoint... +[10-26 16:18:37] New best checkpoint saved: best_fvd_323.49.pth +[10-26 16:18:37] Epoch 12, train: fm_loss=0.0328 perceptual_loss=0.2185 rp_loss=0.1420 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.5186 ssim=0.6458 loss_q=0.0044 index_usage_batch=0.8184 perplexity=8070.7065 loss_commit=0.0044 loss=0.1425 fps=24.0804, + eval: ucf101_val_psnr=21.4852 ucf101_val_ssim=0.5541 ucf101_val_fps=3.1760 ucf101_val_fvd=323.4891, +Latest checkpoint saved. Time: 35.00s +, 54.0m (d 0.03) 12.3h/205.5h +[10-26 16:18:37] Epoch 13 started. +[10-26 17:07:51] Epoch 13 training done. Time: 2954.12s +[10-26 17:10:17] Calculating FVD with running real stats +[10-26 17:11:39] Converting video data to uint8 +[10-26 17:12:46] Converting video data to uint8 +[10-26 17:12:47] Preparing to save rng states... +[10-26 17:12:47] Saving checkpoint... +[10-26 17:13:23] New best checkpoint saved: best_fvd_270.49.pth +[10-26 17:13:23] Epoch 13, train: fm_loss=0.0308 perceptual_loss=0.2025 rp_loss=0.1320 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.8723 ssim=0.6692 loss_q=0.0044 index_usage_batch=0.8237 perplexity=8076.1089 loss_commit=0.0044 loss=0.1325 fps=24.0107, + eval: ucf101_val_psnr=21.3858 ucf101_val_ssim=0.5712 ucf101_val_fps=3.2044 ucf101_val_fvd=270.4910, +Latest checkpoint saved. Time: 35.80s +, 54.8m (d 0.06) 13.2h/203.7h +[10-26 17:13:23] Epoch 14 started. +[10-26 18:01:49] Epoch 14 training done. Time: 2905.86s +[10-26 18:04:15] Calculating FVD with running real stats +[10-26 18:05:37] Converting video data to uint8 +[10-26 18:06:44] Converting video data to uint8 +[10-26 18:06:45] Preparing to save rng states... +[10-26 18:06:45] Saving checkpoint... +[10-26 18:07:23] New best checkpoint saved: best_fvd_266.42.pth +[10-26 18:07:23] Epoch 14, train: fm_loss=0.0294 perceptual_loss=0.1902 rp_loss=0.1245 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.1650 ssim=0.6876 loss_q=0.0045 index_usage_batch=0.8265 perplexity=8077.9287 loss_commit=0.0045 loss=0.1249 fps=24.1597, + eval: ucf101_val_psnr=22.1036 ucf101_val_ssim=0.6195 ucf101_val_fps=3.1930 ucf101_val_fvd=266.4189, +Latest checkpoint saved. Time: 37.61s +, 54.0m (d 0.06) 14.1h/202.0h +[10-26 18:07:23] Epoch 15 started. +[10-26 18:55:32] Epoch 15 training done. Time: 2889.90s +[10-26 18:57:59] Calculating FVD with running real stats +[10-26 18:59:21] Converting video data to uint8 +[10-26 19:00:29] Converting video data to uint8 +[10-26 19:00:29] Preparing to save rng states... +[10-26 19:00:29] Saving checkpoint... +[10-26 19:01:04] New best checkpoint saved: best_fvd_218.01.pth +[10-26 19:01:04] Epoch 15, train: fm_loss=0.0283 perceptual_loss=0.1776 rp_loss=0.1171 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.4998 ssim=0.7085 loss_q=0.0048 index_usage_batch=0.8284 perplexity=8078.5635 loss_commit=0.0048 loss=0.1176 fps=24.3035, + eval: ucf101_val_psnr=22.0786 ucf101_val_ssim=0.6269 ucf101_val_fps=3.1910 ucf101_val_fvd=218.0063, +Latest checkpoint saved. Time: 34.62s +, 53.7m (d 0.04) 15.0h/200.5h +[10-26 19:01:04] Epoch 16 started. +[10-26 19:49:32] Epoch 16 training done. Time: 2907.65s +[10-26 19:51:57] Calculating FVD with running real stats +[10-26 19:53:20] Converting video data to uint8 +[10-26 19:54:27] Converting video data to uint8 +[10-26 19:54:28] Preparing to save rng states... +[10-26 19:54:28] Saving checkpoint... +[10-26 19:54:39] Epoch 16, train: fm_loss=0.0273 perceptual_loss=0.1666 rp_loss=0.1106 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.7899 ssim=0.7275 loss_q=0.0060 index_usage_batch=0.8263 perplexity=8079.1172 loss_commit=0.0060 loss=0.1112 fps=24.1462, + eval: ucf101_val_psnr=22.4575 ucf101_val_ssim=0.6505 ucf101_val_fps=3.2147 ucf101_val_fvd=221.7525, +Latest checkpoint saved. Time: 10.53s +, 53.6m (d 0.02) 15.9h/199.1h +[10-26 19:54:39] Epoch 17 started. +[10-26 20:43:02] Epoch 17 training done. Time: 2903.26s +[10-26 20:45:28] Calculating FVD with running real stats +[10-26 20:46:53] Converting video data to uint8 +[10-26 20:47:59] Converting video data to uint8 +[10-26 20:48:00] Preparing to save rng states... +[10-26 20:48:00] Saving checkpoint... +[10-26 20:48:35] New best checkpoint saved: best_fvd_200.12.pth +[10-26 20:48:35] Epoch 17, train: fm_loss=0.0262 perceptual_loss=0.1568 rp_loss=0.1046 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.0340 ssim=0.7445 loss_q=0.0061 index_usage_batch=0.8285 perplexity=8079.9019 loss_commit=0.0061 loss=0.1052 fps=24.0968, + eval: ucf101_val_psnr=22.6389 ucf101_val_ssim=0.6659 ucf101_val_fps=3.1874 ucf101_val_fvd=200.1184, +Latest checkpoint saved. Time: 35.77s +, 53.9m (d 0.07) 16.8h/198.0h +[10-26 20:48:35] Epoch 18 started. +[10-26 21:36:21] Epoch 18 training done. Time: 2865.34s +[10-26 21:38:47] Calculating FVD with running real stats +[10-26 21:40:11] Converting video data to uint8 +[10-26 21:41:17] Converting video data to uint8 +[10-26 21:41:18] Preparing to save rng states... +[10-26 21:41:18] Saving checkpoint... +[10-26 21:41:52] New best checkpoint saved: best_fvd_187.29.pth +[10-26 21:41:52] Epoch 18, train: fm_loss=0.0255 perceptual_loss=0.1484 rp_loss=0.0997 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.2793 ssim=0.7591 loss_q=0.0062 index_usage_batch=0.8297 perplexity=8080.2930 loss_commit=0.0062 loss=0.1003 fps=24.1556, + eval: ucf101_val_psnr=22.6294 ucf101_val_ssim=0.6638 ucf101_val_fps=3.2034 ucf101_val_fvd=187.2859, +Latest checkpoint saved. Time: 34.07s +, 53.3m (d 0.07) 17.7h/196.8h +[10-26 21:41:52] Epoch 19 started. +[10-26 22:29:32] Epoch 19 training done. Time: 2860.19s +[10-26 22:31:59] Calculating FVD with running real stats +[10-26 22:33:21] Converting video data to uint8 +[10-26 22:34:32] Converting video data to uint8 +[10-26 22:34:33] Preparing to save rng states... +[10-26 22:34:33] Saving checkpoint... +[10-26 22:35:10] New best checkpoint saved: best_fvd_177.69.pth +[10-26 22:35:10] Epoch 19, train: fm_loss=0.0250 perceptual_loss=0.1422 rp_loss=0.0961 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.4619 ssim=0.7701 loss_q=0.0062 index_usage_batch=0.8307 perplexity=8080.1904 loss_commit=0.0062 loss=0.0967 fps=24.0560, + eval: ucf101_val_psnr=23.0714 ucf101_val_ssim=0.6980 ucf101_val_fps=3.1797 ucf101_val_fvd=177.6935, +Latest checkpoint saved. Time: 37.20s +, 53.3m (d 0.05) 18.6h/195.8h +[10-26 22:35:10] Epoch 20 started. +[10-26 23:23:17] Epoch 20 training done. Time: 2886.99s +[10-26 23:25:44] Calculating FVD with running real stats +[10-26 23:27:10] Converting video data to uint8 +[10-26 23:28:17] Converting video data to uint8 +[10-26 23:28:18] Preparing to save rng states... +[10-26 23:28:18] Saving checkpoint... +[10-26 23:28:52] New best checkpoint saved: best_fvd_164.04.pth +[10-26 23:28:52] Epoch 20, train: fm_loss=0.0244 perceptual_loss=0.1366 rp_loss=0.0927 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.6406 ssim=0.7801 loss_q=0.0062 index_usage_batch=0.8317 perplexity=8079.9258 loss_commit=0.0062 loss=0.0933 fps=24.3060, + eval: ucf101_val_psnr=23.0348 ucf101_val_ssim=0.7059 ucf101_val_fps=3.1822 ucf101_val_fvd=164.0369, +Latest checkpoint saved. Time: 34.57s +, 53.7m (d 0.03) 19.5h/195.0h +[10-26 23:28:52] Epoch 21 started. +[10-27 00:16:29] Epoch 21 training done. Time: 2856.47s +[10-27 00:18:57] Calculating FVD with running real stats +[10-27 00:20:23] Converting video data to uint8 +[10-27 00:21:30] Converting video data to uint8 +[10-27 00:21:30] Preparing to save rng states... +[10-27 00:21:30] Saving checkpoint... +[10-27 00:21:41] Epoch 21, train: fm_loss=0.0239 perceptual_loss=0.1321 rp_loss=0.0900 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.7904 ssim=0.7884 loss_q=0.0062 index_usage_batch=0.8325 perplexity=8079.4365 loss_commit=0.0062 loss=0.0906 fps=24.3157, + eval: ucf101_val_psnr=22.6612 ucf101_val_ssim=0.6816 ucf101_val_fps=3.1624 ucf101_val_fvd=164.3932, +Latest checkpoint saved. Time: 10.43s +, 52.8m (d 0.06) 20.4h/194.1h +[10-27 00:21:41] Epoch 22 started. +[10-27 01:09:35] Epoch 22 training done. Time: 2874.68s +[10-27 01:12:03] Calculating FVD with running real stats +[10-27 01:13:29] Converting video data to uint8 +[10-27 01:14:36] Converting video data to uint8 +[10-27 01:14:36] Preparing to save rng states... +[10-27 01:14:36] Saving checkpoint... +[10-27 01:15:11] New best checkpoint saved: best_fvd_151.94.pth +[10-27 01:15:11] Epoch 22, train: fm_loss=0.0234 perceptual_loss=0.1282 rp_loss=0.0875 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.9169 ssim=0.7950 loss_q=0.0063 index_usage_batch=0.8332 perplexity=8078.8394 loss_commit=0.0063 loss=0.0881 fps=24.1343, + eval: ucf101_val_psnr=23.1404 ucf101_val_ssim=0.6852 ucf101_val_fps=3.1745 ucf101_val_fvd=151.9350, +Latest checkpoint saved. Time: 34.47s +, 53.5m (d 0.05) 21.3h/193.4h +[10-27 01:15:11] Epoch 23 started. +[10-27 02:03:16] Epoch 23 training done. Time: 2885.42s +[10-27 02:05:44] Calculating FVD with running real stats +[10-27 02:07:08] Converting video data to uint8 +[10-27 02:08:16] Converting video data to uint8 +[10-27 02:08:17] Preparing to save rng states... +[10-27 02:08:17] Saving checkpoint... +[10-27 02:08:53] New best checkpoint saved: best_fvd_138.58.pth +[10-27 02:08:53] Epoch 23, train: fm_loss=0.0231 perceptual_loss=0.1252 rp_loss=0.0857 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.0422 ssim=0.8006 loss_q=0.0063 index_usage_batch=0.8338 perplexity=8077.9961 loss_commit=0.0063 loss=0.0864 fps=24.0963, + eval: ucf101_val_psnr=23.3009 ucf101_val_ssim=0.7058 ucf101_val_fps=3.1746 ucf101_val_fvd=138.5815, +Latest checkpoint saved. Time: 36.40s +, 53.7m (d 0.05) 22.2h/192.8h +[10-27 02:08:53] Epoch 24 started. +[10-27 02:57:06] Epoch 24 training done. Time: 2892.43s +[10-27 02:59:32] Calculating FVD with running real stats +[10-27 03:00:55] Converting video data to uint8 +[10-27 03:02:04] Converting video data to uint8 +[10-27 03:02:05] Preparing to save rng states... +[10-27 03:02:05] Saving checkpoint... +[10-27 03:02:40] New best checkpoint saved: best_fvd_130.39.pth +[10-27 03:02:40] Epoch 24, train: fm_loss=0.0227 perceptual_loss=0.1221 rp_loss=0.0837 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.1448 ssim=0.8057 loss_q=0.0063 index_usage_batch=0.8342 perplexity=8077.2231 loss_commit=0.0063 loss=0.0843 fps=24.2673, + eval: ucf101_val_psnr=23.4273 ucf101_val_ssim=0.7316 ucf101_val_fps=3.1914 ucf101_val_fvd=130.3938, +Latest checkpoint saved. Time: 35.15s +, 53.8m (d 0.04) 23.1h/192.2h +[10-27 03:02:40] Epoch 25 started. diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_27_05_21_46.txt b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_27_05_21_46.txt new file mode 100644 index 0000000000000000000000000000000000000000..00c5f334b7151df380f7b59496393584927d3c2c --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_27_05_21_46.txt @@ -0,0 +1,156 @@ +[10-27 05:21:48] Distributed training enabled. +[10-27 05:21:48] Environment setup done. +[10-27 05:21:50] Train dataset: len=435743 +[10-27 05:21:50] Test dataset: ucf101_val, len=3783 +[10-27 05:21:50] Resuming training from /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth +[10-27 05:22:20] DYNTokenizer( + (x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 768, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) + (encoder): TransformerEncoderParallel( + (blocks): ModuleList( + (0-11): 12 x AttentionBlock( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=768, out_features=2304, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=768, out_features=768, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + ) + ) + ) + (decoder): TransformerAdaLNDecoderParallel( + (blocks): ModuleList( + (0-17): 18 x DiffusionAttentionBlock( + (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=1152, out_features=3456, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=1152, out_features=1152, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=1152, out_features=4608, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=4608, out_features=1152, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + (adaLN_modulation): Sequential( + (0): SiLU() + (1): Linear(in_features=1152, out_features=13824, bias=True) + ) + ) + ) + (rope): HunyuanVideoRotaryPosEmbed() + ) + (bottleneck): VectorQuantize( + (project_in): Linear(in_features=768, out_features=16, bias=True) + (project_out): Linear(in_features=16, out_features=1152, bias=True) + (_codebook): CosineSimCodebook() + ) + (final_layer): AdaLNOutputLayer( + (norm_final): AdaLayerNormContinuous( + (silu): SiLU() + (linear): Linear(in_features=1152, out_features=2304, bias=True) + (norm): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) + ) + (linear): Linear(in_features=1152, out_features=768, bias=True) + ) + (flow_matching_noise_module): MinRFNoiseModule() + (dec_time_embedder): TimestepEmbedder( + (mlp): Sequential( + (0): Linear(in_features=256, out_features=1152, bias=True) + (1): SiLU() + (2): Linear(in_features=1152, out_features=1152, bias=True) + ) + ) + (dec_x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 1152, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) +) +[10-27 05:22:20] Model: #params=666.3M +[10-27 05:22:20] SLURM_JOB_ID: None +[10-27 05:22:20] SLUMR_ARRAY_JOB_ID: None +[10-27 05:22:20] SLURM_ARRAY_TASK_ID: None +[10-27 05:22:20] wandb_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192_____ +[10-27 05:22:20] Compiling model with mode: default +[10-27 05:22:37] Discriminator: #params=38.2M +[10-27 05:22:37] compiling loss with mode default +[10-27 05:22:37] Epoch 25 started. +[10-27 06:55:17] Epoch 25 training done. Time: 5560.63s +[10-27 07:24:16] Calculating FVD with running real stats +[10-27 07:25:45] Converting video data to uint8 +[10-27 07:26:48] Converting video data to uint8 +[10-27 07:26:48] Preparing to save rng states... +[10-27 07:26:48] Saving checkpoint... +[10-27 07:27:24] New best checkpoint saved: best_fvd_121.09.pth +[10-27 07:27:24] Epoch 25, train: fm_loss=0.0224 perceptual_loss=0.1194 rp_loss=0.0821 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.2606 ssim=0.8104 loss_q=0.0063 index_usage_batch=0.8348 perplexity=8076.3428 loss_commit=0.0063 loss=0.0827 fps=24.2888, + eval: ucf101_val_psnr=23.3652 ucf101_val_ssim=0.7205 ucf101_val_fps=3.1465 ucf101_val_fvd=121.0894, +Latest checkpoint saved. Time: 35.08s +, 2.1h (d 0.47) 2.1h/415.9h +[10-27 07:27:24] Epoch 26 started. +[10-27 08:15:04] Epoch 26 training done. Time: 2860.67s +[10-27 08:17:31] Calculating FVD with running real stats +[10-27 08:18:53] Converting video data to uint8 +[10-27 08:20:01] Converting video data to uint8 +[10-27 08:20:02] Preparing to save rng states... +[10-27 08:20:02] Saving checkpoint... +[10-27 08:20:37] New best checkpoint saved: best_fvd_107.29.pth +[10-27 08:20:37] Epoch 26, train: fm_loss=0.0219 perceptual_loss=0.1175 rp_loss=0.0807 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.3334 ssim=0.8138 loss_q=0.0063 index_usage_batch=0.8352 perplexity=8075.3994 loss_commit=0.0063 loss=0.0813 fps=24.1446, + eval: ucf101_val_psnr=23.3940 ucf101_val_ssim=0.7037 ucf101_val_fps=3.1935 ucf101_val_fvd=107.2903, +Latest checkpoint saved. Time: 35.02s +, 53.2m (d 0.06) 3.0h/296.7h +[10-27 08:20:37] Epoch 27 started. +[10-27 09:08:13] Epoch 27 training done. Time: 2856.72s +[10-27 09:10:40] Calculating FVD with running real stats +[10-27 09:12:02] Converting video data to uint8 +[10-27 09:13:10] Converting video data to uint8 +[10-27 09:13:11] Preparing to save rng states... +[10-27 09:13:11] Saving checkpoint... +[10-27 09:13:47] New best checkpoint saved: best_fvd_106.67.pth +[10-27 09:13:47] Epoch 27, train: fm_loss=0.0217 perceptual_loss=0.1151 rp_loss=0.0792 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.4303 ssim=0.8178 loss_q=0.0063 index_usage_batch=0.8351 perplexity=8074.3740 loss_commit=0.0063 loss=0.0799 fps=24.0925, + eval: ucf101_val_psnr=23.7231 ucf101_val_ssim=0.7382 ucf101_val_fps=3.1962 ucf101_val_fvd=106.6714, +Latest checkpoint saved. Time: 35.97s +, 53.2m (d 0.02) 3.9h/256.9h +[10-27 09:13:47] Epoch 28 started. +[10-27 10:01:01] Epoch 28 training done. Time: 2834.39s +[10-27 10:03:27] Calculating FVD with running real stats +[10-27 10:04:50] Converting video data to uint8 +[10-27 10:05:58] Converting video data to uint8 +[10-27 10:05:59] Preparing to save rng states... +[10-27 10:05:59] Saving checkpoint... +[10-27 10:06:35] New best checkpoint saved: best_fvd_105.24.pth +[10-27 10:06:35] Epoch 28, train: fm_loss=0.0215 perceptual_loss=0.1129 rp_loss=0.0780 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.5272 ssim=0.8215 loss_q=0.0063 index_usage_batch=0.8354 perplexity=8073.2065 loss_commit=0.0063 loss=0.0786 fps=24.1403, + eval: ucf101_val_psnr=23.6395 ucf101_val_ssim=0.7394 ucf101_val_fps=3.2026 ucf101_val_fvd=105.2365, +Latest checkpoint saved. Time: 35.69s +, 52.8m (d 0.03) 4.7h/236.6h +[10-27 10:06:35] Epoch 29 started. diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761422299.hopper-26.3880041.0 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761422299.hopper-26.3880041.0 new file mode 100644 index 0000000000000000000000000000000000000000..145983cb128fd84eed0218952a0fc7310025ebac --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761422299.hopper-26.3880041.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0490cee65ce3c964710407235680e3fdf961cf02a6449fc6c9f8687a7a38833e +size 29464 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761513706.hopper-10.716544.0 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761513706.hopper-10.716544.0 new file mode 100644 index 0000000000000000000000000000000000000000..395b4bb611dc1180f5187810f5baaa79e7ccaf9c --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761513706.hopper-10.716544.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d35c4937d378b988c0ab9a41b6d878d2282760c87e250ddb62efe3f706a945e +size 4984 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug-internal.log b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a2a262fe70a7ee4c517c3a6a6ddd0d074f86deb2 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2025-10-27T05:22:02.60922833+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"/scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug-core.log"} +{"time":"2025-10-27T05:22:03.20175398+08:00","level":"INFO","msg":"created new stream","id":"c3u3silm"} +{"time":"2025-10-27T05:22:03.202333506+08:00","level":"INFO","msg":"stream: started","id":"c3u3silm"} +{"time":"2025-10-27T05:22:03.202357497+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"c3u3silm"} +{"time":"2025-10-27T05:22:03.203578697+08:00","level":"INFO","msg":"sender: started","stream_id":"c3u3silm"} +{"time":"2025-10-27T05:22:03.20239453+08:00","level":"INFO","msg":"handler: started","stream_id":"c3u3silm"} +{"time":"2025-10-27T05:22:03.818476109+08:00","level":"INFO","msg":"Starting system monitor"} diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug.log b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..797831a48a023b54a2480fbbbdec4b362a99ad70 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug.log @@ -0,0 +1,23 @@ +2025-10-27 05:22:02,538 INFO MainThread:716544 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9 +2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Configure stats pid to 716544 +2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from /home/svu/e0724392/.config/wandb/settings +2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from /scratch/e0724392/work4/LARP/wandb/settings +2025-10-27 05:22:02,540 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from environment variables +2025-10-27 05:22:02,541 INFO MainThread:716544 [wandb_init.py:setup_run_log_directory():662] Logging user logs to /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug.log +2025-10-27 05:22:02,542 INFO MainThread:716544 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug-internal.log +2025-10-27 05:22:02,543 INFO MainThread:716544 [wandb_init.py:init():781] calling init triggers +2025-10-27 05:22:02,543 INFO MainThread:716544 [wandb_init.py:init():786] wandb.init called with sweep_config: {} +config: {'trainer': 'our_tokenizer_trainer', 'train_dataset': {'name': 'video_dataset', 'args': {'root_path': 'data/metadata', 'split': 'train', 'frame_num': 16, 'rand_augment': 'no', 'csv_file': 'k600_train.csv+ucf101_train.csv', 'cls_vid_num': '-1_-1', 'crop_size': 128, 'scale': 1.0, 'aspect_ratio': 1.0, 'rand_flip': 'yes', 'use_all_frames': False, 'pre_load': False}, 'loader': {'batch_size': 128, 'num_workers': 32}}, 'test_dataset': {'name': 'video_dataset', 'args': {'root_path': 'data/metadata', 'frame_num': 16, 'cls_vid_num': '-1_-1', 'crop_size': 128, 'use_all_frames': False, 'pre_load': False}, 'csv_paths': {'ucf101_val': 'ucf101_val.csv'}, 'loader': {'batch_size': 128, 'num_workers': 32}}, 'model': {'name': 'dyn_tokenizer', 'args': {'noise_schedule': {'name': 'min_rf_noise_module', 'args': {'clean_data_read_key': 'clean_data', 'noised_data_write_key': 'noisy_input', 'noise_write_key': 'flow_noise', 'timesteps_write_key': 'timesteps', 'sigmas_write_key': 'sigmas', 'ln': False, 'stratisfied': False, 'mode_scale': 0.25}}, 'bottleneck': {'name': 'bottleneck', 'args': {'regularizer': {'name': 'vector_quantize', 'args': {'codebook_dim': 16, 'codebook_size': 8192, 'ema_update': True, 'decay': 0.99, 'kmeans_init': True, 'kmeans_iters': 10, 'threshold_ema_dead_code': 0.2, 'use_cosine_sim': True, 'commitment_weight': 1.0, 'diversity_weight': 0.0, 'smart_re_K': 0, 'continuous': False, 'reg': [0.1, 0.3], 'reset_cluster_size': 0.2, 'ema_entropy_ratio': 0.8, 'vq_start_step': 0}}}}, 'prior_model': {'name': 'none', 'use_mix_ss': True, 'mix_ss_max_ratio': 0.5, 'mix_ss_peak_steps_ratio': 0.3, 'n_rounds': 2, 'avg_loss_over_rounds': True, 'no_grad_before_last_round': False, 'no_dropout': False, 'latent_ce_temperature': 1.0, 'args': {'l2_normalized': True}}, 'dec_time_embedder': {'name': 'timestep_embedder', 'args': {'timesteps_read_key': 'timesteps', 'time_embedding_write_key': 'dec_temb', 'dim': 1152, 'frequency_embedding_size': 256, 'max_timestep': 1000.0}}, 'transformer_name': 'transformer_encoder_parallel', 'encoder_name': 'none', 'decoder_name': 'transformer_AdaLN_decoder_parallel', 'bottleneck_token_num': 1024, 'input_size': 128, 'frame_num': 16, 'temporal_patch_size': 4, 'patch_size': 8, 'decoder_temporal_patch_size': 4, 'decoder_patch_size': 8, 'in_channels': 3, 'encoder_hidden_size': 768, 'decoder_hidden_size': 1152, 'encoder_num_heads': 12, 'decoder_num_heads': 18, 'encoder_depth': 12, 'decoder_depth': 18, 'encoder_block_name': 'block_timm', 'decoder_block_name': 'adaLN_block_timm', 'encoder_mask_mode': 'full', 'decoder_mask_mode': 'full', 'learned_encoder_patch_pe': False, 'learned_encoder_latent_query_embed': True, 'learned_decoder_latent_pe': False, 'learned_decoder_patch_query_embed': False, 'use_encoder_patch_token_type_embed': False, 'use_encoder_latent_query_token_type_embed': False, 'enable_decoder_query': False, 'learned_decoder_pe': False, 'use_decoder_latent_token_type_embed': False, 'use_decoder_patch_query_token_type_embed': True, 'encoder_query_gaussian_init': True, 'latent_pe_scale_factor': 10000, 'query_init_std': 0.02, 'adaLN_expansion': 2, 'final_layer_init': 'xavier_uniform', 'enable_vq': True, 'qk_norm': True, 'use_rope': True, 'rope_dim': [16, 24, 24], 'final_layer_type': 'adanorm'}}, 'loss': {'name': 'fm_disc_loss', 'args': {'disc_type': 'transformer', 'disc_start': 999999, 'disc_self_start': -1, 'perceptual_weight': 0.5, 'perceptual_loss': 'lpips', 'perceptual_fp16': False, 'lecam_weight': 0.001, 'disc_loss': 'ns_smooth', 'disc_weight': 0.0, 'r1_gp_weight': 0.0, 'd_update_freq': 5, 'spectral_norm': False, 'disc_tran_hidden_size': 512, 'disc_tran_n_heads': 8, 'disc_tran_n_layers': 12, 'disc_tran_temporal_patch_size': 4, 'disc_tran_patch_size': 8, 'input_spatial_size': 128, 'frame_num': 16, 'fm_loss_weight': 1.0}}, 'optimizer': {'name': 'adamw', 'loss_name': 'adam', 'args': {'lr': 0.0001, 'betas': [0.9, 0.99]}, 'loss_args': {'lr': 3e-05, 'betas': [0.5, 0.9]}, 'lr_type': 'step', 'lr_step_pcts': '0.9_0.95', 'warmup_epoch': 10, 'min_lr_mult': 0.1, 'prior_lr_mult': 1.0, 'emb_lr_mult': 1.0}, 'max_epoch': 200, 'eval_epoch': 1, 'vis_epoch': 1, 'latest_interval': 1, 'save_epoch': 100000000, 'save_best': True, 'stepwise_logging': False, 'ema_decay': '_', 'use_amp': True, 'amp_dtype': 'float16', 'compile': True, 'compile_mode': 'default', 'flash_attn': False, 'loss_q_weight': 0.1, 'loss_q_warmup': '1.0_1', 'loss_kl_weight': 0.0, 'kl_decay_epoch': -1, 'loss_latent_ce_weight': 0.0, 'sqt_start_end_epoch': '0.0_0.0_0', 'clip_grad_max_norm': 0.0, 'init_checkpoint': '', 'timesteps': 25, 'verbose': False, 'guidance_scale': 1.0, 'env': {'tot_gpus': 8, 'cudnn': False, 'wandb_upload': True, 'wandb_entity': 'lingmin', 'wandb_project': 'dyn_tokenizer', 'exp_name': 'k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__', 'save_dir': '/scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__', 'port': '12778'}, 'comment': '', 'manualSeed': 66667, 'TrainSize': 435743, 'TestSize_ucf101_val': 3783, '_wandb': {}} +2025-10-27 05:22:02,544 INFO MainThread:716544 [wandb_init.py:init():809] starting backend +2025-10-27 05:22:02,544 INFO MainThread:716544 [wandb_init.py:init():813] sending inform_init request +2025-10-27 05:22:02,596 INFO MainThread:716544 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2025-10-27 05:22:02,597 INFO MainThread:716544 [wandb_init.py:init():823] backend started and connected +2025-10-27 05:22:02,602 INFO MainThread:716544 [wandb_init.py:init():915] updated telemetry +2025-10-27 05:22:02,731 INFO MainThread:716544 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout +2025-10-27 05:22:03,767 INFO MainThread:716544 [wandb_init.py:init():1009] run resumed +2025-10-27 05:22:03,771 INFO MainThread:716544 [wandb_init.py:init():1014] starting run threads in backend +2025-10-27 05:22:10,370 INFO MainThread:716544 [wandb_run.py:_console_start():2454] atexit reg +2025-10-27 05:22:10,371 INFO MainThread:716544 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-10-27 05:22:10,372 INFO MainThread:716544 [wandb_run.py:_redirect():2371] Wrapping output streams. +2025-10-27 05:22:10,373 INFO MainThread:716544 [wandb_run.py:_redirect():2394] Redirects installed. +2025-10-27 05:22:10,663 INFO MainThread:716544 [wandb_init.py:init():1056] run started, returning control to user process diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ba0f2fea57ed9f990e369445351ef9ca2f7d0ed5 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcd7543861eb951d9eb9128002ad8c13ed359ea83bdae4393d4ce3a895546af +size 1447271 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..013f9ee049625dbdf68cc96b296d904381aa5a8b --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c440b37dfe11e1ecb291dc667a185db861185b5e562915887c95bb58e07c1ad2 +size 1361429 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b88a454003aea0a74742041a3fa71dd0f3f875da --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c6abd01594c9f04b701e5263b60f0493aa59c673e9fdf04efceac6c15ddebb5 +size 1104671 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c8af3027acdcbdcd5ba8a3a889c49c01304c54df --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ed7abcba2550022a651a87086ca880859a600b134152317b5406cb35652c37 +size 1061247 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..02160d6157b7943d93dde4e58b8d36607e485eef --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d19ba838a2636f6bbb97e8cdab2a1a1ad413ba47d36269393ae97552278e130 +size 971039 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..21eaad2edb8ce8924aeb09f22eff9c49ec752a38 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4b12dc26b33b48899cd40408810e3fabd1ad83a452c333eb2304576fb6e7b67 +size 898506 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..707c3315d7c1f8f52ae2af097a6a3a7031674364 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be836e9a23353717c38d1bc7a52e75ac416615e04c76ace755ce177335410c9f +size 978642 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c2a921a0d96f1833afd256721974e183258843f8 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db636a27733aa82a9288e3b388c4ceb5089ba746d4909ed6adbe141d96ac2603 +size 1003627 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0845627c7bd46961de3497159e3205362db2b63e --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8520ee5928f1948584de506a2dee7748d47912c71657efecd4cc3d6f7e21d6ab +size 975965 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e11eebc83c1caa073c49da99bb0fd4a9065fe698 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9902d6a8e4fcef1843c7e85204d9a67bb8aea067506b585f8b84b84a1f094404 +size 882054 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..90fe1080654ee6c35062ec9edb47d40929a582b2 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4905dd5ddf36de8fa9671e752a11f477c07e6e64107d7df50f65d37fe9686f15 +size 6065915 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2845c9ac769c3a545f92ed515ae2a4d304f631b6 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430fdf292106f4cdd169295b60d4b53c1bded466a659a13b42e361724689ed80 +size 937420 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..808b89b58533ada843021ebce24bb854215fa371 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16346a94669cd22074d836082da0f4dc64523c512248d294be70e676755758d +size 899995 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1cd3d4735c5a86b72860a6fb5cf4e1df324aeb58 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9cd0a98c6efd38dd32640094344de5687e1d0ee16bc09acc0831ccd0a478ccd +size 952132 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..520a333f9c47725582a7bf03723f3a0f05b49d25 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c5160b84b0b93c4d9a35723ac23dc74080d6816c9c386900a0d6aae7e7cbf3 +size 911203 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..524f3b18c53fb6b7b2d4e38dc5521d039b256db8 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa9fc05e947e5c68fe571c83d411c21619387b375f3c3962e4255d625363608 +size 895463 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a4e640dcd1a5aabfe112d3febfe808f105311eca --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15bd6c3ab28bcf2d818887cc710cd9e8c41e7f000f59b09cb1efcad20a3b4cb +size 4538287 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b8e54f8939f801d09bbf6a6837b50224f321cc6d --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d7f7f93663c75403c55ecf763a0a4cb179d042312633f62bdd97228db1c9e9 +size 3564898 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ab779ea154bea503718002af0195b8d2290d4ec5 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518c61a588fe34e8b97b4d5a847ca4503fc147203f2ba655c6c51036ab9f11e0 +size 3098325 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..638c28e0ddb79c2859be2646c5a630d178c22cc9 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a718cd301e53d86c413d314d81c01ae2be78483c38b325fb0f74a8f015dc934f +size 2490105 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fa079b24bd70301988424946a49b44a5da532e9b --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75eb65b267e94c10e7e6736fbc4f28aa33c443da8280a22360cdb26deb1b4fb1 +size 2170170 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ea661da49fb8be69349b73d19abae83011f38ec4 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f140abb6a7cc547d17d4e3b1407f3c67121025bb26aa8918cdaa742227f1963 +size 2086638 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..467c9fc3f5d20399893fe557ee5b597cd2e5a692 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b22b9cea46a63503ee26cb4c3c76f9aa4879d911efe88e213f43d5151b4eaba +size 1653335 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fbc1e738243966155fd8262532aa388981dce87c --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d0cffdb29ebf65107ec0f593ad28c54ff88651007c137a2788cdc878771660 +size 1545553 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6dad6c94595dd9b2012ffa9b6388ad8e8a9acae8 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb55eb08e79a819f931958929f93fad04d815ad67d0d36647eda2840616fd94a +size 1618902 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..20e8fa2ff210449058b2d501f8a5223094999f8f --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c1cb0239fed9f79371044130e055bdabf559600bf3b6773897ae9a4d3ed8f7 +size 1436148 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..46d0af2ca84976a348740985f502947c912b7077 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3c2426eecabf472d8df6e0d2d5bc162cd8c92f4b00df04ac20fc09b45800cd +size 1227217 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4f2bfcf39ee21f7d73f889997a97efc1321022b7 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6876091a79457be67ea70b38e2ae0ebc86379eb82c564e2265c219f6c894ef26 +size 1215670 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..44cc7199947349e5372de04f64888a52f445c5e8 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23bb82f59567ab382809cc37267b9248d3731e77f2f071bf4ac329640fb21b1 +size 1120628 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4834805e8de7c40a20df229704c3903d6b7fc999 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fbe0df36c11295fe0e35f37c7eae508473abaaa9de839230eadf0da9b19364 +size 1071212 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2f04d2a0aacc9ad28f27c572ef59ac9ab2f0a840 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd373b8d21986a89617b4ec4b380228dd5936a3620fee9f99f8c169a4a1a5c2 +size 1144524 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6e3bba561314b7073a4c69ca8759f3b53144ea5f --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8984f5b47aeb710bec1f02c175050ac65cf24c1149dc6cce038585da3a856a +size 1065785 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fad3433625a1536762e59c75d864768c901ae8bf --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b7c06cbf94b4d1290d5c8333878b6a24a44f7cb5faafbc9a294d6b66a30374 +size 1107669 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..37f4429f9c796d9e3f45e9330f9c577e6798c818 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756caade2d1fd4a30c0a7aeabc087b33ef89a52cba4fcb46ced71a39cc4a9984 +size 1067021 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bb5ac229d1c4d79da99f1f20479ecef52048eaf2 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb69ac202a830cd54f3cb6cd11867857813b4deee93e0f022d5006948a3380ee +size 6059426 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3cad0deaea22dd1f57af2a544482d0f61a4686a5 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f34ba9f9b4ff9969489b89ad1eab8ab255a4baa624c880e751379613e52d1089 +size 1040426 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6da5a6aec9b8d2ce27d9f3b95bb7a5eb835e95f7 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d3de1a852a74091ca359278c8f7140102923f88f6a772743ee7e9a250d4d08 +size 1045543 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d71d5c540bf08363bffcdf05ecff313449b2cf05 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ec8ae339ed7f41fe8b500a26f4e7e2a76ad15a79fb4f97b027da4cebf54dcb +size 1013515 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fcf235673bf87f1f3dac052f52ab0d9e47d40d38 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcb034794f3447789b5c4bd4647e3c9c83ac6fe73c24f75e45c09ebefb3fc3a7 +size 1055829 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..810c0fc6b494b3399519ea0924ee26701679d8c1 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4dac4cd93c9f9849e0cb3bc3410531219f1fd8242e050b92935b380c187e07b +size 1116529 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_2_a619d3adb2f03b80bc21.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_2_a619d3adb2f03b80bc21.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..389b8ed633b02067b25ddc0c819e20ef61285b82 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_2_a619d3adb2f03b80bc21.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a619d3adb2f03b80bc21afe0a6915e125519d4353d26236843b392dc48f0dd68 +size 4547197 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_3_256510a892d28f79e85e.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_3_256510a892d28f79e85e.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9dcfc205c4cff98f9a78802c6b2f776390082286 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_3_256510a892d28f79e85e.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256510a892d28f79e85e6e7461d0459e2ef37671e09488a201b50450e5c320d1 +size 3567545 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_4_b55a09705365f1f7bc7c.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_4_b55a09705365f1f7bc7c.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bb79e6f75e4e413de3a1d96cdf3a25875947caa7 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_4_b55a09705365f1f7bc7c.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b55a09705365f1f7bc7cd51aec441ccbcb9594bc7af5ce333d36b7247cef4592 +size 3113284 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_5_e65019ee1ebe9882b0b7.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_5_e65019ee1ebe9882b0b7.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..984972425619f63a277f284b7beaa7be46ad3856 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_5_e65019ee1ebe9882b0b7.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65019ee1ebe9882b0b7c166bf973a97d2c6e60020a4e97524a18640cbf5d8b7 +size 2584563 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_6_f4f71170b27199f67c46.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_6_f4f71170b27199f67c46.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..079dc81c323f6787a1866e51884817c02019bafc --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_6_f4f71170b27199f67c46.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f71170b27199f67c465037b476ceffc2db759444e2c73ef911ca5bea0391d6 +size 2314197 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_7_ee4dfc69e0f0ff992917.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_7_ee4dfc69e0f0ff992917.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..681c487d1a1f32ba6c9d2d6289b41d1b3639c876 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_7_ee4dfc69e0f0ff992917.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4dfc69e0f0ff992917682af6a24dc436a71ff3201bae78bc41c2c6e9c9e3b9 +size 2208605 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_8_3a191d37e945e0e0dc25.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_8_3a191d37e945e0e0dc25.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..eef73232031da5cf4c9c9d864385c1f07396f0d0 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_8_3a191d37e945e0e0dc25.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a191d37e945e0e0dc257c855af75b789f110f9dc96b10ed2bf5791c61c94ee9 +size 1862095 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_9_e121e2ba5f686fd4950c.mp4 b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_9_e121e2ba5f686fd4950c.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4739f4b59738748cfaa5760dd0f645bcbfa83ff1 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_9_e121e2ba5f686fd4950c.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e121e2ba5f686fd4950c891a2eead3356469cc369311462e103b9228acdff976 +size 1738395 diff --git a/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/output.log b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..589f5003e0bef9cca7fdb21ca65bfe3ab15514c4 --- /dev/null +++ b/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/output.log @@ -0,0 +1,516 @@ +[10-26 03:58:34] DYNTokenizer( + (x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 768, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) + (encoder): TransformerEncoderParallel( + (blocks): ModuleList( + (0-11): 12 x AttentionBlock( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=768, out_features=2304, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=768, out_features=768, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + ) + ) + ) + (decoder): TransformerAdaLNDecoderParallel( + (blocks): ModuleList( + (0-17): 18 x DiffusionAttentionBlock( + (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + (qkv): Linear(in_features=1152, out_features=3456, bias=False) + (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True) + (attn_drop): Dropout(p=0.0, inplace=False) + (norm): Identity() + (proj): Linear(in_features=1152, out_features=1152, bias=True) + (proj_drop): Dropout(p=0.0, inplace=False) + ) + (ls1): Identity() + (drop_path1): Identity() + (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=1152, out_features=4608, bias=True) + (act): GELU(approximate='none') + (drop1): Dropout(p=0.0, inplace=False) + (norm): Identity() + (fc2): Linear(in_features=4608, out_features=1152, bias=True) + (drop2): Dropout(p=0.0, inplace=False) + ) + (ls2): Identity() + (drop_path2): Identity() + (adaLN_modulation): Sequential( + (0): SiLU() + (1): Linear(in_features=1152, out_features=13824, bias=True) + ) + ) + ) + (rope): HunyuanVideoRotaryPosEmbed() + ) + (bottleneck): VectorQuantize( + (project_in): Linear(in_features=768, out_features=16, bias=True) + (project_out): Linear(in_features=16, out_features=1152, bias=True) + (_codebook): CosineSimCodebook() + ) + (final_layer): AdaLNOutputLayer( + (norm_final): AdaLayerNormContinuous( + (silu): SiLU() + (linear): Linear(in_features=1152, out_features=2304, bias=True) + (norm): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) + ) + (linear): Linear(in_features=1152, out_features=768, bias=True) + ) + (flow_matching_noise_module): MinRFNoiseModule() + (dec_time_embedder): TimestepEmbedder( + (mlp): Sequential( + (0): Linear(in_features=256, out_features=1152, bias=True) + (1): SiLU() + (2): Linear(in_features=1152, out_features=1152, bias=True) + ) + ) + (dec_x_embedder): PatchEmbed3D( + (proj): Conv3d(3, 1152, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + (norm): Identity() + ) +) +[10-26 03:58:34] Model: #params=666.3M +[10-26 03:58:34] SLURM_JOB_ID: None +[10-26 03:58:34] SLUMR_ARRAY_JOB_ID: None +[10-26 03:58:34] SLURM_ARRAY_TASK_ID: None +[10-26 03:58:34] wandb_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192_____ +[10-26 03:58:34] Compiling model with mode: default +Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off] +/scratch/e0724392/virtualenvs/motiondirector/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + warnings.warn( +/scratch/e0724392/virtualenvs/motiondirector/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights. + warnings.warn(msg) +Loading model from: /scratch/e0724392/virtualenvs/motiondirector/lib/python3.10/site-packages/lpips/weights/v0.1/vgg.pth +/scratch/e0724392/virtualenvs/motiondirector/lib/python3.10/site-packages/lpips/lpips.py:107: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + self.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False) +Using LeCam regularization with weight 0.001. +[10-26 03:58:55] Discriminator: #params=38.2M +[10-26 03:58:55] compiling loss with mode default +/scratch/e0724392/work4/LARP/trainers/our_tokenizer_trainer.py:291: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + model_scaler = torch.cuda.amp.GradScaler(enabled=self.use_amp and self.amp_dtype == torch.float16) +/scratch/e0724392/work4/LARP/trainers/our_tokenizer_trainer.py:297: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead. + discriminator_scaler = torch.cuda.amp.GradScaler(enabled=self.use_amp and self.amp_dtype == torch.float16) +[10-26 03:58:55] Epoch 1 started. +train: 0%| | 0/3404 [00:00