Lingmin-Ran commited on
Commit
f5bf5d2
·
verified ·
1 Parent(s): e6e62e1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +58 -0
  2. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/best_fvd_105.24.pth +3 -0
  3. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/cfg.yaml +477 -0
  4. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth +3 -0
  5. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_26_03_58_19.txt +393 -0
  6. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_27_05_21_46.txt +156 -0
  7. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761422299.hopper-26.3880041.0 +3 -0
  8. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761513706.hopper-10.716544.0 +3 -0
  9. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug-internal.log +7 -0
  10. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug.log +23 -0
  11. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 +3 -0
  12. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 +3 -0
  13. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 +3 -0
  14. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 +3 -0
  15. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 +3 -0
  16. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 +3 -0
  17. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 +3 -0
  18. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 +3 -0
  19. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 +3 -0
  20. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 +3 -0
  21. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 +3 -0
  22. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 +3 -0
  23. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 +3 -0
  24. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 +3 -0
  25. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 +3 -0
  26. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 +3 -0
  27. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 +3 -0
  28. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 +3 -0
  29. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 +3 -0
  30. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 +3 -0
  31. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 +3 -0
  32. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 +3 -0
  33. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 +3 -0
  34. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 +3 -0
  35. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 +3 -0
  36. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 +3 -0
  37. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 +3 -0
  38. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 +3 -0
  39. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 +3 -0
  40. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 +3 -0
  41. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 +3 -0
  42. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 +3 -0
  43. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 +3 -0
  44. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 +3 -0
  45. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 +3 -0
  46. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 +3 -0
  47. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 +3 -0
  48. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 +3 -0
  49. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 +3 -0
  50. k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 +3 -0
.gitattributes CHANGED
@@ -134,3 +134,61 @@ base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_q
134
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_950_a048891b4bb2e302eb8c.mp4 filter=lfs diff=lfs merge=lfs -text
135
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_975_dad91748c15462b26fb3.mp4 filter=lfs diff=lfs merge=lfs -text
136
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/run-p2mcszig.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_950_a048891b4bb2e302eb8c.mp4 filter=lfs diff=lfs merge=lfs -text
135
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/files/media/videos/vis_train_dataset_975_dad91748c15462b26fb3.mp4 filter=lfs diff=lfs merge=lfs -text
136
  base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b64_btn1024_vector_quantize_rcs8192__/wandb/run-20250929_000209-p2mcszig/run-p2mcszig.wandb filter=lfs diff=lfs merge=lfs -text
137
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 filter=lfs diff=lfs merge=lfs -text
138
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 filter=lfs diff=lfs merge=lfs -text
139
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 filter=lfs diff=lfs merge=lfs -text
140
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 filter=lfs diff=lfs merge=lfs -text
141
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 filter=lfs diff=lfs merge=lfs -text
142
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 filter=lfs diff=lfs merge=lfs -text
143
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 filter=lfs diff=lfs merge=lfs -text
144
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 filter=lfs diff=lfs merge=lfs -text
145
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 filter=lfs diff=lfs merge=lfs -text
146
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 filter=lfs diff=lfs merge=lfs -text
147
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 filter=lfs diff=lfs merge=lfs -text
148
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 filter=lfs diff=lfs merge=lfs -text
149
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 filter=lfs diff=lfs merge=lfs -text
150
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 filter=lfs diff=lfs merge=lfs -text
151
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 filter=lfs diff=lfs merge=lfs -text
152
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 filter=lfs diff=lfs merge=lfs -text
153
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 filter=lfs diff=lfs merge=lfs -text
154
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 filter=lfs diff=lfs merge=lfs -text
155
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 filter=lfs diff=lfs merge=lfs -text
156
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 filter=lfs diff=lfs merge=lfs -text
157
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 filter=lfs diff=lfs merge=lfs -text
158
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 filter=lfs diff=lfs merge=lfs -text
159
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 filter=lfs diff=lfs merge=lfs -text
160
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 filter=lfs diff=lfs merge=lfs -text
161
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 filter=lfs diff=lfs merge=lfs -text
162
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 filter=lfs diff=lfs merge=lfs -text
163
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 filter=lfs diff=lfs merge=lfs -text
164
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 filter=lfs diff=lfs merge=lfs -text
165
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 filter=lfs diff=lfs merge=lfs -text
166
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 filter=lfs diff=lfs merge=lfs -text
167
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 filter=lfs diff=lfs merge=lfs -text
168
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 filter=lfs diff=lfs merge=lfs -text
169
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 filter=lfs diff=lfs merge=lfs -text
170
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 filter=lfs diff=lfs merge=lfs -text
171
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 filter=lfs diff=lfs merge=lfs -text
172
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 filter=lfs diff=lfs merge=lfs -text
173
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 filter=lfs diff=lfs merge=lfs -text
174
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 filter=lfs diff=lfs merge=lfs -text
175
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 filter=lfs diff=lfs merge=lfs -text
176
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 filter=lfs diff=lfs merge=lfs -text
177
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_2_a619d3adb2f03b80bc21.mp4 filter=lfs diff=lfs merge=lfs -text
178
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_3_256510a892d28f79e85e.mp4 filter=lfs diff=lfs merge=lfs -text
179
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_4_b55a09705365f1f7bc7c.mp4 filter=lfs diff=lfs merge=lfs -text
180
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_5_e65019ee1ebe9882b0b7.mp4 filter=lfs diff=lfs merge=lfs -text
181
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_6_f4f71170b27199f67c46.mp4 filter=lfs diff=lfs merge=lfs -text
182
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_7_ee4dfc69e0f0ff992917.mp4 filter=lfs diff=lfs merge=lfs -text
183
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_8_3a191d37e945e0e0dc25.mp4 filter=lfs diff=lfs merge=lfs -text
184
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_9_e121e2ba5f686fd4950c.mp4 filter=lfs diff=lfs merge=lfs -text
185
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/run-c3u3silm.wandb filter=lfs diff=lfs merge=lfs -text
186
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_25_f90c3d650c4a707c1ec3.mp4 filter=lfs diff=lfs merge=lfs -text
187
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_26_fcb0b2b66064e1cc523a.mp4 filter=lfs diff=lfs merge=lfs -text
188
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_27_fb495aa0725107fd4084.mp4 filter=lfs diff=lfs merge=lfs -text
189
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_test_dataset_28_f3d85415155c518fccc7.mp4 filter=lfs diff=lfs merge=lfs -text
190
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_25_9060c16221f1e99f7b6e.mp4 filter=lfs diff=lfs merge=lfs -text
191
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_26_aed6f3de431c91b8cae8.mp4 filter=lfs diff=lfs merge=lfs -text
192
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_27_31f4a1a3e263ac0f4120.mp4 filter=lfs diff=lfs merge=lfs -text
193
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/files/media/videos/vis_train_dataset_28_3aa6f2f161e847e89032.mp4 filter=lfs diff=lfs merge=lfs -text
194
+ k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/run-c3u3silm.wandb filter=lfs diff=lfs merge=lfs -text
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/best_fvd_105.24.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94381c6f59e56b006cfb2704277816901a6222f0470205e5ad06ea787ef13447
3
+ size 8223495896
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/cfg.yaml ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object/new:easydict.EasyDict
2
+ state:
3
+ trainer: our_tokenizer_trainer
4
+ train_dataset: &id020 !!python/object/new:easydict.EasyDict
5
+ state:
6
+ name: video_dataset
7
+ args: &id001 !!python/object/new:easydict.EasyDict
8
+ state:
9
+ root_path: data/metadata
10
+ split: train
11
+ frame_num: 16
12
+ rand_augment: 'no'
13
+ csv_file: k600_train.csv+ucf101_train.csv
14
+ cls_vid_num: -1_-1
15
+ crop_size: 128
16
+ scale: 1.0
17
+ aspect_ratio: 1.0
18
+ rand_flip: 'yes'
19
+ use_all_frames: false
20
+ pre_load: false
21
+ dictitems:
22
+ root_path: data/metadata
23
+ split: train
24
+ frame_num: 16
25
+ rand_augment: 'no'
26
+ csv_file: k600_train.csv+ucf101_train.csv
27
+ cls_vid_num: -1_-1
28
+ crop_size: 128
29
+ scale: 1.0
30
+ aspect_ratio: 1.0
31
+ rand_flip: 'yes'
32
+ use_all_frames: false
33
+ pre_load: false
34
+ loader: &id002 !!python/object/new:easydict.EasyDict
35
+ state:
36
+ batch_size: 128
37
+ num_workers: 32
38
+ dictitems:
39
+ batch_size: 128
40
+ num_workers: 32
41
+ dictitems:
42
+ name: video_dataset
43
+ args: *id001
44
+ loader: *id002
45
+ test_dataset: &id021 !!python/object/new:easydict.EasyDict
46
+ state:
47
+ name: video_dataset
48
+ args: &id003 !!python/object/new:easydict.EasyDict
49
+ state:
50
+ root_path: data/metadata
51
+ frame_num: 16
52
+ cls_vid_num: -1_-1
53
+ crop_size: 128
54
+ use_all_frames: false
55
+ pre_load: false
56
+ dictitems:
57
+ root_path: data/metadata
58
+ frame_num: 16
59
+ cls_vid_num: -1_-1
60
+ crop_size: 128
61
+ use_all_frames: false
62
+ pre_load: false
63
+ csv_paths: &id004 !!python/object/new:easydict.EasyDict
64
+ state:
65
+ ucf101_val: ucf101_val.csv
66
+ dictitems:
67
+ ucf101_val: ucf101_val.csv
68
+ loader: &id005 !!python/object/new:easydict.EasyDict
69
+ state:
70
+ batch_size: 128
71
+ num_workers: 32
72
+ dictitems:
73
+ batch_size: 128
74
+ num_workers: 32
75
+ dictitems:
76
+ name: video_dataset
77
+ args: *id003
78
+ csv_paths: *id004
79
+ loader: *id005
80
+ model: &id022 !!python/object/new:easydict.EasyDict
81
+ state:
82
+ name: dyn_tokenizer
83
+ args: &id016 !!python/object/new:easydict.EasyDict
84
+ state:
85
+ noise_schedule: &id012 !!python/object/new:easydict.EasyDict
86
+ state:
87
+ name: min_rf_noise_module
88
+ args: &id006 !!python/object/new:easydict.EasyDict
89
+ state:
90
+ clean_data_read_key: clean_data
91
+ noised_data_write_key: noisy_input
92
+ noise_write_key: flow_noise
93
+ timesteps_write_key: timesteps
94
+ sigmas_write_key: sigmas
95
+ ln: false
96
+ stratisfied: false
97
+ mode_scale: 0.25
98
+ dictitems:
99
+ clean_data_read_key: clean_data
100
+ noised_data_write_key: noisy_input
101
+ noise_write_key: flow_noise
102
+ timesteps_write_key: timesteps
103
+ sigmas_write_key: sigmas
104
+ ln: false
105
+ stratisfied: false
106
+ mode_scale: 0.25
107
+ dictitems:
108
+ name: min_rf_noise_module
109
+ args: *id006
110
+ bottleneck: &id013 !!python/object/new:easydict.EasyDict
111
+ state:
112
+ name: bottleneck
113
+ args: &id009 !!python/object/new:easydict.EasyDict
114
+ state:
115
+ regularizer: &id008 !!python/object/new:easydict.EasyDict
116
+ state:
117
+ name: vector_quantize
118
+ args: &id007 !!python/object/new:easydict.EasyDict
119
+ state:
120
+ codebook_dim: 16
121
+ codebook_size: 8192
122
+ ema_update: true
123
+ decay: 0.99
124
+ kmeans_init: true
125
+ kmeans_iters: 10
126
+ threshold_ema_dead_code: 0.2
127
+ use_cosine_sim: true
128
+ commitment_weight: 1.0
129
+ diversity_weight: 0.0
130
+ smart_re_K: 0
131
+ continuous: false
132
+ reg:
133
+ - 0.1
134
+ - 0.3
135
+ reset_cluster_size: 0.2
136
+ ema_entropy_ratio: 0.8
137
+ vq_start_step: 0
138
+ dictitems:
139
+ codebook_dim: 16
140
+ codebook_size: 8192
141
+ ema_update: true
142
+ decay: 0.99
143
+ kmeans_init: true
144
+ kmeans_iters: 10
145
+ threshold_ema_dead_code: 0.2
146
+ use_cosine_sim: true
147
+ commitment_weight: 1.0
148
+ diversity_weight: 0.0
149
+ smart_re_K: 0
150
+ continuous: false
151
+ reg:
152
+ - 0.1
153
+ - 0.3
154
+ reset_cluster_size: 0.2
155
+ ema_entropy_ratio: 0.8
156
+ vq_start_step: 0
157
+ dictitems:
158
+ name: vector_quantize
159
+ args: *id007
160
+ dictitems:
161
+ regularizer: *id008
162
+ dictitems:
163
+ name: bottleneck
164
+ args: *id009
165
+ prior_model: &id014 !!python/object/new:easydict.EasyDict
166
+ state:
167
+ name: none
168
+ use_mix_ss: true
169
+ mix_ss_max_ratio: 0.5
170
+ mix_ss_peak_steps_ratio: 0.3
171
+ n_rounds: 2
172
+ avg_loss_over_rounds: true
173
+ no_grad_before_last_round: false
174
+ no_dropout: false
175
+ latent_ce_temperature: 1.0
176
+ args: &id010 !!python/object/new:easydict.EasyDict
177
+ state:
178
+ l2_normalized: true
179
+ dictitems:
180
+ l2_normalized: true
181
+ dictitems:
182
+ name: none
183
+ use_mix_ss: true
184
+ mix_ss_max_ratio: 0.5
185
+ mix_ss_peak_steps_ratio: 0.3
186
+ n_rounds: 2
187
+ avg_loss_over_rounds: true
188
+ no_grad_before_last_round: false
189
+ no_dropout: false
190
+ latent_ce_temperature: 1.0
191
+ args: *id010
192
+ dec_time_embedder: &id015 !!python/object/new:easydict.EasyDict
193
+ state:
194
+ name: timestep_embedder
195
+ args: &id011 !!python/object/new:easydict.EasyDict
196
+ state:
197
+ timesteps_read_key: timesteps
198
+ time_embedding_write_key: dec_temb
199
+ dim: 1152
200
+ frequency_embedding_size: 256
201
+ max_timestep: 1000.0
202
+ dictitems:
203
+ timesteps_read_key: timesteps
204
+ time_embedding_write_key: dec_temb
205
+ dim: 1152
206
+ frequency_embedding_size: 256
207
+ max_timestep: 1000.0
208
+ dictitems:
209
+ name: timestep_embedder
210
+ args: *id011
211
+ transformer_name: transformer_encoder_parallel
212
+ encoder_name: none
213
+ decoder_name: transformer_AdaLN_decoder_parallel
214
+ bottleneck_token_num: 1024
215
+ input_size: 128
216
+ frame_num: 16
217
+ temporal_patch_size: 4
218
+ patch_size: 8
219
+ decoder_temporal_patch_size: 4
220
+ decoder_patch_size: 8
221
+ in_channels: 3
222
+ encoder_hidden_size: 768
223
+ decoder_hidden_size: 1152
224
+ encoder_num_heads: 12
225
+ decoder_num_heads: 18
226
+ encoder_depth: 12
227
+ decoder_depth: 18
228
+ encoder_block_name: block_timm
229
+ decoder_block_name: adaLN_block_timm
230
+ encoder_mask_mode: full
231
+ decoder_mask_mode: full
232
+ learned_encoder_patch_pe: false
233
+ learned_encoder_latent_query_embed: true
234
+ learned_decoder_latent_pe: false
235
+ learned_decoder_patch_query_embed: false
236
+ use_encoder_patch_token_type_embed: false
237
+ use_encoder_latent_query_token_type_embed: false
238
+ enable_decoder_query: false
239
+ learned_decoder_pe: false
240
+ use_decoder_latent_token_type_embed: false
241
+ use_decoder_patch_query_token_type_embed: true
242
+ encoder_query_gaussian_init: true
243
+ latent_pe_scale_factor: 10000
244
+ query_init_std: 0.02
245
+ adaLN_expansion: 2
246
+ final_layer_init: xavier_uniform
247
+ enable_vq: true
248
+ qk_norm: true
249
+ use_rope: true
250
+ rope_dim:
251
+ - 16
252
+ - 24
253
+ - 24
254
+ final_layer_type: adanorm
255
+ dictitems:
256
+ noise_schedule: *id012
257
+ bottleneck: *id013
258
+ prior_model: *id014
259
+ dec_time_embedder: *id015
260
+ transformer_name: transformer_encoder_parallel
261
+ encoder_name: none
262
+ decoder_name: transformer_AdaLN_decoder_parallel
263
+ bottleneck_token_num: 1024
264
+ input_size: 128
265
+ frame_num: 16
266
+ temporal_patch_size: 4
267
+ patch_size: 8
268
+ decoder_temporal_patch_size: 4
269
+ decoder_patch_size: 8
270
+ in_channels: 3
271
+ encoder_hidden_size: 768
272
+ decoder_hidden_size: 1152
273
+ encoder_num_heads: 12
274
+ decoder_num_heads: 18
275
+ encoder_depth: 12
276
+ decoder_depth: 18
277
+ encoder_block_name: block_timm
278
+ decoder_block_name: adaLN_block_timm
279
+ encoder_mask_mode: full
280
+ decoder_mask_mode: full
281
+ learned_encoder_patch_pe: false
282
+ learned_encoder_latent_query_embed: true
283
+ learned_decoder_latent_pe: false
284
+ learned_decoder_patch_query_embed: false
285
+ use_encoder_patch_token_type_embed: false
286
+ use_encoder_latent_query_token_type_embed: false
287
+ enable_decoder_query: false
288
+ learned_decoder_pe: false
289
+ use_decoder_latent_token_type_embed: false
290
+ use_decoder_patch_query_token_type_embed: true
291
+ encoder_query_gaussian_init: true
292
+ latent_pe_scale_factor: 10000
293
+ query_init_std: 0.02
294
+ adaLN_expansion: 2
295
+ final_layer_init: xavier_uniform
296
+ enable_vq: true
297
+ qk_norm: true
298
+ use_rope: true
299
+ rope_dim:
300
+ - 16
301
+ - 24
302
+ - 24
303
+ final_layer_type: adanorm
304
+ dictitems:
305
+ name: dyn_tokenizer
306
+ args: *id016
307
+ loss: &id023 !!python/object/new:easydict.EasyDict
308
+ state:
309
+ name: fm_disc_loss
310
+ args: &id017 !!python/object/new:easydict.EasyDict
311
+ state:
312
+ disc_type: transformer
313
+ disc_start: 999999
314
+ disc_self_start: -1
315
+ perceptual_weight: 0.5
316
+ perceptual_loss: lpips
317
+ perceptual_fp16: false
318
+ lecam_weight: 0.001
319
+ disc_loss: ns_smooth
320
+ disc_weight: 0.0
321
+ r1_gp_weight: 0.0
322
+ d_update_freq: 5
323
+ spectral_norm: false
324
+ disc_tran_hidden_size: 512
325
+ disc_tran_n_heads: 8
326
+ disc_tran_n_layers: 12
327
+ disc_tran_temporal_patch_size: 4
328
+ disc_tran_patch_size: 8
329
+ input_spatial_size: 128
330
+ frame_num: 16
331
+ fm_loss_weight: 1.0
332
+ dictitems:
333
+ disc_type: transformer
334
+ disc_start: 999999
335
+ disc_self_start: -1
336
+ perceptual_weight: 0.5
337
+ perceptual_loss: lpips
338
+ perceptual_fp16: false
339
+ lecam_weight: 0.001
340
+ disc_loss: ns_smooth
341
+ disc_weight: 0.0
342
+ r1_gp_weight: 0.0
343
+ d_update_freq: 5
344
+ spectral_norm: false
345
+ disc_tran_hidden_size: 512
346
+ disc_tran_n_heads: 8
347
+ disc_tran_n_layers: 12
348
+ disc_tran_temporal_patch_size: 4
349
+ disc_tran_patch_size: 8
350
+ input_spatial_size: 128
351
+ frame_num: 16
352
+ fm_loss_weight: 1.0
353
+ dictitems:
354
+ name: fm_disc_loss
355
+ args: *id017
356
+ optimizer: &id024 !!python/object/new:easydict.EasyDict
357
+ state:
358
+ name: adamw
359
+ loss_name: adam
360
+ args: &id018 !!python/object/new:easydict.EasyDict
361
+ state:
362
+ lr: 0.0001
363
+ betas:
364
+ - 0.9
365
+ - 0.99
366
+ dictitems:
367
+ lr: 0.0001
368
+ betas:
369
+ - 0.9
370
+ - 0.99
371
+ loss_args: &id019 !!python/object/new:easydict.EasyDict
372
+ state:
373
+ lr: 3.0e-05
374
+ betas:
375
+ - 0.5
376
+ - 0.9
377
+ dictitems:
378
+ lr: 3.0e-05
379
+ betas:
380
+ - 0.5
381
+ - 0.9
382
+ lr_type: step
383
+ lr_step_pcts: 0.9_0.95
384
+ warmup_epoch: 10
385
+ min_lr_mult: 0.1
386
+ prior_lr_mult: 1.0
387
+ emb_lr_mult: 1.0
388
+ dictitems:
389
+ name: adamw
390
+ loss_name: adam
391
+ args: *id018
392
+ loss_args: *id019
393
+ lr_type: step
394
+ lr_step_pcts: 0.9_0.95
395
+ warmup_epoch: 10
396
+ min_lr_mult: 0.1
397
+ prior_lr_mult: 1.0
398
+ emb_lr_mult: 1.0
399
+ max_epoch: 200
400
+ eval_epoch: 1
401
+ vis_epoch: 1
402
+ latest_interval: 1
403
+ save_epoch: 100000000
404
+ save_best: true
405
+ stepwise_logging: false
406
+ ema_decay: _
407
+ use_amp: true
408
+ amp_dtype: float16
409
+ compile: true
410
+ compile_mode: default
411
+ flash_attn: false
412
+ loss_q_weight: 0.1
413
+ loss_q_warmup: '1.0_1'
414
+ loss_kl_weight: 0.0
415
+ kl_decay_epoch: -1
416
+ loss_latent_ce_weight: 0.0
417
+ sqt_start_end_epoch: 0.0_0.0_0
418
+ clip_grad_max_norm: 0.0
419
+ init_checkpoint: ''
420
+ timesteps: 25
421
+ verbose: false
422
+ guidance_scale: 1.0
423
+ env: &id025 !!python/object/new:easydict.EasyDict
424
+ state:
425
+ tot_gpus: 8
426
+ cudnn: false
427
+ wandb_upload: true
428
+ wandb_entity: lingmin
429
+ wandb_project: dyn_tokenizer
430
+ exp_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__
431
+ save_dir: /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__
432
+ port: '12778'
433
+ dictitems:
434
+ tot_gpus: 8
435
+ cudnn: false
436
+ wandb_upload: true
437
+ wandb_entity: lingmin
438
+ wandb_project: dyn_tokenizer
439
+ exp_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__
440
+ save_dir: /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__
441
+ port: '12778'
442
+ comment: ''
443
+ manualSeed: 66667
444
+ dictitems:
445
+ trainer: our_tokenizer_trainer
446
+ train_dataset: *id020
447
+ test_dataset: *id021
448
+ model: *id022
449
+ loss: *id023
450
+ optimizer: *id024
451
+ max_epoch: 200
452
+ eval_epoch: 1
453
+ vis_epoch: 1
454
+ latest_interval: 1
455
+ save_epoch: 100000000
456
+ save_best: true
457
+ stepwise_logging: false
458
+ ema_decay: _
459
+ use_amp: true
460
+ amp_dtype: float16
461
+ compile: true
462
+ compile_mode: default
463
+ flash_attn: false
464
+ loss_q_weight: 0.1
465
+ loss_q_warmup: '1.0_1'
466
+ loss_kl_weight: 0.0
467
+ kl_decay_epoch: -1
468
+ loss_latent_ce_weight: 0.0
469
+ sqt_start_end_epoch: 0.0_0.0_0
470
+ clip_grad_max_norm: 0.0
471
+ init_checkpoint: ''
472
+ timesteps: 25
473
+ verbose: false
474
+ guidance_scale: 1.0
475
+ env: *id025
476
+ comment: ''
477
+ manualSeed: 66667
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94381c6f59e56b006cfb2704277816901a6222f0470205e5ad06ea787ef13447
3
+ size 8223495896
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_26_03_58_19.txt ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [10-26 03:58:19] Distributed training enabled.
2
+ [10-26 03:58:20] Environment setup done.
3
+ [10-26 03:58:21] Train dataset: len=435743
4
+ [10-26 03:58:21] Test dataset: ucf101_val, len=3783
5
+ [10-26 03:58:34] DYNTokenizer(
6
+ (x_embedder): PatchEmbed3D(
7
+ (proj): Conv3d(3, 768, kernel_size=(4, 8, 8), stride=(4, 8, 8))
8
+ (norm): Identity()
9
+ )
10
+ (encoder): TransformerEncoderParallel(
11
+ (blocks): ModuleList(
12
+ (0-11): 12 x AttentionBlock(
13
+ (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
14
+ (attn): Attention(
15
+ (qkv): Linear(in_features=768, out_features=2304, bias=False)
16
+ (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
17
+ (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
18
+ (attn_drop): Dropout(p=0.0, inplace=False)
19
+ (norm): Identity()
20
+ (proj): Linear(in_features=768, out_features=768, bias=True)
21
+ (proj_drop): Dropout(p=0.0, inplace=False)
22
+ )
23
+ (ls1): Identity()
24
+ (drop_path1): Identity()
25
+ (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
26
+ (mlp): Mlp(
27
+ (fc1): Linear(in_features=768, out_features=3072, bias=True)
28
+ (act): GELU(approximate='none')
29
+ (drop1): Dropout(p=0.0, inplace=False)
30
+ (norm): Identity()
31
+ (fc2): Linear(in_features=3072, out_features=768, bias=True)
32
+ (drop2): Dropout(p=0.0, inplace=False)
33
+ )
34
+ (ls2): Identity()
35
+ (drop_path2): Identity()
36
+ )
37
+ )
38
+ )
39
+ (decoder): TransformerAdaLNDecoderParallel(
40
+ (blocks): ModuleList(
41
+ (0-17): 18 x DiffusionAttentionBlock(
42
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
43
+ (attn): Attention(
44
+ (qkv): Linear(in_features=1152, out_features=3456, bias=False)
45
+ (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
46
+ (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
47
+ (attn_drop): Dropout(p=0.0, inplace=False)
48
+ (norm): Identity()
49
+ (proj): Linear(in_features=1152, out_features=1152, bias=True)
50
+ (proj_drop): Dropout(p=0.0, inplace=False)
51
+ )
52
+ (ls1): Identity()
53
+ (drop_path1): Identity()
54
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
55
+ (mlp): Mlp(
56
+ (fc1): Linear(in_features=1152, out_features=4608, bias=True)
57
+ (act): GELU(approximate='none')
58
+ (drop1): Dropout(p=0.0, inplace=False)
59
+ (norm): Identity()
60
+ (fc2): Linear(in_features=4608, out_features=1152, bias=True)
61
+ (drop2): Dropout(p=0.0, inplace=False)
62
+ )
63
+ (ls2): Identity()
64
+ (drop_path2): Identity()
65
+ (adaLN_modulation): Sequential(
66
+ (0): SiLU()
67
+ (1): Linear(in_features=1152, out_features=13824, bias=True)
68
+ )
69
+ )
70
+ )
71
+ (rope): HunyuanVideoRotaryPosEmbed()
72
+ )
73
+ (bottleneck): VectorQuantize(
74
+ (project_in): Linear(in_features=768, out_features=16, bias=True)
75
+ (project_out): Linear(in_features=16, out_features=1152, bias=True)
76
+ (_codebook): CosineSimCodebook()
77
+ )
78
+ (final_layer): AdaLNOutputLayer(
79
+ (norm_final): AdaLayerNormContinuous(
80
+ (silu): SiLU()
81
+ (linear): Linear(in_features=1152, out_features=2304, bias=True)
82
+ (norm): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
83
+ )
84
+ (linear): Linear(in_features=1152, out_features=768, bias=True)
85
+ )
86
+ (flow_matching_noise_module): MinRFNoiseModule()
87
+ (dec_time_embedder): TimestepEmbedder(
88
+ (mlp): Sequential(
89
+ (0): Linear(in_features=256, out_features=1152, bias=True)
90
+ (1): SiLU()
91
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
92
+ )
93
+ )
94
+ (dec_x_embedder): PatchEmbed3D(
95
+ (proj): Conv3d(3, 1152, kernel_size=(4, 8, 8), stride=(4, 8, 8))
96
+ (norm): Identity()
97
+ )
98
+ )
99
+ [10-26 03:58:34] Model: #params=666.3M
100
+ [10-26 03:58:34] SLURM_JOB_ID: None
101
+ [10-26 03:58:34] SLUMR_ARRAY_JOB_ID: None
102
+ [10-26 03:58:34] SLURM_ARRAY_TASK_ID: None
103
+ [10-26 03:58:34] wandb_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192_____
104
+ [10-26 03:58:34] Compiling model with mode: default
105
+ [10-26 03:58:55] Discriminator: #params=38.2M
106
+ [10-26 03:58:55] compiling loss with mode default
107
+ [10-26 03:58:55] Epoch 1 started.
108
+ [10-26 05:34:28] Epoch 1 training done. Time: 5732.70s
109
+ [10-26 06:03:25] Calculating FVD with running real stats
110
+ [10-26 06:04:53] Converting video data to uint8
111
+ [10-26 06:05:59] Converting video data to uint8
112
+ [10-26 06:05:59] Preparing to save rng states...
113
+ [10-26 06:05:59] Saving checkpoint...
114
+ [10-26 06:06:35] New best checkpoint saved: best_fvd_7983.63.pth
115
+ [10-26 06:06:35] Epoch 1, train: fm_loss=0.4965 perceptual_loss=0.7230 rp_loss=0.8580 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=11.8697 ssim=0.1345 loss_q=0.0000 index_usage_batch=0.0224 perplexity=6358.6162 loss_commit=0.0000 loss=0.8580 fps=24.2793,
116
+ eval: ucf101_val_psnr=6.4218 ucf101_val_ssim=0.0048 ucf101_val_fps=3.1429 ucf101_val_fvd=7983.6285,
117
+ Latest checkpoint saved. Time: 35.78s
118
+ , 2.1h (d 0.47) 2.1h/425.6h
119
+ [10-26 06:06:35] Epoch 2 started.
120
+ [10-26 06:58:07] Epoch 2 training done. Time: 3091.38s
121
+ [10-26 07:00:34] Calculating FVD with running real stats
122
+ [10-26 07:01:54] Converting video data to uint8
123
+ [10-26 07:03:06] Converting video data to uint8
124
+ [10-26 07:03:07] Preparing to save rng states...
125
+ [10-26 07:03:07] Saving checkpoint...
126
+ [10-26 07:03:44] New best checkpoint saved: best_fvd_4078.66.pth
127
+ [10-26 07:03:44] Epoch 2, train: fm_loss=0.2040 perceptual_loss=0.6439 rp_loss=0.5259 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=15.9498 ssim=0.1968 loss_q=0.0002 index_usage_batch=0.0195 perplexity=7807.2646 loss_commit=0.0002 loss=0.5259 fps=23.9054,
128
+ eval: ucf101_val_psnr=8.9663 ucf101_val_ssim=0.0189 ucf101_val_fps=3.1781 ucf101_val_fvd=4078.6604,
129
+ Latest checkpoint saved. Time: 37.56s
130
+ , 57.2m (d 0.04) 3.1h/308.0h
131
+ [10-26 07:03:44] Epoch 3 started.
132
+ [10-26 07:56:18] Epoch 3 training done. Time: 3154.18s
133
+ [10-26 07:58:45] Calculating FVD with running real stats
134
+ [10-26 08:00:09] Converting video data to uint8
135
+ [10-26 08:01:18] Converting video data to uint8
136
+ [10-26 08:01:18] Preparing to save rng states...
137
+ [10-26 08:01:18] Saving checkpoint...
138
+ [10-26 08:01:58] New best checkpoint saved: best_fvd_3830.81.pth
139
+ [10-26 08:01:58] Epoch 3, train: fm_loss=0.1239 perceptual_loss=0.5882 rp_loss=0.4180 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=18.5751 ssim=0.2626 loss_q=0.0003 index_usage_batch=0.0115 perplexity=7967.7891 loss_commit=0.0003 loss=0.4181 fps=23.6399,
140
+ eval: ucf101_val_psnr=9.5385 ucf101_val_ssim=0.0304 ucf101_val_fps=3.1865 ucf101_val_fvd=3830.8149,
141
+ Latest checkpoint saved. Time: 39.61s
142
+ , 58.2m (d 0.02) 4.1h/270.1h
143
+ [10-26 08:01:58] Epoch 4 started.
144
+ [10-26 08:54:48] Epoch 4 training done. Time: 3169.99s
145
+ [10-26 08:57:15] Calculating FVD with running real stats
146
+ [10-26 08:58:33] Converting video data to uint8
147
+ [10-26 08:59:41] Converting video data to uint8
148
+ [10-26 08:59:43] Preparing to save rng states...
149
+ [10-26 08:59:49] Saving checkpoint...
150
+ [10-26 09:00:27] New best checkpoint saved: best_fvd_3380.49.pth
151
+ [10-26 09:00:27] Epoch 4, train: fm_loss=0.0941 perceptual_loss=0.5426 rp_loss=0.3654 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=20.0186 ssim=0.3119 loss_q=0.0008 index_usage_batch=0.0103 perplexity=8027.0205 loss_commit=0.0008 loss=0.3654 fps=23.6290,
152
+ eval: ucf101_val_psnr=9.6446 ucf101_val_ssim=0.0359 ucf101_val_fps=3.1718 ucf101_val_fvd=3380.4911,
153
+ Latest checkpoint saved. Time: 44.50s
154
+ , 58.5m (d 0.03) 5.0h/251.3h
155
+ [10-26 09:00:27] Epoch 5 started.
156
+ [10-26 09:53:14] Epoch 5 training done. Time: 3167.01s
157
+ [10-26 09:55:41] Calculating FVD with running real stats
158
+ [10-26 09:57:00] Converting video data to uint8
159
+ [10-26 09:58:08] Converting video data to uint8
160
+ [10-26 09:58:09] Preparing to save rng states...
161
+ [10-26 09:58:10] Saving checkpoint...
162
+ [10-26 09:58:47] New best checkpoint saved: best_fvd_2447.01.pth
163
+ [10-26 09:58:47] Epoch 5, train: fm_loss=0.0751 perceptual_loss=0.5013 rp_loss=0.3258 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=21.1530 ssim=0.3623 loss_q=0.0020 index_usage_batch=0.0285 perplexity=8064.0542 loss_commit=0.0020 loss=0.3260 fps=23.6636,
164
+ eval: ucf101_val_psnr=11.4611 ucf101_val_ssim=0.0623 ucf101_val_fps=3.1759 ucf101_val_fvd=2447.0120,
165
+ Latest checkpoint saved. Time: 37.86s
166
+ , 58.3m (d 0.01) 6.0h/239.9h
167
+ [10-26 09:58:47] Epoch 6 started.
168
+ [10-26 10:50:47] Epoch 6 training done. Time: 3120.14s
169
+ [10-26 10:53:16] Calculating FVD with running real stats
170
+ [10-26 10:54:37] Converting video data to uint8
171
+ [10-26 10:55:44] Converting video data to uint8
172
+ [10-26 10:55:46] Preparing to save rng states...
173
+ [10-26 10:55:49] Saving checkpoint...
174
+ [10-26 10:56:25] New best checkpoint saved: best_fvd_2160.28.pth
175
+ [10-26 10:56:25] Epoch 6, train: fm_loss=0.0625 perceptual_loss=0.4607 rp_loss=0.2928 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=22.0782 ssim=0.4143 loss_q=0.0022 index_usage_batch=0.1266 perplexity=8087.0312 loss_commit=0.0022 loss=0.2930 fps=23.6909,
176
+ eval: ucf101_val_psnr=13.6264 ucf101_val_ssim=0.0992 ucf101_val_fps=3.1386 ucf101_val_fvd=2160.2844,
177
+ Latest checkpoint saved. Time: 39.57s
178
+ , 57.6m (d 0.00) 7.0h/231.9h
179
+ [10-26 10:56:25] Epoch 7 started.
180
+ [10-26 11:45:07] Epoch 7 training done. Time: 2921.50s
181
+ [10-26 11:47:34] Calculating FVD with running real stats
182
+ [10-26 11:48:53] Converting video data to uint8
183
+ [10-26 11:50:02] Converting video data to uint8
184
+ [10-26 11:50:03] Preparing to save rng states...
185
+ [10-26 11:50:06] Saving checkpoint...
186
+ [10-26 11:50:42] New best checkpoint saved: best_fvd_1674.94.pth
187
+ [10-26 11:50:42] Epoch 7, train: fm_loss=0.0541 perceptual_loss=0.4081 rp_loss=0.2581 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=22.7474 ssim=0.4579 loss_q=0.0018 index_usage_batch=0.2620 perplexity=8086.5469 loss_commit=0.0018 loss=0.2583 fps=24.0509,
188
+ eval: ucf101_val_psnr=15.8265 ucf101_val_ssim=0.1793 ucf101_val_fps=3.1762 ucf101_val_fvd=1674.9365,
189
+ Latest checkpoint saved. Time: 38.41s
190
+ , 54.3m (d 0.00) 7.9h/224.7h
191
+ [10-26 11:50:42] Epoch 8 started.
192
+ [10-26 12:38:25] Epoch 8 training done. Time: 2862.67s
193
+ [10-26 12:40:51] Calculating FVD with running real stats
194
+ [10-26 12:42:13] Converting video data to uint8
195
+ [10-26 12:43:24] Converting video data to uint8
196
+ [10-26 12:43:24] Preparing to save rng states...
197
+ [10-26 12:43:24] Saving checkpoint...
198
+ [10-26 12:43:58] New best checkpoint saved: best_fvd_1113.78.pth
199
+ [10-26 12:43:58] Epoch 8, train: fm_loss=0.0474 perceptual_loss=0.3506 rp_loss=0.2227 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=23.4041 ssim=0.5031 loss_q=0.0020 index_usage_batch=0.4202 perplexity=8058.2388 loss_commit=0.0020 loss=0.2229 fps=24.2708,
200
+ eval: ucf101_val_psnr=16.7288 ucf101_val_ssim=0.2578 ucf101_val_fps=3.1883 ucf101_val_fvd=1113.7755,
201
+ Latest checkpoint saved. Time: 33.97s
202
+ , 53.3m (d 0.02) 8.8h/218.8h
203
+ [10-26 12:43:58] Epoch 9 started.
204
+ [10-26 13:31:57] Epoch 9 training done. Time: 2878.38s
205
+ [10-26 13:34:23] Calculating FVD with running real stats
206
+ [10-26 13:35:46] Converting video data to uint8
207
+ [10-26 13:36:54] Converting video data to uint8
208
+ [10-26 13:36:54] Preparing to save rng states...
209
+ [10-26 13:36:54] Saving checkpoint...
210
+ [10-26 13:37:29] New best checkpoint saved: best_fvd_827.66.pth
211
+ [10-26 13:37:29] Epoch 9, train: fm_loss=0.0423 perceptual_loss=0.3054 rp_loss=0.1950 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=23.9759 ssim=0.5432 loss_q=0.0027 index_usage_batch=0.5526 perplexity=8024.5972 loss_commit=0.0027 loss=0.1952 fps=24.1793,
212
+ eval: ucf101_val_psnr=18.4337 ucf101_val_ssim=0.3501 ucf101_val_fps=3.1849 ucf101_val_fvd=827.6603,
213
+ Latest checkpoint saved. Time: 34.83s
214
+ , 53.5m (d 0.04) 9.6h/214.3h
215
+ [10-26 13:37:29] Epoch 10 started.
216
+ [10-26 14:25:32] Epoch 10 training done. Time: 2882.85s
217
+ [10-26 14:27:58] Calculating FVD with running real stats
218
+ [10-26 14:29:22] Converting video data to uint8
219
+ [10-26 14:30:28] Converting video data to uint8
220
+ [10-26 14:30:29] Preparing to save rng states...
221
+ [10-26 14:30:29] Saving checkpoint...
222
+ [10-26 14:31:05] New best checkpoint saved: best_fvd_610.79.pth
223
+ [10-26 14:31:05] Epoch 10, train: fm_loss=0.0383 perceptual_loss=0.2705 rp_loss=0.1736 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=24.5157 ssim=0.5798 loss_q=0.0037 index_usage_batch=0.6788 perplexity=8023.9727 loss_commit=0.0037 loss=0.1740 fps=24.1696,
224
+ eval: ucf101_val_psnr=19.2263 ucf101_val_ssim=0.4042 ucf101_val_fps=3.1910 ucf101_val_fvd=610.7945,
225
+ Latest checkpoint saved. Time: 36.02s
226
+ , 53.6m (d 0.02) 10.5h/210.7h
227
+ [10-26 14:31:05] Epoch 11 started.
228
+ [10-26 15:19:04] Epoch 11 training done. Time: 2879.19s
229
+ [10-26 15:21:30] Calculating FVD with running real stats
230
+ [10-26 15:22:50] Converting video data to uint8
231
+ [10-26 15:24:00] Converting video data to uint8
232
+ [10-26 15:24:01] Preparing to save rng states...
233
+ [10-26 15:24:01] Saving checkpoint...
234
+ [10-26 15:24:37] New best checkpoint saved: best_fvd_381.65.pth
235
+ [10-26 15:24:37] Epoch 11, train: fm_loss=0.0351 perceptual_loss=0.2409 rp_loss=0.1555 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.0424 ssim=0.6155 loss_q=0.0043 index_usage_batch=0.7751 perplexity=8051.9331 loss_commit=0.0043 loss=0.1559 fps=24.1750,
236
+ eval: ucf101_val_psnr=20.3549 ucf101_val_ssim=0.4929 ucf101_val_fps=3.1925 ucf101_val_fvd=381.6456,
237
+ Latest checkpoint saved. Time: 36.06s
238
+ , 53.5m (d 0.07) 11.4h/207.8h
239
+ [10-26 15:24:37] Epoch 12 started.
240
+ [10-26 16:12:57] Epoch 12 training done. Time: 2900.10s
241
+ [10-26 16:15:24] Calculating FVD with running real stats
242
+ [10-26 16:16:51] Converting video data to uint8
243
+ [10-26 16:18:01] Converting video data to uint8
244
+ [10-26 16:18:02] Preparing to save rng states...
245
+ [10-26 16:18:02] Saving checkpoint...
246
+ [10-26 16:18:37] New best checkpoint saved: best_fvd_323.49.pth
247
+ [10-26 16:18:37] Epoch 12, train: fm_loss=0.0328 perceptual_loss=0.2185 rp_loss=0.1420 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.5186 ssim=0.6458 loss_q=0.0044 index_usage_batch=0.8184 perplexity=8070.7065 loss_commit=0.0044 loss=0.1425 fps=24.0804,
248
+ eval: ucf101_val_psnr=21.4852 ucf101_val_ssim=0.5541 ucf101_val_fps=3.1760 ucf101_val_fvd=323.4891,
249
+ Latest checkpoint saved. Time: 35.00s
250
+ , 54.0m (d 0.03) 12.3h/205.5h
251
+ [10-26 16:18:37] Epoch 13 started.
252
+ [10-26 17:07:51] Epoch 13 training done. Time: 2954.12s
253
+ [10-26 17:10:17] Calculating FVD with running real stats
254
+ [10-26 17:11:39] Converting video data to uint8
255
+ [10-26 17:12:46] Converting video data to uint8
256
+ [10-26 17:12:47] Preparing to save rng states...
257
+ [10-26 17:12:47] Saving checkpoint...
258
+ [10-26 17:13:23] New best checkpoint saved: best_fvd_270.49.pth
259
+ [10-26 17:13:23] Epoch 13, train: fm_loss=0.0308 perceptual_loss=0.2025 rp_loss=0.1320 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=25.8723 ssim=0.6692 loss_q=0.0044 index_usage_batch=0.8237 perplexity=8076.1089 loss_commit=0.0044 loss=0.1325 fps=24.0107,
260
+ eval: ucf101_val_psnr=21.3858 ucf101_val_ssim=0.5712 ucf101_val_fps=3.2044 ucf101_val_fvd=270.4910,
261
+ Latest checkpoint saved. Time: 35.80s
262
+ , 54.8m (d 0.06) 13.2h/203.7h
263
+ [10-26 17:13:23] Epoch 14 started.
264
+ [10-26 18:01:49] Epoch 14 training done. Time: 2905.86s
265
+ [10-26 18:04:15] Calculating FVD with running real stats
266
+ [10-26 18:05:37] Converting video data to uint8
267
+ [10-26 18:06:44] Converting video data to uint8
268
+ [10-26 18:06:45] Preparing to save rng states...
269
+ [10-26 18:06:45] Saving checkpoint...
270
+ [10-26 18:07:23] New best checkpoint saved: best_fvd_266.42.pth
271
+ [10-26 18:07:23] Epoch 14, train: fm_loss=0.0294 perceptual_loss=0.1902 rp_loss=0.1245 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.1650 ssim=0.6876 loss_q=0.0045 index_usage_batch=0.8265 perplexity=8077.9287 loss_commit=0.0045 loss=0.1249 fps=24.1597,
272
+ eval: ucf101_val_psnr=22.1036 ucf101_val_ssim=0.6195 ucf101_val_fps=3.1930 ucf101_val_fvd=266.4189,
273
+ Latest checkpoint saved. Time: 37.61s
274
+ , 54.0m (d 0.06) 14.1h/202.0h
275
+ [10-26 18:07:23] Epoch 15 started.
276
+ [10-26 18:55:32] Epoch 15 training done. Time: 2889.90s
277
+ [10-26 18:57:59] Calculating FVD with running real stats
278
+ [10-26 18:59:21] Converting video data to uint8
279
+ [10-26 19:00:29] Converting video data to uint8
280
+ [10-26 19:00:29] Preparing to save rng states...
281
+ [10-26 19:00:29] Saving checkpoint...
282
+ [10-26 19:01:04] New best checkpoint saved: best_fvd_218.01.pth
283
+ [10-26 19:01:04] Epoch 15, train: fm_loss=0.0283 perceptual_loss=0.1776 rp_loss=0.1171 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.4998 ssim=0.7085 loss_q=0.0048 index_usage_batch=0.8284 perplexity=8078.5635 loss_commit=0.0048 loss=0.1176 fps=24.3035,
284
+ eval: ucf101_val_psnr=22.0786 ucf101_val_ssim=0.6269 ucf101_val_fps=3.1910 ucf101_val_fvd=218.0063,
285
+ Latest checkpoint saved. Time: 34.62s
286
+ , 53.7m (d 0.04) 15.0h/200.5h
287
+ [10-26 19:01:04] Epoch 16 started.
288
+ [10-26 19:49:32] Epoch 16 training done. Time: 2907.65s
289
+ [10-26 19:51:57] Calculating FVD with running real stats
290
+ [10-26 19:53:20] Converting video data to uint8
291
+ [10-26 19:54:27] Converting video data to uint8
292
+ [10-26 19:54:28] Preparing to save rng states...
293
+ [10-26 19:54:28] Saving checkpoint...
294
+ [10-26 19:54:39] Epoch 16, train: fm_loss=0.0273 perceptual_loss=0.1666 rp_loss=0.1106 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=26.7899 ssim=0.7275 loss_q=0.0060 index_usage_batch=0.8263 perplexity=8079.1172 loss_commit=0.0060 loss=0.1112 fps=24.1462,
295
+ eval: ucf101_val_psnr=22.4575 ucf101_val_ssim=0.6505 ucf101_val_fps=3.2147 ucf101_val_fvd=221.7525,
296
+ Latest checkpoint saved. Time: 10.53s
297
+ , 53.6m (d 0.02) 15.9h/199.1h
298
+ [10-26 19:54:39] Epoch 17 started.
299
+ [10-26 20:43:02] Epoch 17 training done. Time: 2903.26s
300
+ [10-26 20:45:28] Calculating FVD with running real stats
301
+ [10-26 20:46:53] Converting video data to uint8
302
+ [10-26 20:47:59] Converting video data to uint8
303
+ [10-26 20:48:00] Preparing to save rng states...
304
+ [10-26 20:48:00] Saving checkpoint...
305
+ [10-26 20:48:35] New best checkpoint saved: best_fvd_200.12.pth
306
+ [10-26 20:48:35] Epoch 17, train: fm_loss=0.0262 perceptual_loss=0.1568 rp_loss=0.1046 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.0340 ssim=0.7445 loss_q=0.0061 index_usage_batch=0.8285 perplexity=8079.9019 loss_commit=0.0061 loss=0.1052 fps=24.0968,
307
+ eval: ucf101_val_psnr=22.6389 ucf101_val_ssim=0.6659 ucf101_val_fps=3.1874 ucf101_val_fvd=200.1184,
308
+ Latest checkpoint saved. Time: 35.77s
309
+ , 53.9m (d 0.07) 16.8h/198.0h
310
+ [10-26 20:48:35] Epoch 18 started.
311
+ [10-26 21:36:21] Epoch 18 training done. Time: 2865.34s
312
+ [10-26 21:38:47] Calculating FVD with running real stats
313
+ [10-26 21:40:11] Converting video data to uint8
314
+ [10-26 21:41:17] Converting video data to uint8
315
+ [10-26 21:41:18] Preparing to save rng states...
316
+ [10-26 21:41:18] Saving checkpoint...
317
+ [10-26 21:41:52] New best checkpoint saved: best_fvd_187.29.pth
318
+ [10-26 21:41:52] Epoch 18, train: fm_loss=0.0255 perceptual_loss=0.1484 rp_loss=0.0997 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.2793 ssim=0.7591 loss_q=0.0062 index_usage_batch=0.8297 perplexity=8080.2930 loss_commit=0.0062 loss=0.1003 fps=24.1556,
319
+ eval: ucf101_val_psnr=22.6294 ucf101_val_ssim=0.6638 ucf101_val_fps=3.2034 ucf101_val_fvd=187.2859,
320
+ Latest checkpoint saved. Time: 34.07s
321
+ , 53.3m (d 0.07) 17.7h/196.8h
322
+ [10-26 21:41:52] Epoch 19 started.
323
+ [10-26 22:29:32] Epoch 19 training done. Time: 2860.19s
324
+ [10-26 22:31:59] Calculating FVD with running real stats
325
+ [10-26 22:33:21] Converting video data to uint8
326
+ [10-26 22:34:32] Converting video data to uint8
327
+ [10-26 22:34:33] Preparing to save rng states...
328
+ [10-26 22:34:33] Saving checkpoint...
329
+ [10-26 22:35:10] New best checkpoint saved: best_fvd_177.69.pth
330
+ [10-26 22:35:10] Epoch 19, train: fm_loss=0.0250 perceptual_loss=0.1422 rp_loss=0.0961 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.4619 ssim=0.7701 loss_q=0.0062 index_usage_batch=0.8307 perplexity=8080.1904 loss_commit=0.0062 loss=0.0967 fps=24.0560,
331
+ eval: ucf101_val_psnr=23.0714 ucf101_val_ssim=0.6980 ucf101_val_fps=3.1797 ucf101_val_fvd=177.6935,
332
+ Latest checkpoint saved. Time: 37.20s
333
+ , 53.3m (d 0.05) 18.6h/195.8h
334
+ [10-26 22:35:10] Epoch 20 started.
335
+ [10-26 23:23:17] Epoch 20 training done. Time: 2886.99s
336
+ [10-26 23:25:44] Calculating FVD with running real stats
337
+ [10-26 23:27:10] Converting video data to uint8
338
+ [10-26 23:28:17] Converting video data to uint8
339
+ [10-26 23:28:18] Preparing to save rng states...
340
+ [10-26 23:28:18] Saving checkpoint...
341
+ [10-26 23:28:52] New best checkpoint saved: best_fvd_164.04.pth
342
+ [10-26 23:28:52] Epoch 20, train: fm_loss=0.0244 perceptual_loss=0.1366 rp_loss=0.0927 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.6406 ssim=0.7801 loss_q=0.0062 index_usage_batch=0.8317 perplexity=8079.9258 loss_commit=0.0062 loss=0.0933 fps=24.3060,
343
+ eval: ucf101_val_psnr=23.0348 ucf101_val_ssim=0.7059 ucf101_val_fps=3.1822 ucf101_val_fvd=164.0369,
344
+ Latest checkpoint saved. Time: 34.57s
345
+ , 53.7m (d 0.03) 19.5h/195.0h
346
+ [10-26 23:28:52] Epoch 21 started.
347
+ [10-27 00:16:29] Epoch 21 training done. Time: 2856.47s
348
+ [10-27 00:18:57] Calculating FVD with running real stats
349
+ [10-27 00:20:23] Converting video data to uint8
350
+ [10-27 00:21:30] Converting video data to uint8
351
+ [10-27 00:21:30] Preparing to save rng states...
352
+ [10-27 00:21:30] Saving checkpoint...
353
+ [10-27 00:21:41] Epoch 21, train: fm_loss=0.0239 perceptual_loss=0.1321 rp_loss=0.0900 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.7904 ssim=0.7884 loss_q=0.0062 index_usage_batch=0.8325 perplexity=8079.4365 loss_commit=0.0062 loss=0.0906 fps=24.3157,
354
+ eval: ucf101_val_psnr=22.6612 ucf101_val_ssim=0.6816 ucf101_val_fps=3.1624 ucf101_val_fvd=164.3932,
355
+ Latest checkpoint saved. Time: 10.43s
356
+ , 52.8m (d 0.06) 20.4h/194.1h
357
+ [10-27 00:21:41] Epoch 22 started.
358
+ [10-27 01:09:35] Epoch 22 training done. Time: 2874.68s
359
+ [10-27 01:12:03] Calculating FVD with running real stats
360
+ [10-27 01:13:29] Converting video data to uint8
361
+ [10-27 01:14:36] Converting video data to uint8
362
+ [10-27 01:14:36] Preparing to save rng states...
363
+ [10-27 01:14:36] Saving checkpoint...
364
+ [10-27 01:15:11] New best checkpoint saved: best_fvd_151.94.pth
365
+ [10-27 01:15:11] Epoch 22, train: fm_loss=0.0234 perceptual_loss=0.1282 rp_loss=0.0875 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=27.9169 ssim=0.7950 loss_q=0.0063 index_usage_batch=0.8332 perplexity=8078.8394 loss_commit=0.0063 loss=0.0881 fps=24.1343,
366
+ eval: ucf101_val_psnr=23.1404 ucf101_val_ssim=0.6852 ucf101_val_fps=3.1745 ucf101_val_fvd=151.9350,
367
+ Latest checkpoint saved. Time: 34.47s
368
+ , 53.5m (d 0.05) 21.3h/193.4h
369
+ [10-27 01:15:11] Epoch 23 started.
370
+ [10-27 02:03:16] Epoch 23 training done. Time: 2885.42s
371
+ [10-27 02:05:44] Calculating FVD with running real stats
372
+ [10-27 02:07:08] Converting video data to uint8
373
+ [10-27 02:08:16] Converting video data to uint8
374
+ [10-27 02:08:17] Preparing to save rng states...
375
+ [10-27 02:08:17] Saving checkpoint...
376
+ [10-27 02:08:53] New best checkpoint saved: best_fvd_138.58.pth
377
+ [10-27 02:08:53] Epoch 23, train: fm_loss=0.0231 perceptual_loss=0.1252 rp_loss=0.0857 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.0422 ssim=0.8006 loss_q=0.0063 index_usage_batch=0.8338 perplexity=8077.9961 loss_commit=0.0063 loss=0.0864 fps=24.0963,
378
+ eval: ucf101_val_psnr=23.3009 ucf101_val_ssim=0.7058 ucf101_val_fps=3.1746 ucf101_val_fvd=138.5815,
379
+ Latest checkpoint saved. Time: 36.40s
380
+ , 53.7m (d 0.05) 22.2h/192.8h
381
+ [10-27 02:08:53] Epoch 24 started.
382
+ [10-27 02:57:06] Epoch 24 training done. Time: 2892.43s
383
+ [10-27 02:59:32] Calculating FVD with running real stats
384
+ [10-27 03:00:55] Converting video data to uint8
385
+ [10-27 03:02:04] Converting video data to uint8
386
+ [10-27 03:02:05] Preparing to save rng states...
387
+ [10-27 03:02:05] Saving checkpoint...
388
+ [10-27 03:02:40] New best checkpoint saved: best_fvd_130.39.pth
389
+ [10-27 03:02:40] Epoch 24, train: fm_loss=0.0227 perceptual_loss=0.1221 rp_loss=0.0837 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.1448 ssim=0.8057 loss_q=0.0063 index_usage_batch=0.8342 perplexity=8077.2231 loss_commit=0.0063 loss=0.0843 fps=24.2673,
390
+ eval: ucf101_val_psnr=23.4273 ucf101_val_ssim=0.7316 ucf101_val_fps=3.1914 ucf101_val_fvd=130.3938,
391
+ Latest checkpoint saved. Time: 35.15s
392
+ , 53.8m (d 0.04) 23.1h/192.2h
393
+ [10-27 03:02:40] Epoch 25 started.
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/log_2025_10_27_05_21_46.txt ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [10-27 05:21:48] Distributed training enabled.
2
+ [10-27 05:21:48] Environment setup done.
3
+ [10-27 05:21:50] Train dataset: len=435743
4
+ [10-27 05:21:50] Test dataset: ucf101_val, len=3783
5
+ [10-27 05:21:50] Resuming training from /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/epoch-last.pth
6
+ [10-27 05:22:20] DYNTokenizer(
7
+ (x_embedder): PatchEmbed3D(
8
+ (proj): Conv3d(3, 768, kernel_size=(4, 8, 8), stride=(4, 8, 8))
9
+ (norm): Identity()
10
+ )
11
+ (encoder): TransformerEncoderParallel(
12
+ (blocks): ModuleList(
13
+ (0-11): 12 x AttentionBlock(
14
+ (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
15
+ (attn): Attention(
16
+ (qkv): Linear(in_features=768, out_features=2304, bias=False)
17
+ (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
18
+ (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
19
+ (attn_drop): Dropout(p=0.0, inplace=False)
20
+ (norm): Identity()
21
+ (proj): Linear(in_features=768, out_features=768, bias=True)
22
+ (proj_drop): Dropout(p=0.0, inplace=False)
23
+ )
24
+ (ls1): Identity()
25
+ (drop_path1): Identity()
26
+ (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
27
+ (mlp): Mlp(
28
+ (fc1): Linear(in_features=768, out_features=3072, bias=True)
29
+ (act): GELU(approximate='none')
30
+ (drop1): Dropout(p=0.0, inplace=False)
31
+ (norm): Identity()
32
+ (fc2): Linear(in_features=3072, out_features=768, bias=True)
33
+ (drop2): Dropout(p=0.0, inplace=False)
34
+ )
35
+ (ls2): Identity()
36
+ (drop_path2): Identity()
37
+ )
38
+ )
39
+ )
40
+ (decoder): TransformerAdaLNDecoderParallel(
41
+ (blocks): ModuleList(
42
+ (0-17): 18 x DiffusionAttentionBlock(
43
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
44
+ (attn): Attention(
45
+ (qkv): Linear(in_features=1152, out_features=3456, bias=False)
46
+ (q_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
47
+ (k_norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
48
+ (attn_drop): Dropout(p=0.0, inplace=False)
49
+ (norm): Identity()
50
+ (proj): Linear(in_features=1152, out_features=1152, bias=True)
51
+ (proj_drop): Dropout(p=0.0, inplace=False)
52
+ )
53
+ (ls1): Identity()
54
+ (drop_path1): Identity()
55
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
56
+ (mlp): Mlp(
57
+ (fc1): Linear(in_features=1152, out_features=4608, bias=True)
58
+ (act): GELU(approximate='none')
59
+ (drop1): Dropout(p=0.0, inplace=False)
60
+ (norm): Identity()
61
+ (fc2): Linear(in_features=4608, out_features=1152, bias=True)
62
+ (drop2): Dropout(p=0.0, inplace=False)
63
+ )
64
+ (ls2): Identity()
65
+ (drop_path2): Identity()
66
+ (adaLN_modulation): Sequential(
67
+ (0): SiLU()
68
+ (1): Linear(in_features=1152, out_features=13824, bias=True)
69
+ )
70
+ )
71
+ )
72
+ (rope): HunyuanVideoRotaryPosEmbed()
73
+ )
74
+ (bottleneck): VectorQuantize(
75
+ (project_in): Linear(in_features=768, out_features=16, bias=True)
76
+ (project_out): Linear(in_features=16, out_features=1152, bias=True)
77
+ (_codebook): CosineSimCodebook()
78
+ )
79
+ (final_layer): AdaLNOutputLayer(
80
+ (norm_final): AdaLayerNormContinuous(
81
+ (silu): SiLU()
82
+ (linear): Linear(in_features=1152, out_features=2304, bias=True)
83
+ (norm): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
84
+ )
85
+ (linear): Linear(in_features=1152, out_features=768, bias=True)
86
+ )
87
+ (flow_matching_noise_module): MinRFNoiseModule()
88
+ (dec_time_embedder): TimestepEmbedder(
89
+ (mlp): Sequential(
90
+ (0): Linear(in_features=256, out_features=1152, bias=True)
91
+ (1): SiLU()
92
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
93
+ )
94
+ )
95
+ (dec_x_embedder): PatchEmbed3D(
96
+ (proj): Conv3d(3, 1152, kernel_size=(4, 8, 8), stride=(4, 8, 8))
97
+ (norm): Identity()
98
+ )
99
+ )
100
+ [10-27 05:22:20] Model: #params=666.3M
101
+ [10-27 05:22:20] SLURM_JOB_ID: None
102
+ [10-27 05:22:20] SLUMR_ARRAY_JOB_ID: None
103
+ [10-27 05:22:20] SLURM_ARRAY_TASK_ID: None
104
+ [10-27 05:22:20] wandb_name: k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192_____
105
+ [10-27 05:22:20] Compiling model with mode: default
106
+ [10-27 05:22:37] Discriminator: #params=38.2M
107
+ [10-27 05:22:37] compiling loss with mode default
108
+ [10-27 05:22:37] Epoch 25 started.
109
+ [10-27 06:55:17] Epoch 25 training done. Time: 5560.63s
110
+ [10-27 07:24:16] Calculating FVD with running real stats
111
+ [10-27 07:25:45] Converting video data to uint8
112
+ [10-27 07:26:48] Converting video data to uint8
113
+ [10-27 07:26:48] Preparing to save rng states...
114
+ [10-27 07:26:48] Saving checkpoint...
115
+ [10-27 07:27:24] New best checkpoint saved: best_fvd_121.09.pth
116
+ [10-27 07:27:24] Epoch 25, train: fm_loss=0.0224 perceptual_loss=0.1194 rp_loss=0.0821 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.2606 ssim=0.8104 loss_q=0.0063 index_usage_batch=0.8348 perplexity=8076.3428 loss_commit=0.0063 loss=0.0827 fps=24.2888,
117
+ eval: ucf101_val_psnr=23.3652 ucf101_val_ssim=0.7205 ucf101_val_fps=3.1465 ucf101_val_fvd=121.0894,
118
+ Latest checkpoint saved. Time: 35.08s
119
+ , 2.1h (d 0.47) 2.1h/415.9h
120
+ [10-27 07:27:24] Epoch 26 started.
121
+ [10-27 08:15:04] Epoch 26 training done. Time: 2860.67s
122
+ [10-27 08:17:31] Calculating FVD with running real stats
123
+ [10-27 08:18:53] Converting video data to uint8
124
+ [10-27 08:20:01] Converting video data to uint8
125
+ [10-27 08:20:02] Preparing to save rng states...
126
+ [10-27 08:20:02] Saving checkpoint...
127
+ [10-27 08:20:37] New best checkpoint saved: best_fvd_107.29.pth
128
+ [10-27 08:20:37] Epoch 26, train: fm_loss=0.0219 perceptual_loss=0.1175 rp_loss=0.0807 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.3334 ssim=0.8138 loss_q=0.0063 index_usage_batch=0.8352 perplexity=8075.3994 loss_commit=0.0063 loss=0.0813 fps=24.1446,
129
+ eval: ucf101_val_psnr=23.3940 ucf101_val_ssim=0.7037 ucf101_val_fps=3.1935 ucf101_val_fvd=107.2903,
130
+ Latest checkpoint saved. Time: 35.02s
131
+ , 53.2m (d 0.06) 3.0h/296.7h
132
+ [10-27 08:20:37] Epoch 27 started.
133
+ [10-27 09:08:13] Epoch 27 training done. Time: 2856.72s
134
+ [10-27 09:10:40] Calculating FVD with running real stats
135
+ [10-27 09:12:02] Converting video data to uint8
136
+ [10-27 09:13:10] Converting video data to uint8
137
+ [10-27 09:13:11] Preparing to save rng states...
138
+ [10-27 09:13:11] Saving checkpoint...
139
+ [10-27 09:13:47] New best checkpoint saved: best_fvd_106.67.pth
140
+ [10-27 09:13:47] Epoch 27, train: fm_loss=0.0217 perceptual_loss=0.1151 rp_loss=0.0792 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.4303 ssim=0.8178 loss_q=0.0063 index_usage_batch=0.8351 perplexity=8074.3740 loss_commit=0.0063 loss=0.0799 fps=24.0925,
141
+ eval: ucf101_val_psnr=23.7231 ucf101_val_ssim=0.7382 ucf101_val_fps=3.1962 ucf101_val_fvd=106.6714,
142
+ Latest checkpoint saved. Time: 35.97s
143
+ , 53.2m (d 0.02) 3.9h/256.9h
144
+ [10-27 09:13:47] Epoch 28 started.
145
+ [10-27 10:01:01] Epoch 28 training done. Time: 2834.39s
146
+ [10-27 10:03:27] Calculating FVD with running real stats
147
+ [10-27 10:04:50] Converting video data to uint8
148
+ [10-27 10:05:58] Converting video data to uint8
149
+ [10-27 10:05:59] Preparing to save rng states...
150
+ [10-27 10:05:59] Saving checkpoint...
151
+ [10-27 10:06:35] New best checkpoint saved: best_fvd_105.24.pth
152
+ [10-27 10:06:35] Epoch 28, train: fm_loss=0.0215 perceptual_loss=0.1129 rp_loss=0.0780 g_loss=0.0000 g_loss_weight=0.0000 length_loss=0.0000 ref_loss=0.0000 ema_model_loss=0.0000 ce_loss=0.0000 psnr=28.5272 ssim=0.8215 loss_q=0.0063 index_usage_batch=0.8354 perplexity=8073.2065 loss_commit=0.0063 loss=0.0786 fps=24.1403,
153
+ eval: ucf101_val_psnr=23.6395 ucf101_val_ssim=0.7394 ucf101_val_fps=3.2026 ucf101_val_fvd=105.2365,
154
+ Latest checkpoint saved. Time: 35.69s
155
+ , 52.8m (d 0.03) 4.7h/236.6h
156
+ [10-27 10:06:35] Epoch 29 started.
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761422299.hopper-26.3880041.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0490cee65ce3c964710407235680e3fdf961cf02a6449fc6c9f8687a7a38833e
3
+ size 29464
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/tensorboard/events.out.tfevents.1761513706.hopper-10.716544.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d35c4937d378b988c0ab9a41b6d878d2282760c87e250ddb62efe3f706a945e
3
+ size 4984
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug-internal.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-27T05:22:02.60922833+08:00","level":"INFO","msg":"stream: starting","core version":"0.19.9","symlink path":"/scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug-core.log"}
2
+ {"time":"2025-10-27T05:22:03.20175398+08:00","level":"INFO","msg":"created new stream","id":"c3u3silm"}
3
+ {"time":"2025-10-27T05:22:03.202333506+08:00","level":"INFO","msg":"stream: started","id":"c3u3silm"}
4
+ {"time":"2025-10-27T05:22:03.202357497+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"c3u3silm"}
5
+ {"time":"2025-10-27T05:22:03.203578697+08:00","level":"INFO","msg":"sender: started","stream_id":"c3u3silm"}
6
+ {"time":"2025-10-27T05:22:03.20239453+08:00","level":"INFO","msg":"handler: started","stream_id":"c3u3silm"}
7
+ {"time":"2025-10-27T05:22:03.818476109+08:00","level":"INFO","msg":"Starting system monitor"}
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-10-27 05:22:02,538 INFO MainThread:716544 [wandb_setup.py:_flush():67] Current SDK version is 0.19.9
2
+ 2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Configure stats pid to 716544
3
+ 2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from /home/svu/e0724392/.config/wandb/settings
4
+ 2025-10-27 05:22:02,539 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from /scratch/e0724392/work4/LARP/wandb/settings
5
+ 2025-10-27 05:22:02,540 INFO MainThread:716544 [wandb_setup.py:_flush():67] Loading settings from environment variables
6
+ 2025-10-27 05:22:02,541 INFO MainThread:716544 [wandb_init.py:setup_run_log_directory():662] Logging user logs to /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug.log
7
+ 2025-10-27 05:22:02,542 INFO MainThread:716544 [wandb_init.py:setup_run_log_directory():663] Logging internal logs to /scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251027_052202-c3u3silm/logs/debug-internal.log
8
+ 2025-10-27 05:22:02,543 INFO MainThread:716544 [wandb_init.py:init():781] calling init triggers
9
+ 2025-10-27 05:22:02,543 INFO MainThread:716544 [wandb_init.py:init():786] wandb.init called with sweep_config: {}
10
+ config: {'trainer': 'our_tokenizer_trainer', 'train_dataset': {'name': 'video_dataset', 'args': {'root_path': 'data/metadata', 'split': 'train', 'frame_num': 16, 'rand_augment': 'no', 'csv_file': 'k600_train.csv+ucf101_train.csv', 'cls_vid_num': '-1_-1', 'crop_size': 128, 'scale': 1.0, 'aspect_ratio': 1.0, 'rand_flip': 'yes', 'use_all_frames': False, 'pre_load': False}, 'loader': {'batch_size': 128, 'num_workers': 32}}, 'test_dataset': {'name': 'video_dataset', 'args': {'root_path': 'data/metadata', 'frame_num': 16, 'cls_vid_num': '-1_-1', 'crop_size': 128, 'use_all_frames': False, 'pre_load': False}, 'csv_paths': {'ucf101_val': 'ucf101_val.csv'}, 'loader': {'batch_size': 128, 'num_workers': 32}}, 'model': {'name': 'dyn_tokenizer', 'args': {'noise_schedule': {'name': 'min_rf_noise_module', 'args': {'clean_data_read_key': 'clean_data', 'noised_data_write_key': 'noisy_input', 'noise_write_key': 'flow_noise', 'timesteps_write_key': 'timesteps', 'sigmas_write_key': 'sigmas', 'ln': False, 'stratisfied': False, 'mode_scale': 0.25}}, 'bottleneck': {'name': 'bottleneck', 'args': {'regularizer': {'name': 'vector_quantize', 'args': {'codebook_dim': 16, 'codebook_size': 8192, 'ema_update': True, 'decay': 0.99, 'kmeans_init': True, 'kmeans_iters': 10, 'threshold_ema_dead_code': 0.2, 'use_cosine_sim': True, 'commitment_weight': 1.0, 'diversity_weight': 0.0, 'smart_re_K': 0, 'continuous': False, 'reg': [0.1, 0.3], 'reset_cluster_size': 0.2, 'ema_entropy_ratio': 0.8, 'vq_start_step': 0}}}}, 'prior_model': {'name': 'none', 'use_mix_ss': True, 'mix_ss_max_ratio': 0.5, 'mix_ss_peak_steps_ratio': 0.3, 'n_rounds': 2, 'avg_loss_over_rounds': True, 'no_grad_before_last_round': False, 'no_dropout': False, 'latent_ce_temperature': 1.0, 'args': {'l2_normalized': True}}, 'dec_time_embedder': {'name': 'timestep_embedder', 'args': {'timesteps_read_key': 'timesteps', 'time_embedding_write_key': 'dec_temb', 'dim': 1152, 'frequency_embedding_size': 256, 'max_timestep': 1000.0}}, 'transformer_name': 'transformer_encoder_parallel', 'encoder_name': 'none', 'decoder_name': 'transformer_AdaLN_decoder_parallel', 'bottleneck_token_num': 1024, 'input_size': 128, 'frame_num': 16, 'temporal_patch_size': 4, 'patch_size': 8, 'decoder_temporal_patch_size': 4, 'decoder_patch_size': 8, 'in_channels': 3, 'encoder_hidden_size': 768, 'decoder_hidden_size': 1152, 'encoder_num_heads': 12, 'decoder_num_heads': 18, 'encoder_depth': 12, 'decoder_depth': 18, 'encoder_block_name': 'block_timm', 'decoder_block_name': 'adaLN_block_timm', 'encoder_mask_mode': 'full', 'decoder_mask_mode': 'full', 'learned_encoder_patch_pe': False, 'learned_encoder_latent_query_embed': True, 'learned_decoder_latent_pe': False, 'learned_decoder_patch_query_embed': False, 'use_encoder_patch_token_type_embed': False, 'use_encoder_latent_query_token_type_embed': False, 'enable_decoder_query': False, 'learned_decoder_pe': False, 'use_decoder_latent_token_type_embed': False, 'use_decoder_patch_query_token_type_embed': True, 'encoder_query_gaussian_init': True, 'latent_pe_scale_factor': 10000, 'query_init_std': 0.02, 'adaLN_expansion': 2, 'final_layer_init': 'xavier_uniform', 'enable_vq': True, 'qk_norm': True, 'use_rope': True, 'rope_dim': [16, 24, 24], 'final_layer_type': 'adanorm'}}, 'loss': {'name': 'fm_disc_loss', 'args': {'disc_type': 'transformer', 'disc_start': 999999, 'disc_self_start': -1, 'perceptual_weight': 0.5, 'perceptual_loss': 'lpips', 'perceptual_fp16': False, 'lecam_weight': 0.001, 'disc_loss': 'ns_smooth', 'disc_weight': 0.0, 'r1_gp_weight': 0.0, 'd_update_freq': 5, 'spectral_norm': False, 'disc_tran_hidden_size': 512, 'disc_tran_n_heads': 8, 'disc_tran_n_layers': 12, 'disc_tran_temporal_patch_size': 4, 'disc_tran_patch_size': 8, 'input_spatial_size': 128, 'frame_num': 16, 'fm_loss_weight': 1.0}}, 'optimizer': {'name': 'adamw', 'loss_name': 'adam', 'args': {'lr': 0.0001, 'betas': [0.9, 0.99]}, 'loss_args': {'lr': 3e-05, 'betas': [0.5, 0.9]}, 'lr_type': 'step', 'lr_step_pcts': '0.9_0.95', 'warmup_epoch': 10, 'min_lr_mult': 0.1, 'prior_lr_mult': 1.0, 'emb_lr_mult': 1.0}, 'max_epoch': 200, 'eval_epoch': 1, 'vis_epoch': 1, 'latest_interval': 1, 'save_epoch': 100000000, 'save_best': True, 'stepwise_logging': False, 'ema_decay': '_', 'use_amp': True, 'amp_dtype': 'float16', 'compile': True, 'compile_mode': 'default', 'flash_attn': False, 'loss_q_weight': 0.1, 'loss_q_warmup': '1.0_1', 'loss_kl_weight': 0.0, 'kl_decay_epoch': -1, 'loss_latent_ce_weight': 0.0, 'sqt_start_end_epoch': '0.0_0.0_0', 'clip_grad_max_norm': 0.0, 'init_checkpoint': '', 'timesteps': 25, 'verbose': False, 'guidance_scale': 1.0, 'env': {'tot_gpus': 8, 'cudnn': False, 'wandb_upload': True, 'wandb_entity': 'lingmin', 'wandb_project': 'dyn_tokenizer', 'exp_name': 'k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__', 'save_dir': '/scratch/Projects/CFP-01/CFP01-CF-033/lingmin/1d_tokenizer/save/tokenizer_ours/tokenizer_e12_d18_k600/k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__', 'port': '12778'}, 'comment': '', 'manualSeed': 66667, 'TrainSize': 435743, 'TestSize_ucf101_val': 3783, '_wandb': {}}
11
+ 2025-10-27 05:22:02,544 INFO MainThread:716544 [wandb_init.py:init():809] starting backend
12
+ 2025-10-27 05:22:02,544 INFO MainThread:716544 [wandb_init.py:init():813] sending inform_init request
13
+ 2025-10-27 05:22:02,596 INFO MainThread:716544 [backend.py:_multiprocessing_setup():101] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
14
+ 2025-10-27 05:22:02,597 INFO MainThread:716544 [wandb_init.py:init():823] backend started and connected
15
+ 2025-10-27 05:22:02,602 INFO MainThread:716544 [wandb_init.py:init():915] updated telemetry
16
+ 2025-10-27 05:22:02,731 INFO MainThread:716544 [wandb_init.py:init():939] communicating run to backend with 90.0 second timeout
17
+ 2025-10-27 05:22:03,767 INFO MainThread:716544 [wandb_init.py:init():1009] run resumed
18
+ 2025-10-27 05:22:03,771 INFO MainThread:716544 [wandb_init.py:init():1014] starting run threads in backend
19
+ 2025-10-27 05:22:10,370 INFO MainThread:716544 [wandb_run.py:_console_start():2454] atexit reg
20
+ 2025-10-27 05:22:10,371 INFO MainThread:716544 [wandb_run.py:_redirect():2306] redirect: wrap_raw
21
+ 2025-10-27 05:22:10,372 INFO MainThread:716544 [wandb_run.py:_redirect():2371] Wrapping output streams.
22
+ 2025-10-27 05:22:10,373 INFO MainThread:716544 [wandb_run.py:_redirect():2394] Redirects installed.
23
+ 2025-10-27 05:22:10,663 INFO MainThread:716544 [wandb_init.py:init():1056] run started, returning control to user process
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_10_0dcd7543861eb951d9eb.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dcd7543861eb951d9eb9128002ad8c13ed359ea83bdae4393d4ce3a895546af
3
+ size 1447271
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_11_c440b37dfe11e1ecb291.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c440b37dfe11e1ecb291dc667a185db861185b5e562915887c95bb58e07c1ad2
3
+ size 1361429
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_12_2c6abd01594c9f04b701.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c6abd01594c9f04b701e5263b60f0493aa59c673e9fdf04efceac6c15ddebb5
3
+ size 1104671
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_13_07ed7abcba2550022a65.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ed7abcba2550022a651a87086ca880859a600b134152317b5406cb35652c37
3
+ size 1061247
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_14_1d19ba838a2636f6bbb9.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d19ba838a2636f6bbb97e8cdab2a1a1ad413ba47d36269393ae97552278e130
3
+ size 971039
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_15_e4b12dc26b33b48899cd.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4b12dc26b33b48899cd40408810e3fabd1ad83a452c333eb2304576fb6e7b67
3
+ size 898506
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_16_be836e9a23353717c38d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be836e9a23353717c38d1bc7a52e75ac416615e04c76ace755ce177335410c9f
3
+ size 978642
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_17_db636a27733aa82a9288.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db636a27733aa82a9288e3b388c4ceb5089ba746d4909ed6adbe141d96ac2603
3
+ size 1003627
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_18_8520ee5928f1948584de.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8520ee5928f1948584de506a2dee7748d47912c71657efecd4cc3d6f7e21d6ab
3
+ size 975965
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_19_9902d6a8e4fcef1843c7.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9902d6a8e4fcef1843c7e85204d9a67bb8aea067506b585f8b84b84a1f094404
3
+ size 882054
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_1_4905dd5ddf36de8fa967.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4905dd5ddf36de8fa9671e752a11f477c07e6e64107d7df50f65d37fe9686f15
3
+ size 6065915
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_20_430fdf292106f4cdd169.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430fdf292106f4cdd169295b60d4b53c1bded466a659a13b42e361724689ed80
3
+ size 937420
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_21_b16346a94669cd22074d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16346a94669cd22074d836082da0f4dc64523c512248d294be70e676755758d
3
+ size 899995
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_22_b9cd0a98c6efd38dd326.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9cd0a98c6efd38dd32640094344de5687e1d0ee16bc09acc0831ccd0a478ccd
3
+ size 952132
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_23_16c5160b84b0b93c4d9a.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c5160b84b0b93c4d9a35723ac23dc74080d6816c9c386900a0d6aae7e7cbf3
3
+ size 911203
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_24_caa9fc05e947e5c68fe5.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caa9fc05e947e5c68fe571c83d411c21619387b375f3c3962e4255d625363608
3
+ size 895463
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_2_d15bd6c3ab28bcf2d818.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d15bd6c3ab28bcf2d818887cc710cd9e8c41e7f000f59b09cb1efcad20a3b4cb
3
+ size 4538287
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_3_53d7f7f93663c75403c5.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d7f7f93663c75403c55ecf763a0a4cb179d042312633f62bdd97228db1c9e9
3
+ size 3564898
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_4_518c61a588fe34e8b97b.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518c61a588fe34e8b97b4d5a847ca4503fc147203f2ba655c6c51036ab9f11e0
3
+ size 3098325
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_5_a718cd301e53d86c413d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a718cd301e53d86c413d314d81c01ae2be78483c38b325fb0f74a8f015dc934f
3
+ size 2490105
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_6_75eb65b267e94c10e7e6.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75eb65b267e94c10e7e6736fbc4f28aa33c443da8280a22360cdb26deb1b4fb1
3
+ size 2170170
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_7_7f140abb6a7cc547d17d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f140abb6a7cc547d17d4e3b1407f3c67121025bb26aa8918cdaa742227f1963
3
+ size 2086638
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_8_4b22b9cea46a63503ee2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b22b9cea46a63503ee26cb4c3c76f9aa4879d911efe88e213f43d5151b4eaba
3
+ size 1653335
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_test_dataset_9_09d0cffdb29ebf65107e.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d0cffdb29ebf65107ec0f593ad28c54ff88651007c137a2788cdc878771660
3
+ size 1545553
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_10_cb55eb08e79a819f9319.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb55eb08e79a819f931958929f93fad04d815ad67d0d36647eda2840616fd94a
3
+ size 1618902
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_11_83c1cb0239fed9f79371.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c1cb0239fed9f79371044130e055bdabf559600bf3b6773897ae9a4d3ed8f7
3
+ size 1436148
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_12_5b3c2426eecabf472d8d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b3c2426eecabf472d8df6e0d2d5bc162cd8c92f4b00df04ac20fc09b45800cd
3
+ size 1227217
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_13_6876091a79457be67ea7.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6876091a79457be67ea70b38e2ae0ebc86379eb82c564e2265c219f6c894ef26
3
+ size 1215670
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_14_e23bb82f59567ab38280.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23bb82f59567ab382809cc37267b9248d3731e77f2f071bf4ac329640fb21b1
3
+ size 1120628
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_15_08fbe0df36c11295fe0e.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fbe0df36c11295fe0e35f37c7eae508473abaaa9de839230eadf0da9b19364
3
+ size 1071212
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_16_6dd373b8d21986a89617.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd373b8d21986a89617b4ec4b380228dd5936a3620fee9f99f8c169a4a1a5c2
3
+ size 1144524
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_17_0b8984f5b47aeb710bec.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8984f5b47aeb710bec1f02c175050ac65cf24c1149dc6cce038585da3a856a
3
+ size 1065785
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_18_37b7c06cbf94b4d1290d.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b7c06cbf94b4d1290d5c8333878b6a24a44f7cb5faafbc9a294d6b66a30374
3
+ size 1107669
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_19_756caade2d1fd4a30c0a.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756caade2d1fd4a30c0a7aeabc087b33ef89a52cba4fcb46ced71a39cc4a9984
3
+ size 1067021
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_1_cb69ac202a830cd54f3c.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb69ac202a830cd54f3cb6cd11867857813b4deee93e0f022d5006948a3380ee
3
+ size 6059426
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_20_f34ba9f9b4ff9969489b.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34ba9f9b4ff9969489b89ad1eab8ab255a4baa624c880e751379613e52d1089
3
+ size 1040426
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_21_91d3de1a852a74091ca3.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d3de1a852a74091ca359278c8f7140102923f88f6a772743ee7e9a250d4d08
3
+ size 1045543
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_22_b8ec8ae339ed7f41fe8b.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ec8ae339ed7f41fe8b500a26f4e7e2a76ad15a79fb4f97b027da4cebf54dcb
3
+ size 1013515
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_23_fcb034794f3447789b5c.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb034794f3447789b5c4bd4647e3c9c83ac6fe73c24f75e45c09ebefb3fc3a7
3
+ size 1055829
k600_base_vq_dim_16_decay_0_99_lr_step_p_loss_1_0_dec_rope_fixed/b128_btn1024_vector_quantize_rcs8192__/wandb/run-20251026_035824-c3u3silm/files/media/videos/vis_train_dataset_24_d4dac4cd93c9f9849e0c.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4dac4cd93c9f9849e0cb3bc3410531219f1fd8242e050b92935b380c187e07b
3
+ size 1116529