BryanW commited on Dec 29, 2025

Commit

4b46d4c

verified ·

1 Parent(s): c2925de

Upload code from /mnt/43.oT_eV

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
128_128_17/video_codes.tar.zst +3 -0
256_256_17/video_codes.tar.zst +3 -0
Meissonic/cosmos_test_output/comparison_video_0.mp4 +0 -0
Meissonic/cosmos_test_output/comparison_video_1.mp4 +3 -0
Meissonic/cosmos_test_output/comparison_video_2.mp4 +3 -0
Meissonic/cosmos_test_output/comparison_video_3.mp4 +3 -0
Meissonic/model/diffusion_pytorch_model.safetensors +3 -0
Meissonic/src/__pycache__/pipeline.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/pipeline_video.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/pipeline_video.cpython-313.pyc +0 -0
Meissonic/src/__pycache__/pipeline_video.cpython-314.pyc +0 -0
Meissonic/src/__pycache__/scheduler.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/scheduler.cpython-313.pyc +0 -0
Meissonic/src/__pycache__/scheduler_video.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/scheduler_video.cpython-313.pyc +0 -0
Meissonic/src/__pycache__/scheduler_video.cpython-314.pyc +0 -0
Meissonic/src/__pycache__/transformer.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/transformer.cpython-313.pyc +0 -0
Meissonic/src/__pycache__/transformer_video.cpython-310.pyc +0 -0
Meissonic/src/__pycache__/transformer_video.cpython-313.pyc +0 -0
Meissonic/src/__pycache__/transformer_video.cpython-314.pyc +0 -0
Meissonic/train/__pycache__/dataset_utils.cpython-310.pyc +0 -0
Meissonic/train/__pycache__/dataset_utils.cpython-313.pyc +0 -0
Meissonic/train/__pycache__/trainer_utils.cpython-310.pyc +0 -0
Meissonic/train/__pycache__/trainer_utils.cpython-313.pyc +0 -0
Meissonic/wandb/debug-internal.log +11 -0
Meissonic/wandb/debug.log +24 -0
Meissonic/wandb/run-20251229_081634-hjn0m6c2/files/output.log +17 -0
Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-core.log +14 -0
Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log +11 -0
Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log +24 -0
Meissonic/wandb/run-20251229_081752-78ojckdj/files/output.log +17 -0
Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-core.log +14 -0
Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log +11 -0
Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log +24 -0
Meissonic/wandb/run-20251229_081959-tvb7bjux/files/output.log +8 -0
Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-core.log +7 -0
Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log +6 -0
Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log +22 -0
Meissonic/wandb/run-20251229_082208-d5bens3y/files/output.log +68 -0
Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-core.log +14 -0
Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log +11 -0
Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log +24 -0
Meissonic/wandb/run-20251229_082348-xdcob8vv/files/output.log +68 -0
Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-core.log +14 -0
Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log +11 -0
Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log +24 -0
Meissonic/wandb/run-20251229_082735-s2rbngfj/files/output.log +68 -0
Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-core.log +14 -0

.gitattributes CHANGED Viewed

@@ -876,3 +876,6 @@ Meissonic/wandb/run-20251229_093500-yyrdgepk/run-yyrdgepk.wandb filter=lfs diff=
 OpenVid1M_reorganized.csv filter=lfs diff=lfs merge=lfs -text
 Wan2.1-T2V-1.3B/examples/i2v_input.JPG filter=lfs diff=lfs merge=lfs -text
 Wan2.1-T2V-1.3B/google/umt5-xxl/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 OpenVid1M_reorganized.csv filter=lfs diff=lfs merge=lfs -text
 Wan2.1-T2V-1.3B/examples/i2v_input.JPG filter=lfs diff=lfs merge=lfs -text
 Wan2.1-T2V-1.3B/google/umt5-xxl/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Meissonic/cosmos_test_output/comparison_video_1.mp4 filter=lfs diff=lfs merge=lfs -text
+Meissonic/cosmos_test_output/comparison_video_2.mp4 filter=lfs diff=lfs merge=lfs -text
+Meissonic/cosmos_test_output/comparison_video_3.mp4 filter=lfs diff=lfs merge=lfs -text

128_128_17/video_codes.tar.zst ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09f283283fcbaa8e88678c39ffaf7b37d14c2f234798403f77fbda59ea65b5e0
+size 2966606624

256_256_17/video_codes.tar.zst ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdb5b8c632876b41b319e069c021e517fe3ab6477b49e4ad5ed950646d58bcd5
+size 11880937045

Meissonic/cosmos_test_output/comparison_video_0.mp4 ADDED Viewed

Binary file (36.3 kB). View file

Meissonic/cosmos_test_output/comparison_video_1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7311b27e36333219d20c8d835432ecadf9ebe5977bcf760bc6706a85a95cabd
+size 1089113

Meissonic/cosmos_test_output/comparison_video_2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e02445cac3531ab68bda4ba1bc90ac570a7b423f78b9493471acb4d6e5f9a28
+size 1618316

Meissonic/cosmos_test_output/comparison_video_3.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:017fcf1133dc553228724625c5ad6ec7f58f97ddc27c91201aa88a07423a76e2
+size 931953

Meissonic/model/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96b6b242ca1c2f24e9d02cd6596066fab6d310e2d7538f33ae267cb18d957e8f
+size 5676070424

Meissonic/src/__pycache__/pipeline.cpython-310.pyc ADDED Viewed

Binary file (11.4 kB). View file

Meissonic/src/__pycache__/pipeline_video.cpython-310.pyc ADDED Viewed

Binary file (27.9 kB). View file

Meissonic/src/__pycache__/pipeline_video.cpython-313.pyc ADDED Viewed

Binary file (41.8 kB). View file

Meissonic/src/__pycache__/pipeline_video.cpython-314.pyc ADDED Viewed

Binary file (44.8 kB). View file

Meissonic/src/__pycache__/scheduler.cpython-310.pyc ADDED Viewed

Binary file (5.09 kB). View file

Meissonic/src/__pycache__/scheduler.cpython-313.pyc ADDED Viewed

Binary file (9.32 kB). View file

Meissonic/src/__pycache__/scheduler_video.cpython-310.pyc ADDED Viewed

Binary file (5.27 kB). View file

Meissonic/src/__pycache__/scheduler_video.cpython-313.pyc ADDED Viewed

Binary file (9.87 kB). View file

Meissonic/src/__pycache__/scheduler_video.cpython-314.pyc ADDED Viewed

Binary file (11.1 kB). View file

Meissonic/src/__pycache__/transformer.cpython-310.pyc ADDED Viewed

Binary file (33 kB). View file

Meissonic/src/__pycache__/transformer.cpython-313.pyc ADDED Viewed

Binary file (52 kB). View file

Meissonic/src/__pycache__/transformer_video.cpython-310.pyc ADDED Viewed

Binary file (29.4 kB). View file

Meissonic/src/__pycache__/transformer_video.cpython-313.pyc ADDED Viewed

Binary file (49.1 kB). View file

Meissonic/src/__pycache__/transformer_video.cpython-314.pyc ADDED Viewed

Binary file (49 kB). View file

Meissonic/train/__pycache__/dataset_utils.cpython-310.pyc ADDED Viewed

Binary file (28.2 kB). View file

Meissonic/train/__pycache__/dataset_utils.cpython-313.pyc ADDED Viewed

Binary file (50 kB). View file

Meissonic/train/__pycache__/trainer_utils.cpython-310.pyc ADDED Viewed

Binary file (1.27 kB). View file

Meissonic/train/__pycache__/trainer_utils.cpython-313.pyc ADDED Viewed

Binary file (2.03 kB). View file

Meissonic/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-12-29T09:35:00.674748488Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T09:35:00.840745763Z","level":"INFO","msg":"stream: created new stream","id":"yyrdgepk"}
+{"time":"2025-12-29T09:35:00.840887309Z","level":"INFO","msg":"handler: started","stream_id":"yyrdgepk"}
+{"time":"2025-12-29T09:35:00.840989877Z","level":"INFO","msg":"stream: started","id":"yyrdgepk"}
+{"time":"2025-12-29T09:35:00.841004187Z","level":"INFO","msg":"writer: started","stream_id":"yyrdgepk"}
+{"time":"2025-12-29T09:35:00.841006253Z","level":"INFO","msg":"sender: started","stream_id":"yyrdgepk"}
+{"time":"2025-12-29T09:42:02.535940574Z","level":"INFO","msg":"stream: closing","id":"yyrdgepk"}
+{"time":"2025-12-29T09:42:02.752587654Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-29T09:42:02.857589578Z","level":"INFO","msg":"handler: closed","stream_id":"yyrdgepk"}
+{"time":"2025-12-29T09:42:02.857716241Z","level":"INFO","msg":"sender: closed","stream_id":"yyrdgepk"}
+{"time":"2025-12-29T09:42:02.857727173Z","level":"INFO","msg":"stream: closed","id":"yyrdgepk"}

Meissonic/wandb/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_setup.py:_flush():80] Configure stats pid to 843534
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_init.py:init():841] calling init triggers
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 09:35:00,410 INFO    MainThread:843534 [wandb_init.py:init():889] starting backend
+2025-12-29 09:35:00,668 INFO    MainThread:843534 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 09:35:00,673 INFO    MainThread:843534 [wandb_init.py:init():900] backend started and connected
+2025-12-29 09:35:00,674 INFO    MainThread:843534 [wandb_init.py:init():970] updated telemetry
+2025-12-29 09:35:00,678 INFO    MainThread:843534 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 09:35:01,041 INFO    MainThread:843534 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 09:35:01,126 INFO    MainThread:843534 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 09:35:01,126 INFO    MainThread:843534 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 09:35:01,126 INFO    MainThread:843534 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 09:35:01,126 INFO    MainThread:843534 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 09:35:01,128 INFO    MainThread:843534 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 09:35:01,130 INFO    MainThread:843534 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None}
+2025-12-29 09:42:02,535 INFO    wandb-AsyncioManager-main:843534 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-29 09:42:02,535 INFO    wandb-AsyncioManager-main:843534 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.

Meissonic/wandb/run-20251229_081634-hjn0m6c2/files/output.log ADDED Viewed

	@@ -0,0 +1,17 @@

+12/29/2025 08:16:35 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:16:35 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 62.15it/s]
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+12/29/2025 08:16:38 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096)
+Traceback (most recent call last):
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1892, in <module>
+    main(parse_args())
+  File "/mnt/Meissonic/train/train_mei_video.py", line 554, in main
+    dataset.tokenizer = tokenizer
+UnboundLocalError: local variable 'dataset' referenced before assignment
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1892, in <module>
+[rank0]:     main(parse_args())
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 554, in main
+[rank0]:     dataset.tokenizer = tokenizer
+[rank0]: UnboundLocalError: local variable 'dataset' referenced before assignment

Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-12-29T08:16:34.925791368Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpjvxtgfwa/port-680831.txt","pid":680831,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:16:34.92651504Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":680831}
+{"time":"2025-12-29T08:16:34.926493614Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-680831-681084-3226924194/socket","Net":"unix"}}
+{"time":"2025-12-29T08:16:35.112196944Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:16:35.118201645Z","level":"INFO","msg":"handleInformInit: received","streamId":"hjn0m6c2","id":"1(@)"}
+{"time":"2025-12-29T08:16:35.284535005Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"hjn0m6c2","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.409050659Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.409094413Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-29T08:16:38.409089535Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.409131426Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.409243761Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-680831-681084-3226924194/socket","Net":"unix"}}
+{"time":"2025-12-29T08:16:38.912785622Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.912803973Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-29T08:16:38.912818214Z","level":"INFO","msg":"server is closed"}

Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-12-29T08:16:35.118294642Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T08:16:35.284331327Z","level":"INFO","msg":"stream: created new stream","id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:35.28441448Z","level":"INFO","msg":"handler: started","stream_id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:35.284528509Z","level":"INFO","msg":"stream: started","id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:35.284552699Z","level":"INFO","msg":"sender: started","stream_id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:35.284556048Z","level":"INFO","msg":"writer: started","stream_id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:38.40910837Z","level":"INFO","msg":"stream: closing","id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:38.726721311Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-29T08:16:38.907987768Z","level":"INFO","msg":"handler: closed","stream_id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:38.908080631Z","level":"INFO","msg":"sender: closed","stream_id":"hjn0m6c2"}
+{"time":"2025-12-29T08:16:38.908087916Z","level":"INFO","msg":"stream: closed","id":"hjn0m6c2"}

Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_setup.py:_flush():80] Configure stats pid to 680831
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_init.py:init():841] calling init triggers
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 08:16:34,856 INFO    MainThread:680831 [wandb_init.py:init():889] starting backend
+2025-12-29 08:16:35,112 INFO    MainThread:680831 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 08:16:35,116 INFO    MainThread:680831 [wandb_init.py:init():900] backend started and connected
+2025-12-29 08:16:35,118 INFO    MainThread:680831 [wandb_init.py:init():970] updated telemetry
+2025-12-29 08:16:35,123 INFO    MainThread:680831 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 08:16:35,554 INFO    MainThread:680831 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 08:16:35,679 INFO    MainThread:680831 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 08:16:35,679 INFO    MainThread:680831 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 08:16:35,679 INFO    MainThread:680831 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 08:16:35,679 INFO    MainThread:680831 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 08:16:35,681 INFO    MainThread:680831 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 08:16:35,682 INFO    MainThread:680831 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None}
+2025-12-29 08:16:38,409 INFO    wandb-AsyncioManager-main:680831 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-29 08:16:38,409 INFO    wandb-AsyncioManager-main:680831 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.

Meissonic/wandb/run-20251229_081752-78ojckdj/files/output.log ADDED Viewed

	@@ -0,0 +1,17 @@

+12/29/2025 08:17:53 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:17:53 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.25it/s]
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+12/29/2025 08:17:55 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096)
+Traceback (most recent call last):
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1891, in <module>
+    main(parse_args())
+  File "/mnt/Meissonic/train/train_mei_video.py", line 553, in main
+    dataset.tokenizer = tokenizer
+UnboundLocalError: local variable 'dataset' referenced before assignment
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1891, in <module>
+[rank0]:     main(parse_args())
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 553, in main
+[rank0]:     dataset.tokenizer = tokenizer
+[rank0]: UnboundLocalError: local variable 'dataset' referenced before assignment

Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-12-29T08:17:52.415361788Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpoautak8q/port-681864.txt","pid":681864,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:17:52.415911531Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":681864}
+{"time":"2025-12-29T08:17:52.415892317Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-681864-682101-615016650/socket","Net":"unix"}}
+{"time":"2025-12-29T08:17:52.600038892Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:17:52.605938403Z","level":"INFO","msg":"handleInformInit: received","streamId":"78ojckdj","id":"1(@)"}
+{"time":"2025-12-29T08:17:52.775428685Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"78ojckdj","id":"1(@)"}
+{"time":"2025-12-29T08:17:55.715872394Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-29T08:17:55.715918634Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-29T08:17:55.715913241Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-29T08:17:55.71601316Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-681864-682101-615016650/socket","Net":"unix"}}
+{"time":"2025-12-29T08:17:55.716036224Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-29T08:17:56.359848916Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-29T08:17:56.359873934Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-29T08:17:56.359888804Z","level":"INFO","msg":"server is closed"}

Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-12-29T08:17:52.606062282Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T08:17:52.77520788Z","level":"INFO","msg":"stream: created new stream","id":"78ojckdj"}
+{"time":"2025-12-29T08:17:52.775295249Z","level":"INFO","msg":"handler: started","stream_id":"78ojckdj"}
+{"time":"2025-12-29T08:17:52.775420221Z","level":"INFO","msg":"stream: started","id":"78ojckdj"}
+{"time":"2025-12-29T08:17:52.775434881Z","level":"INFO","msg":"writer: started","stream_id":"78ojckdj"}
+{"time":"2025-12-29T08:17:52.775434899Z","level":"INFO","msg":"sender: started","stream_id":"78ojckdj"}
+{"time":"2025-12-29T08:17:55.715926892Z","level":"INFO","msg":"stream: closing","id":"78ojckdj"}
+{"time":"2025-12-29T08:17:56.25572227Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-29T08:17:56.355939724Z","level":"INFO","msg":"handler: closed","stream_id":"78ojckdj"}
+{"time":"2025-12-29T08:17:56.35603204Z","level":"INFO","msg":"sender: closed","stream_id":"78ojckdj"}
+{"time":"2025-12-29T08:17:56.356037202Z","level":"INFO","msg":"stream: closed","id":"78ojckdj"}

Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-12-29 08:17:52,347 INFO    MainThread:681864 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_setup.py:_flush():80] Configure stats pid to 681864
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_init.py:init():841] calling init triggers
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 08:17:52,348 INFO    MainThread:681864 [wandb_init.py:init():889] starting backend
+2025-12-29 08:17:52,600 INFO    MainThread:681864 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 08:17:52,604 INFO    MainThread:681864 [wandb_init.py:init():900] backend started and connected
+2025-12-29 08:17:52,605 INFO    MainThread:681864 [wandb_init.py:init():970] updated telemetry
+2025-12-29 08:17:52,609 INFO    MainThread:681864 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 08:17:52,979 INFO    MainThread:681864 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 08:17:53,102 INFO    MainThread:681864 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 08:17:53,102 INFO    MainThread:681864 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 08:17:53,102 INFO    MainThread:681864 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 08:17:53,103 INFO    MainThread:681864 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 08:17:53,105 INFO    MainThread:681864 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 08:17:53,106 INFO    MainThread:681864 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None}
+2025-12-29 08:17:55,715 INFO    wandb-AsyncioManager-main:681864 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-29 08:17:55,716 INFO    wandb-AsyncioManager-main:681864 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.

Meissonic/wandb/run-20251229_081959-tvb7bjux/files/output.log ADDED Viewed

	@@ -0,0 +1,8 @@

+12/29/2025 08:20:00 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:20:00 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.27it/s]
+'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 3ca70188-ebaa-40b0-a3ff-1473c60ab7d9)')' thrown while requesting HEAD https://huggingface.co/google/umt5-xxl/resolve/main/tokenizer_config.json
+12/29/2025 08:20:10 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 3ca70188-ebaa-40b0-a3ff-1473c60ab7d9)')' thrown while requesting HEAD https://huggingface.co/google/umt5-xxl/resolve/main/tokenizer_config.json
+Retrying in 1s [Retry 1/5].
+12/29/2025 08:20:10 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5].
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565

Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-12-29T08:19:59.444483356Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpk2lbu65l/port-683325.txt","pid":683325,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:19:59.445159843Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":683325}
+{"time":"2025-12-29T08:19:59.445163741Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-683325-683564-770336178/socket","Net":"unix"}}
+{"time":"2025-12-29T08:19:59.630747774Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:19:59.636523927Z","level":"INFO","msg":"handleInformInit: received","streamId":"tvb7bjux","id":"1(@)"}
+{"time":"2025-12-29T08:19:59.807596347Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tvb7bjux","id":"1(@)"}
+{"time":"2025-12-29T08:20:13.475754205Z","level":"INFO","msg":"server: parent process exited, terminating service process"}

Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-12-29T08:19:59.636615677Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T08:19:59.807402325Z","level":"INFO","msg":"stream: created new stream","id":"tvb7bjux"}
+{"time":"2025-12-29T08:19:59.807478253Z","level":"INFO","msg":"handler: started","stream_id":"tvb7bjux"}
+{"time":"2025-12-29T08:19:59.807589456Z","level":"INFO","msg":"stream: started","id":"tvb7bjux"}
+{"time":"2025-12-29T08:19:59.807608334Z","level":"INFO","msg":"sender: started","stream_id":"tvb7bjux"}
+{"time":"2025-12-29T08:19:59.807611249Z","level":"INFO","msg":"writer: started","stream_id":"tvb7bjux"}

Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_setup.py:_flush():80] Configure stats pid to 683325
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log
+2025-12-29 08:19:59,377 INFO    MainThread:683325 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log
+2025-12-29 08:19:59,378 INFO    MainThread:683325 [wandb_init.py:init():841] calling init triggers
+2025-12-29 08:19:59,378 INFO    MainThread:683325 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 08:19:59,378 INFO    MainThread:683325 [wandb_init.py:init():889] starting backend
+2025-12-29 08:19:59,630 INFO    MainThread:683325 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 08:19:59,635 INFO    MainThread:683325 [wandb_init.py:init():900] backend started and connected
+2025-12-29 08:19:59,636 INFO    MainThread:683325 [wandb_init.py:init():970] updated telemetry
+2025-12-29 08:19:59,640 INFO    MainThread:683325 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 08:20:00,091 INFO    MainThread:683325 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 08:20:00,220 INFO    MainThread:683325 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 08:20:00,220 INFO    MainThread:683325 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 08:20:00,220 INFO    MainThread:683325 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 08:20:00,220 INFO    MainThread:683325 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 08:20:00,223 INFO    MainThread:683325 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 08:20:00,224 INFO    MainThread:683325 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None}

Meissonic/wandb/run-20251229_082208-d5bens3y/files/output.log ADDED Viewed

	@@ -0,0 +1,68 @@

+12/29/2025 08:22:09 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:22:09 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 69.71it/s]
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+12/29/2025 08:22:12 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096)
+12/29/2025 08:22:20 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000
+12/29/2025 08:22:20 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000
+12/29/2025 08:22:20 - INFO - __main__ - Getting compressed dimensions from precomputed features...
+12/29/2025 08:22:29 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=32, W'=32
+12/29/2025 08:22:29 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096
+12/29/2025 08:22:29 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:22:29 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12
+12/29/2025 08:22:46 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:22:46 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors
+12/29/2025 08:22:48 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding)
+12/29/2025 08:22:49 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833
+12/29/2025 08:22:49 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2)
+12/29/2025 08:22:49 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000
+12/29/2025 08:22:49 - INFO - __main__ - Creating dataloaders and lr_scheduler
+12/29/2025 08:22:49 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set
+12/29/2025 08:22:49 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime
+12/29/2025 08:22:58 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set/metadata.json
+12/29/2025 08:22:58 - INFO - train.dataset_utils -   Total samples in metadata: unknown
+12/29/2025 08:22:58 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 1019957 samples available
+12/29/2025 08:22:58 - INFO - train.dataset_utils -   Index range: 0 to 1019956
+12/29/2025 08:22:58 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True
+12/29/2025 08:22:58 - INFO - __main__ - Dataloader configuration:
+12/29/2025 08:22:58 - INFO - __main__ -   - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video)
+12/29/2025 08:22:58 - INFO - __main__ -   - prefetch_factor: 2
+12/29/2025 08:22:58 - INFO - __main__ -   - persistent_workers: True
+12/29/2025 08:22:58 - INFO - __main__ -   - pin_memory: True
+12/29/2025 08:22:58 - INFO - __main__ - Preparing model, optimizer and dataloaders
+Traceback (most recent call last):
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in <module>
+    main(parse_args())
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+    model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+    result = tuple(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+    self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+    return self.prepare_model(obj, device_placement=device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+    model = torch.nn.parallel.DistributedDataParallel(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+    self._ddp_init_helper(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+    self.reducer = dist.Reducer(
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in <module>
+[rank0]:     main(parse_args())
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+[rank0]:     model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+[rank0]:     result = tuple(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+[rank0]:     self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+[rank0]:     return self.prepare_model(obj, device_placement=device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+[rank0]:     model = torch.nn.parallel.DistributedDataParallel(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+[rank0]:     self._ddp_init_helper(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+[rank0]:     self.reducer = dist.Reducer(
+[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-12-29T08:22:08.633253613Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpfee4vdgx/port-684910.txt","pid":684910,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:22:08.633786607Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":684910}
+{"time":"2025-12-29T08:22:08.633765139Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-684910-685159-1258026704/socket","Net":"unix"}}
+{"time":"2025-12-29T08:22:08.819292223Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:22:08.826487265Z","level":"INFO","msg":"handleInformInit: received","streamId":"d5bens3y","id":"1(@)"}
+{"time":"2025-12-29T08:22:08.995050977Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d5bens3y","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.182467655Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.182531417Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-29T08:23:10.182519187Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.182572054Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.182609016Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-684910-685159-1258026704/socket","Net":"unix"}}
+{"time":"2025-12-29T08:23:10.552208267Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.552231257Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-29T08:23:10.552243636Z","level":"INFO","msg":"server is closed"}

Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-12-29T08:22:08.826738283Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T08:22:08.994805914Z","level":"INFO","msg":"stream: created new stream","id":"d5bens3y"}
+{"time":"2025-12-29T08:22:08.9949314Z","level":"INFO","msg":"handler: started","stream_id":"d5bens3y"}
+{"time":"2025-12-29T08:22:08.995043335Z","level":"INFO","msg":"stream: started","id":"d5bens3y"}
+{"time":"2025-12-29T08:22:08.995063351Z","level":"INFO","msg":"sender: started","stream_id":"d5bens3y"}
+{"time":"2025-12-29T08:22:08.995066887Z","level":"INFO","msg":"writer: started","stream_id":"d5bens3y"}
+{"time":"2025-12-29T08:23:10.182529884Z","level":"INFO","msg":"stream: closing","id":"d5bens3y"}
+{"time":"2025-12-29T08:23:10.451151782Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-29T08:23:10.548788578Z","level":"INFO","msg":"handler: closed","stream_id":"d5bens3y"}
+{"time":"2025-12-29T08:23:10.548905656Z","level":"INFO","msg":"sender: closed","stream_id":"d5bens3y"}
+{"time":"2025-12-29T08:23:10.548914674Z","level":"INFO","msg":"stream: closed","id":"d5bens3y"}

Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_setup.py:_flush():80] Configure stats pid to 684910
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_init.py:init():841] calling init triggers
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 08:22:08,565 INFO    MainThread:684910 [wandb_init.py:init():889] starting backend
+2025-12-29 08:22:08,819 INFO    MainThread:684910 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 08:22:08,825 INFO    MainThread:684910 [wandb_init.py:init():900] backend started and connected
+2025-12-29 08:22:08,826 INFO    MainThread:684910 [wandb_init.py:init():970] updated telemetry
+2025-12-29 08:22:08,832 INFO    MainThread:684910 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 08:22:09,324 INFO    MainThread:684910 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 08:22:09,503 INFO    MainThread:684910 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 08:22:09,503 INFO    MainThread:684910 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 08:22:09,503 INFO    MainThread:684910 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 08:22:09,503 INFO    MainThread:684910 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 08:22:09,506 INFO    MainThread:684910 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 08:22:09,507 INFO    MainThread:684910 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None}
+2025-12-29 08:23:10,182 INFO    wandb-AsyncioManager-main:684910 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-29 08:23:10,182 INFO    wandb-AsyncioManager-main:684910 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.

Meissonic/wandb/run-20251229_082348-xdcob8vv/files/output.log ADDED Viewed

	@@ -0,0 +1,68 @@

+12/29/2025 08:23:49 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:23:49 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 64.61it/s]
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+12/29/2025 08:23:51 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096)
+12/29/2025 08:24:00 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000
+12/29/2025 08:24:00 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000
+12/29/2025 08:24:00 - INFO - __main__ - Getting compressed dimensions from precomputed features...
+12/29/2025 08:24:09 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=32, W'=32
+12/29/2025 08:24:09 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096
+12/29/2025 08:24:09 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:24:09 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12
+12/29/2025 08:24:26 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:24:26 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors
+12/29/2025 08:24:28 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding)
+12/29/2025 08:24:29 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833
+12/29/2025 08:24:29 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2)
+12/29/2025 08:24:29 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000
+12/29/2025 08:24:29 - INFO - __main__ - Creating dataloaders and lr_scheduler
+12/29/2025 08:24:29 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set
+12/29/2025 08:24:29 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime
+12/29/2025 08:24:38 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set/metadata.json
+12/29/2025 08:24:38 - INFO - train.dataset_utils -   Total samples in metadata: unknown
+12/29/2025 08:24:38 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 1019957 samples available
+12/29/2025 08:24:38 - INFO - train.dataset_utils -   Index range: 0 to 1019956
+12/29/2025 08:24:38 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True
+12/29/2025 08:24:38 - INFO - __main__ - Dataloader configuration:
+12/29/2025 08:24:38 - INFO - __main__ -   - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video)
+12/29/2025 08:24:38 - INFO - __main__ -   - prefetch_factor: 2
+12/29/2025 08:24:38 - INFO - __main__ -   - persistent_workers: True
+12/29/2025 08:24:38 - INFO - __main__ -   - pin_memory: True
+12/29/2025 08:24:38 - INFO - __main__ - Preparing model, optimizer and dataloaders
+Traceback (most recent call last):
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in <module>
+    main(parse_args())
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+    model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+    result = tuple(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+    self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+    return self.prepare_model(obj, device_placement=device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+    model = torch.nn.parallel.DistributedDataParallel(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+    self._ddp_init_helper(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+    self.reducer = dist.Reducer(
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in <module>
+[rank0]:     main(parse_args())
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+[rank0]:     model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+[rank0]:     result = tuple(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+[rank0]:     self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+[rank0]:     return self.prepare_model(obj, device_placement=device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+[rank0]:     model = torch.nn.parallel.DistributedDataParallel(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+[rank0]:     self._ddp_init_helper(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+[rank0]:     self.reducer = dist.Reducer(
+[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-12-29T08:23:48.48214766Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp_kibi_k0/port-687239.txt","pid":687239,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:23:48.48261065Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":687239}
+{"time":"2025-12-29T08:23:48.482606464Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-687239-687548-1521909327/socket","Net":"unix"}}
+{"time":"2025-12-29T08:23:48.668396575Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:23:48.67438215Z","level":"INFO","msg":"handleInformInit: received","streamId":"xdcob8vv","id":"1(@)"}
+{"time":"2025-12-29T08:23:48.838417506Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xdcob8vv","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.064143118Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.064216028Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.064271092Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.064228351Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-29T08:24:51.064361726Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-687239-687548-1521909327/socket","Net":"unix"}}
+{"time":"2025-12-29T08:24:51.603614002Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.603644195Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-29T08:24:51.603660135Z","level":"INFO","msg":"server is closed"}

Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,11 @@

+{"time":"2025-12-29T08:23:48.674533717Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-29T08:23:48.83819002Z","level":"INFO","msg":"stream: created new stream","id":"xdcob8vv"}
+{"time":"2025-12-29T08:23:48.838277887Z","level":"INFO","msg":"handler: started","stream_id":"xdcob8vv"}
+{"time":"2025-12-29T08:23:48.838409545Z","level":"INFO","msg":"stream: started","id":"xdcob8vv"}
+{"time":"2025-12-29T08:23:48.838424189Z","level":"INFO","msg":"writer: started","stream_id":"xdcob8vv"}
+{"time":"2025-12-29T08:23:48.838433456Z","level":"INFO","msg":"sender: started","stream_id":"xdcob8vv"}
+{"time":"2025-12-29T08:24:51.064239479Z","level":"INFO","msg":"stream: closing","id":"xdcob8vv"}
+{"time":"2025-12-29T08:24:51.333940412Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-29T08:24:51.600325077Z","level":"INFO","msg":"handler: closed","stream_id":"xdcob8vv"}
+{"time":"2025-12-29T08:24:51.600456594Z","level":"INFO","msg":"sender: closed","stream_id":"xdcob8vv"}
+{"time":"2025-12-29T08:24:51.600464276Z","level":"INFO","msg":"stream: closed","id":"xdcob8vv"}

Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_setup.py:_flush():80] Configure stats pid to 687239
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_init.py:init():841] calling init triggers
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-29 08:23:48,415 INFO    MainThread:687239 [wandb_init.py:init():889] starting backend
+2025-12-29 08:23:48,668 INFO    MainThread:687239 [wandb_init.py:init():892] sending inform_init request
+2025-12-29 08:23:48,672 INFO    MainThread:687239 [wandb_init.py:init():900] backend started and connected
+2025-12-29 08:23:48,674 INFO    MainThread:687239 [wandb_init.py:init():970] updated telemetry
+2025-12-29 08:23:48,678 INFO    MainThread:687239 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-29 08:23:49,038 INFO    MainThread:687239 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-29 08:23:49,163 INFO    MainThread:687239 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-29 08:23:49,163 INFO    MainThread:687239 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-29 08:23:49,163 INFO    MainThread:687239 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-29 08:23:49,163 INFO    MainThread:687239 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-29 08:23:49,166 INFO    MainThread:687239 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-29 08:23:49,167 INFO    MainThread:687239 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None}
+2025-12-29 08:24:51,064 INFO    wandb-AsyncioManager-main:687239 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-29 08:24:51,064 INFO    wandb-AsyncioManager-main:687239 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.

Meissonic/wandb/run-20251229_082735-s2rbngfj/files/output.log ADDED Viewed

	@@ -0,0 +1,68 @@

+12/29/2025 08:27:36 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime
+12/29/2025 08:27:36 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference
+Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.71it/s]
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+12/29/2025 08:27:38 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096)
+12/29/2025 08:27:39 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000
+12/29/2025 08:27:39 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000
+12/29/2025 08:27:39 - INFO - __main__ - Getting compressed dimensions from precomputed features...
+12/29/2025 08:27:40 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16
+12/29/2025 08:27:40 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096
+12/29/2025 08:27:40 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:27:40 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12
+12/29/2025 08:27:57 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B
+12/29/2025 08:27:57 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors
+12/29/2025 08:27:59 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding)
+12/29/2025 08:28:01 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833
+12/29/2025 08:28:01 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2)
+12/29/2025 08:28:01 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000
+12/29/2025 08:28:01 - INFO - __main__ - Creating dataloaders and lr_scheduler
+12/29/2025 08:28:01 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128
+12/29/2025 08:28:01 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime
+12/29/2025 08:28:02 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json
+12/29/2025 08:28:02 - INFO - train.dataset_utils -   Total samples in metadata: 1019957
+12/29/2025 08:28:02 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available
+12/29/2025 08:28:02 - INFO - train.dataset_utils -   Index range: 0 to 127999
+12/29/2025 08:28:02 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True
+12/29/2025 08:28:02 - INFO - __main__ - Dataloader configuration:
+12/29/2025 08:28:02 - INFO - __main__ -   - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video)
+12/29/2025 08:28:02 - INFO - __main__ -   - prefetch_factor: 2
+12/29/2025 08:28:02 - INFO - __main__ -   - persistent_workers: True
+12/29/2025 08:28:02 - INFO - __main__ -   - pin_memory: True
+12/29/2025 08:28:02 - INFO - __main__ - Preparing model, optimizer and dataloaders
+Traceback (most recent call last):
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in <module>
+    main(parse_args())
+  File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+    model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+    result = tuple(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+    self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+    return self.prepare_model(obj, device_placement=device_placement)
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+    model = torch.nn.parallel.DistributedDataParallel(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+    self._ddp_init_helper(
+  File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+    self.reducer = dist.Reducer(
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
+[rank0]: Traceback (most recent call last):
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in <module>
+[rank0]:     main(parse_args())
+[rank0]:   File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main
+[rank0]:     model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare
+[rank0]:     result = tuple(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in <genexpr>
+[rank0]:     self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one
+[rank0]:     return self.prepare_model(obj, device_placement=device_placement)
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model
+[rank0]:     model = torch.nn.parallel.DistributedDataParallel(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__
+[rank0]:     self._ddp_init_helper(
+[rank0]:   File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper
+[rank0]:     self.reducer = dist.Reducer(
+[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-12-29T08:27:35.245761137Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpdee2lffa/port-691754.txt","pid":691754,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-29T08:27:35.24620287Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":691754}
+{"time":"2025-12-29T08:27:35.246206125Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-691754-692079-1616011383/socket","Net":"unix"}}
+{"time":"2025-12-29T08:27:35.432196378Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-29T08:27:35.439108806Z","level":"INFO","msg":"handleInformInit: received","streamId":"s2rbngfj","id":"1(@)"}
+{"time":"2025-12-29T08:27:35.609262249Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"s2rbngfj","id":"1(@)"}
+{"time":"2025-12-29T08:28:12.961224171Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-29T08:28:12.961266225Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-29T08:28:12.961261491Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-29T08:28:12.96133838Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-29T08:28:12.961339854Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-691754-692079-1616011383/socket","Net":"unix"}}
+{"time":"2025-12-29T08:28:13.298524802Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-29T08:28:13.298553535Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-29T08:28:13.298566342Z","level":"INFO","msg":"server is closed"}