diff --git a/.gitattributes b/.gitattributes index 817308a7e16bb8713b878d8b42e60139e5d5eb5c..7583c0d57d2dc3e40153b62335e922346c00c86c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -876,3 +876,6 @@ Meissonic/wandb/run-20251229_093500-yyrdgepk/run-yyrdgepk.wandb filter=lfs diff= OpenVid1M_reorganized.csv filter=lfs diff=lfs merge=lfs -text Wan2.1-T2V-1.3B/examples/i2v_input.JPG filter=lfs diff=lfs merge=lfs -text Wan2.1-T2V-1.3B/google/umt5-xxl/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meissonic/cosmos_test_output/comparison_video_1.mp4 filter=lfs diff=lfs merge=lfs -text +Meissonic/cosmos_test_output/comparison_video_2.mp4 filter=lfs diff=lfs merge=lfs -text +Meissonic/cosmos_test_output/comparison_video_3.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/128_128_17/video_codes.tar.zst b/128_128_17/video_codes.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..6061fbc8bd2a528e92e4d5a3d60d552c82e0cbb1 --- /dev/null +++ b/128_128_17/video_codes.tar.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f283283fcbaa8e88678c39ffaf7b37d14c2f234798403f77fbda59ea65b5e0 +size 2966606624 diff --git a/256_256_17/video_codes.tar.zst b/256_256_17/video_codes.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..4ac16ae77f583f769afc29b915ae3d7a0714517a --- /dev/null +++ b/256_256_17/video_codes.tar.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb5b8c632876b41b319e069c021e517fe3ab6477b49e4ad5ed950646d58bcd5 +size 11880937045 diff --git a/Meissonic/cosmos_test_output/comparison_video_0.mp4 b/Meissonic/cosmos_test_output/comparison_video_0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e899108ec77f5f3cf6d22bf1e6d9ccf63564a281 Binary files /dev/null and b/Meissonic/cosmos_test_output/comparison_video_0.mp4 differ diff --git a/Meissonic/cosmos_test_output/comparison_video_1.mp4 b/Meissonic/cosmos_test_output/comparison_video_1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9f5bf55a28c9849abdbd0794d22ef2df7d418f48 --- /dev/null +++ b/Meissonic/cosmos_test_output/comparison_video_1.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7311b27e36333219d20c8d835432ecadf9ebe5977bcf760bc6706a85a95cabd +size 1089113 diff --git a/Meissonic/cosmos_test_output/comparison_video_2.mp4 b/Meissonic/cosmos_test_output/comparison_video_2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e9a86b65067e996f43a9b6a994e27b62a92cff24 --- /dev/null +++ b/Meissonic/cosmos_test_output/comparison_video_2.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e02445cac3531ab68bda4ba1bc90ac570a7b423f78b9493471acb4d6e5f9a28 +size 1618316 diff --git a/Meissonic/cosmos_test_output/comparison_video_3.mp4 b/Meissonic/cosmos_test_output/comparison_video_3.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..34b9bf25d163f76d849fbe42ef41262a8bd9acfc --- /dev/null +++ b/Meissonic/cosmos_test_output/comparison_video_3.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:017fcf1133dc553228724625c5ad6ec7f58f97ddc27c91201aa88a07423a76e2 +size 931953 diff --git a/Meissonic/model/diffusion_pytorch_model.safetensors b/Meissonic/model/diffusion_pytorch_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d41b39ee6498ce3b18c1d6b01dfde531a97d5047 --- /dev/null +++ b/Meissonic/model/diffusion_pytorch_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b6b242ca1c2f24e9d02cd6596066fab6d310e2d7538f33ae267cb18d957e8f +size 5676070424 diff --git a/Meissonic/src/__pycache__/pipeline.cpython-310.pyc b/Meissonic/src/__pycache__/pipeline.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0159a7b5f0d121615aae9c29abe1c8fc2dbee8c3 Binary files /dev/null and b/Meissonic/src/__pycache__/pipeline.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/pipeline_video.cpython-310.pyc b/Meissonic/src/__pycache__/pipeline_video.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2078a12229bd0b9978109cd5c62a1d1c5ddcc2f Binary files /dev/null and b/Meissonic/src/__pycache__/pipeline_video.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/pipeline_video.cpython-313.pyc b/Meissonic/src/__pycache__/pipeline_video.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c46ca4bfdfe6c2df9f0340b76533997bd3dbbc76 Binary files /dev/null and b/Meissonic/src/__pycache__/pipeline_video.cpython-313.pyc differ diff --git a/Meissonic/src/__pycache__/pipeline_video.cpython-314.pyc b/Meissonic/src/__pycache__/pipeline_video.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6765ea05253811065bca9d8f951a181c500af22 Binary files /dev/null and b/Meissonic/src/__pycache__/pipeline_video.cpython-314.pyc differ diff --git a/Meissonic/src/__pycache__/scheduler.cpython-310.pyc b/Meissonic/src/__pycache__/scheduler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20bac6dbfaad2214f74b7135857ae40dbf76517e Binary files /dev/null and b/Meissonic/src/__pycache__/scheduler.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/scheduler.cpython-313.pyc b/Meissonic/src/__pycache__/scheduler.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..31b67a17a1df5e3439c50e3970700d2ad89cff08 Binary files /dev/null and b/Meissonic/src/__pycache__/scheduler.cpython-313.pyc differ diff --git a/Meissonic/src/__pycache__/scheduler_video.cpython-310.pyc b/Meissonic/src/__pycache__/scheduler_video.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6dbdac1ae3bdfdc5fcbf7c9b75500e3dd4394c3 Binary files /dev/null and b/Meissonic/src/__pycache__/scheduler_video.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/scheduler_video.cpython-313.pyc b/Meissonic/src/__pycache__/scheduler_video.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e8e7723c9479bcef8a9ceeda4c135df588dcd091 Binary files /dev/null and b/Meissonic/src/__pycache__/scheduler_video.cpython-313.pyc differ diff --git a/Meissonic/src/__pycache__/scheduler_video.cpython-314.pyc b/Meissonic/src/__pycache__/scheduler_video.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c14ad2d213934d2a63348c5b51db51b15d46d716 Binary files /dev/null and b/Meissonic/src/__pycache__/scheduler_video.cpython-314.pyc differ diff --git a/Meissonic/src/__pycache__/transformer.cpython-310.pyc b/Meissonic/src/__pycache__/transformer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34642b80cc313d964a27573c39c233114b05e99b Binary files /dev/null and b/Meissonic/src/__pycache__/transformer.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/transformer.cpython-313.pyc b/Meissonic/src/__pycache__/transformer.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6bb8b2a63399d12fdaf6df3b35da6494b7a6514d Binary files /dev/null and b/Meissonic/src/__pycache__/transformer.cpython-313.pyc differ diff --git a/Meissonic/src/__pycache__/transformer_video.cpython-310.pyc b/Meissonic/src/__pycache__/transformer_video.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5d812636c40b5f310d526ccea908762540670cd Binary files /dev/null and b/Meissonic/src/__pycache__/transformer_video.cpython-310.pyc differ diff --git a/Meissonic/src/__pycache__/transformer_video.cpython-313.pyc b/Meissonic/src/__pycache__/transformer_video.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8a68cee88dc73d0f2d060c95b499b6265958675 Binary files /dev/null and b/Meissonic/src/__pycache__/transformer_video.cpython-313.pyc differ diff --git a/Meissonic/src/__pycache__/transformer_video.cpython-314.pyc b/Meissonic/src/__pycache__/transformer_video.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f16fdf1a752f73b689c814c0ec50e3e4b9e450ad Binary files /dev/null and b/Meissonic/src/__pycache__/transformer_video.cpython-314.pyc differ diff --git a/Meissonic/train/__pycache__/dataset_utils.cpython-310.pyc b/Meissonic/train/__pycache__/dataset_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18e118e21717e84db55c6e4a88e09d440b6d0000 Binary files /dev/null and b/Meissonic/train/__pycache__/dataset_utils.cpython-310.pyc differ diff --git a/Meissonic/train/__pycache__/dataset_utils.cpython-313.pyc b/Meissonic/train/__pycache__/dataset_utils.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f78f905cf6fb8cc023fb5ab27aaadaf0701c525 Binary files /dev/null and b/Meissonic/train/__pycache__/dataset_utils.cpython-313.pyc differ diff --git a/Meissonic/train/__pycache__/trainer_utils.cpython-310.pyc b/Meissonic/train/__pycache__/trainer_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbe917b20ca139d7ec602b67b9089e1d3e83eeb7 Binary files /dev/null and b/Meissonic/train/__pycache__/trainer_utils.cpython-310.pyc differ diff --git a/Meissonic/train/__pycache__/trainer_utils.cpython-313.pyc b/Meissonic/train/__pycache__/trainer_utils.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7842410521abb09c0c50ac7720d0b275d2d48c0c Binary files /dev/null and b/Meissonic/train/__pycache__/trainer_utils.cpython-313.pyc differ diff --git a/Meissonic/wandb/debug-internal.log b/Meissonic/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..054e3b930dc46ca1ce02f918b36ec3aff363945e --- /dev/null +++ b/Meissonic/wandb/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:35:00.674748488Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:35:00.840745763Z","level":"INFO","msg":"stream: created new stream","id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.840887309Z","level":"INFO","msg":"handler: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.840989877Z","level":"INFO","msg":"stream: started","id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.841004187Z","level":"INFO","msg":"writer: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.841006253Z","level":"INFO","msg":"sender: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.535940574Z","level":"INFO","msg":"stream: closing","id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.752587654Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:42:02.857589578Z","level":"INFO","msg":"handler: closed","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.857716241Z","level":"INFO","msg":"sender: closed","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.857727173Z","level":"INFO","msg":"stream: closed","id":"yyrdgepk"} diff --git a/Meissonic/wandb/debug.log b/Meissonic/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..13cc1ea77ef489a9526479f5f9151bb49a69eef3 --- /dev/null +++ b/Meissonic/wandb/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Configure stats pid to 843534 +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():889] starting backend +2025-12-29 09:35:00,668 INFO MainThread:843534 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:35:00,673 INFO MainThread:843534 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:35:00,674 INFO MainThread:843534 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:35:00,678 INFO MainThread:843534 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:35:01,041 INFO MainThread:843534 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:35:01,128 INFO MainThread:843534 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:35:01,130 INFO MainThread:843534 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:42:02,535 INFO wandb-AsyncioManager-main:843534 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:42:02,535 INFO wandb-AsyncioManager-main:843534 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_081634-hjn0m6c2/files/output.log b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..704dd27c7ef4a7a0042dcfd1bcd21365ce89ef50 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/files/output.log @@ -0,0 +1,17 @@ +12/29/2025 08:16:35 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:16:35 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 62.15it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:16:38 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1892, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 554, in main + dataset.tokenizer = tokenizer +UnboundLocalError: local variable 'dataset' referenced before assignment +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1892, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 554, in main +[rank0]: dataset.tokenizer = tokenizer +[rank0]: UnboundLocalError: local variable 'dataset' referenced before assignment diff --git a/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-core.log b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..bf71f42bad1123ca9f9d731f2ec834aa14d30a40 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:16:34.925791368Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpjvxtgfwa/port-680831.txt","pid":680831,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:16:34.92651504Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":680831} +{"time":"2025-12-29T08:16:34.926493614Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-680831-681084-3226924194/socket","Net":"unix"}} +{"time":"2025-12-29T08:16:35.112196944Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:16:35.118201645Z","level":"INFO","msg":"handleInformInit: received","streamId":"hjn0m6c2","id":"1(@)"} +{"time":"2025-12-29T08:16:35.284535005Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"hjn0m6c2","id":"1(@)"} +{"time":"2025-12-29T08:16:38.409050659Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:16:38.409094413Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:16:38.409089535Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:16:38.409131426Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:16:38.409243761Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-680831-681084-3226924194/socket","Net":"unix"}} +{"time":"2025-12-29T08:16:38.912785622Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:16:38.912803973Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:16:38.912818214Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ccc0f95ccf38cc339fe1813851bb285e35b8b592 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:16:35.118294642Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:16:35.284331327Z","level":"INFO","msg":"stream: created new stream","id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:35.28441448Z","level":"INFO","msg":"handler: started","stream_id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:35.284528509Z","level":"INFO","msg":"stream: started","id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:35.284552699Z","level":"INFO","msg":"sender: started","stream_id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:35.284556048Z","level":"INFO","msg":"writer: started","stream_id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:38.40910837Z","level":"INFO","msg":"stream: closing","id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:38.726721311Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:16:38.907987768Z","level":"INFO","msg":"handler: closed","stream_id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:38.908080631Z","level":"INFO","msg":"sender: closed","stream_id":"hjn0m6c2"} +{"time":"2025-12-29T08:16:38.908087916Z","level":"INFO","msg":"stream: closed","id":"hjn0m6c2"} diff --git a/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..057542aef5d5784318d0be02b2f7a75f344a79c5 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_setup.py:_flush():80] Configure stats pid to 680831 +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug.log +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081634-hjn0m6c2/logs/debug-internal.log +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:16:34,856 INFO MainThread:680831 [wandb_init.py:init():889] starting backend +2025-12-29 08:16:35,112 INFO MainThread:680831 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:16:35,116 INFO MainThread:680831 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:16:35,118 INFO MainThread:680831 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:16:35,123 INFO MainThread:680831 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:16:35,554 INFO MainThread:680831 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:16:35,679 INFO MainThread:680831 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:16:35,679 INFO MainThread:680831 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:16:35,679 INFO MainThread:680831 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:16:35,679 INFO MainThread:680831 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:16:35,681 INFO MainThread:680831 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:16:35,682 INFO MainThread:680831 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None} +2025-12-29 08:16:38,409 INFO wandb-AsyncioManager-main:680831 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:16:38,409 INFO wandb-AsyncioManager-main:680831 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_081752-78ojckdj/files/output.log b/Meissonic/wandb/run-20251229_081752-78ojckdj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..db1d138b2d77763a6b19dd0d5587d403c55f0484 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081752-78ojckdj/files/output.log @@ -0,0 +1,17 @@ +12/29/2025 08:17:53 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:17:53 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.25it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:17:55 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1891, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 553, in main + dataset.tokenizer = tokenizer +UnboundLocalError: local variable 'dataset' referenced before assignment +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1891, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 553, in main +[rank0]: dataset.tokenizer = tokenizer +[rank0]: UnboundLocalError: local variable 'dataset' referenced before assignment diff --git a/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-core.log b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..9af574feb708bafe74273dea95ddf8cc9d5625f7 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:17:52.415361788Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpoautak8q/port-681864.txt","pid":681864,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:17:52.415911531Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":681864} +{"time":"2025-12-29T08:17:52.415892317Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-681864-682101-615016650/socket","Net":"unix"}} +{"time":"2025-12-29T08:17:52.600038892Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:17:52.605938403Z","level":"INFO","msg":"handleInformInit: received","streamId":"78ojckdj","id":"1(@)"} +{"time":"2025-12-29T08:17:52.775428685Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"78ojckdj","id":"1(@)"} +{"time":"2025-12-29T08:17:55.715872394Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:17:55.715918634Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:17:55.715913241Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:17:55.71601316Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-681864-682101-615016650/socket","Net":"unix"}} +{"time":"2025-12-29T08:17:55.716036224Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:17:56.359848916Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:17:56.359873934Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:17:56.359888804Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..10b48ec5226e19911ddee43d8b0e968adc20b285 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:17:52.606062282Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:17:52.77520788Z","level":"INFO","msg":"stream: created new stream","id":"78ojckdj"} +{"time":"2025-12-29T08:17:52.775295249Z","level":"INFO","msg":"handler: started","stream_id":"78ojckdj"} +{"time":"2025-12-29T08:17:52.775420221Z","level":"INFO","msg":"stream: started","id":"78ojckdj"} +{"time":"2025-12-29T08:17:52.775434881Z","level":"INFO","msg":"writer: started","stream_id":"78ojckdj"} +{"time":"2025-12-29T08:17:52.775434899Z","level":"INFO","msg":"sender: started","stream_id":"78ojckdj"} +{"time":"2025-12-29T08:17:55.715926892Z","level":"INFO","msg":"stream: closing","id":"78ojckdj"} +{"time":"2025-12-29T08:17:56.25572227Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:17:56.355939724Z","level":"INFO","msg":"handler: closed","stream_id":"78ojckdj"} +{"time":"2025-12-29T08:17:56.35603204Z","level":"INFO","msg":"sender: closed","stream_id":"78ojckdj"} +{"time":"2025-12-29T08:17:56.356037202Z","level":"INFO","msg":"stream: closed","id":"78ojckdj"} diff --git a/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2eab0c0ec67dc28d848b2216cf8d43a7f6a98a52 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:17:52,347 INFO MainThread:681864 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_setup.py:_flush():80] Configure stats pid to 681864 +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug.log +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081752-78ojckdj/logs/debug-internal.log +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:17:52,348 INFO MainThread:681864 [wandb_init.py:init():889] starting backend +2025-12-29 08:17:52,600 INFO MainThread:681864 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:17:52,604 INFO MainThread:681864 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:17:52,605 INFO MainThread:681864 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:17:52,609 INFO MainThread:681864 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:17:52,979 INFO MainThread:681864 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:17:53,102 INFO MainThread:681864 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:17:53,102 INFO MainThread:681864 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:17:53,102 INFO MainThread:681864 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:17:53,103 INFO MainThread:681864 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:17:53,105 INFO MainThread:681864 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:17:53,106 INFO MainThread:681864 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None} +2025-12-29 08:17:55,715 INFO wandb-AsyncioManager-main:681864 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:17:55,716 INFO wandb-AsyncioManager-main:681864 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_081959-tvb7bjux/files/output.log b/Meissonic/wandb/run-20251229_081959-tvb7bjux/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..8ae8bc38a482fd85173d65acafd37eb01f9a2199 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081959-tvb7bjux/files/output.log @@ -0,0 +1,8 @@ +12/29/2025 08:20:00 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:20:00 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.27it/s] +'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 3ca70188-ebaa-40b0-a3ff-1473c60ab7d9)')' thrown while requesting HEAD https://huggingface.co/google/umt5-xxl/resolve/main/tokenizer_config.json +12/29/2025 08:20:10 - WARNING - huggingface_hub.utils._http - '(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 3ca70188-ebaa-40b0-a3ff-1473c60ab7d9)')' thrown while requesting HEAD https://huggingface.co/google/umt5-xxl/resolve/main/tokenizer_config.json +Retrying in 1s [Retry 1/5]. +12/29/2025 08:20:10 - WARNING - huggingface_hub.utils._http - Retrying in 1s [Retry 1/5]. +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 diff --git a/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-core.log b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a73b3d1f862e6f7f5e60eb2f28ce0ca4ca08fbb2 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2025-12-29T08:19:59.444483356Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpk2lbu65l/port-683325.txt","pid":683325,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:19:59.445159843Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":683325} +{"time":"2025-12-29T08:19:59.445163741Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-683325-683564-770336178/socket","Net":"unix"}} +{"time":"2025-12-29T08:19:59.630747774Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:19:59.636523927Z","level":"INFO","msg":"handleInformInit: received","streamId":"tvb7bjux","id":"1(@)"} +{"time":"2025-12-29T08:19:59.807596347Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tvb7bjux","id":"1(@)"} +{"time":"2025-12-29T08:20:13.475754205Z","level":"INFO","msg":"server: parent process exited, terminating service process"} diff --git a/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7c8b99e7de4c09070076f06c3df98055002b0550 --- /dev/null +++ b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-12-29T08:19:59.636615677Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:19:59.807402325Z","level":"INFO","msg":"stream: created new stream","id":"tvb7bjux"} +{"time":"2025-12-29T08:19:59.807478253Z","level":"INFO","msg":"handler: started","stream_id":"tvb7bjux"} +{"time":"2025-12-29T08:19:59.807589456Z","level":"INFO","msg":"stream: started","id":"tvb7bjux"} +{"time":"2025-12-29T08:19:59.807608334Z","level":"INFO","msg":"sender: started","stream_id":"tvb7bjux"} +{"time":"2025-12-29T08:19:59.807611249Z","level":"INFO","msg":"writer: started","stream_id":"tvb7bjux"} diff --git a/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..aa9b9f44b99adf27d6303622e4ee5c65252dc99a --- /dev/null +++ b/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log @@ -0,0 +1,22 @@ +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_setup.py:_flush():80] Configure stats pid to 683325 +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug.log +2025-12-29 08:19:59,377 INFO MainThread:683325 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_081959-tvb7bjux/logs/debug-internal.log +2025-12-29 08:19:59,378 INFO MainThread:683325 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:19:59,378 INFO MainThread:683325 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:19:59,378 INFO MainThread:683325 [wandb_init.py:init():889] starting backend +2025-12-29 08:19:59,630 INFO MainThread:683325 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:19:59,635 INFO MainThread:683325 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:19:59,636 INFO MainThread:683325 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:19:59,640 INFO MainThread:683325 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:20:00,091 INFO MainThread:683325 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:20:00,220 INFO MainThread:683325 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:20:00,220 INFO MainThread:683325 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:20:00,220 INFO MainThread:683325 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:20:00,220 INFO MainThread:683325 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:20:00,223 INFO MainThread:683325 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:20:00,224 INFO MainThread:683325 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None} diff --git a/Meissonic/wandb/run-20251229_082208-d5bens3y/files/output.log b/Meissonic/wandb/run-20251229_082208-d5bens3y/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..39982a97e869dead9fde95587284151411fb0921 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082208-d5bens3y/files/output.log @@ -0,0 +1,68 @@ +12/29/2025 08:22:09 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:22:09 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 69.71it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:22:12 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:22:20 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:22:20 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:22:20 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:22:29 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=32, W'=32 +12/29/2025 08:22:29 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:22:29 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:22:29 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:22:46 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:22:46 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:22:48 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:22:49 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:22:49 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:22:49 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:22:49 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:22:49 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set +12/29/2025 08:22:49 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:22:58 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set/metadata.json +12/29/2025 08:22:58 - INFO - train.dataset_utils - Total samples in metadata: unknown +12/29/2025 08:22:58 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 1019957 samples available +12/29/2025 08:22:58 - INFO - train.dataset_utils - Index range: 0 to 1019956 +12/29/2025 08:22:58 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:22:58 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:22:58 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:22:58 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:22:58 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:22:58 - INFO - __main__ - - pin_memory: True +12/29/2025 08:22:58 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-core.log b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..90f51d8a861fbe6415f862c7a5b8eceae842a20a --- /dev/null +++ b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:22:08.633253613Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpfee4vdgx/port-684910.txt","pid":684910,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:22:08.633786607Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":684910} +{"time":"2025-12-29T08:22:08.633765139Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-684910-685159-1258026704/socket","Net":"unix"}} +{"time":"2025-12-29T08:22:08.819292223Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:22:08.826487265Z","level":"INFO","msg":"handleInformInit: received","streamId":"d5bens3y","id":"1(@)"} +{"time":"2025-12-29T08:22:08.995050977Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d5bens3y","id":"1(@)"} +{"time":"2025-12-29T08:23:10.182467655Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:23:10.182531417Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:23:10.182519187Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:23:10.182572054Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:23:10.182609016Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-684910-685159-1258026704/socket","Net":"unix"}} +{"time":"2025-12-29T08:23:10.552208267Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:23:10.552231257Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:23:10.552243636Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cd5442b32dc9f71e208c7471f24a0677a4892a0b --- /dev/null +++ b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:22:08.826738283Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:22:08.994805914Z","level":"INFO","msg":"stream: created new stream","id":"d5bens3y"} +{"time":"2025-12-29T08:22:08.9949314Z","level":"INFO","msg":"handler: started","stream_id":"d5bens3y"} +{"time":"2025-12-29T08:22:08.995043335Z","level":"INFO","msg":"stream: started","id":"d5bens3y"} +{"time":"2025-12-29T08:22:08.995063351Z","level":"INFO","msg":"sender: started","stream_id":"d5bens3y"} +{"time":"2025-12-29T08:22:08.995066887Z","level":"INFO","msg":"writer: started","stream_id":"d5bens3y"} +{"time":"2025-12-29T08:23:10.182529884Z","level":"INFO","msg":"stream: closing","id":"d5bens3y"} +{"time":"2025-12-29T08:23:10.451151782Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:23:10.548788578Z","level":"INFO","msg":"handler: closed","stream_id":"d5bens3y"} +{"time":"2025-12-29T08:23:10.548905656Z","level":"INFO","msg":"sender: closed","stream_id":"d5bens3y"} +{"time":"2025-12-29T08:23:10.548914674Z","level":"INFO","msg":"stream: closed","id":"d5bens3y"} diff --git a/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ab46686e39f1515c1a26d11987e338ef560130eb --- /dev/null +++ b/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_setup.py:_flush():80] Configure stats pid to 684910 +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug.log +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_082208-d5bens3y/logs/debug-internal.log +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:22:08,565 INFO MainThread:684910 [wandb_init.py:init():889] starting backend +2025-12-29 08:22:08,819 INFO MainThread:684910 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:22:08,825 INFO MainThread:684910 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:22:08,826 INFO MainThread:684910 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:22:08,832 INFO MainThread:684910 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:22:09,324 INFO MainThread:684910 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:22:09,503 INFO MainThread:684910 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:22:09,503 INFO MainThread:684910 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:22:09,503 INFO MainThread:684910 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:22:09,503 INFO MainThread:684910 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:22:09,506 INFO MainThread:684910 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:22:09,507 INFO MainThread:684910 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 2, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None} +2025-12-29 08:23:10,182 INFO wandb-AsyncioManager-main:684910 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:23:10,182 INFO wandb-AsyncioManager-main:684910 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_082348-xdcob8vv/files/output.log b/Meissonic/wandb/run-20251229_082348-xdcob8vv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..323627522ed29e7c270525b6b499ffd18202e896 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082348-xdcob8vv/files/output.log @@ -0,0 +1,68 @@ +12/29/2025 08:23:49 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:23:49 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 64.61it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:23:51 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:24:00 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:24:00 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:24:00 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:24:09 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=32, W'=32 +12/29/2025 08:24:09 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:24:09 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:24:09 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:24:26 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:24:26 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:24:28 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:24:29 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:24:29 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:24:29 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:24:29 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:24:29 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set +12/29/2025 08:24:29 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:24:38 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set/metadata.json +12/29/2025 08:24:38 - INFO - train.dataset_utils - Total samples in metadata: unknown +12/29/2025 08:24:38 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 1019957 samples available +12/29/2025 08:24:38 - INFO - train.dataset_utils - Index range: 0 to 1019956 +12/29/2025 08:24:38 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:24:38 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:24:38 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:24:38 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:24:38 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:24:38 - INFO - __main__ - - pin_memory: True +12/29/2025 08:24:38 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1894, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-core.log b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..6d232dc077018fe3497813b0322c761c69cb7d42 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:23:48.48214766Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp_kibi_k0/port-687239.txt","pid":687239,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:23:48.48261065Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":687239} +{"time":"2025-12-29T08:23:48.482606464Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-687239-687548-1521909327/socket","Net":"unix"}} +{"time":"2025-12-29T08:23:48.668396575Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:23:48.67438215Z","level":"INFO","msg":"handleInformInit: received","streamId":"xdcob8vv","id":"1(@)"} +{"time":"2025-12-29T08:23:48.838417506Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xdcob8vv","id":"1(@)"} +{"time":"2025-12-29T08:24:51.064143118Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:24:51.064216028Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:24:51.064271092Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:24:51.064228351Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:24:51.064361726Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-687239-687548-1521909327/socket","Net":"unix"}} +{"time":"2025-12-29T08:24:51.603614002Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:24:51.603644195Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:24:51.603660135Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..6243248beb823cdb5dabab308373c92882bfe60d --- /dev/null +++ b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:23:48.674533717Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:23:48.83819002Z","level":"INFO","msg":"stream: created new stream","id":"xdcob8vv"} +{"time":"2025-12-29T08:23:48.838277887Z","level":"INFO","msg":"handler: started","stream_id":"xdcob8vv"} +{"time":"2025-12-29T08:23:48.838409545Z","level":"INFO","msg":"stream: started","id":"xdcob8vv"} +{"time":"2025-12-29T08:23:48.838424189Z","level":"INFO","msg":"writer: started","stream_id":"xdcob8vv"} +{"time":"2025-12-29T08:23:48.838433456Z","level":"INFO","msg":"sender: started","stream_id":"xdcob8vv"} +{"time":"2025-12-29T08:24:51.064239479Z","level":"INFO","msg":"stream: closing","id":"xdcob8vv"} +{"time":"2025-12-29T08:24:51.333940412Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:24:51.600325077Z","level":"INFO","msg":"handler: closed","stream_id":"xdcob8vv"} +{"time":"2025-12-29T08:24:51.600456594Z","level":"INFO","msg":"sender: closed","stream_id":"xdcob8vv"} +{"time":"2025-12-29T08:24:51.600464276Z","level":"INFO","msg":"stream: closed","id":"xdcob8vv"} diff --git a/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7ce571136b0cd3430cf5bdd549ef51b43f5e2565 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_setup.py:_flush():80] Configure stats pid to 687239 +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug.log +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_082348-xdcob8vv/logs/debug-internal.log +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:23:48,415 INFO MainThread:687239 [wandb_init.py:init():889] starting backend +2025-12-29 08:23:48,668 INFO MainThread:687239 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:23:48,672 INFO MainThread:687239 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:23:48,674 INFO MainThread:687239 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:23:48,678 INFO MainThread:687239 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:23:49,038 INFO MainThread:687239 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:23:49,163 INFO MainThread:687239 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:23:49,163 INFO MainThread:687239 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:23:49,163 INFO MainThread:687239 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:23:49,163 INFO MainThread:687239 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:23:49,166 INFO MainThread:687239 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:23:49,167 INFO MainThread:687239 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 256, 'video_width': 256, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_256_256_full_set', 'empty_embeds_path': None} +2025-12-29 08:24:51,064 INFO wandb-AsyncioManager-main:687239 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:24:51,064 INFO wandb-AsyncioManager-main:687239 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_082735-s2rbngfj/files/output.log b/Meissonic/wandb/run-20251229_082735-s2rbngfj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1ff13263ff5cc1e3b6ed078668555e29ef80488e --- /dev/null +++ b/Meissonic/wandb/run-20251229_082735-s2rbngfj/files/output.log @@ -0,0 +1,68 @@ +12/29/2025 08:27:36 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:27:36 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.71it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:27:38 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:27:39 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:27:39 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:27:39 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:27:40 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:27:40 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:27:40 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:27:40 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:27:57 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:27:57 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:27:59 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:28:01 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:28:01 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:28:01 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:28:01 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:28:01 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:28:01 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:28:02 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:28:02 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:28:02 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:28:02 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:28:02 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:28:02 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:28:02 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:28:02 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:28:02 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:28:02 - INFO - __main__ - - pin_memory: True +12/29/2025 08:28:02 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-core.log b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..71a3bdcbd2227e5db88f0caa0b91c3700782c701 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:27:35.245761137Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpdee2lffa/port-691754.txt","pid":691754,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:27:35.24620287Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":691754} +{"time":"2025-12-29T08:27:35.246206125Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-691754-692079-1616011383/socket","Net":"unix"}} +{"time":"2025-12-29T08:27:35.432196378Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:27:35.439108806Z","level":"INFO","msg":"handleInformInit: received","streamId":"s2rbngfj","id":"1(@)"} +{"time":"2025-12-29T08:27:35.609262249Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"s2rbngfj","id":"1(@)"} +{"time":"2025-12-29T08:28:12.961224171Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:28:12.961266225Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:28:12.961261491Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:28:12.96133838Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:28:12.961339854Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-691754-692079-1616011383/socket","Net":"unix"}} +{"time":"2025-12-29T08:28:13.298524802Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:28:13.298553535Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:28:13.298566342Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-internal.log b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c4849cedead1379d4f95179261c3e920fc221b95 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:27:35.439205158Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:27:35.609062835Z","level":"INFO","msg":"stream: created new stream","id":"s2rbngfj"} +{"time":"2025-12-29T08:27:35.609147625Z","level":"INFO","msg":"handler: started","stream_id":"s2rbngfj"} +{"time":"2025-12-29T08:27:35.609255565Z","level":"INFO","msg":"stream: started","id":"s2rbngfj"} +{"time":"2025-12-29T08:27:35.609272925Z","level":"INFO","msg":"sender: started","stream_id":"s2rbngfj"} +{"time":"2025-12-29T08:27:35.609272978Z","level":"INFO","msg":"writer: started","stream_id":"s2rbngfj"} +{"time":"2025-12-29T08:28:12.961275825Z","level":"INFO","msg":"stream: closing","id":"s2rbngfj"} +{"time":"2025-12-29T08:28:13.200189708Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:28:13.295236948Z","level":"INFO","msg":"handler: closed","stream_id":"s2rbngfj"} +{"time":"2025-12-29T08:28:13.295341299Z","level":"INFO","msg":"sender: closed","stream_id":"s2rbngfj"} +{"time":"2025-12-29T08:28:13.295354628Z","level":"INFO","msg":"stream: closed","id":"s2rbngfj"} diff --git a/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug.log b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..8ce6cbb8da07d04a1823722fbc4945c739fbb850 --- /dev/null +++ b/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_setup.py:_flush():80] Configure stats pid to 691754 +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug.log +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_082735-s2rbngfj/logs/debug-internal.log +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:27:35,179 INFO MainThread:691754 [wandb_init.py:init():889] starting backend +2025-12-29 08:27:35,432 INFO MainThread:691754 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:27:35,436 INFO MainThread:691754 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:27:35,438 INFO MainThread:691754 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:27:35,442 INFO MainThread:691754 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:27:35,917 INFO MainThread:691754 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:27:36,041 INFO MainThread:691754 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:27:36,041 INFO MainThread:691754 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:27:36,041 INFO MainThread:691754 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:27:36,041 INFO MainThread:691754 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:27:36,044 INFO MainThread:691754 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:27:36,045 INFO MainThread:691754 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:28:12,961 INFO wandb-AsyncioManager-main:691754 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:28:12,961 INFO wandb-AsyncioManager-main:691754 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_083018-js6dhqj8/files/output.log b/Meissonic/wandb/run-20251229_083018-js6dhqj8/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3eed617e95f3d055c59856bc4d58feab5acfcab3 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083018-js6dhqj8/files/output.log @@ -0,0 +1,68 @@ +12/29/2025 08:30:18 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:30:18 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 69.77it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:30:21 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:30:22 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:30:22 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:30:22 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:30:23 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:30:23 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:30:23 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:30:23 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:30:39 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:30:39 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:30:41 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:30:42 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:30:42 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:30:42 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:30:42 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:30:42 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:30:42 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:30:43 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:30:43 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:30:43 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:30:43 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:30:43 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=False +12/29/2025 08:30:43 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:30:43 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:30:43 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:30:43 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:30:43 - INFO - __main__ - - pin_memory: True +12/29/2025 08:30:43 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1909, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1323, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-core.log b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..5075f5c822416f3a0d84fab3d4252c198a28f732 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:30:18.19383107Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmprpotuioa/port-695162.txt","pid":695162,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:30:18.194378395Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":695162} +{"time":"2025-12-29T08:30:18.194358104Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-695162-695475-3659479336/socket","Net":"unix"}} +{"time":"2025-12-29T08:30:18.379200582Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:30:18.385679058Z","level":"INFO","msg":"handleInformInit: received","streamId":"js6dhqj8","id":"1(@)"} +{"time":"2025-12-29T08:30:18.551204248Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"js6dhqj8","id":"1(@)"} +{"time":"2025-12-29T08:30:54.957774704Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:30:54.957835146Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:30:54.957828184Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:30:54.957925611Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:30:54.957928769Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-695162-695475-3659479336/socket","Net":"unix"}} +{"time":"2025-12-29T08:30:55.341249263Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:30:55.341274333Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:30:55.341287876Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-internal.log b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..5d25ef15001e75e9f1875a713128525828223dc2 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:30:18.385774672Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:30:18.550993921Z","level":"INFO","msg":"stream: created new stream","id":"js6dhqj8"} +{"time":"2025-12-29T08:30:18.551083173Z","level":"INFO","msg":"handler: started","stream_id":"js6dhqj8"} +{"time":"2025-12-29T08:30:18.551197295Z","level":"INFO","msg":"stream: started","id":"js6dhqj8"} +{"time":"2025-12-29T08:30:18.551220048Z","level":"INFO","msg":"sender: started","stream_id":"js6dhqj8"} +{"time":"2025-12-29T08:30:18.551221222Z","level":"INFO","msg":"writer: started","stream_id":"js6dhqj8"} +{"time":"2025-12-29T08:30:54.95784194Z","level":"INFO","msg":"stream: closing","id":"js6dhqj8"} +{"time":"2025-12-29T08:30:55.242974044Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:30:55.338199862Z","level":"INFO","msg":"handler: closed","stream_id":"js6dhqj8"} +{"time":"2025-12-29T08:30:55.338318233Z","level":"INFO","msg":"sender: closed","stream_id":"js6dhqj8"} +{"time":"2025-12-29T08:30:55.338325476Z","level":"INFO","msg":"stream: closed","id":"js6dhqj8"} diff --git a/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug.log b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..de4fa96dd9e9a0a2401d272389479fe7291e966d --- /dev/null +++ b/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_setup.py:_flush():80] Configure stats pid to 695162 +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug.log +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_083018-js6dhqj8/logs/debug-internal.log +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:30:18,124 INFO MainThread:695162 [wandb_init.py:init():889] starting backend +2025-12-29 08:30:18,379 INFO MainThread:695162 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:30:18,384 INFO MainThread:695162 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:30:18,385 INFO MainThread:695162 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:30:18,390 INFO MainThread:695162 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:30:18,721 INFO MainThread:695162 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:30:18,845 INFO MainThread:695162 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:30:18,845 INFO MainThread:695162 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:30:18,845 INFO MainThread:695162 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:30:18,845 INFO MainThread:695162 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:30:18,848 INFO MainThread:695162 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:30:18,849 INFO MainThread:695162 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:30:54,958 INFO wandb-AsyncioManager-main:695162 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:30:54,958 INFO wandb-AsyncioManager-main:695162 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_083518-un2j6o0e/files/output.log b/Meissonic/wandb/run-20251229_083518-un2j6o0e/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..12fc6947a55849c2028f7064706968359b1ed53c --- /dev/null +++ b/Meissonic/wandb/run-20251229_083518-un2j6o0e/files/output.log @@ -0,0 +1,69 @@ +12/29/2025 08:35:19 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:35:19 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 67.92it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:35:21 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:35:22 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:35:22 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:35:22 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:35:23 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:35:23 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:35:23 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:35:23 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:35:40 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:35:40 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:35:43 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:35:45 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 08:35:45 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:35:45 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:35:45 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:35:45 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:35:45 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:35:45 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:35:46 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:35:46 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:35:46 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:35:46 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:35:46 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:35:46 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:35:46 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:35:46 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:35:46 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:35:46 - INFO - __main__ - - pin_memory: True +12/29/2025 08:35:46 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1327, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1327, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-core.log b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..1baed14754f54fa528d9d773c21bfbab904ee291 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:35:18.669804537Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpmjlxkgmy/port-700229.txt","pid":700229,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:35:18.671072978Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":700229} +{"time":"2025-12-29T08:35:18.671028609Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-700229-700557-798898693/socket","Net":"unix"}} +{"time":"2025-12-29T08:35:18.855985989Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:35:18.862380161Z","level":"INFO","msg":"handleInformInit: received","streamId":"un2j6o0e","id":"1(@)"} +{"time":"2025-12-29T08:35:19.032749634Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"un2j6o0e","id":"1(@)"} +{"time":"2025-12-29T08:35:57.8066095Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:35:57.80665294Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:35:57.806712552Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:35:57.806670814Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:35:57.806780256Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-700229-700557-798898693/socket","Net":"unix"}} +{"time":"2025-12-29T08:35:58.176314955Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:35:58.17634724Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:35:58.176362505Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-internal.log b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..309a8678528063901d3f0dda2201bcbdc37da441 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:35:18.862471385Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:35:19.032576744Z","level":"INFO","msg":"stream: created new stream","id":"un2j6o0e"} +{"time":"2025-12-29T08:35:19.032657922Z","level":"INFO","msg":"handler: started","stream_id":"un2j6o0e"} +{"time":"2025-12-29T08:35:19.032743048Z","level":"INFO","msg":"stream: started","id":"un2j6o0e"} +{"time":"2025-12-29T08:35:19.032756959Z","level":"INFO","msg":"writer: started","stream_id":"un2j6o0e"} +{"time":"2025-12-29T08:35:19.032757402Z","level":"INFO","msg":"sender: started","stream_id":"un2j6o0e"} +{"time":"2025-12-29T08:35:57.806676199Z","level":"INFO","msg":"stream: closing","id":"un2j6o0e"} +{"time":"2025-12-29T08:35:58.061669625Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:35:58.173047721Z","level":"INFO","msg":"handler: closed","stream_id":"un2j6o0e"} +{"time":"2025-12-29T08:35:58.1731724Z","level":"INFO","msg":"sender: closed","stream_id":"un2j6o0e"} +{"time":"2025-12-29T08:35:58.173185017Z","level":"INFO","msg":"stream: closed","id":"un2j6o0e"} diff --git a/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug.log b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..28098a346ee1e4e8ddca4766c53f57378f331508 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_setup.py:_flush():80] Configure stats pid to 700229 +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug.log +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_083518-un2j6o0e/logs/debug-internal.log +2025-12-29 08:35:18,602 INFO MainThread:700229 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:35:18,603 INFO MainThread:700229 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:35:18,603 INFO MainThread:700229 [wandb_init.py:init():889] starting backend +2025-12-29 08:35:18,856 INFO MainThread:700229 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:35:18,860 INFO MainThread:700229 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:35:18,862 INFO MainThread:700229 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:35:18,867 INFO MainThread:700229 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:35:19,215 INFO MainThread:700229 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:35:19,339 INFO MainThread:700229 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:35:19,339 INFO MainThread:700229 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:35:19,339 INFO MainThread:700229 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:35:19,339 INFO MainThread:700229 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:35:19,342 INFO MainThread:700229 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:35:19,343 INFO MainThread:700229 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 4, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:35:57,806 INFO wandb-AsyncioManager-main:700229 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:35:57,807 INFO wandb-AsyncioManager-main:700229 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_083628-ef52280g/files/output.log b/Meissonic/wandb/run-20251229_083628-ef52280g/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f4e0c0034174350bc78622b2348700746b8af21c --- /dev/null +++ b/Meissonic/wandb/run-20251229_083628-ef52280g/files/output.log @@ -0,0 +1,69 @@ +12/29/2025 08:36:29 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:36:29 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.79it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:36:31 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:36:32 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:36:32 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:36:32 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:36:33 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:36:33 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:36:33 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:36:33 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:36:50 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:36:50 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:36:52 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:36:53 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 08:36:53 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:36:53 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:36:53 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:36:53 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:36:53 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:36:53 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:36:54 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:36:54 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:36:54 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:36:54 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:36:54 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:36:54 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:36:54 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:36:54 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:36:54 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:36:54 - INFO - __main__ - - pin_memory: True +12/29/2025 08:36:54 - INFO - __main__ - Preparing model, optimizer and dataloaders +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1327, in main + model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare + result = tuple( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one + return self.prepare_model(obj, device_placement=device_placement) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model + model = torch.nn.parallel.DistributedDataParallel( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ + self._ddp_init_helper( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper + self.reducer = dist.Reducer( +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1327, in main +[rank0]: model, optimizer, lr_scheduler, train_dataloader, text_encoder = accelerator.prepare( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1559, in prepare +[rank0]: result = tuple( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1560, in +[rank0]: self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1402, in _prepare_one +[rank0]: return self.prepare_model(obj, device_placement=device_placement) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/accelerate/accelerator.py", line 1847, in prepare_model +[rank0]: model = torch.nn.parallel.DistributedDataParallel( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 873, in __init__ +[rank0]: self._ddp_init_helper( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1222, in _ddp_init_helper +[rank0]: self.reducer = dist.Reducer( +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 21.16 GiB. GPU 0 has a total capacity of 39.49 GiB of which 11.55 GiB is free. Including non-PyTorch memory, this process has 27.93 GiB memory in use. Of the allocated memory 26.46 GiB is allocated by PyTorch, and 411.03 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-core.log b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..b41d5e856d7168be59b19465350115e8ccf2eb16 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:36:28.536877643Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp9u9wg_8z/port-701994.txt","pid":701994,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:36:28.537375389Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":701994} +{"time":"2025-12-29T08:36:28.537388627Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-701994-702315-1281163488/socket","Net":"unix"}} +{"time":"2025-12-29T08:36:28.723772999Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:36:28.731298642Z","level":"INFO","msg":"handleInformInit: received","streamId":"ef52280g","id":"1(@)"} +{"time":"2025-12-29T08:36:28.900400727Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ef52280g","id":"1(@)"} +{"time":"2025-12-29T08:37:06.33833385Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:37:06.338376643Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:37:06.338425258Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:37:06.338389105Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:37:06.338489111Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-701994-702315-1281163488/socket","Net":"unix"}} +{"time":"2025-12-29T08:37:06.748139561Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:37:06.748167538Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:37:06.748179385Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-internal.log b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0458f1ce2ab9276c24f861bd9ee91289a085b299 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:36:28.73138977Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:36:28.900166033Z","level":"INFO","msg":"stream: created new stream","id":"ef52280g"} +{"time":"2025-12-29T08:36:28.900260047Z","level":"INFO","msg":"handler: started","stream_id":"ef52280g"} +{"time":"2025-12-29T08:36:28.900392164Z","level":"INFO","msg":"stream: started","id":"ef52280g"} +{"time":"2025-12-29T08:36:28.900398395Z","level":"INFO","msg":"writer: started","stream_id":"ef52280g"} +{"time":"2025-12-29T08:36:28.900414836Z","level":"INFO","msg":"sender: started","stream_id":"ef52280g"} +{"time":"2025-12-29T08:37:06.338388699Z","level":"INFO","msg":"stream: closing","id":"ef52280g"} +{"time":"2025-12-29T08:37:06.629527075Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:37:06.744404731Z","level":"INFO","msg":"handler: closed","stream_id":"ef52280g"} +{"time":"2025-12-29T08:37:06.744665555Z","level":"INFO","msg":"sender: closed","stream_id":"ef52280g"} +{"time":"2025-12-29T08:37:06.744677521Z","level":"INFO","msg":"stream: closed","id":"ef52280g"} diff --git a/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug.log b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..bc7303afdb69474ac35265d530d59e8ff474cbe9 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_setup.py:_flush():80] Configure stats pid to 701994 +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug.log +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_083628-ef52280g/logs/debug-internal.log +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:36:28,444 INFO MainThread:701994 [wandb_init.py:init():889] starting backend +2025-12-29 08:36:28,724 INFO MainThread:701994 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:36:28,729 INFO MainThread:701994 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:36:28,732 INFO MainThread:701994 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:36:28,738 INFO MainThread:701994 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:36:29,098 INFO MainThread:701994 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:36:29,289 INFO MainThread:701994 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:36:29,289 INFO MainThread:701994 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:36:29,289 INFO MainThread:701994 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:36:29,289 INFO MainThread:701994 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:36:29,292 INFO MainThread:701994 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:36:29,293 INFO MainThread:701994 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:37:06,338 INFO wandb-AsyncioManager-main:701994 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:37:06,338 INFO wandb-AsyncioManager-main:701994 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_083809-sx4rkgm3/files/output.log b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c65bb5cbfb74f75f11fc07da0a33cffba14d6061 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/files/output.log @@ -0,0 +1,90 @@ +12/29/2025 08:38:10 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:38:10 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 67.01it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:38:12 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:38:13 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:38:13 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:38:13 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:38:14 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:38:14 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:38:14 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:38:14 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:38:30 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:38:30 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:38:32 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:38:35 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 08:38:35 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:38:35 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:38:35 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:38:35 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:38:35 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:38:35 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:38:36 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:38:36 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:38:36 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:38:36 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:38:36 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:38:36 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:38:36 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:38:36 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:38:36 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:38:36 - INFO - __main__ - - pin_memory: True +12/29/2025 08:38:36 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 08:38:37 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1364, in main + empty_embeds, _, _ = encode_prompt( + File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt + outputs = text_encoder(input_ids=input_ids, return_dict=True) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward + encoder_outputs = self.encoder( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward + inputs_embeds = self.embed_tokens(input_ids) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1364, in main +[rank0]: empty_embeds, _, _ = encode_prompt( +[rank0]: File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt +[rank0]: outputs = text_encoder(input_ids=input_ids, return_dict=True) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward +[rank0]: encoder_outputs = self.encoder( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward +[rank0]: inputs_embeds = self.embed_tokens(input_ids) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward +[rank0]: return F.embedding( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding +[rank0]: return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +[rank0]: RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) diff --git a/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-core.log b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..ae7e4080d12e6bbfda1d9cadee2111d02469e631 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:38:09.236037439Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmps8z2auub/port-704541.txt","pid":704541,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:38:09.236506322Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":704541} +{"time":"2025-12-29T08:38:09.236524415Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-704541-704800-3705438608/socket","Net":"unix"}} +{"time":"2025-12-29T08:38:09.423135533Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:38:09.428824813Z","level":"INFO","msg":"handleInformInit: received","streamId":"sx4rkgm3","id":"1(@)"} +{"time":"2025-12-29T08:38:09.895698754Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"sx4rkgm3","id":"1(@)"} +{"time":"2025-12-29T08:38:37.913794586Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:38:37.913836926Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:38:37.913834141Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:38:37.913915304Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:38:37.913910854Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-704541-704800-3705438608/socket","Net":"unix"}} +{"time":"2025-12-29T08:38:38.35356139Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:38:38.353594589Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:38:38.353606603Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-internal.log b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1ddd70aa15376cf7967ff38a9fc587f09e21ee54 --- /dev/null +++ b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:38:09.42892184Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:38:09.895427866Z","level":"INFO","msg":"stream: created new stream","id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:09.895510771Z","level":"INFO","msg":"handler: started","stream_id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:09.895692268Z","level":"INFO","msg":"stream: started","id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:09.895715655Z","level":"INFO","msg":"sender: started","stream_id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:09.895717039Z","level":"INFO","msg":"writer: started","stream_id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:37.913847974Z","level":"INFO","msg":"stream: closing","id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:38.200326036Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:38:38.349952553Z","level":"INFO","msg":"handler: closed","stream_id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:38.350079149Z","level":"INFO","msg":"sender: closed","stream_id":"sx4rkgm3"} +{"time":"2025-12-29T08:38:38.350088198Z","level":"INFO","msg":"stream: closed","id":"sx4rkgm3"} diff --git a/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug.log b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5bd21ab6e8d3fa13b759e1b31c85886949bcb82a --- /dev/null +++ b/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_setup.py:_flush():80] Configure stats pid to 704541 +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:38:09,170 INFO MainThread:704541 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug.log +2025-12-29 08:38:09,171 INFO MainThread:704541 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_083809-sx4rkgm3/logs/debug-internal.log +2025-12-29 08:38:09,171 INFO MainThread:704541 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:38:09,171 INFO MainThread:704541 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:38:09,171 INFO MainThread:704541 [wandb_init.py:init():889] starting backend +2025-12-29 08:38:09,423 INFO MainThread:704541 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:38:09,427 INFO MainThread:704541 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:38:09,428 INFO MainThread:704541 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:38:09,433 INFO MainThread:704541 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:38:10,152 INFO MainThread:704541 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:38:10,278 INFO MainThread:704541 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:38:10,279 INFO MainThread:704541 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:38:10,279 INFO MainThread:704541 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:38:10,279 INFO MainThread:704541 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:38:10,283 INFO MainThread:704541 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:38:10,284 INFO MainThread:704541 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:38:37,913 INFO wandb-AsyncioManager-main:704541 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:38:37,913 INFO wandb-AsyncioManager-main:704541 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_084520-fzi541le/files/output.log b/Meissonic/wandb/run-20251229_084520-fzi541le/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2d5260c0ad109d4f39106a8a667343932a53252d --- /dev/null +++ b/Meissonic/wandb/run-20251229_084520-fzi541le/files/output.log @@ -0,0 +1,14 @@ +12/29/2025 08:45:21 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:45:21 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 547, in main + text_encoder = T5EncoderModel.from_pretrained(model_id, device_map="auto", torch_dtype=weight_dtype) +UnboundLocalError: local variable 'weight_dtype' referenced before assignment +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1917, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 547, in main +[rank0]: text_encoder = T5EncoderModel.from_pretrained(model_id, device_map="auto", torch_dtype=weight_dtype) +[rank0]: UnboundLocalError: local variable 'weight_dtype' referenced before assignment diff --git a/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-core.log b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..87c788df0849d844288e8a35fc92ef4917c3236a --- /dev/null +++ b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:45:20.687331485Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp8cdws4p7/port-711311.txt","pid":711311,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:45:20.687843645Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":711311} +{"time":"2025-12-29T08:45:20.687837529Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-711311-711617-3743846482/socket","Net":"unix"}} +{"time":"2025-12-29T08:45:20.874027867Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:45:20.880383207Z","level":"INFO","msg":"handleInformInit: received","streamId":"fzi541le","id":"1(@)"} +{"time":"2025-12-29T08:45:21.0483966Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fzi541le","id":"1(@)"} +{"time":"2025-12-29T08:45:21.432148425Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:45:21.432196222Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:45:21.432184954Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:45:21.432256701Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:45:21.432299933Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-711311-711617-3743846482/socket","Net":"unix"}} +{"time":"2025-12-29T08:45:21.921126786Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:45:21.921151565Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:45:21.921166978Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-internal.log b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3a62ae9b393c3889664b9680102b86ff430fd2e3 --- /dev/null +++ b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:45:20.880473832Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:45:21.048211618Z","level":"INFO","msg":"stream: created new stream","id":"fzi541le"} +{"time":"2025-12-29T08:45:21.048297098Z","level":"INFO","msg":"handler: started","stream_id":"fzi541le"} +{"time":"2025-12-29T08:45:21.048388634Z","level":"INFO","msg":"stream: started","id":"fzi541le"} +{"time":"2025-12-29T08:45:21.048404229Z","level":"INFO","msg":"writer: started","stream_id":"fzi541le"} +{"time":"2025-12-29T08:45:21.048405125Z","level":"INFO","msg":"sender: started","stream_id":"fzi541le"} +{"time":"2025-12-29T08:45:21.432187704Z","level":"INFO","msg":"stream: closing","id":"fzi541le"} +{"time":"2025-12-29T08:45:21.795260371Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:45:21.916946792Z","level":"INFO","msg":"handler: closed","stream_id":"fzi541le"} +{"time":"2025-12-29T08:45:21.91703998Z","level":"INFO","msg":"sender: closed","stream_id":"fzi541le"} +{"time":"2025-12-29T08:45:21.917046398Z","level":"INFO","msg":"stream: closed","id":"fzi541le"} diff --git a/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug.log b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..653262a380e800ce2ef76beeefe07d845b05a8a9 --- /dev/null +++ b/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_setup.py:_flush():80] Configure stats pid to 711311 +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug.log +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_084520-fzi541le/logs/debug-internal.log +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:45:20,619 INFO MainThread:711311 [wandb_init.py:init():889] starting backend +2025-12-29 08:45:20,874 INFO MainThread:711311 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:45:20,878 INFO MainThread:711311 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:45:20,880 INFO MainThread:711311 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:45:20,884 INFO MainThread:711311 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:45:21,294 INFO MainThread:711311 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:45:21,421 INFO MainThread:711311 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:45:21,421 INFO MainThread:711311 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:45:21,421 INFO MainThread:711311 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:45:21,421 INFO MainThread:711311 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:45:21,424 INFO MainThread:711311 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:45:21,425 INFO MainThread:711311 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:45:21,432 INFO wandb-AsyncioManager-main:711311 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:45:21,432 INFO wandb-AsyncioManager-main:711311 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_084618-2l6k4nad/files/output.log b/Meissonic/wandb/run-20251229_084618-2l6k4nad/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3590a0c80a72bcb8fc2d29e214ce8d8cc05b16d0 --- /dev/null +++ b/Meissonic/wandb/run-20251229_084618-2l6k4nad/files/output.log @@ -0,0 +1,15 @@ +12/29/2025 08:46:20 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:46:20 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +12/29/2025 08:46:27 - INFO - accelerate.utils.modeling - Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory: + - 0: 23495245824 bytes required + - 1: 22723452928 bytes required + - 2: 22723452928 bytes required + - 3: 22723452928 bytes required + - 4: 22723452928 bytes required + - 5: 22723452928 bytes required + - 6: 22723452928 bytes required + - 7: 22723452928 bytes required +These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config. +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 66.85it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:46:29 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) diff --git a/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-core.log b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..858fb1b83d99a25c6a8538ffc2ac3680cf349910 --- /dev/null +++ b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2025-12-29T08:46:19.020019382Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp16p2v6i5/port-712532.txt","pid":712532,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:46:19.020510031Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":712532} +{"time":"2025-12-29T08:46:19.020505669Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-712532-712837-879851373/socket","Net":"unix"}} +{"time":"2025-12-29T08:46:19.207722341Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:46:19.213723921Z","level":"INFO","msg":"handleInformInit: received","streamId":"2l6k4nad","id":"1(@)"} +{"time":"2025-12-29T08:46:20.200266224Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"2l6k4nad","id":"1(@)"} +{"time":"2025-12-29T08:46:34.656595059Z","level":"INFO","msg":"server: parent process exited, terminating service process"} diff --git a/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-internal.log b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1151bdfee2ffc6d578265b53b4db942155f8d8f8 --- /dev/null +++ b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-12-29T08:46:19.213812841Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:46:20.200060528Z","level":"INFO","msg":"stream: created new stream","id":"2l6k4nad"} +{"time":"2025-12-29T08:46:20.200140489Z","level":"INFO","msg":"handler: started","stream_id":"2l6k4nad"} +{"time":"2025-12-29T08:46:20.200259398Z","level":"INFO","msg":"stream: started","id":"2l6k4nad"} +{"time":"2025-12-29T08:46:20.200280025Z","level":"INFO","msg":"sender: started","stream_id":"2l6k4nad"} +{"time":"2025-12-29T08:46:20.200284369Z","level":"INFO","msg":"writer: started","stream_id":"2l6k4nad"} diff --git a/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug.log b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..accce1cc61c5eaafb2c0ff315197e7d51783989e --- /dev/null +++ b/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug.log @@ -0,0 +1,22 @@ +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_setup.py:_flush():80] Configure stats pid to 712532 +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug.log +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_084618-2l6k4nad/logs/debug-internal.log +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:46:18,955 INFO MainThread:712532 [wandb_init.py:init():889] starting backend +2025-12-29 08:46:19,207 INFO MainThread:712532 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:46:19,212 INFO MainThread:712532 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:46:19,213 INFO MainThread:712532 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:46:19,218 INFO MainThread:712532 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:46:20,427 INFO MainThread:712532 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:46:20,552 INFO MainThread:712532 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:46:20,552 INFO MainThread:712532 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:46:20,552 INFO MainThread:712532 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:46:20,552 INFO MainThread:712532 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:46:20,555 INFO MainThread:712532 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:46:20,556 INFO MainThread:712532 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} diff --git a/Meissonic/wandb/run-20251229_085306-rfncwmtb/files/output.log b/Meissonic/wandb/run-20251229_085306-rfncwmtb/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e3b2205bcb5f4620c1f11a124fe45e313ed32243 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085306-rfncwmtb/files/output.log @@ -0,0 +1,15 @@ +12/29/2025 08:53:07 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:53:07 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +12/29/2025 08:53:14 - INFO - accelerate.utils.modeling - Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory: + - 0: 23495245824 bytes required + - 1: 22723452928 bytes required + - 2: 22723452928 bytes required + - 3: 22723452928 bytes required + - 4: 22723452928 bytes required + - 5: 22723452928 bytes required + - 6: 22723452928 bytes required + - 7: 22723452928 bytes required +These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config. +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 70.90it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:53:17 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) diff --git a/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-core.log b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c4279641cb0aac50451325086c3bc2898b650449 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2025-12-29T08:53:06.413986452Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpjmw009bo/port-719769.txt","pid":719769,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:53:06.414591904Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":719769} +{"time":"2025-12-29T08:53:06.414574148Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-719769-720130-1976255648/socket","Net":"unix"}} +{"time":"2025-12-29T08:53:06.60155787Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:53:06.60754625Z","level":"INFO","msg":"handleInformInit: received","streamId":"rfncwmtb","id":"1(@)"} +{"time":"2025-12-29T08:53:07.276887536Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"rfncwmtb","id":"1(@)"} +{"time":"2025-12-29T08:53:20.547342746Z","level":"INFO","msg":"server: parent process exited, terminating service process"} diff --git a/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-internal.log b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4a952b822c9bc16d46218b9e8ab2c0b21b6ed542 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2025-12-29T08:53:06.607635319Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:53:07.276673747Z","level":"INFO","msg":"stream: created new stream","id":"rfncwmtb"} +{"time":"2025-12-29T08:53:07.276751985Z","level":"INFO","msg":"handler: started","stream_id":"rfncwmtb"} +{"time":"2025-12-29T08:53:07.276879493Z","level":"INFO","msg":"stream: started","id":"rfncwmtb"} +{"time":"2025-12-29T08:53:07.276894876Z","level":"INFO","msg":"writer: started","stream_id":"rfncwmtb"} +{"time":"2025-12-29T08:53:07.276904004Z","level":"INFO","msg":"sender: started","stream_id":"rfncwmtb"} diff --git a/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug.log b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..469bc7f8107e8d3559f85d7571b7344d866b20fc --- /dev/null +++ b/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug.log @@ -0,0 +1,22 @@ +2025-12-29 08:53:06,347 INFO MainThread:719769 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_setup.py:_flush():80] Configure stats pid to 719769 +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug.log +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_085306-rfncwmtb/logs/debug-internal.log +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:53:06,348 INFO MainThread:719769 [wandb_init.py:init():889] starting backend +2025-12-29 08:53:06,601 INFO MainThread:719769 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:53:06,606 INFO MainThread:719769 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:53:06,607 INFO MainThread:719769 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:53:06,611 INFO MainThread:719769 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:53:07,453 INFO MainThread:719769 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:53:07,576 INFO MainThread:719769 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:53:07,576 INFO MainThread:719769 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:53:07,577 INFO MainThread:719769 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:53:07,577 INFO MainThread:719769 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:53:07,579 INFO MainThread:719769 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:53:07,580 INFO MainThread:719769 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} diff --git a/Meissonic/wandb/run-20251229_085719-9ezk0nqn/files/output.log b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d9a447a67ac4837e7fbd6438190a3263218d3052 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/files/output.log @@ -0,0 +1,90 @@ +12/29/2025 08:57:19 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 08:57:19 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 67.64it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 08:57:22 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 08:57:23 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 08:57:23 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 08:57:23 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 08:57:24 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 08:57:24 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 08:57:24 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:57:24 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 08:57:40 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 08:57:40 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 08:57:42 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 08:57:44 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 08:57:44 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 08:57:44 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 08:57:44 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 08:57:44 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 08:57:44 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 08:57:44 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 08:57:45 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 08:57:45 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 08:57:45 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 08:57:45 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 08:57:45 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 08:57:45 - INFO - __main__ - Dataloader configuration: +12/29/2025 08:57:45 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 08:57:45 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 08:57:45 - INFO - __main__ - - persistent_workers: True +12/29/2025 08:57:45 - INFO - __main__ - - pin_memory: True +12/29/2025 08:57:45 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 08:57:47 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1919, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1360, in main + empty_embeds, _, _ = encode_prompt( + File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt + outputs = text_encoder(input_ids=input_ids, return_dict=True) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward + encoder_outputs = self.encoder( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward + inputs_embeds = self.embed_tokens(input_ids) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1919, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1360, in main +[rank0]: empty_embeds, _, _ = encode_prompt( +[rank0]: File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt +[rank0]: outputs = text_encoder(input_ids=input_ids, return_dict=True) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward +[rank0]: encoder_outputs = self.encoder( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward +[rank0]: inputs_embeds = self.embed_tokens(input_ids) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward +[rank0]: return F.embedding( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding +[rank0]: return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +[rank0]: RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) diff --git a/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-core.log b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..ee15eeeae544fd84b8355b15fe9717295d2af9f6 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T08:57:19.235790771Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmprvixo680/port-724045.txt","pid":724045,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T08:57:19.236323841Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":724045} +{"time":"2025-12-29T08:57:19.236281607Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-724045-724356-142602013/socket","Net":"unix"}} +{"time":"2025-12-29T08:57:19.423198425Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T08:57:19.429361633Z","level":"INFO","msg":"handleInformInit: received","streamId":"9ezk0nqn","id":"1(@)"} +{"time":"2025-12-29T08:57:19.600195554Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"9ezk0nqn","id":"1(@)"} +{"time":"2025-12-29T08:57:47.993839332Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T08:57:47.99391296Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T08:57:47.993903668Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T08:57:47.994874875Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T08:57:47.994765402Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-724045-724356-142602013/socket","Net":"unix"}} +{"time":"2025-12-29T08:57:48.354882161Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T08:57:48.354902195Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T08:57:48.354910234Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-internal.log b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..06f985ace1b134b755cb5977dedf5adabd2437a0 --- /dev/null +++ b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T08:57:19.429447645Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T08:57:19.600020546Z","level":"INFO","msg":"stream: created new stream","id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:19.600096046Z","level":"INFO","msg":"handler: started","stream_id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:19.600186404Z","level":"INFO","msg":"stream: started","id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:19.600202081Z","level":"INFO","msg":"writer: started","stream_id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:19.60020169Z","level":"INFO","msg":"sender: started","stream_id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:47.99391733Z","level":"INFO","msg":"stream: closing","id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:48.23929645Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T08:57:48.351448233Z","level":"INFO","msg":"handler: closed","stream_id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:48.351541906Z","level":"INFO","msg":"sender: closed","stream_id":"9ezk0nqn"} +{"time":"2025-12-29T08:57:48.351548009Z","level":"INFO","msg":"stream: closed","id":"9ezk0nqn"} diff --git a/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug.log b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..45a1608e6083435739981e3e8918340855386caf --- /dev/null +++ b/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_setup.py:_flush():80] Configure stats pid to 724045 +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug.log +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_085719-9ezk0nqn/logs/debug-internal.log +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_init.py:init():841] calling init triggers +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 08:57:19,170 INFO MainThread:724045 [wandb_init.py:init():889] starting backend +2025-12-29 08:57:19,423 INFO MainThread:724045 [wandb_init.py:init():892] sending inform_init request +2025-12-29 08:57:19,427 INFO MainThread:724045 [wandb_init.py:init():900] backend started and connected +2025-12-29 08:57:19,430 INFO MainThread:724045 [wandb_init.py:init():970] updated telemetry +2025-12-29 08:57:19,436 INFO MainThread:724045 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 08:57:19,802 INFO MainThread:724045 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 08:57:19,925 INFO MainThread:724045 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 08:57:19,925 INFO MainThread:724045 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 08:57:19,925 INFO MainThread:724045 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 08:57:19,925 INFO MainThread:724045 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 08:57:19,928 INFO MainThread:724045 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 08:57:19,929 INFO MainThread:724045 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 08:57:47,993 INFO wandb-AsyncioManager-main:724045 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 08:57:47,994 INFO wandb-AsyncioManager-main:724045 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_090331-alguiic1/files/output.log b/Meissonic/wandb/run-20251229_090331-alguiic1/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b26c9a9df7bf36eab1c3d6794f7858abda092b3e --- /dev/null +++ b/Meissonic/wandb/run-20251229_090331-alguiic1/files/output.log @@ -0,0 +1,90 @@ +12/29/2025 09:03:32 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:03:32 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 69.13it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:03:34 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:03:35 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:03:35 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:03:35 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:03:36 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:03:36 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:03:36 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:03:36 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:03:52 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:03:52 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:03:53 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:03:55 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:03:55 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:03:55 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:03:55 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:03:55 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:03:55 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:03:55 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:03:56 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:03:56 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:03:56 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:03:56 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:03:56 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:03:56 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:03:56 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:03:56 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:03:56 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:03:56 - INFO - __main__ - - pin_memory: True +12/29/2025 09:03:56 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:03:59 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1921, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1362, in main + empty_embeds, _, _ = encode_prompt( + File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt + outputs = text_encoder(input_ids=input_ids, return_dict=True) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward + encoder_outputs = self.encoder( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward + inputs_embeds = self.embed_tokens(input_ids) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1921, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1362, in main +[rank0]: empty_embeds, _, _ = encode_prompt( +[rank0]: File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt +[rank0]: outputs = text_encoder(input_ids=input_ids, return_dict=True) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward +[rank0]: encoder_outputs = self.encoder( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward +[rank0]: inputs_embeds = self.embed_tokens(input_ids) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward +[rank0]: return F.embedding( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding +[rank0]: return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +[rank0]: RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) diff --git a/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-core.log b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..68205ab142c76a8feca5c551424651de02ddcf4a --- /dev/null +++ b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:03:31.34237597Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpglfm_ac2/port-729967.txt","pid":729967,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:03:31.342891627Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":729967} +{"time":"2025-12-29T09:03:31.342895938Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-729967-730329-2911911574/socket","Net":"unix"}} +{"time":"2025-12-29T09:03:31.528714122Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:03:31.534974689Z","level":"INFO","msg":"handleInformInit: received","streamId":"alguiic1","id":"1(@)"} +{"time":"2025-12-29T09:03:31.707664855Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"alguiic1","id":"1(@)"} +{"time":"2025-12-29T09:03:59.248522484Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:03:59.248568523Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:03:59.248568692Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:03:59.248638466Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-729967-730329-2911911574/socket","Net":"unix"}} +{"time":"2025-12-29T09:03:59.248765013Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:03:59.650140191Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:03:59.650165043Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:03:59.650177107Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-internal.log b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..792f2dbb63baba217a80e3b2b106f47d000d165c --- /dev/null +++ b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:03:31.535124097Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:03:31.707434361Z","level":"INFO","msg":"stream: created new stream","id":"alguiic1"} +{"time":"2025-12-29T09:03:31.707521014Z","level":"INFO","msg":"handler: started","stream_id":"alguiic1"} +{"time":"2025-12-29T09:03:31.70765758Z","level":"INFO","msg":"stream: started","id":"alguiic1"} +{"time":"2025-12-29T09:03:31.707673551Z","level":"INFO","msg":"writer: started","stream_id":"alguiic1"} +{"time":"2025-12-29T09:03:31.70767689Z","level":"INFO","msg":"sender: started","stream_id":"alguiic1"} +{"time":"2025-12-29T09:03:59.248581722Z","level":"INFO","msg":"stream: closing","id":"alguiic1"} +{"time":"2025-12-29T09:03:59.517091928Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:03:59.646799742Z","level":"INFO","msg":"handler: closed","stream_id":"alguiic1"} +{"time":"2025-12-29T09:03:59.646934487Z","level":"INFO","msg":"sender: closed","stream_id":"alguiic1"} +{"time":"2025-12-29T09:03:59.646942679Z","level":"INFO","msg":"stream: closed","id":"alguiic1"} diff --git a/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug.log b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..2d739da0d4d0d09eb2684bc5d95cd2a5fb8a41b2 --- /dev/null +++ b/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_setup.py:_flush():80] Configure stats pid to 729967 +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug.log +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_090331-alguiic1/logs/debug-internal.log +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:03:31,276 INFO MainThread:729967 [wandb_init.py:init():889] starting backend +2025-12-29 09:03:31,528 INFO MainThread:729967 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:03:31,533 INFO MainThread:729967 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:03:31,534 INFO MainThread:729967 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:03:31,539 INFO MainThread:729967 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:03:31,901 INFO MainThread:729967 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:03:32,024 INFO MainThread:729967 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:03:32,024 INFO MainThread:729967 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:03:32,024 INFO MainThread:729967 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:03:32,024 INFO MainThread:729967 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:03:32,027 INFO MainThread:729967 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:03:32,027 INFO MainThread:729967 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:03:59,248 INFO wandb-AsyncioManager-main:729967 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:03:59,248 INFO wandb-AsyncioManager-main:729967 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_090648-6beufw2w/files/output.log b/Meissonic/wandb/run-20251229_090648-6beufw2w/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..123a3f12faeb6ba8a5f9900ec41d98f70706af5a --- /dev/null +++ b/Meissonic/wandb/run-20251229_090648-6beufw2w/files/output.log @@ -0,0 +1,90 @@ +12/29/2025 09:06:49 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:06:49 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 69.63it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:06:51 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:06:52 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:06:52 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:06:52 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:06:53 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:06:53 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:06:53 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:06:53 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:07:09 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:07:09 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:07:10 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:07:12 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:07:12 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:07:12 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:07:12 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:07:12 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:07:12 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:07:12 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:07:13 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:07:13 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:07:13 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:07:13 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:07:13 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:07:13 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:07:13 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:07:13 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:07:13 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:07:13 - INFO - __main__ - - pin_memory: True +12/29/2025 09:07:13 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:07:16 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1921, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1372, in main + empty_embeds, _, _ = encode_prompt( + File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt + outputs = text_encoder(input_ids=input_ids, return_dict=True) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward + encoder_outputs = self.encoder( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward + inputs_embeds = self.embed_tokens(input_ids) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward + return F.embedding( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding + return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1921, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1372, in main +[rank0]: empty_embeds, _, _ = encode_prompt( +[rank0]: File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt +[rank0]: outputs = text_encoder(input_ids=input_ids, return_dict=True) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward +[rank0]: encoder_outputs = self.encoder( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1003, in forward +[rank0]: inputs_embeds = self.embed_tokens(input_ids) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 192, in forward +[rank0]: return F.embedding( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/functional.py", line 2542, in embedding +[rank0]: return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) +[rank0]: RuntimeError: Expected all tensors to be on the same device, but got index is on cuda:0, different from other tensors on cpu (when checking argument in method wrapper_CUDA__index_select) diff --git a/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-core.log b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..70b5139ea5a479db2afd5ce42b10a9542528a625 --- /dev/null +++ b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:06:48.322744491Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmptovya5jz/port-733510.txt","pid":733510,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:06:48.323222671Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":733510} +{"time":"2025-12-29T09:06:48.323226469Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-733510-733874-1899236426/socket","Net":"unix"}} +{"time":"2025-12-29T09:06:48.509280549Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:06:48.515225868Z","level":"INFO","msg":"handleInformInit: received","streamId":"6beufw2w","id":"1(@)"} +{"time":"2025-12-29T09:06:48.680615139Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"6beufw2w","id":"1(@)"} +{"time":"2025-12-29T09:07:16.927152145Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:07:16.927221495Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:07:16.927235437Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:07:16.927262801Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:07:16.927332658Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-733510-733874-1899236426/socket","Net":"unix"}} +{"time":"2025-12-29T09:07:17.287256648Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:07:17.287280462Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:07:17.287293068Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-internal.log b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ad2def016c171c062455e907fb9e232918dd237a --- /dev/null +++ b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:06:48.515337726Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:06:48.680392277Z","level":"INFO","msg":"stream: created new stream","id":"6beufw2w"} +{"time":"2025-12-29T09:06:48.680472423Z","level":"INFO","msg":"handler: started","stream_id":"6beufw2w"} +{"time":"2025-12-29T09:06:48.680606649Z","level":"INFO","msg":"stream: started","id":"6beufw2w"} +{"time":"2025-12-29T09:06:48.680606453Z","level":"INFO","msg":"writer: started","stream_id":"6beufw2w"} +{"time":"2025-12-29T09:06:48.680614685Z","level":"INFO","msg":"sender: started","stream_id":"6beufw2w"} +{"time":"2025-12-29T09:07:16.927233764Z","level":"INFO","msg":"stream: closing","id":"6beufw2w"} +{"time":"2025-12-29T09:07:17.162462897Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:07:17.283815654Z","level":"INFO","msg":"handler: closed","stream_id":"6beufw2w"} +{"time":"2025-12-29T09:07:17.283941325Z","level":"INFO","msg":"sender: closed","stream_id":"6beufw2w"} +{"time":"2025-12-29T09:07:17.28394878Z","level":"INFO","msg":"stream: closed","id":"6beufw2w"} diff --git a/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug.log b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b6ff4e60f848c4ce48ad2a62959738b9abea569a --- /dev/null +++ b/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_setup.py:_flush():80] Configure stats pid to 733510 +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug.log +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_090648-6beufw2w/logs/debug-internal.log +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:06:48,257 INFO MainThread:733510 [wandb_init.py:init():889] starting backend +2025-12-29 09:06:48,509 INFO MainThread:733510 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:06:48,513 INFO MainThread:733510 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:06:48,515 INFO MainThread:733510 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:06:48,519 INFO MainThread:733510 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:06:48,895 INFO MainThread:733510 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:06:49,017 INFO MainThread:733510 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:06:49,017 INFO MainThread:733510 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:06:49,017 INFO MainThread:733510 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:06:49,017 INFO MainThread:733510 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:06:49,020 INFO MainThread:733510 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:06:49,021 INFO MainThread:733510 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:07:16,927 INFO wandb-AsyncioManager-main:733510 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:07:16,927 INFO wandb-AsyncioManager-main:733510 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_091141-ka0jd7f5/files/output.log b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..0f679c60f03e115a72fc72b85fb642a336a4fccf --- /dev/null +++ b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/files/output.log @@ -0,0 +1,80 @@ +12/29/2025 09:11:42 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:11:42 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 68.85it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:11:52 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:11:53 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:11:53 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:11:53 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:11:54 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:11:54 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:11:54 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:11:54 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:12:10 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:12:10 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:12:12 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:12:14 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:12:14 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:12:14 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:12:14 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:12:14 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:12:14 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:12:14 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:12:15 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:12:15 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:12:15 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:12:15 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:12:15 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:12:15 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:12:15 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:12:15 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:12:15 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:12:15 - INFO - __main__ - - pin_memory: True +12/29/2025 09:12:15 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:12:16 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:12:17 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:12:17 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:12:17 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:12:17 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:12:17 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:12:17 - INFO - __main__ - Gradient Accumulation steps = 1 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1505, in main + encoder_hidden_states, cond_embeds, context_lens = encode_prompt( + File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt + outputs = text_encoder(input_ids=input_ids, return_dict=True) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward + encoder_outputs = self.encoder( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1005, in forward + batch_size, seq_length = input_shape +ValueError: too many values to unpack (expected 2) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1505, in main +[rank0]: encoder_hidden_states, cond_embeds, context_lens = encode_prompt( +[rank0]: File "/mnt/Meissonic/train/dataset_utils.py", line 81, in encode_prompt +[rank0]: outputs = text_encoder(input_ids=input_ids, return_dict=True) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1932, in forward +[rank0]: encoder_outputs = self.encoder( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1005, in forward +[rank0]: batch_size, seq_length = input_shape +[rank0]: ValueError: too many values to unpack (expected 2) diff --git a/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-core.log b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..deada89eb5fccc9fd90d004362f0b77e9cd65e18 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:11:41.46495449Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp60jzgf4k/port-738522.txt","pid":738522,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:11:41.465425003Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":738522} +{"time":"2025-12-29T09:11:41.465429212Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-738522-738841-2481864578/socket","Net":"unix"}} +{"time":"2025-12-29T09:11:41.65000237Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:11:41.656138897Z","level":"INFO","msg":"handleInformInit: received","streamId":"ka0jd7f5","id":"1(@)"} +{"time":"2025-12-29T09:11:41.824154034Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ka0jd7f5","id":"1(@)"} +{"time":"2025-12-29T09:12:58.044495582Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:12:58.044567297Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:12:58.044641805Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:12:58.044581488Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:12:58.044691294Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-738522-738841-2481864578/socket","Net":"unix"}} +{"time":"2025-12-29T09:12:58.380987195Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:12:58.381007786Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:12:58.381016688Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-internal.log b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..190db5c4e8ae4e7f82b702fcd088a70c34e64a58 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:11:41.656266516Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:11:41.823874816Z","level":"INFO","msg":"stream: created new stream","id":"ka0jd7f5"} +{"time":"2025-12-29T09:11:41.823974149Z","level":"INFO","msg":"handler: started","stream_id":"ka0jd7f5"} +{"time":"2025-12-29T09:11:41.824118611Z","level":"INFO","msg":"stream: started","id":"ka0jd7f5"} +{"time":"2025-12-29T09:11:41.824161087Z","level":"INFO","msg":"writer: started","stream_id":"ka0jd7f5"} +{"time":"2025-12-29T09:11:41.824185193Z","level":"INFO","msg":"sender: started","stream_id":"ka0jd7f5"} +{"time":"2025-12-29T09:12:58.044579207Z","level":"INFO","msg":"stream: closing","id":"ka0jd7f5"} +{"time":"2025-12-29T09:12:58.302865226Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:12:58.377920712Z","level":"INFO","msg":"handler: closed","stream_id":"ka0jd7f5"} +{"time":"2025-12-29T09:12:58.378009069Z","level":"INFO","msg":"sender: closed","stream_id":"ka0jd7f5"} +{"time":"2025-12-29T09:12:58.378017239Z","level":"INFO","msg":"stream: closed","id":"ka0jd7f5"} diff --git a/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug.log b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d801474315cfc5e6fdd5a3f75f69c82d027c9e8a --- /dev/null +++ b/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_setup.py:_flush():80] Configure stats pid to 738522 +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug.log +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_091141-ka0jd7f5/logs/debug-internal.log +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:11:41,397 INFO MainThread:738522 [wandb_init.py:init():889] starting backend +2025-12-29 09:11:41,650 INFO MainThread:738522 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:11:41,654 INFO MainThread:738522 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:11:41,656 INFO MainThread:738522 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:11:41,660 INFO MainThread:738522 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:11:41,964 INFO MainThread:738522 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:11:42,090 INFO MainThread:738522 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:11:42,090 INFO MainThread:738522 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:11:42,090 INFO MainThread:738522 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:11:42,090 INFO MainThread:738522 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:11:42,092 INFO MainThread:738522 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:11:42,093 INFO MainThread:738522 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:12:58,044 INFO wandb-AsyncioManager-main:738522 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:12:58,045 INFO wandb-AsyncioManager-main:738522 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_091548-1gbb0o27/files/output.log b/Meissonic/wandb/run-20251229_091548-1gbb0o27/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..073dd912185e1e91c78bb3d1537697c6d242abee --- /dev/null +++ b/Meissonic/wandb/run-20251229_091548-1gbb0o27/files/output.log @@ -0,0 +1,54 @@ +12/29/2025 09:15:49 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:15:49 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 66.84it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:15:59 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:15:59 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:15:59 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:15:59 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:16:00 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:16:00 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:16:00 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:16:00 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:16:17 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:16:17 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:16:18 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:16:21 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:16:21 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:16:21 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:16:21 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:16:21 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:16:21 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:16:21 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:16:22 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:16:22 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:16:22 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:16:22 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:16:22 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:16:22 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:16:22 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:16:22 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:16:22 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:16:22 - INFO - __main__ - - pin_memory: True +12/29/2025 09:16:22 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:16:24 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:16:24 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:16:24 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:16:24 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:16:24 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:16:24 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:16:24 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1608, in main + if not args.use_precomputed_features and "prompt_input_ids" in batch: +UnboundLocalError: local variable 'batch' referenced before assignment +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1608, in main +[rank0]: if not args.use_precomputed_features and "prompt_input_ids" in batch: +[rank0]: UnboundLocalError: local variable 'batch' referenced before assignment diff --git a/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-core.log b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..7f28d486508497e037be936de41b4ed415a8b183 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:15:48.744381528Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp0a738r9c/port-743676.txt","pid":743676,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:15:48.744831185Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":743676} +{"time":"2025-12-29T09:15:48.744843527Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-743676-744053-1953551874/socket","Net":"unix"}} +{"time":"2025-12-29T09:15:48.931439722Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:15:48.937635622Z","level":"INFO","msg":"handleInformInit: received","streamId":"1gbb0o27","id":"1(@)"} +{"time":"2025-12-29T09:15:49.109405194Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"1gbb0o27","id":"1(@)"} +{"time":"2025-12-29T09:17:05.540022248Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:17:05.540106186Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:17:05.540095322Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:17:05.540175809Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:17:05.540189393Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-743676-744053-1953551874/socket","Net":"unix"}} +{"time":"2025-12-29T09:17:05.85207215Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:17:05.8520996Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:17:05.852112434Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-internal.log b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..b85e2b390c07c268e0125ae89e6bc6a200259d2e --- /dev/null +++ b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:15:48.937735594Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:15:49.109185909Z","level":"INFO","msg":"stream: created new stream","id":"1gbb0o27"} +{"time":"2025-12-29T09:15:49.109272226Z","level":"INFO","msg":"handler: started","stream_id":"1gbb0o27"} +{"time":"2025-12-29T09:15:49.109398371Z","level":"INFO","msg":"stream: started","id":"1gbb0o27"} +{"time":"2025-12-29T09:15:49.109418382Z","level":"INFO","msg":"sender: started","stream_id":"1gbb0o27"} +{"time":"2025-12-29T09:15:49.109439792Z","level":"INFO","msg":"writer: started","stream_id":"1gbb0o27"} +{"time":"2025-12-29T09:17:05.540109887Z","level":"INFO","msg":"stream: closing","id":"1gbb0o27"} +{"time":"2025-12-29T09:17:05.763375687Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:17:05.848791774Z","level":"INFO","msg":"handler: closed","stream_id":"1gbb0o27"} +{"time":"2025-12-29T09:17:05.848885178Z","level":"INFO","msg":"sender: closed","stream_id":"1gbb0o27"} +{"time":"2025-12-29T09:17:05.848893752Z","level":"INFO","msg":"stream: closed","id":"1gbb0o27"} diff --git a/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug.log b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0ff6def4b9c5e278c104602acc195f70d7b8a34b --- /dev/null +++ b/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_setup.py:_flush():80] Configure stats pid to 743676 +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug.log +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_091548-1gbb0o27/logs/debug-internal.log +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:15:48,675 INFO MainThread:743676 [wandb_init.py:init():889] starting backend +2025-12-29 09:15:48,931 INFO MainThread:743676 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:15:48,936 INFO MainThread:743676 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:15:48,937 INFO MainThread:743676 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:15:48,942 INFO MainThread:743676 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:15:49,303 INFO MainThread:743676 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:15:49,426 INFO MainThread:743676 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:15:49,426 INFO MainThread:743676 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:15:49,427 INFO MainThread:743676 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:15:49,427 INFO MainThread:743676 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:15:49,429 INFO MainThread:743676 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:15:49,430 INFO MainThread:743676 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:17:05,540 INFO wandb-AsyncioManager-main:743676 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:17:05,540 INFO wandb-AsyncioManager-main:743676 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_091749-rjwft5vo/files/output.log b/Meissonic/wandb/run-20251229_091749-rjwft5vo/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1bd7d4eabd576a61c5b7c52cf1afff7a93ff5c08 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091749-rjwft5vo/files/output.log @@ -0,0 +1,216 @@ +12/29/2025 09:17:50 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:17:50 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 60.90it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:18:00 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:18:01 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:18:01 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:18:01 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:18:02 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:18:02 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:18:02 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:18:02 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:18:18 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:18:18 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:18:19 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:18:21 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:18:21 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:18:21 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:18:21 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:18:21 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:18:21 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:18:21 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:18:22 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:18:22 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:18:22 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:18:22 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:18:22 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:18:22 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:18:22 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:18:22 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:18:22 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:18:22 - INFO - __main__ - - pin_memory: True +12/29/2025 09:18:22 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:18:25 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:18:26 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:18:26 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:18:26 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:18:26 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:18:26 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:18:26 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +12/29/2025 09:18:40 - INFO - __main__ - Step: 10 Loss: 11.0643 LR: 0.000600 +12/29/2025 09:18:49 - INFO - __main__ - Step: 20 Loss: 11.0104 LR: 0.000600 +12/29/2025 09:18:58 - INFO - __main__ - Step: 30 Loss: 10.9004 LR: 0.000600 +12/29/2025 09:19:07 - INFO - __main__ - Step: 40 Loss: 10.7376 LR: 0.000600 +12/29/2025 09:19:16 - INFO - __main__ - Step: 50 Loss: 10.6118 LR: 0.000600 +12/29/2025 09:19:25 - INFO - __main__ - Step: 60 Loss: 10.5061 LR: 0.000600 +12/29/2025 09:19:33 - INFO - __main__ - Step: 70 Loss: 10.5375 LR: 0.000600 +12/29/2025 09:19:42 - INFO - __main__ - Step: 80 Loss: 10.5029 LR: 0.000600 +12/29/2025 09:19:51 - INFO - __main__ - Step: 90 Loss: 10.4589 LR: 0.000600 +12/29/2025 09:20:00 - INFO - __main__ - Step: 100 Loss: 10.4480 LR: 0.000600 +12/29/2025 09:20:00 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:20:00 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:20:01 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:20:01 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:08<00:00, 5.51it/s] +12/29/2025 09:20:11 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:20:11 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:20:20 - INFO - __main__ - Step: 110 Loss: 10.4370 LR: 0.000600 +12/29/2025 09:20:29 - INFO - __main__ - Step: 120 Loss: 10.3973 LR: 0.000600 +12/29/2025 09:20:37 - INFO - __main__ - Step: 130 Loss: 10.4707 LR: 0.000600 +12/29/2025 09:20:46 - INFO - __main__ - Step: 140 Loss: 10.5013 LR: 0.000600 +12/29/2025 09:20:55 - INFO - __main__ - Step: 150 Loss: 10.4578 LR: 0.000600 +12/29/2025 09:21:04 - INFO - __main__ - Step: 160 Loss: 10.4520 LR: 0.000600 +12/29/2025 09:21:13 - INFO - __main__ - Step: 170 Loss: 10.4825 LR: 0.000600 +12/29/2025 09:21:22 - INFO - __main__ - Step: 180 Loss: 10.4314 LR: 0.000600 +12/29/2025 09:21:30 - INFO - __main__ - Step: 190 Loss: 10.4015 LR: 0.000600 +12/29/2025 09:21:39 - INFO - __main__ - Step: 200 Loss: 10.4898 LR: 0.000600 +12/29/2025 09:21:39 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:21:39 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:21:40 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:21:40 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:08<00:00, 5.55it/s] +12/29/2025 09:21:50 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:21:50 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:21:58 - INFO - __main__ - Step: 210 Loss: 10.4768 LR: 0.000600 +12/29/2025 09:22:07 - INFO - __main__ - Step: 220 Loss: 10.4190 LR: 0.000600 +12/29/2025 09:22:16 - INFO - __main__ - Step: 230 Loss: 10.3139 LR: 0.000600 +12/29/2025 09:22:25 - INFO - __main__ - Step: 240 Loss: 10.3959 LR: 0.000600 +12/29/2025 09:22:33 - INFO - __main__ - Step: 250 Loss: 10.4780 LR: 0.000600 +12/29/2025 09:22:43 - INFO - __main__ - Step: 260 Loss: 10.3735 LR: 0.000600 +12/29/2025 09:22:51 - INFO - __main__ - Step: 270 Loss: 10.4155 LR: 0.000600 +12/29/2025 09:23:00 - INFO - __main__ - Step: 280 Loss: 10.4003 LR: 0.000600 +12/29/2025 09:23:09 - INFO - __main__ - Step: 290 Loss: 10.3447 LR: 0.000600 +12/29/2025 09:23:18 - INFO - __main__ - Step: 300 Loss: 10.4349 LR: 0.000600 +12/29/2025 09:23:18 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:23:18 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:23:19 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:23:19 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:08<00:00, 5.59it/s] +12/29/2025 09:23:29 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:23:29 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:23:38 - INFO - __main__ - Step: 310 Loss: 10.3702 LR: 0.000600 +12/29/2025 09:23:48 - INFO - __main__ - Step: 320 Loss: 10.3423 LR: 0.000600 +12/29/2025 09:23:57 - INFO - __main__ - Step: 330 Loss: 10.3801 LR: 0.000600 +12/29/2025 09:24:05 - INFO - __main__ - Step: 340 Loss: 10.3961 LR: 0.000600 +12/29/2025 09:24:14 - INFO - __main__ - Step: 350 Loss: 10.4539 LR: 0.000600 +12/29/2025 09:24:23 - INFO - __main__ - Step: 360 Loss: 10.4541 LR: 0.000600 +12/29/2025 09:24:32 - INFO - __main__ - Step: 370 Loss: 10.3918 LR: 0.000600 +12/29/2025 09:24:41 - INFO - __main__ - Step: 380 Loss: 10.6276 LR: 0.000600 +12/29/2025 09:24:51 - INFO - __main__ - Step: 390 Loss: 10.4246 LR: 0.000600 +12/29/2025 09:24:59 - INFO - __main__ - Step: 400 Loss: 10.6309 LR: 0.000600 +12/29/2025 09:24:59 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:24:59 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:25:00 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:25:00 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:09<00:00, 5.29it/s] +12/29/2025 09:25:11 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:25:11 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:25:20 - INFO - __main__ - Step: 410 Loss: 10.4519 LR: 0.000600 +12/29/2025 09:25:29 - INFO - __main__ - Step: 420 Loss: 10.5135 LR: 0.000600 +12/29/2025 09:25:39 - INFO - __main__ - Step: 430 Loss: 10.4289 LR: 0.000600 +12/29/2025 09:25:47 - INFO - __main__ - Step: 440 Loss: 10.3606 LR: 0.000600 +12/29/2025 09:25:56 - INFO - __main__ - Step: 450 Loss: 10.4412 LR: 0.000600 +12/29/2025 09:26:05 - INFO - __main__ - Step: 460 Loss: 10.5095 LR: 0.000600 +12/29/2025 09:26:13 - INFO - __main__ - Step: 470 Loss: 10.4414 LR: 0.000600 +12/29/2025 09:26:22 - INFO - __main__ - Step: 480 Loss: 10.4971 LR: 0.000600 +12/29/2025 09:26:31 - INFO - __main__ - Step: 490 Loss: 10.4062 LR: 0.000600 +12/29/2025 09:26:39 - INFO - __main__ - Step: 500 Loss: 10.3370 LR: 0.000600 +12/29/2025 09:26:39 - INFO - accelerate.accelerator - Saving current state to output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500 +12/29/2025 09:27:28 - INFO - accelerate.checkpointing - Optimizer state saved in output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500/optimizer.bin +12/29/2025 09:27:28 - INFO - accelerate.checkpointing - Scheduler state saved in output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500/scheduler.bin +12/29/2025 09:27:28 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500/sampler.bin +12/29/2025 09:27:28 - INFO - accelerate.checkpointing - Random states saved in output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500/random_states_0.pkl +12/29/2025 09:27:28 - INFO - __main__ - Saved state to output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3/checkpoint-500 +12/29/2025 09:27:28 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:27:28 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:27:29 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:27:29 - INFO - __main__ - Generating videos for validation... + 73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 35/48 [00:06<00:02, 5.53it/s] +12/29/2025 09:27:35 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + File "/mnt/Meissonic/train/train_mei_video.py", line 1804, in main + videos = pipe( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context + return func(*args, **kwargs) + File "/mnt/Meissonic/src/pipeline_video.py", line 656, in __call__ + logits = self.transformer( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1037, in forward + out_list = self.backbone( + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 740, in forward + x = block(x, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 489, in forward + x = cross_attn_ffn(x, context, context_lens, e) + File "/mnt/Meissonic/src/transformer_video.py", line 477, in cross_attn_ffn + x = x + self.cross_attn(self.norm3(x), context, context_lens) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 389, in forward + k = self.norm_k(self.k(context)).view(b, -1, n, d) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 281, in forward + return self._norm(x.float()).type_as(x) * self.weight.type_as(x) +KeyboardInterrupt +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1804, in main +[rank0]: videos = pipe( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context +[rank0]: return func(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/pipeline_video.py", line 656, in __call__ +[rank0]: logits = self.transformer( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1037, in forward +[rank0]: out_list = self.backbone( +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 740, in forward +[rank0]: x = block(x, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 489, in forward +[rank0]: x = cross_attn_ffn(x, context, context_lens, e) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 477, in cross_attn_ffn +[rank0]: x = x + self.cross_attn(self.norm3(x), context, context_lens) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 389, in forward +[rank0]: k = self.norm_k(self.k(context)).view(b, -1, n, d) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/matrix-game2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 281, in forward +[rank0]: return self._norm(x.float()).type_as(x) * self.weight.type_as(x) +[rank0]: KeyboardInterrupt diff --git a/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-core.log b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..7ede93003d8be0d5f1c8914f5a782b34c3a19273 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:17:49.97703184Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpdp7cb4ii/port-746756.txt","pid":746756,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:17:49.977539409Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":746756} +{"time":"2025-12-29T09:17:49.977534174Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-746756-747080-873596851/socket","Net":"unix"}} +{"time":"2025-12-29T09:17:50.163761402Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:17:50.17136986Z","level":"INFO","msg":"handleInformInit: received","streamId":"rjwft5vo","id":"1(@)"} +{"time":"2025-12-29T09:17:50.347111593Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"rjwft5vo","id":"1(@)"} +{"time":"2025-12-29T09:27:36.194629779Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:27:36.194688147Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:27:36.194744633Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:27:36.194700243Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:27:36.194833749Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-746756-747080-873596851/socket","Net":"unix"}} +{"time":"2025-12-29T09:27:36.562716195Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:27:36.562745027Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:27:36.56275455Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-internal.log b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..56bd483a446d5b03b0aebe849f78a1078b7bb900 --- /dev/null +++ b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:17:50.171459083Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:17:50.346899108Z","level":"INFO","msg":"stream: created new stream","id":"rjwft5vo"} +{"time":"2025-12-29T09:17:50.346985842Z","level":"INFO","msg":"handler: started","stream_id":"rjwft5vo"} +{"time":"2025-12-29T09:17:50.347104316Z","level":"INFO","msg":"stream: started","id":"rjwft5vo"} +{"time":"2025-12-29T09:17:50.347120094Z","level":"INFO","msg":"sender: started","stream_id":"rjwft5vo"} +{"time":"2025-12-29T09:17:50.34712111Z","level":"INFO","msg":"writer: started","stream_id":"rjwft5vo"} +{"time":"2025-12-29T09:27:36.194706638Z","level":"INFO","msg":"stream: closing","id":"rjwft5vo"} +{"time":"2025-12-29T09:27:36.446064012Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:27:36.558384562Z","level":"INFO","msg":"handler: closed","stream_id":"rjwft5vo"} +{"time":"2025-12-29T09:27:36.558720788Z","level":"INFO","msg":"sender: closed","stream_id":"rjwft5vo"} +{"time":"2025-12-29T09:27:36.558745005Z","level":"INFO","msg":"stream: closed","id":"rjwft5vo"} diff --git a/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug.log b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..50dbc5e3a0ff7671a8c43b15ec6af9c68d9f97bd --- /dev/null +++ b/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_setup.py:_flush():80] Configure stats pid to 746756 +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug.log +2025-12-29 09:17:49,880 INFO MainThread:746756 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_091749-rjwft5vo/logs/debug-internal.log +2025-12-29 09:17:49,881 INFO MainThread:746756 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:17:49,881 INFO MainThread:746756 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:17:49,881 INFO MainThread:746756 [wandb_init.py:init():889] starting backend +2025-12-29 09:17:50,164 INFO MainThread:746756 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:17:50,169 INFO MainThread:746756 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:17:50,172 INFO MainThread:746756 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:17:50,178 INFO MainThread:746756 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:17:50,614 INFO MainThread:746756 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:17:50,804 INFO MainThread:746756 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:17:50,804 INFO MainThread:746756 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:17:50,804 INFO MainThread:746756 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:17:50,804 INFO MainThread:746756 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:17:50,808 INFO MainThread:746756 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:17:50,809 INFO MainThread:746756 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:27:36,194 INFO wandb-AsyncioManager-main:746756 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:27:36,194 INFO wandb-AsyncioManager-main:746756 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_092338-9aiqswtw/files/output.log b/Meissonic/wandb/run-20251229_092338-9aiqswtw/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7c497f0c0b7a24979aee1fe871a303b6e2ed6e8e --- /dev/null +++ b/Meissonic/wandb/run-20251229_092338-9aiqswtw/files/output.log @@ -0,0 +1,78 @@ +12/29/2025 09:23:39 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:23:39 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 71.04it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + ~~~~^^^^^^^^^^^^^^ + File "/mnt/Meissonic/train/train_mei_video.py", line 549, in main + text_encoder.to(accelerator.device) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/transformers/modeling_utils.py", line 4343, in to + return super().to(*args, **kwargs) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1371, in to + return self._apply(convert) + ~~~~~~~~~~~^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply + module._apply(fn) + ~~~~~~~~~~~~~^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply + module._apply(fn) + ~~~~~~~~~~~~~^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply + module._apply(fn) + ~~~~~~~~~~~~~^^^^ + [Previous line repeated 4 more times] + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 957, in _apply + param_applied = fn(param) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1357, in convert + return t.to( + ~~~~^ + device, + ^^^^^^^ + dtype if t.is_floating_point() or t.is_complex() else None, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + non_blocking, + ^^^^^^^^^^^^^ + ) + ^ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 39.49 GiB of which 32.00 MiB is free. Process 746756 has 35.01 GiB memory in use. Including non-PyTorch memory, this process has 4.44 GiB memory in use. Of the allocated memory 4.04 GiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: ~~~~^^^^^^^^^^^^^^ +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 549, in main +[rank0]: text_encoder.to(accelerator.device) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/transformers/modeling_utils.py", line 4343, in to +[rank0]: return super().to(*args, **kwargs) +[rank0]: ~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1371, in to +[rank0]: return self._apply(convert) +[rank0]: ~~~~~~~~~~~^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply +[rank0]: module._apply(fn) +[rank0]: ~~~~~~~~~~~~~^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply +[rank0]: module._apply(fn) +[rank0]: ~~~~~~~~~~~~~^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 930, in _apply +[rank0]: module._apply(fn) +[rank0]: ~~~~~~~~~~~~~^^^^ +[rank0]: [Previous line repeated 4 more times] +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 957, in _apply +[rank0]: param_applied = fn(param) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1357, in convert +[rank0]: return t.to( +[rank0]: ~~~~^ +[rank0]: device, +[rank0]: ^^^^^^^ +[rank0]: dtype if t.is_floating_point() or t.is_complex() else None, +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: non_blocking, +[rank0]: ^^^^^^^^^^^^^ +[rank0]: ) +[rank0]: ^ +[rank0]: torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 39.49 GiB of which 32.00 MiB is free. Process 746756 has 35.01 GiB memory in use. Including non-PyTorch memory, this process has 4.44 GiB memory in use. Of the allocated memory 4.04 GiB is allocated by PyTorch, and 0 bytes is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-core.log b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..66d5fa36822fbef082a6fcb3e12e1f1a18da4f7d --- /dev/null +++ b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:23:38.666936685Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp66bkmdom/port-793712.txt","pid":793712,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:23:38.667452965Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":793712} +{"time":"2025-12-29T09:23:38.667424999Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-793712-796118-562382933/socket","Net":"unix"}} +{"time":"2025-12-29T09:23:38.854071782Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:23:38.860821733Z","level":"INFO","msg":"handleInformInit: received","streamId":"9aiqswtw","id":"1(@)"} +{"time":"2025-12-29T09:23:39.030391585Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"9aiqswtw","id":"1(@)"} +{"time":"2025-12-29T09:23:43.124152534Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:23:43.124211972Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:23:43.124202435Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:23:43.124273391Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:23:43.124306372Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-793712-796118-562382933/socket","Net":"unix"}} +{"time":"2025-12-29T09:23:43.659931735Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:23:43.659952424Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:23:43.6599602Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-internal.log b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..87ffe99081002e59065e3e24ce9e43bdb40255ed --- /dev/null +++ b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:23:38.860953258Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:23:39.030147233Z","level":"INFO","msg":"stream: created new stream","id":"9aiqswtw"} +{"time":"2025-12-29T09:23:39.030228827Z","level":"INFO","msg":"handler: started","stream_id":"9aiqswtw"} +{"time":"2025-12-29T09:23:39.030383219Z","level":"INFO","msg":"stream: started","id":"9aiqswtw"} +{"time":"2025-12-29T09:23:39.030399033Z","level":"INFO","msg":"writer: started","stream_id":"9aiqswtw"} +{"time":"2025-12-29T09:23:39.030403687Z","level":"INFO","msg":"sender: started","stream_id":"9aiqswtw"} +{"time":"2025-12-29T09:23:43.124213482Z","level":"INFO","msg":"stream: closing","id":"9aiqswtw"} +{"time":"2025-12-29T09:23:43.540705034Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:23:43.654220606Z","level":"INFO","msg":"handler: closed","stream_id":"9aiqswtw"} +{"time":"2025-12-29T09:23:43.654314466Z","level":"INFO","msg":"sender: closed","stream_id":"9aiqswtw"} +{"time":"2025-12-29T09:23:43.654320922Z","level":"INFO","msg":"stream: closed","id":"9aiqswtw"} diff --git a/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug.log b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..ae9c3609305626340e683f1afc6105eeea29e671 --- /dev/null +++ b/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:23:38,592 INFO MainThread:793712 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:23:38,592 INFO MainThread:793712 [wandb_setup.py:_flush():80] Configure stats pid to 793712 +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug.log +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_092338-9aiqswtw/logs/debug-internal.log +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:23:38,593 INFO MainThread:793712 [wandb_init.py:init():889] starting backend +2025-12-29 09:23:38,854 INFO MainThread:793712 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:23:38,859 INFO MainThread:793712 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:23:38,860 INFO MainThread:793712 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:23:38,865 INFO MainThread:793712 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:23:39,289 INFO MainThread:793712 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:23:39,373 INFO MainThread:793712 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:23:39,373 INFO MainThread:793712 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:23:39,373 INFO MainThread:793712 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:23:39,373 INFO MainThread:793712 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:23:39,376 INFO MainThread:793712 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:23:39,377 INFO MainThread:793712 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:23:43,124 INFO wandb-AsyncioManager-main:793712 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:23:43,124 INFO wandb-AsyncioManager-main:793712 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_092754-hkolswde/files/output.log b/Meissonic/wandb/run-20251229_092754-hkolswde/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..10c34cc827d118d5a371af40fd06f25935d41bc9 --- /dev/null +++ b/Meissonic/wandb/run-20251229_092754-hkolswde/files/output.log @@ -0,0 +1,227 @@ +12/29/2025 09:27:55 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:27:55 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 88.41it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:28:05 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:28:05 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:28:05 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:28:05 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:28:06 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:28:06 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:28:06 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:28:06 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:28:22 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:28:22 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:28:24 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:28:25 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:28:25 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:28:25 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:28:25 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:28:25 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:28:25 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:28:25 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:28:26 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:28:26 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:28:26 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:28:26 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:28:26 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:28:26 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:28:26 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:28:26 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:28:26 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:28:26 - INFO - __main__ - - pin_memory: True +12/29/2025 09:28:26 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:28:27 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:28:28 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:28:28 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:28:28 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:28:28 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:28:28 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:28:28 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + ~~~~^^^^^^^^^^^^^^ + File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main + logits = model( + tokens=input_ids, # [B, F', H', W'] + ...<3 lines>... + y=None, + ) # Returns [B, vocab_size, F', H', W'] + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward + else self._run_ddp_forward(*inputs, **kwargs) + ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward + return self.module(*inputs, **kwargs) # type: ignore[index] + ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ + return super().__call__(*args, **kwargs) + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1026, in forward + out_list = torch.utils.checkpoint.checkpoint( + create_custom_forward(self.backbone), + ...<6 lines>... + **ckpt_kwargs, + ) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner + return disable_fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint + ret = function(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1021, in custom_forward + return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 740, in forward + x = block(x, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 471, in forward + y = self.self_attn(attn_input, seq_lens, grid_sizes, freqs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 357, in forward + x = flash_attention( + q=rope_apply(q, grid_sizes, freqs), + ...<2 lines>... + k_lens=seq_lens, + window_size=self.window_size) + File "/mnt/Meissonic/src/transformer_video.py", line 124, in flash_attention + assert FLASH_ATTN_2_AVAILABLE + ^^^^^^^^^^^^^^^^^^^^^^ +AssertionError +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: ~~~~^^^^^^^^^^^^^^ +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main +[rank0]: logits = model( +[rank0]: tokens=input_ids, # [B, F', H', W'] +[rank0]: ...<3 lines>... +[rank0]: y=None, +[rank0]: ) # Returns [B, vocab_size, F', H', W'] +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward +[rank0]: else self._run_ddp_forward(*inputs, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward +[rank0]: return self.module(*inputs, **kwargs) # type: ignore[index] +[rank0]: ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ +[rank0]: return super().__call__(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward +[rank0]: return model_forward(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ +[rank0]: return convert_to_fp32(self.model_forward(*args, **kwargs)) +[rank0]: ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast +[rank0]: return func(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1026, in forward +[rank0]: out_list = torch.utils.checkpoint.checkpoint( +[rank0]: create_custom_forward(self.backbone), +[rank0]: ...<6 lines>... +[rank0]: **ckpt_kwargs, +[rank0]: ) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner +[rank0]: return disable_fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint +[rank0]: ret = function(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1021, in custom_forward +[rank0]: return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 740, in forward +[rank0]: x = block(x, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 471, in forward +[rank0]: y = self.self_attn(attn_input, seq_lens, grid_sizes, freqs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 357, in forward +[rank0]: x = flash_attention( +[rank0]: q=rope_apply(q, grid_sizes, freqs), +[rank0]: ...<2 lines>... +[rank0]: k_lens=seq_lens, +[rank0]: window_size=self.window_size) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 124, in flash_attention +[rank0]: assert FLASH_ATTN_2_AVAILABLE +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: AssertionError +Exception ignored in atexit callback : +Traceback (most recent call last): + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1648, in _clean_up_worker + w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/process.py", line 149, in join + res = self._popen.wait(timeout) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/popen_fork.py", line 41, in wait + if not wait([self.sentinel], timeout): + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/connection.py", line 1148, in wait + ready = selector.select(timeout) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/selectors.py", line 398, in select + fd_event_list = self._selector.poll(timeout) +KeyboardInterrupt: diff --git a/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-core.log b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..f8e159bf45c1acc743f133fd52528d184d87daa0 --- /dev/null +++ b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:27:54.459958748Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpvbf85v7z/port-827001.txt","pid":827001,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:27:54.461298115Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-827001-827350-3157483810/socket","Net":"unix"}} +{"time":"2025-12-29T09:27:54.461318371Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":827001} +{"time":"2025-12-29T09:27:54.64708403Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:27:54.653447558Z","level":"INFO","msg":"handleInformInit: received","streamId":"hkolswde","id":"1(@)"} +{"time":"2025-12-29T09:27:54.822121408Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"hkolswde","id":"1(@)"} +{"time":"2025-12-29T09:28:40.467398849Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:28:40.467613202Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:28:40.467666788Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:28:40.467674612Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:28:40.467735782Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-827001-827350-3157483810/socket","Net":"unix"}} +{"time":"2025-12-29T09:28:40.865539623Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:28:40.865583319Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:28:40.865601347Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-internal.log b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..9de91eb9b56244f43d5b13891a1988e38a9accaa --- /dev/null +++ b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:27:54.65355505Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:27:54.821869274Z","level":"INFO","msg":"stream: created new stream","id":"hkolswde"} +{"time":"2025-12-29T09:27:54.821939345Z","level":"INFO","msg":"handler: started","stream_id":"hkolswde"} +{"time":"2025-12-29T09:27:54.822114514Z","level":"INFO","msg":"stream: started","id":"hkolswde"} +{"time":"2025-12-29T09:27:54.822128192Z","level":"INFO","msg":"writer: started","stream_id":"hkolswde"} +{"time":"2025-12-29T09:27:54.822129272Z","level":"INFO","msg":"sender: started","stream_id":"hkolswde"} +{"time":"2025-12-29T09:28:40.467486229Z","level":"INFO","msg":"stream: closing","id":"hkolswde"} +{"time":"2025-12-29T09:28:40.738928906Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:28:40.861746732Z","level":"INFO","msg":"handler: closed","stream_id":"hkolswde"} +{"time":"2025-12-29T09:28:40.861886565Z","level":"INFO","msg":"sender: closed","stream_id":"hkolswde"} +{"time":"2025-12-29T09:28:40.861903039Z","level":"INFO","msg":"stream: closed","id":"hkolswde"} diff --git a/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug.log b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d6fe75e5915b40012b32ee07198f3842deb756e6 --- /dev/null +++ b/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_setup.py:_flush():80] Configure stats pid to 827001 +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug.log +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_092754-hkolswde/logs/debug-internal.log +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:27:54,388 INFO MainThread:827001 [wandb_init.py:init():889] starting backend +2025-12-29 09:27:54,647 INFO MainThread:827001 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:27:54,651 INFO MainThread:827001 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:27:54,653 INFO MainThread:827001 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:27:54,657 INFO MainThread:827001 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:27:54,952 INFO MainThread:827001 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:27:55,040 INFO MainThread:827001 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:27:55,040 INFO MainThread:827001 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:27:55,040 INFO MainThread:827001 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:27:55,040 INFO MainThread:827001 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:27:55,043 INFO MainThread:827001 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:27:55,044 INFO MainThread:827001 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:28:40,467 INFO wandb-AsyncioManager-main:827001 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:28:40,467 INFO wandb-AsyncioManager-main:827001 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_093047-tjwhycdm/files/output.log b/Meissonic/wandb/run-20251229_093047-tjwhycdm/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..3b684ec5d859563c9cd6598551b272e0b7aa058a --- /dev/null +++ b/Meissonic/wandb/run-20251229_093047-tjwhycdm/files/output.log @@ -0,0 +1,226 @@ +12/29/2025 09:30:48 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:30:48 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 87.94it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:30:58 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:30:59 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:30:59 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:30:59 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:30:59 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:30:59 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:30:59 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:30:59 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:31:17 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:31:17 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:31:19 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:31:21 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:31:21 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:31:21 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:31:21 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:31:21 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:31:21 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:31:21 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:31:21 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:31:21 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:31:21 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:31:21 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:31:21 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:31:21 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:31:21 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:31:21 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:31:21 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:31:21 - INFO - __main__ - - pin_memory: True +12/29/2025 09:31:21 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:31:23 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:31:23 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:31:23 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:31:23 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:31:23 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:31:23 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:31:23 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + ~~~~^^^^^^^^^^^^^^ + File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main + logits = model( + tokens=input_ids, # [B, F', H', W'] + ...<3 lines>... + y=None, + ) # Returns [B, vocab_size, F', H', W'] + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward + else self._run_ddp_forward(*inputs, **kwargs) + ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward + return self.module(*inputs, **kwargs) # type: ignore[index] + ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ + return super().__call__(*args, **kwargs) + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1180, in forward + out_list = torch.utils.checkpoint.checkpoint( + create_custom_forward(self.backbone), + ...<6 lines>... + **ckpt_kwargs, + ) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner + return disable_fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint + ret = function(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1175, in custom_forward + return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 894, in forward + x = block(x, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 625, in forward + y = self.self_attn(attn_input, seq_lens, grid_sizes, freqs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 511, in forward + x = flash_attention( + q=rope_apply(q, grid_sizes, freqs), + ...<2 lines>... + k_lens=seq_lens, + window_size=self.window_size) + File "/mnt/Meissonic/src/transformer_video.py", line 115, in flash_attention + return _sdpa_attention( + q=q, k=k, v=v, + ...<5 lines>... + dtype=dtype, + ) + File "/mnt/Meissonic/src/transformer_video.py", line 74, in _sdpa_attention + invalid = kpos >= k_lens.view(B, 1, 1, 1) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: ~~~~^^^^^^^^^^^^^^ +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main +[rank0]: logits = model( +[rank0]: tokens=input_ids, # [B, F', H', W'] +[rank0]: ...<3 lines>... +[rank0]: y=None, +[rank0]: ) # Returns [B, vocab_size, F', H', W'] +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward +[rank0]: else self._run_ddp_forward(*inputs, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward +[rank0]: return self.module(*inputs, **kwargs) # type: ignore[index] +[rank0]: ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ +[rank0]: return super().__call__(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward +[rank0]: return model_forward(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ +[rank0]: return convert_to_fp32(self.model_forward(*args, **kwargs)) +[rank0]: ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast +[rank0]: return func(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1180, in forward +[rank0]: out_list = torch.utils.checkpoint.checkpoint( +[rank0]: create_custom_forward(self.backbone), +[rank0]: ...<6 lines>... +[rank0]: **ckpt_kwargs, +[rank0]: ) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner +[rank0]: return disable_fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint +[rank0]: ret = function(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1175, in custom_forward +[rank0]: return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 894, in forward +[rank0]: x = block(x, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 625, in forward +[rank0]: y = self.self_attn(attn_input, seq_lens, grid_sizes, freqs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 511, in forward +[rank0]: x = flash_attention( +[rank0]: q=rope_apply(q, grid_sizes, freqs), +[rank0]: ...<2 lines>... +[rank0]: k_lens=seq_lens, +[rank0]: window_size=self.window_size) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 115, in flash_attention +[rank0]: return _sdpa_attention( +[rank0]: q=q, k=k, v=v, +[rank0]: ...<5 lines>... +[rank0]: dtype=dtype, +[rank0]: ) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 74, in _sdpa_attention +[rank0]: invalid = kpos >= k_lens.view(B, 1, 1, 1) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! diff --git a/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-core.log b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..020c63870a6e9162570841c1265e330d1b093ca1 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:30:47.752645275Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmplo4d2h55/port-832705.txt","pid":832705,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:30:47.753239183Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":832705} +{"time":"2025-12-29T09:30:47.753235442Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-832705-832994-3816301294/socket","Net":"unix"}} +{"time":"2025-12-29T09:30:47.938881791Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:30:47.945214153Z","level":"INFO","msg":"handleInformInit: received","streamId":"tjwhycdm","id":"1(@)"} +{"time":"2025-12-29T09:30:48.112200364Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tjwhycdm","id":"1(@)"} +{"time":"2025-12-29T09:32:07.259814336Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:32:07.259960133Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:32:07.259949779Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:32:07.260095761Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:32:07.260090084Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-832705-832994-3816301294/socket","Net":"unix"}} +{"time":"2025-12-29T09:32:07.862442605Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:32:07.862468233Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:32:07.862480823Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-internal.log b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..23a66ab05bdf987d281f28a1973cab4ba0da0f13 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:30:47.94537373Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:30:48.111948149Z","level":"INFO","msg":"stream: created new stream","id":"tjwhycdm"} +{"time":"2025-12-29T09:30:48.11203355Z","level":"INFO","msg":"handler: started","stream_id":"tjwhycdm"} +{"time":"2025-12-29T09:30:48.112193421Z","level":"INFO","msg":"stream: started","id":"tjwhycdm"} +{"time":"2025-12-29T09:30:48.112213948Z","level":"INFO","msg":"sender: started","stream_id":"tjwhycdm"} +{"time":"2025-12-29T09:30:48.112213535Z","level":"INFO","msg":"writer: started","stream_id":"tjwhycdm"} +{"time":"2025-12-29T09:32:07.259951095Z","level":"INFO","msg":"stream: closing","id":"tjwhycdm"} +{"time":"2025-12-29T09:32:07.520258144Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:32:07.859134275Z","level":"INFO","msg":"handler: closed","stream_id":"tjwhycdm"} +{"time":"2025-12-29T09:32:07.859249008Z","level":"INFO","msg":"sender: closed","stream_id":"tjwhycdm"} +{"time":"2025-12-29T09:32:07.859256911Z","level":"INFO","msg":"stream: closed","id":"tjwhycdm"} diff --git a/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug.log b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..8b20738c77f376b907e5d98aef8874004aee2edc --- /dev/null +++ b/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_setup.py:_flush():80] Configure stats pid to 832705 +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug.log +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_093047-tjwhycdm/logs/debug-internal.log +2025-12-29 09:30:47,680 INFO MainThread:832705 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:30:47,681 INFO MainThread:832705 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:30:47,681 INFO MainThread:832705 [wandb_init.py:init():889] starting backend +2025-12-29 09:30:47,939 INFO MainThread:832705 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:30:47,943 INFO MainThread:832705 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:30:47,944 INFO MainThread:832705 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:30:47,949 INFO MainThread:832705 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:30:48,412 INFO MainThread:832705 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:30:48,496 INFO MainThread:832705 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:30:48,496 INFO MainThread:832705 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:30:48,496 INFO MainThread:832705 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:30:48,496 INFO MainThread:832705 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:30:48,498 INFO MainThread:832705 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:30:48,500 INFO MainThread:832705 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:32:07,260 INFO wandb-AsyncioManager-main:832705 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:32:07,260 INFO wandb-AsyncioManager-main:832705 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_093332-4lgcq9jf/files/output.log b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9e89fa0f25e7732f8335535c0ce4ba2383ca5ecc --- /dev/null +++ b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/files/output.log @@ -0,0 +1,187 @@ +12/29/2025 09:33:33 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:33:33 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 92.03it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:33:43 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:33:44 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:33:44 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:33:44 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:33:44 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:33:44 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:33:44 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:33:44 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:34:00 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:34:00 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:34:02 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:34:03 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:34:03 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:34:03 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:34:03 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:34:03 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:34:03 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:34:03 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:34:04 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:34:04 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:34:04 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:34:04 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:34:04 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:34:04 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:34:04 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:34:04 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:34:04 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:34:04 - INFO - __main__ - - pin_memory: True +12/29/2025 09:34:04 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:34:08 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:34:09 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:34:09 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:34:09 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:34:09 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:34:09 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:34:09 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + ~~~~^^^^^^^^^^^^^^ + File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main + logits = model( + tokens=input_ids, # [B, F', H', W'] + ...<3 lines>... + y=None, + ) # Returns [B, vocab_size, F', H', W'] + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward + else self._run_ddp_forward(*inputs, **kwargs) + ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward + return self.module(*inputs, **kwargs) # type: ignore[index] + ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ + return super().__call__(*args, **kwargs) + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1189, in forward + out_list = torch.utils.checkpoint.checkpoint( + create_custom_forward(self.backbone), + ...<6 lines>... + **ckpt_kwargs, + ) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner + return disable_fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn + return fn(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint + ret = function(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 1184, in custom_forward + return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl + return forward_call(*args, **kwargs) + File "/mnt/Meissonic/src/transformer_video.py", line 852, in forward + seq_lens = torch.tensor([u.size(1) for u in x], dtype=torch.long, device=x.device) + ^^^^^^^^ +AttributeError: 'list' object has no attribute 'device' +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: ~~~~^^^^^^^^^^^^^^ +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1638, in main +[rank0]: logits = model( +[rank0]: tokens=input_ids, # [B, F', H', W'] +[rank0]: ...<3 lines>... +[rank0]: y=None, +[rank0]: ) # Returns [B, vocab_size, F', H', W'] +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1661, in forward +[rank0]: else self._run_ddp_forward(*inputs, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/parallel/distributed.py", line 1487, in _run_ddp_forward +[rank0]: return self.module(*inputs, **kwargs) # type: ignore[index] +[rank0]: ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 414, in __call__ +[rank0]: return super().__call__(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 819, in forward +[rank0]: return model_forward(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/utils/operations.py", line 807, in __call__ +[rank0]: return convert_to_fp32(self.model_forward(*args, **kwargs)) +[rank0]: ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast +[rank0]: return func(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 832, in compile_wrapper +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1189, in forward +[rank0]: out_list = torch.utils.checkpoint.checkpoint( +[rank0]: create_custom_forward(self.backbone), +[rank0]: ...<6 lines>... +[rank0]: **ckpt_kwargs, +[rank0]: ) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_compile.py", line 53, in inner +[rank0]: return disable_fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/_dynamo/eval_frame.py", line 1044, in _fn +[rank0]: return fn(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/checkpoint.py", line 503, in checkpoint +[rank0]: ret = function(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 1184, in custom_forward +[rank0]: return module(x=x_in, t=t_in, context=context_in, seq_len=seq_len_in, y=y_in, context_lens=context_lens_in) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl +[rank0]: return self._call_impl(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl +[rank0]: return forward_call(*args, **kwargs) +[rank0]: File "/mnt/Meissonic/src/transformer_video.py", line 852, in forward +[rank0]: seq_lens = torch.tensor([u.size(1) for u in x], dtype=torch.long, device=x.device) +[rank0]: ^^^^^^^^ +[rank0]: AttributeError: 'list' object has no attribute 'device' +Exception ignored in atexit callback : +Traceback (most recent call last): + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/data/dataloader.py", line 1648, in _clean_up_worker + w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/process.py", line 149, in join + res = self._popen.wait(timeout) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/popen_fork.py", line 41, in wait + if not wait([self.sentinel], timeout): + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/multiprocessing/connection.py", line 1148, in wait + ready = selector.select(timeout) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/selectors.py", line 398, in select + fd_event_list = self._selector.poll(timeout) +KeyboardInterrupt: diff --git a/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-core.log b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..0c806be9152b9a6e14bd66b30f3ceeb9f02a71b9 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:33:33.063032865Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmpvbs2zumc/port-838173.txt","pid":838173,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:33:33.063491111Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":838173} +{"time":"2025-12-29T09:33:33.063503274Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-838173-838480-620292740/socket","Net":"unix"}} +{"time":"2025-12-29T09:33:33.249800648Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:33:33.257263258Z","level":"INFO","msg":"handleInformInit: received","streamId":"4lgcq9jf","id":"1(@)"} +{"time":"2025-12-29T09:33:33.428012956Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4lgcq9jf","id":"1(@)"} +{"time":"2025-12-29T09:34:44.795889453Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:34:44.795957341Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:34:44.795959781Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:34:44.796029114Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:34:44.796081623Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-838173-838480-620292740/socket","Net":"unix"}} +{"time":"2025-12-29T09:34:45.362789001Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:34:45.362817247Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:34:45.362828104Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-internal.log b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7c6b5bc0f86924f0e3dbbbb43517640bfa8a3e96 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:33:33.257361663Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:33:33.427811791Z","level":"INFO","msg":"stream: created new stream","id":"4lgcq9jf"} +{"time":"2025-12-29T09:33:33.427887456Z","level":"INFO","msg":"handler: started","stream_id":"4lgcq9jf"} +{"time":"2025-12-29T09:33:33.428005727Z","level":"INFO","msg":"stream: started","id":"4lgcq9jf"} +{"time":"2025-12-29T09:33:33.428020488Z","level":"INFO","msg":"writer: started","stream_id":"4lgcq9jf"} +{"time":"2025-12-29T09:33:33.428027925Z","level":"INFO","msg":"sender: started","stream_id":"4lgcq9jf"} +{"time":"2025-12-29T09:34:44.795958991Z","level":"INFO","msg":"stream: closing","id":"4lgcq9jf"} +{"time":"2025-12-29T09:34:45.039653144Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:34:45.358231699Z","level":"INFO","msg":"handler: closed","stream_id":"4lgcq9jf"} +{"time":"2025-12-29T09:34:45.359034053Z","level":"INFO","msg":"sender: closed","stream_id":"4lgcq9jf"} +{"time":"2025-12-29T09:34:45.359045788Z","level":"INFO","msg":"stream: closed","id":"4lgcq9jf"} diff --git a/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug.log b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..22125ae6df32057ac9fe911b97db79e7e5041fe5 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:33:32,971 INFO MainThread:838173 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:33:32,971 INFO MainThread:838173 [wandb_setup.py:_flush():80] Configure stats pid to 838173 +2025-12-29 09:33:32,971 INFO MainThread:838173 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:33:32,971 INFO MainThread:838173 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug.log +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_093332-4lgcq9jf/logs/debug-internal.log +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:33:32,972 INFO MainThread:838173 [wandb_init.py:init():889] starting backend +2025-12-29 09:33:33,250 INFO MainThread:838173 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:33:33,255 INFO MainThread:838173 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:33:33,257 INFO MainThread:838173 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:33:33,263 INFO MainThread:838173 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:33:33,632 INFO MainThread:838173 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:33:33,751 INFO MainThread:838173 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:33:33,751 INFO MainThread:838173 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:33:33,751 INFO MainThread:838173 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:33:33,751 INFO MainThread:838173 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:33:33,754 INFO MainThread:838173 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:33:33,755 INFO MainThread:838173 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:34:44,795 INFO wandb-AsyncioManager-main:838173 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:34:44,796 INFO wandb-AsyncioManager-main:838173 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Meissonic/wandb/run-20251229_093500-yyrdgepk/files/output.log b/Meissonic/wandb/run-20251229_093500-yyrdgepk/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..55b3b0f6b1ee75594b9ee4d38716e0953ce08cb6 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093500-yyrdgepk/files/output.log @@ -0,0 +1,166 @@ +12/29/2025 09:35:01 - INFO - __main__ - Using precomputed video codes only - will encode text with UMT5-XXL at runtime +12/29/2025 09:35:01 - INFO - __main__ - Video tokenizer will be loaded only during validation/inference +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 90.54it/s] +You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 +12/29/2025 09:35:11 - INFO - __main__ - Loaded text encoder: google/umt5-xxl (d_model=4096) +12/29/2025 09:35:11 - INFO - __main__ - Loaded from metadata: codebook_size=64000, mask_token_id=64000 +12/29/2025 09:35:11 - INFO - __main__ - Minimal tokenizer created: mask_token_id=64000, codebook_size=64000 +12/29/2025 09:35:11 - INFO - __main__ - Getting compressed dimensions from precomputed features... +12/29/2025 09:35:12 - INFO - __main__ - Got dimensions from metadata: F'=5, H'=16, W'=16 +12/29/2025 09:35:12 - INFO - __main__ - Using actual text encoder dimension for umt5-xxl: 4096 +12/29/2025 09:35:12 - INFO - __main__ - Loading Wan config from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:35:12 - INFO - __main__ - Loaded Wan config: dim=1536, ffn_dim=8960, num_layers=30, num_heads=12 +12/29/2025 09:35:29 - INFO - __main__ - Loading Wan pretrained weights from: /mnt/Wan2.1-T2V-1.3B +12/29/2025 09:35:29 - INFO - __main__ - Loading weights from local path: /mnt/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors +12/29/2025 09:35:32 - INFO - __main__ - ✓ Successfully loaded Wan pretrained weights into backbone (excluding text_embedding) +12/29/2025 09:35:33 - INFO - __main__ - Enabled gradient checkpointing for text encoder to save memory +12/29/2025 09:35:33 - INFO - __main__ - Parameter counts: backbone=1,418,996,800, other=2,112,033, total=1,421,108,833 +12/29/2025 09:35:33 - INFO - __main__ - Wan backbone lr = 0.000600 (base_lr * 0.2) +12/29/2025 09:35:33 - INFO - __main__ - Other parts (token_embedding, logits_head) lr = 0.003000 +12/29/2025 09:35:33 - INFO - __main__ - Creating dataloaders and lr_scheduler +12/29/2025 09:35:33 - INFO - __main__ - Using pre-extracted video codes from: /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 +12/29/2025 09:35:33 - INFO - __main__ - Text will be encoded with UMT5-XXL at runtime +12/29/2025 09:35:34 - INFO - train.dataset_utils - Loaded metadata from /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128/metadata.json +12/29/2025 09:35:34 - INFO - train.dataset_utils - Total samples in metadata: 1019957 +12/29/2025 09:35:34 - INFO - train.dataset_utils - PrecomputedVideoOnlyDataset: 128000 samples available +12/29/2025 09:35:34 - INFO - train.dataset_utils - Index range: 0 to 127999 +12/29/2025 09:35:34 - INFO - __main__ - Using precomputed features - DataLoader settings: prefetch_factor=1, pin_memory=True +12/29/2025 09:35:34 - INFO - __main__ - Dataloader configuration: +12/29/2025 09:35:34 - INFO - __main__ - - num_workers: 8 (0 = single-threaded, recommended: 4-8 for video) +12/29/2025 09:35:34 - INFO - __main__ - - prefetch_factor: 2 +12/29/2025 09:35:34 - INFO - __main__ - - persistent_workers: True +12/29/2025 09:35:34 - INFO - __main__ - - pin_memory: True +12/29/2025 09:35:34 - INFO - __main__ - Preparing model, optimizer and dataloaders +12/29/2025 09:35:36 - INFO - __main__ - Text encoder prepared by accelerator for video-only precomputed mode +12/29/2025 09:35:36 - INFO - __main__ - Generated empty_embeds at runtime: shape=torch.Size([1, 512, 4096]), dtype=torch.float32 +12/29/2025 09:35:36 - INFO - __main__ - ***** Running training ***** +12/29/2025 09:35:36 - INFO - __main__ - Num training steps = 100000 +12/29/2025 09:35:36 - INFO - __main__ - Instantaneous batch size per device = 1 +12/29/2025 09:35:36 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 8 +12/29/2025 09:35:36 - INFO - __main__ - Gradient Accumulation steps = 1 +[DEBUG] video_tokens: shape=torch.Size([1, 5, 16, 16]), dtype=torch.int32, device=cuda:0 +[DEBUG] encoder_hidden_states: shape=torch.Size([1, 512, 4096]), dtype=torch.float32, device=cuda:0 +12/29/2025 09:35:50 - INFO - __main__ - Step: 10 Loss: 11.0644 LR: 0.000600 +12/29/2025 09:35:58 - INFO - __main__ - Step: 20 Loss: 11.0102 LR: 0.000600 +12/29/2025 09:36:05 - INFO - __main__ - Step: 30 Loss: 10.9001 LR: 0.000600 +12/29/2025 09:36:13 - INFO - __main__ - Step: 40 Loss: 10.7454 LR: 0.000600 +12/29/2025 09:36:22 - INFO - __main__ - Step: 50 Loss: 10.6203 LR: 0.000600 +12/29/2025 09:36:30 - INFO - __main__ - Step: 60 Loss: 10.5053 LR: 0.000600 +12/29/2025 09:36:37 - INFO - __main__ - Step: 70 Loss: 10.5456 LR: 0.000600 +12/29/2025 09:36:46 - INFO - __main__ - Step: 80 Loss: 10.6296 LR: 0.000600 +12/29/2025 09:36:54 - INFO - __main__ - Step: 90 Loss: 10.4796 LR: 0.000600 +12/29/2025 09:37:01 - INFO - __main__ - Step: 100 Loss: 10.4853 LR: 0.000600 +12/29/2025 09:37:01 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:37:01 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:37:02 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:37:02 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:07<00:00, 6.69it/s] +12/29/2025 09:37:11 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:37:11 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:37:19 - INFO - __main__ - Step: 110 Loss: 10.4851 LR: 0.000600 +12/29/2025 09:37:28 - INFO - __main__ - Step: 120 Loss: 10.4616 LR: 0.000600 +12/29/2025 09:37:35 - INFO - __main__ - Step: 130 Loss: 10.4942 LR: 0.000600 +12/29/2025 09:37:43 - INFO - __main__ - Step: 140 Loss: 10.5260 LR: 0.000600 +12/29/2025 09:37:51 - INFO - __main__ - Step: 150 Loss: 10.4625 LR: 0.000600 +12/29/2025 09:38:00 - INFO - __main__ - Step: 160 Loss: 10.4609 LR: 0.000600 +12/29/2025 09:38:08 - INFO - __main__ - Step: 170 Loss: 10.4898 LR: 0.000600 +12/29/2025 09:38:16 - INFO - __main__ - Step: 180 Loss: 10.4359 LR: 0.000600 +12/29/2025 09:38:24 - INFO - __main__ - Step: 190 Loss: 10.4036 LR: 0.000600 +12/29/2025 09:38:33 - INFO - __main__ - Step: 200 Loss: 10.4902 LR: 0.000600 +12/29/2025 09:38:33 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:38:33 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:38:34 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:38:34 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:09<00:00, 5.24it/s] +12/29/2025 09:38:44 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:38:44 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:38:52 - INFO - __main__ - Step: 210 Loss: 10.4971 LR: 0.000600 +12/29/2025 09:38:59 - INFO - __main__ - Step: 220 Loss: 10.4203 LR: 0.000600 +12/29/2025 09:39:08 - INFO - __main__ - Step: 230 Loss: 10.3090 LR: 0.000600 +12/29/2025 09:39:16 - INFO - __main__ - Step: 240 Loss: 10.4365 LR: 0.000600 +12/29/2025 09:39:24 - INFO - __main__ - Step: 250 Loss: 10.4716 LR: 0.000600 +12/29/2025 09:39:32 - INFO - __main__ - Step: 260 Loss: 10.3486 LR: 0.000600 +12/29/2025 09:39:40 - INFO - __main__ - Step: 270 Loss: 10.4206 LR: 0.000600 +12/29/2025 09:39:48 - INFO - __main__ - Step: 280 Loss: 10.4038 LR: 0.000600 +12/29/2025 09:39:55 - INFO - __main__ - Step: 290 Loss: 10.3498 LR: 0.000600 +12/29/2025 09:40:03 - INFO - __main__ - Step: 300 Loss: 10.4410 LR: 0.000600 +12/29/2025 09:40:03 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:40:03 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:40:03 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:40:03 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:07<00:00, 6.70it/s] +12/29/2025 09:40:12 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:40:12 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:40:21 - INFO - __main__ - Step: 310 Loss: 10.3740 LR: 0.000600 +12/29/2025 09:40:29 - INFO - __main__ - Step: 320 Loss: 10.3390 LR: 0.000600 +12/29/2025 09:40:36 - INFO - __main__ - Step: 330 Loss: 10.4014 LR: 0.000600 +12/29/2025 09:40:44 - INFO - __main__ - Step: 340 Loss: 10.3798 LR: 0.000600 +12/29/2025 09:40:53 - INFO - __main__ - Step: 350 Loss: 10.4827 LR: 0.000600 +12/29/2025 09:41:01 - INFO - __main__ - Step: 360 Loss: 10.4953 LR: 0.000600 +12/29/2025 09:41:08 - INFO - __main__ - Step: 370 Loss: 10.3940 LR: 0.000600 +12/29/2025 09:41:15 - INFO - __main__ - Step: 380 Loss: 10.4910 LR: 0.000600 +12/29/2025 09:41:24 - INFO - __main__ - Step: 390 Loss: 10.5649 LR: 0.000600 +12/29/2025 09:41:32 - INFO - __main__ - Step: 400 Loss: 10.6136 LR: 0.000600 +12/29/2025 09:41:32 - INFO - __main__ - Generating videos for validation... +12/29/2025 09:41:32 - INFO - __main__ - Reusing already loaded text encoder for validation... +12/29/2025 09:41:33 - INFO - __main__ - Text encoder and video tokenizer loaded for validation +12/29/2025 09:41:33 - INFO - __main__ - Generating videos for validation... +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [00:07<00:00, 6.76it/s] +12/29/2025 09:41:41 - INFO - __main__ - Validation videos saved to ./output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3 +12/29/2025 09:41:41 - INFO - __main__ - Cleaned up validation models (kept text_encoder for reuse) +12/29/2025 09:41:48 - INFO - __main__ - Step: 410 Loss: 10.6054 LR: 0.000600 +12/29/2025 09:41:56 - INFO - __main__ - Step: 420 Loss: 11.1269 LR: 0.000600 +Traceback (most recent call last): + File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in + main(parse_args()) + ~~~~^^^^^^^^^^^^^^ + File "/mnt/Meissonic/train/train_mei_video.py", line 1690, in main + optimizer.step() + ~~~~~~~~~~~~~~^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/optimizer.py", line 179, in step + self.optimizer.step(closure) + ~~~~~~~~~~~~~~~~~~~^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/optim/lr_scheduler.py", line 133, in wrapper + return func.__get__(opt, opt.__class__)(*args, **kwargs) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/optim/optimizer.py", line 517, in wrapper + out = func(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context + return func(*args, **kwargs) + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/bitsandbytes/optim/optimizer.py", line 292, in step + sync_gpu(p) + ~~~~~~~~^^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/bitsandbytes/utils.py", line 203, in sync_gpu + torch.cuda.synchronize() + ~~~~~~~~~~~~~~~~~~~~~~^^ + File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/cuda/__init__.py", line 1083, in synchronize + return torch._C._cuda_synchronize() + ~~~~~~~~~~~~~~~~~~~~~~~~~~^^ +KeyboardInterrupt +[rank0]: Traceback (most recent call last): +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1926, in +[rank0]: main(parse_args()) +[rank0]: ~~~~^^^^^^^^^^^^^^ +[rank0]: File "/mnt/Meissonic/train/train_mei_video.py", line 1690, in main +[rank0]: optimizer.step() +[rank0]: ~~~~~~~~~~~~~~^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/accelerate/optimizer.py", line 179, in step +[rank0]: self.optimizer.step(closure) +[rank0]: ~~~~~~~~~~~~~~~~~~~^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/optim/lr_scheduler.py", line 133, in wrapper +[rank0]: return func.__get__(opt, opt.__class__)(*args, **kwargs) +[rank0]: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/optim/optimizer.py", line 517, in wrapper +[rank0]: out = func(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context +[rank0]: return func(*args, **kwargs) +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/bitsandbytes/optim/optimizer.py", line 292, in step +[rank0]: sync_gpu(p) +[rank0]: ~~~~~~~~^^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/bitsandbytes/utils.py", line 203, in sync_gpu +[rank0]: torch.cuda.synchronize() +[rank0]: ~~~~~~~~~~~~~~~~~~~~~~^^ +[rank0]: File "/home/ubuntu/miniconda3/envs/mei-video/lib/python3.13/site-packages/torch/cuda/__init__.py", line 1083, in synchronize +[rank0]: return torch._C._cuda_synchronize() +[rank0]: ~~~~~~~~~~~~~~~~~~~~~~~~~~^^ +[rank0]: KeyboardInterrupt diff --git a/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-core.log b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..594a1141d28479a507d66e26f20dde5482fcf5e0 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-29T09:35:00.481975395Z","level":"INFO","msg":"main: starting server","port-filename":"/opt/dlami/nvme/tmp_user/tmp_02kheoc/port-843534.txt","pid":843534,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-29T09:35:00.482487685Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":843534} +{"time":"2025-12-29T09:35:00.482469218Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-843534-843827-2447796162/socket","Net":"unix"}} +{"time":"2025-12-29T09:35:00.667923434Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-29T09:35:00.67463222Z","level":"INFO","msg":"handleInformInit: received","streamId":"yyrdgepk","id":"1(@)"} +{"time":"2025-12-29T09:35:00.840996559Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"yyrdgepk","id":"1(@)"} +{"time":"2025-12-29T09:42:02.535819497Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-29T09:42:02.535930029Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-29T09:42:02.536178421Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-29T09:42:02.536077783Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-29T09:42:02.536275457Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/opt/dlami/nvme/tmp_user/wandb-843534-843827-2447796162/socket","Net":"unix"}} +{"time":"2025-12-29T09:42:02.861197441Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-29T09:42:02.861225314Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-29T09:42:02.861239121Z","level":"INFO","msg":"server is closed"} diff --git a/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..054e3b930dc46ca1ce02f918b36ec3aff363945e --- /dev/null +++ b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-29T09:35:00.674748488Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-29T09:35:00.840745763Z","level":"INFO","msg":"stream: created new stream","id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.840887309Z","level":"INFO","msg":"handler: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.840989877Z","level":"INFO","msg":"stream: started","id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.841004187Z","level":"INFO","msg":"writer: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:35:00.841006253Z","level":"INFO","msg":"sender: started","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.535940574Z","level":"INFO","msg":"stream: closing","id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.752587654Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-29T09:42:02.857589578Z","level":"INFO","msg":"handler: closed","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.857716241Z","level":"INFO","msg":"sender: closed","stream_id":"yyrdgepk"} +{"time":"2025-12-29T09:42:02.857727173Z","level":"INFO","msg":"stream: closed","id":"yyrdgepk"} diff --git a/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..13cc1ea77ef489a9526479f5f9151bb49a69eef3 --- /dev/null +++ b/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log @@ -0,0 +1,24 @@ +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Configure stats pid to 843534 +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from /mnt/Meissonic/wandb/settings +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug.log +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /mnt/Meissonic/wandb/run-20251229_093500-yyrdgepk/logs/debug-internal.log +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():841] calling init triggers +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'_wandb': {}} +2025-12-29 09:35:00,410 INFO MainThread:843534 [wandb_init.py:init():889] starting backend +2025-12-29 09:35:00,668 INFO MainThread:843534 [wandb_init.py:init():892] sending inform_init request +2025-12-29 09:35:00,673 INFO MainThread:843534 [wandb_init.py:init():900] backend started and connected +2025-12-29 09:35:00,674 INFO MainThread:843534 [wandb_init.py:init():970] updated telemetry +2025-12-29 09:35:00,678 INFO MainThread:843534 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-29 09:35:01,041 INFO MainThread:843534 [wandb_init.py:init():1041] starting run threads in backend +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_console_start():2521] atexit reg +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-29 09:35:01,126 INFO MainThread:843534 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-29 09:35:01,128 INFO MainThread:843534 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-29 09:35:01,130 INFO MainThread:843534 [wandb_run.py:_config_callback():1396] config_cb None None {'text_encoder_architecture': 'umt5-xxl', 'instance_dataset': 'OpenVid1MDataset', 'training_from_scratch': True, 'pretrained_model_name_or_path': 'dummy', 'revision': None, 'variant': None, 'instance_data_dataset': None, 'instance_data_dir': '/mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv', 'instance_data_image': None, 'use_8bit_adam': True, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': 2, 'allow_tf32': True, 'use_ema': False, 'ema_decay': 0.9999, 'ema_update_after_step': 0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_weight_decay': 0.01, 'adam_epsilon': 1e-08, 'output_dir': './output_256x256_17f_2*4bs_4*8*8vqvae_0_2_ratio_lr3e-3', 'seed': 42, 'logging_dir': 'logs', 'max_train_steps': 100000, 'checkpointing_steps': 500, 'logging_steps': 10, 'checkpoints_total_limit': None, 'resume_from_checkpoint': None, 'train_batch_size': 1, 'gradient_accumulation_steps': 1, 'learning_rate': 0.003, 'scale_lr': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 0, 'validation_steps': 100, 'mixed_precision': 'bf16', 'report_to': 'wandb', 'validation_prompts': ['a cat playing', 'a girl walking', "The video features a man named David Schultz from Hamline University. He is dressed in a suit and tie, standing in front of a building with a tree in the background. The man appears to be speaking or presenting, as suggested by the context of the image. The style of the video is likely informative or educational, given the context of the man's attire and the setting. The video may be part of a news segment or a lecture series, as indicated by the man's professional appearance and the presence of a building that could be a university or academic institution.", "The video captures the interior of a car at a car show. The car features a striking orange and black color scheme, with the seats upholstered in orange leather and the door panels in black leather. The car's interior is well-lit, highlighting the details of the upholstery and the design of the door panels. The car is on display, with people walking around and observing it. The car show setting is bustling with activity, with other cars and people visible in the background. The video is a close-up shot of the car's interior, focusing on the details of the upholstery and the design of the door panels. The style of the video is realistic, capturing the car's interior in a clear and detailed manner."], 'resolution': 512, 'split_vae_encode': 1, 'min_masking_rate': 0.0, 'cond_dropout_prob': 0.0, 'max_grad_norm': 50.0, 'use_lora': False, 'text_encoder_use_lora': False, 'lora_r': 16, 'lora_alpha': 32, 'lora_target_modules': ['to_q', 'to_k', 'to_v'], 'text_encoder_lora_r': 16, 'text_encoder_lora_alpha': 32, 'text_encoder_lora_target_modules': ['to_q', 'to_k', 'to_v'], 'train_text_encoder': False, 'image_key': None, 'prompt_key': None, 'gradient_checkpointing': True, 'prompt_prefix': None, 'num_frames': 17, 'video_height': 128, 'video_width': 128, 'video_tokenizer_model_id': 'Cosmos-0.1-Tokenizer-DV4x8x8', 'wan_pretrained_path': '/mnt/Wan2.1-T2V-1.3B', 'freeze_wan_backbone': False, 'wan_backbone_lr_ratio': 0.2, 'use_precomputed_features': False, 'use_precomputed_video_only': True, 'features_dir': '/mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128', 'empty_embeds_path': None} +2025-12-29 09:42:02,535 INFO wandb-AsyncioManager-main:843534 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-29 09:42:02,535 INFO wandb-AsyncioManager-main:843534 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth b/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth new file mode 100644 index 0000000000000000000000000000000000000000..5897fba405232a6b07a947d6188d19a8e050ccfb --- /dev/null +++ b/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38071ab59bd94681c686fa51d75a1968f64e470262043be31f7a094e442fd981 +size 507609880 diff --git a/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors b/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d41b39ee6498ce3b18c1d6b01dfde531a97d5047 --- /dev/null +++ b/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b6b242ca1c2f24e9d02cd6596066fab6d310e2d7538f33ae267cb18d957e8f +size 5676070424 diff --git a/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth b/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth new file mode 100644 index 0000000000000000000000000000000000000000..d5dad910304ab4b909a2c8a225a71840606e6de4 --- /dev/null +++ b/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cace0da2b446bbbbc57d031ab6cf163a3d59b366da94e5afe36745b746fd81d +size 11361920418