diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9f13f35c8bec428b22dc035cf5ea5ad3b9726954 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +wandb/run-20260209_084451-csy2m2pr/run-csy2m2pr.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoints/030000/pretrained_model/config.json b/checkpoints/030000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e151580a32cd2eb74c7381458e62fb506bf7d950 --- /dev/null +++ b/checkpoints/030000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/model.safetensors b/checkpoints/030000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cfa9f6db99c3d385f4b3d6860076a641f12354c --- /dev/null +++ b/checkpoints/030000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f91688a2ff0eb5c524271c8ae10992a04914bcca77075bc3c35e4493ceb284 +size 934390312 diff --git a/checkpoints/030000/pretrained_model/train_config.json b/checkpoints/030000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/030000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_param_groups.json b/checkpoints/030000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..489214594c844190846ceeb18394541edc953463 --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 7.990453104925807e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_state.safetensors b/checkpoints/030000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acf3dbc9faa7a603df91cba2b4da868559928b86 --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2288b662f76ead05007385d3d39e8505be98724f91821386dcf328c1721840 +size 1333741212 diff --git a/checkpoints/030000/training_state/rng_state.safetensors b/checkpoints/030000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..312b074790df85307cca17ef0cbc0ba4903a58e4 --- /dev/null +++ b/checkpoints/030000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be17f320562bccd54138b82f80538c13f80dc76f3843dc1eff13f5f6e7ea7a77 +size 15708 diff --git a/checkpoints/030000/training_state/scheduler_state.json b/checkpoints/030000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b2d1373d96f2d2f89d94dbbdc99917c29d17342a --- /dev/null +++ b/checkpoints/030000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 30000, + "_step_count": 30001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 7.990453104925807e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/training_step.json b/checkpoints/030000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..f4945f660f45b332883dccfccf18d8b8815d916a --- /dev/null +++ b/checkpoints/030000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 30000 +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/config.json b/checkpoints/040000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e151580a32cd2eb74c7381458e62fb506bf7d950 --- /dev/null +++ b/checkpoints/040000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/model.safetensors b/checkpoints/040000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4c3e58265c8d54d6656f5b76110d383f057b812 --- /dev/null +++ b/checkpoints/040000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b29d135d4b54bcba25a6f80c723924f2afc9ba6e12187f5d88245452b88540b +size 934390312 diff --git a/checkpoints/040000/pretrained_model/train_config.json b/checkpoints/040000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/040000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_param_groups.json b/checkpoints/040000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..02786f0fb770f41e233e55a7dc42b42d34824c5a --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 6.631457847577869e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_state.safetensors b/checkpoints/040000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f984529898ec65f1a4fcca8d4f8ab5cd2629027 --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ae446d6304d02f3a629b352c5aa2627167d065b87d84745aff47045ec1537b +size 1333741212 diff --git a/checkpoints/040000/training_state/rng_state.safetensors b/checkpoints/040000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23201236a608dd31a6c1d95c262c3175a0bbab10 --- /dev/null +++ b/checkpoints/040000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6487bf2426da7edc131a3d9fea3224ecb849d4291485f68a0b46c37faebbc6 +size 15708 diff --git a/checkpoints/040000/training_state/scheduler_state.json b/checkpoints/040000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1de247450c27c8307261d405ab1764634cd737fa --- /dev/null +++ b/checkpoints/040000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 40000, + "_step_count": 40001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.631457847577869e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/training_step.json b/checkpoints/040000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..fe40d8ff4312c7e5a8fba9bcc932a43a1384ba77 --- /dev/null +++ b/checkpoints/040000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 40000 +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/config.json b/checkpoints/050000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e151580a32cd2eb74c7381458e62fb506bf7d950 --- /dev/null +++ b/checkpoints/050000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/model.safetensors b/checkpoints/050000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bf2bab6aa30054294c236bd34488f7f17c3bf46 --- /dev/null +++ b/checkpoints/050000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2814b340ce751353a290944bbcad5d698d8a5c75ef2a21f2cabeade7a4832fa4 +size 934390312 diff --git a/checkpoints/050000/pretrained_model/train_config.json b/checkpoints/050000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/050000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/050000/training_state/optimizer_param_groups.json b/checkpoints/050000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..0466b49b36c241f5c83dd0f4a67bd29b19e66897 --- /dev/null +++ b/checkpoints/050000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 5.125e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/050000/training_state/optimizer_state.safetensors b/checkpoints/050000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..130835116ceed820c08caa3d227a294bbd90461d --- /dev/null +++ b/checkpoints/050000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed1cac5afe1a074deda41156828ab2feffea6a08a03ce7a83daeadecec06e29 +size 1333741212 diff --git a/checkpoints/050000/training_state/rng_state.safetensors b/checkpoints/050000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f205dc711a41bfda03c73164f2554e94941b32a0 --- /dev/null +++ b/checkpoints/050000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25533ed902cf4bf1581fbde3cc9df950eb5505d4e55fe12b1768c6fd0cd4675f +size 15708 diff --git a/checkpoints/050000/training_state/scheduler_state.json b/checkpoints/050000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..18ffc7bd95d4dc7d457046e09b44381ff499e308 --- /dev/null +++ b/checkpoints/050000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 50000, + "_step_count": 50001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 5.125e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/050000/training_state/training_step.json b/checkpoints/050000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..b3c03563f17d6d98552117ccf00705b693aa3fbd --- /dev/null +++ b/checkpoints/050000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 50000 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/config.json b/checkpoints/060000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ecd77243a17bd23115755ce8c6948edb61cb8a9 --- /dev/null +++ b/checkpoints/060000/pretrained_model/config.json @@ -0,0 +1,81 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/model.safetensors b/checkpoints/060000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..443926de247d621f8ab9d5e5e6d02221332d0353 --- /dev/null +++ b/checkpoints/060000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd6b94a71f1b3793b6cc21242b198da46937c7e7a276736d252b8597eba0fa5 +size 934390312 diff --git a/checkpoints/060000/pretrained_model/train_config.json b/checkpoints/060000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/060000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model_migrated/README.md b/checkpoints/060000/pretrained_model_migrated/README.md new file mode 100644 index 0000000000000000000000000000000000000000..54cbfa08202b78e1a84c19dc62559279173de9ce --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/README.md @@ -0,0 +1,63 @@ +--- +base_model: lerobot/smolvla_base +datasets: unknown +library_name: lerobot +license: apache-2.0 +model_name: smolvla +pipeline_tag: robotics +tags: +- smolvla +- robotics +- lerobot +--- + +# Model Card for smolvla + + + + +[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware. + + +This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot). +See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index). + +--- + +## How to Get Started with the Model + +For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy). +Below is the short version on how to train and run inference/eval: + +### Train from scratch + +```bash +lerobot-train \ + --dataset.repo_id=${HF_USER}/ \ + --policy.type=act \ + --output_dir=outputs/train/ \ + --job_name=lerobot_training \ + --policy.device=cuda \ + --policy.repo_id=${HF_USER}/ + --wandb.enable=true +``` + +_Writes checkpoints to `outputs/train//checkpoints/`._ + +### Evaluate the policy/run inference + +```bash +lerobot-record \ + --robot.type=so100_follower \ + --dataset.repo_id=/eval_ \ + --policy.path=/ \ + --episodes=10 +``` + +Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint. + +--- + +## Model Details + +- **License:** apache-2.0 \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model_migrated/config.json b/checkpoints/060000/pretrained_model_migrated/config.json new file mode 100644 index 0000000000000000000000000000000000000000..966a009b081a47a01e2467c52d7a14c79e0cdf73 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/config.json @@ -0,0 +1,88 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "chunk_size": 50, + "n_action_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model_migrated/model.safetensors b/checkpoints/060000/pretrained_model_migrated/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..666e121268a8c37b0a202477cdf427682cbe8fe5 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e2268f9f67327eeabe9fdf407cf2b7d07d2a96cc169200311dc06b0306ba57 +size 1800257992 diff --git a/checkpoints/060000/pretrained_model_migrated/policy_postprocessor.json b/checkpoints/060000/pretrained_model_migrated/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..69ad283b2943245f657419699542552c7adba027 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/060000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/060000/pretrained_model_migrated/policy_preprocessor.json b/checkpoints/060000/pretrained_model_migrated/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..324afbc484cec86ce5f75225ecb8ad145e0d9047 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/policy_preprocessor.json @@ -0,0 +1,77 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/060000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/060000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/060000/training_state/optimizer_param_groups.json b/checkpoints/060000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..be364e05768f1d1b06094fff1fc0025ae813601b --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 3.618542152422132e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/060000/training_state/optimizer_state.safetensors b/checkpoints/060000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dab5f5289d5dfaa5139c0b03a9397fc7064f3b3a --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2574e3763c77d9f80c4dd0b46fe9afef3c16091606e4e3dcb1ba3a70ec562d +size 1333741212 diff --git a/checkpoints/060000/training_state/rng_state.safetensors b/checkpoints/060000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a7dfd423604a218ef7b620558e5b6e828981fce --- /dev/null +++ b/checkpoints/060000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60372e80cab5665fd3f1cb8517c951204775532cde1f0bda1a616e76bac27cdf +size 15708 diff --git a/checkpoints/060000/training_state/scheduler_state.json b/checkpoints/060000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e149eff8571711b2560219cbdc3498f5d6a18f4 --- /dev/null +++ b/checkpoints/060000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 60000, + "_step_count": 60001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.618542152422132e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/060000/training_state/training_step.json b/checkpoints/060000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d360b6037f1c8c2c85d38e951160eb02ace507dd --- /dev/null +++ b/checkpoints/060000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 60000 +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/config.json b/checkpoints/070000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e151580a32cd2eb74c7381458e62fb506bf7d950 --- /dev/null +++ b/checkpoints/070000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/model.safetensors b/checkpoints/070000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9e868f94d4fdbe3c79605f018ef2227b938178c --- /dev/null +++ b/checkpoints/070000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37cb00007cf6ac091eff0dc4c1ce71f4f291a5eca19b5b21fec4323c507fd2d +size 934390312 diff --git a/checkpoints/070000/pretrained_model/train_config.json b/checkpoints/070000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/070000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/070000/training_state/optimizer_param_groups.json b/checkpoints/070000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..3159ff53404dbfa70d7d8a2e426047bbbe35f166 --- /dev/null +++ b/checkpoints/070000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 2.259546895074194e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/070000/training_state/optimizer_state.safetensors b/checkpoints/070000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf406cd71e9490e411439576702485dc15b59ef7 --- /dev/null +++ b/checkpoints/070000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:195fb0a22415dc179470b7cad1b4e4790862cf49530bc56ccc9242fb9b0c8b04 +size 1333741212 diff --git a/checkpoints/070000/training_state/rng_state.safetensors b/checkpoints/070000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95cf6e737930062aba7d99dec42c832318ad97df --- /dev/null +++ b/checkpoints/070000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db075e66e8192ed5ce15fd42d6cd690707219740c3dad7980a57ca2e2975fa9 +size 15708 diff --git a/checkpoints/070000/training_state/scheduler_state.json b/checkpoints/070000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..de2fa20e85c657c75e18ee898062c2762b482ad0 --- /dev/null +++ b/checkpoints/070000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 70000, + "_step_count": 70001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.259546895074194e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/070000/training_state/training_step.json b/checkpoints/070000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0c1eb0f525e34fa3a57743f750b2b34b5b77b --- /dev/null +++ b/checkpoints/070000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 70000 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/config.json b/checkpoints/080000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ecd77243a17bd23115755ce8c6948edb61cb8a9 --- /dev/null +++ b/checkpoints/080000/pretrained_model/config.json @@ -0,0 +1,81 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/model.safetensors b/checkpoints/080000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..987bd84c81fe27830d804a44dd00bb9baf6d7108 --- /dev/null +++ b/checkpoints/080000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81eea4a01f04ff2aaa08e33a891802d496ddb5e281b3cdf4873dfd3368cd3e3f +size 934390312 diff --git a/checkpoints/080000/pretrained_model/train_config.json b/checkpoints/080000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/080000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model_migrated/README.md b/checkpoints/080000/pretrained_model_migrated/README.md new file mode 100644 index 0000000000000000000000000000000000000000..54cbfa08202b78e1a84c19dc62559279173de9ce --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/README.md @@ -0,0 +1,63 @@ +--- +base_model: lerobot/smolvla_base +datasets: unknown +library_name: lerobot +license: apache-2.0 +model_name: smolvla +pipeline_tag: robotics +tags: +- smolvla +- robotics +- lerobot +--- + +# Model Card for smolvla + + + + +[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware. + + +This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot). +See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index). + +--- + +## How to Get Started with the Model + +For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy). +Below is the short version on how to train and run inference/eval: + +### Train from scratch + +```bash +lerobot-train \ + --dataset.repo_id=${HF_USER}/ \ + --policy.type=act \ + --output_dir=outputs/train/ \ + --job_name=lerobot_training \ + --policy.device=cuda \ + --policy.repo_id=${HF_USER}/ + --wandb.enable=true +``` + +_Writes checkpoints to `outputs/train//checkpoints/`._ + +### Evaluate the policy/run inference + +```bash +lerobot-record \ + --robot.type=so100_follower \ + --dataset.repo_id=/eval_ \ + --policy.path=/ \ + --episodes=10 +``` + +Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint. + +--- + +## Model Details + +- **License:** apache-2.0 \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model_migrated/config.json b/checkpoints/080000/pretrained_model_migrated/config.json new file mode 100644 index 0000000000000000000000000000000000000000..966a009b081a47a01e2467c52d7a14c79e0cdf73 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/config.json @@ -0,0 +1,88 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "chunk_size": 50, + "n_action_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model_migrated/model.safetensors b/checkpoints/080000/pretrained_model_migrated/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5aaabe8add1976a9f2cce2ed9c8ed5fdea637e62 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa092fd1ef17b09cc0136d41d7a42a9ef490dba1a3ba3d24d12bd142c70d030 +size 1800257992 diff --git a/checkpoints/080000/pretrained_model_migrated/policy_postprocessor.json b/checkpoints/080000/pretrained_model_migrated/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..69ad283b2943245f657419699542552c7adba027 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/080000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/080000/pretrained_model_migrated/policy_preprocessor.json b/checkpoints/080000/pretrained_model_migrated/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..324afbc484cec86ce5f75225ecb8ad145e0d9047 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/policy_preprocessor.json @@ -0,0 +1,77 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/080000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/080000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/080000/training_state/optimizer_param_groups.json b/checkpoints/080000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..ca0ca01fe5442577f3c6976a7fc854f95bca0c94 --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 1.1810421524221319e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/080000/training_state/optimizer_state.safetensors b/checkpoints/080000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7676b6f2a4217c476d475ecebabb153e81416b32 --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6295f3cafa7187252ee4c94e24622aa4e4bfd822f2294e3c036ea0d9550c9f +size 1333741212 diff --git a/checkpoints/080000/training_state/rng_state.safetensors b/checkpoints/080000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ae54682d12a8abbf93a2929b7984bab390a5c08 --- /dev/null +++ b/checkpoints/080000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40cca97a03de4aae8bf15c12a5997dd787d622fdd158ec16cada332e3835144 +size 15708 diff --git a/checkpoints/080000/training_state/scheduler_state.json b/checkpoints/080000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c8f051978161c44df0ad9975cb5fddd905a858d --- /dev/null +++ b/checkpoints/080000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 80000, + "_step_count": 80001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.1810421524221319e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/080000/training_state/training_step.json b/checkpoints/080000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..384eace4ecc2a6cba352aa7cf27f04405b7319c3 --- /dev/null +++ b/checkpoints/080000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 80000 +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/config.json b/checkpoints/090000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e151580a32cd2eb74c7381458e62fb506bf7d950 --- /dev/null +++ b/checkpoints/090000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/model.safetensors b/checkpoints/090000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1d45e3515ad6437beda1ae79fdb10b2596cef7a --- /dev/null +++ b/checkpoints/090000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88088159ad176ee3ef29b1b4755e84872e948629dabb3d5f805b7fb9359577a5 +size 934390312 diff --git a/checkpoints/090000/pretrained_model/train_config.json b/checkpoints/090000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/090000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/090000/training_state/optimizer_param_groups.json b/checkpoints/090000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..0480fb4c7e26c72a1873247b353d5bed01f3c286 --- /dev/null +++ b/checkpoints/090000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 4.885994830611265e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/090000/training_state/optimizer_state.safetensors b/checkpoints/090000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfa1f0206e9bb6afeee75dca1b7c7fb7ff413589 --- /dev/null +++ b/checkpoints/090000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82942a185cf9dabfbb66f2e16df7f93aeab8c4aa8c2676995920fb2959a9e537 +size 1333741212 diff --git a/checkpoints/090000/training_state/rng_state.safetensors b/checkpoints/090000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7fb7985214554b9a327074ffcac5ac703c5af0e --- /dev/null +++ b/checkpoints/090000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e2b3af2550a3c9011a618a5a227a8b26561a359f61a262cd90fb5455879dec +size 15708 diff --git a/checkpoints/090000/training_state/scheduler_state.json b/checkpoints/090000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3bb69192bd52a057a3f4b80832a3522066f5275c --- /dev/null +++ b/checkpoints/090000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 90000, + "_step_count": 90001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.885994830611265e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/090000/training_state/training_step.json b/checkpoints/090000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..cec965b83b7f817ac99559792f84cbb69a721bfd --- /dev/null +++ b/checkpoints/090000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 90000 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/config.json b/checkpoints/100000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ecd77243a17bd23115755ce8c6948edb61cb8a9 --- /dev/null +++ b/checkpoints/100000/pretrained_model/config.json @@ -0,0 +1,81 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/model.safetensors b/checkpoints/100000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26d161d22e6600a53f5c0ca9bfb1590e6c032b4f --- /dev/null +++ b/checkpoints/100000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c62592ccd91bcdbc0981428aeb04fa74251cdd0071044bd756b8af1fe7255e +size 934390312 diff --git a/checkpoints/100000/pretrained_model/train_config.json b/checkpoints/100000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8844c9616aeeda20c2dfec96c7d1c68624a9ce --- /dev/null +++ b/checkpoints/100000/pretrained_model/train_config.json @@ -0,0 +1,276 @@ +{ + "dataset": { + "repo_id": ".", + "root": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": false, + "image_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "crop_resize": { + "weight": 1.0, + "type": "RandomResizedCrop", + "kwargs": { + "size": [ + 256, + 256 + ], + "ratio": [ + 1, + 1 + ], + "scale": [ + 0.9, + 0.95 + ] + } + }, + "rotate": { + "weight": 1.0, + "type": "RandomRotate", + "kwargs": { + "degrees": [ + -5, + 5 + ] + } + } + }, + "wrist_tfs": { + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "vqa_data_path": null + }, + "env": null, + "policy": { + "type": "litevla-ms", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "ENV", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "gradient_accumulation_steps": 1, + "chunk_size": 50, + "n_action_steps": 1, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "of_path": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5" + }, + "output_dir": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "job_name": "MW_100%_scratch_litevla-ms_lastlayer", + "resume": false, + "seed": 42, + "num_workers": 8, + "batch_size": 64, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 100000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "LiteVLA-MS", + "entity": "Robotics_VLA", + "notes": null, + "run_id": null, + "mode": "online" + } +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model_migrated/README.md b/checkpoints/100000/pretrained_model_migrated/README.md new file mode 100644 index 0000000000000000000000000000000000000000..54cbfa08202b78e1a84c19dc62559279173de9ce --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/README.md @@ -0,0 +1,63 @@ +--- +base_model: lerobot/smolvla_base +datasets: unknown +library_name: lerobot +license: apache-2.0 +model_name: smolvla +pipeline_tag: robotics +tags: +- smolvla +- robotics +- lerobot +--- + +# Model Card for smolvla + + + + +[SmolVLA](https://huggingface.co/papers/2506.01844) is a compact, efficient vision-language-action model that achieves competitive performance at reduced computational costs and can be deployed on consumer-grade hardware. + + +This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot). +See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index). + +--- + +## How to Get Started with the Model + +For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy). +Below is the short version on how to train and run inference/eval: + +### Train from scratch + +```bash +lerobot-train \ + --dataset.repo_id=${HF_USER}/ \ + --policy.type=act \ + --output_dir=outputs/train/ \ + --job_name=lerobot_training \ + --policy.device=cuda \ + --policy.repo_id=${HF_USER}/ + --wandb.enable=true +``` + +_Writes checkpoints to `outputs/train//checkpoints/`._ + +### Evaluate the policy/run inference + +```bash +lerobot-record \ + --robot.type=so100_follower \ + --dataset.repo_id=/eval_ \ + --policy.path=/ \ + --episodes=10 +``` + +Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint. + +--- + +## Model Details + +- **License:** apache-2.0 \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model_migrated/config.json b/checkpoints/100000/pretrained_model_migrated/config.json new file mode 100644 index 0000000000000000000000000000000000000000..966a009b081a47a01e2467c52d7a14c79e0cdf73 --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/config.json @@ -0,0 +1,88 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "chunk_size": 50, + "n_action_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": false, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 100000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model_migrated/model.safetensors b/checkpoints/100000/pretrained_model_migrated/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..190758e30467a97fd80e2ed9a7d87adccd3a7c7f --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8ff32d3b8408dd081520a9e33131e2f9740c0546869c2d187d8fa915e97c98 +size 1800257992 diff --git a/checkpoints/100000/pretrained_model_migrated/policy_postprocessor.json b/checkpoints/100000/pretrained_model_migrated/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..69ad283b2943245f657419699542552c7adba027 --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/100000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/100000/pretrained_model_migrated/policy_preprocessor.json b/checkpoints/100000/pretrained_model_migrated/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..324afbc484cec86ce5f75225ecb8ad145e0d9047 --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/policy_preprocessor.json @@ -0,0 +1,77 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "observation.environment_state": { + "type": "STATE", + "shape": [ + 39 + ] + }, + "observation.image": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 480 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 4 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/100000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af29fccbf83e510a5d05092d18f077415e699e46 --- /dev/null +++ b/checkpoints/100000/pretrained_model_migrated/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43908d0b2a031735826c3fdd8ae685421f8880c66cd1c66c4fd71bd71be68f18 +size 352 diff --git a/checkpoints/100000/training_state/optimizer_param_groups.json b/checkpoints/100000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..dca6dfcd4e1acab0b8681a81f6b0d8d24cf8e71c --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_param_groups.json @@ -0,0 +1,529 @@ +[ + { + "lr": 2.5e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507 + ] + } +] \ No newline at end of file diff --git a/checkpoints/100000/training_state/optimizer_state.safetensors b/checkpoints/100000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f0f3a5e0567b9782954d517747d0b6b7846522a --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008ecb634fda164b8fc142593bca611990b8ef90a63fd1745aff8506f4800791 +size 1333741212 diff --git a/checkpoints/100000/training_state/rng_state.safetensors b/checkpoints/100000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93215ec4ebac2ee85bd20cc9f70a69f3ccf1f19c --- /dev/null +++ b/checkpoints/100000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b97e9966b300d69683a71b690a32f35acca5e808fbed2799ba0d4113f1870a70 +size 15708 diff --git a/checkpoints/100000/training_state/scheduler_state.json b/checkpoints/100000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d2f19c8ed30aeec91eab5ed595f2d77ec13c6a5f --- /dev/null +++ b/checkpoints/100000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 100000, + "_step_count": 100001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.5e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/100000/training_state/training_step.json b/checkpoints/100000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb73c13d28bca88058c08796abbf931c3f9b012 --- /dev/null +++ b/checkpoints/100000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 100000 +} \ No newline at end of file diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c19e51d64cbe029380ead3c2128dbe5778fd4d27 --- /dev/null +++ b/wandb/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-02-09T08:44:51.980035036Z","level":"INFO","msg":"stream: starting","core version":"0.24.2"} +{"time":"2026-02-09T08:44:52.250046733Z","level":"INFO","msg":"stream: created new stream","id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.250176074Z","level":"INFO","msg":"handler: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251106958Z","level":"INFO","msg":"stream: started","id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251174203Z","level":"INFO","msg":"sender: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251190143Z","level":"INFO","msg":"writer: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T18:47:07.815456874Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/Robotics_VLA/LiteVLA-MS/csy2m2pr/file_stream","body":"\n\n\n502 Server Error\n\n\n

Error: Server Error

\n

The server encountered a temporary error and could not complete your request.

Please try again in 30 seconds.

\n

\n\n"} +{"time":"2026-02-10T03:09:33.945729023Z","level":"INFO","msg":"stream: closing","id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.622097575Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-02-10T03:09:34.809342627Z","level":"INFO","msg":"handler: closed","stream_id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.811271683Z","level":"INFO","msg":"sender: closed","stream_id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.811285308Z","level":"INFO","msg":"stream: closed","id":"csy2m2pr"} diff --git a/wandb/debug.log b/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c0c6b26e336a0298a0d035a2b4cf3fdb48ea941a --- /dev/null +++ b/wandb/debug.log @@ -0,0 +1,21 @@ +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Current SDK version is 0.24.2 +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Configure stats pid to 138391 +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer/wandb/run-20260209_084451-csy2m2pr/logs/debug.log +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer/wandb/run-20260209_084451-csy2m2pr/logs/debug-internal.log +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:init():844] calling init triggers +2026-02-09 08:44:51,658 INFO MainThread:138391 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': '.', 'root': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 3, 'random_order': False, 'image_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'crop_resize': {'weight': 1.0, 'type': 'RandomResizedCrop', 'kwargs': {'size': [256, 256], 'ratio': [1, 1], 'scale': [0.9, 0.95]}}, 'rotate': {'weight': 1.0, 'type': 'RandomRotate', 'kwargs': {'degrees': [-5, 5]}}}, 'wrist_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'vqa_data_path': None}, 'env': None, 'policy': {'type': 'litevla-ms', 'n_obs_steps': 1, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {}, 'output_features': {}, 'device': 'cuda', 'use_amp': False, 'gradient_accumulation_steps': 1, 'chunk_size': 50, 'n_action_steps': 1, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': False, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 100000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'of_path': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5'}, 'output_dir': 'outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer', 'job_name': 'MW_100%_scratch_litevla-ms_lastlayer', 'resume': False, 'seed': 42, 'num_workers': 8, 'batch_size': 64, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 10000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 100000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'LiteVLA-MS', 'entity': 'Robotics_VLA', 'notes': None, 'run_id': None, 'mode': 'online'}, '_wandb': {}} +2026-02-09 08:44:51,658 INFO MainThread:138391 [wandb_init.py:init():892] starting backend +2026-02-09 08:44:51,963 INFO MainThread:138391 [wandb_init.py:init():895] sending inform_init request +2026-02-09 08:44:51,976 INFO MainThread:138391 [wandb_init.py:init():903] backend started and connected +2026-02-09 08:44:51,978 INFO MainThread:138391 [wandb_init.py:init():973] updated telemetry +2026-02-09 08:44:51,985 INFO MainThread:138391 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-09 08:44:52,532 INFO MainThread:138391 [wandb_init.py:init():1042] starting run threads in backend +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_console_start():2529] atexit reg +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-09 08:44:52,685 INFO MainThread:138391 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-10 03:09:33,944 INFO wandb-AsyncioManager-main:138391 [service_client.py:_forward_responses():94] Reached EOF. +2026-02-10 03:09:33,945 INFO wandb-AsyncioManager-main:138391 [mailbox.py:close():154] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260209_084451-csy2m2pr/files/config.yaml b/wandb/run-20260209_084451-csy2m2pr/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c627e596b17b3e9c85091007aebd2dc1cc96a3d --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/files/config.yaml @@ -0,0 +1,302 @@ +_wandb: + value: + cli_version: 0.24.2 + e: + pqkhfzolauly8lhncay8a0oavkuhy83f: + args: + - --policy.type=litevla-ms + - --policy.vlm_model_name=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct + - --policy.of_path=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5 + - --dataset.root=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50 + - --output_dir=outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer + - --job_name=MW_100%_scratch_litevla-ms_lastlayer + - --config_path=configs/default.json + - --batch_size=64 + - --wandb.mode=online + codePath: lerobot/scripts/train_accelerate.py + codePathLocal: lerobot/scripts/train_accelerate.py + cpu_count: 32 + cpu_count_logical: 32 + cudaVersion: "12.5" + disk: + /: + total: "10737418240" + used: "78622720" + email: ducido.w@gmail.com + executable: /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/LiteVLA-MS/.venv/bin/python3 + git: + commit: a7e835bc70780ed105414dd3bb743c62137f6a8b + remote: https://github.com/ducido/LiteVLA-MS.git + gpu: NVIDIA A100-SXM4-80GB + gpu_count: 4 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-a6489813-5309-3432-09e4-c663845f9cef + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-fc32315d-9871-1ebf-235f-c6f211db2e03 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-2f913744-29a0-03ae-59d3-c19fd3d11eb6 + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-23e52388-87b5-c416-3205-eae06c6942f2 + host: aed594a1dd20 + memory: + total: "2163816673280" + os: Linux-4.18.0-513.5.1.el8_9.x86_64-x86_64-with-glibc2.35 + program: /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/LiteVLA-MS/lerobot/scripts/train_accelerate.py + python: CPython 3.11.14 + root: outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer + startedAt: "2026-02-09T08:44:51.654399Z" + writerId: pqkhfzolauly8lhncay8a0oavkuhy83f + m: [] + python_version: 3.11.14 + t: + "1": + - 1 + - 41 + - 49 + - 51 + - 71 + "2": + - 1 + - 11 + - 41 + - 49 + - 51 + - 63 + - 71 + - 74 + "3": + - 13 + - 15 + - 16 + - 61 + "4": 3.11.14 + "5": 0.24.2 + "8": + - 2 + "12": 0.24.2 + "13": linux-x86_64 +batch_size: + value: 64 +dataset: + value: + episodes: null + image_transforms: + enable: true + image_tfs: + brightness: + kwargs: + brightness: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + contrast: + kwargs: + contrast: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + crop_resize: + kwargs: + ratio: + - 1 + - 1 + scale: + - 0.9 + - 0.95 + size: + - 256 + - 256 + type: RandomResizedCrop + weight: 1 + hue: + kwargs: + hue: + - -0.05 + - 0.05 + type: ColorJitter + weight: 1 + rotate: + kwargs: + degrees: + - -5 + - 5 + type: RandomRotate + weight: 1 + saturation: + kwargs: + saturation: + - 0.5 + - 1.5 + type: ColorJitter + weight: 1 + sharpness: + kwargs: + sharpness: + - 0.5 + - 1.5 + type: SharpnessJitter + weight: 1 + max_num_transforms: 3 + random_order: false + wrist_tfs: + brightness: + kwargs: + brightness: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + contrast: + kwargs: + contrast: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + hue: + kwargs: + hue: + - -0.05 + - 0.05 + type: ColorJitter + weight: 1 + saturation: + kwargs: + saturation: + - 0.5 + - 1.5 + type: ColorJitter + weight: 1 + sharpness: + kwargs: + sharpness: + - 0.5 + - 1.5 + type: SharpnessJitter + weight: 1 + repo_id: . + revision: null + root: /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50 + use_imagenet_stats: true + video_backend: torchcodec + vqa_data_path: null +env: + value: null +eval: + value: + batch_size: 50 + n_episodes: 50 + use_async_envs: false +eval_freq: + value: 20000 +job_name: + value: MW_100%_scratch_litevla-ms_lastlayer +log_freq: + value: 200 +num_workers: + value: 8 +optimizer: + value: + betas: + - 0.9 + - 0.95 + eps: 1e-08 + grad_clip_norm: 10 + lr: 0.0001 + type: adamw + weight_decay: 1e-10 +output_dir: + value: outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer +policy: + value: + adapt_to_pi_aloha: false + add_image_special_tokens: false + attention_mode: cross_attn + chunk_size: 50 + device: cuda + empty_cameras: 0 + expert_width_multiplier: 0.75 + freeze_vision_encoder: true + gradient_accumulation_steps: 1 + load_vlm_weights: true + max_action_dim: 32 + max_period: 4 + max_state_dim: 32 + min_period: 0.004 + n_action_steps: 1 + n_obs_steps: 1 + normalization_mapping: + ACTION: MEAN_STD + STATE: MEAN_STD + VISUAL: IDENTITY + num_expert_layers: 0 + num_steps: 10 + num_vlm_layers: 16 + of_path: /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5 + optimizer_betas: + - 0.9 + - 0.95 + optimizer_eps: 1e-08 + optimizer_grad_clip_norm: 10 + optimizer_lr: 0.0001 + optimizer_weight_decay: 1e-10 + pad_language_to: max_length + prefix_length: 0 + resize_imgs_with_padding: + - 512 + - 512 + scheduler_decay_lr: 2.5e-06 + scheduler_decay_steps: 100000 + scheduler_warmup_steps: 1000 + self_attn_every_n_layers: 2 + tokenizer_max_length: 48 + train_expert_only: false + train_state_proj: true + type: litevla-ms + use_amp: false + use_cache: true + use_delta_joint_actions_aloha: false + vlm_model_name: /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct +resume: + value: false +save_checkpoint: + value: true +save_freq: + value: 10000 +scheduler: + value: + decay_lr: 2.5e-06 + num_decay_steps: 100000 + num_warmup_steps: 1000 + peak_lr: 0.0001 + type: cosine_decay_with_warmup +seed: + value: 42 +steps: + value: 100000 +use_policy_training_preset: + value: true +wandb: + value: + disable_artifact: true + enable: true + entity: Robotics_VLA + mode: online + notes: null + project: LiteVLA-MS + run_id: null diff --git a/wandb/run-20260209_084451-csy2m2pr/files/output.log b/wandb/run-20260209_084451-csy2m2pr/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9c996a0af75a68979e31bd6cd0be1ec9fe4a9aae --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/files/output.log @@ -0,0 +1,532 @@ +INFO 2026-02-09 08:44:52 celerate.py:161 Creating dataset +Resolving data files: 100%|██████████████████████████████████████████████████████| 2500/2500 [00:00<00:00, 17722.67it/s] +Loading dataset shards: 100%|███████████████████████████████████████████████████████████| 91/91 [00:01<00:00, 74.79it/s] +INFO 2026-02-09 08:45:00 celerate.py:172 Creating policy +load fresh policy +Loading /pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct weights ... +`torch_dtype` is deprecated! Use `dtype` instead! +Loading weights: 100%|█| 489/489 [00:01<00:00, 298.79it/s, Materializing param=model.vision_model.post_layernorm.weight] +Load pretrained VLM successfully! +Reducing the number of VLM layers to 16 ... +INFO 2026-02-09 08:45:09 celerate.py:183 Creating optimizer and scheduler +INFO 2026-02-09 08:45:09 celerate.py:223 Output dir: outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer +INFO 2026-02-09 08:45:09 celerate.py:226 cfg.steps=100000 (100K) +INFO 2026-02-09 08:45:09 celerate.py:227 dataset.num_frames=204806 (205K) +INFO 2026-02-09 08:45:09 celerate.py:228 dataset.num_episodes=2500 +INFO 2026-02-09 08:45:09 celerate.py:229 num_learnable_params=323223552 (323M) +INFO 2026-02-09 08:45:09 celerate.py:230 num_total_params=456965400 (457M) +INFO 2026-02-09 08:45:09 celerate.py:231 Number of processes: 1 +INFO 2026-02-09 08:45:09 celerate.py:232 Device: cuda +INFO 2026-02-09 08:45:09 celerate.py:233 Mixed precision: no +INFO 2026-02-09 08:45:09 celerate.py:255 Start offline training on a fixed dataset +INFO 2026-02-09 08:47:23 celerate.py:305 step:200 smpl:13K ep:156 epch:0.06 loss:3292.964 grdn:4390.980 lr:1.0e-05 updt_s:0.593 data_s:0.075 +INFO 2026-02-09 08:49:31 celerate.py:305 step:400 smpl:26K ep:312 epch:0.12 loss:298.485 grdn:157.205 lr:3.0e-05 updt_s:0.580 data_s:0.052 +INFO 2026-02-09 08:51:42 celerate.py:305 step:600 smpl:38K ep:469 epch:0.19 loss:117.069 grdn:70.645 lr:5.0e-05 updt_s:0.584 data_s:0.069 +INFO 2026-02-09 08:54:13 celerate.py:305 step:800 smpl:51K ep:625 epch:0.25 loss:50.286 grdn:36.198 lr:7.0e-05 updt_s:0.612 data_s:0.139 +INFO 2026-02-09 08:56:48 celerate.py:305 step:1K smpl:64K ep:781 epch:0.31 loss:21.632 grdn:19.475 lr:9.0e-05 updt_s:0.610 data_s:0.157 +INFO 2026-02-09 08:59:37 celerate.py:305 step:1K smpl:77K ep:937 epch:0.37 loss:10.486 grdn:12.593 lr:1.0e-04 updt_s:0.621 data_s:0.218 +INFO 2026-02-09 09:02:24 celerate.py:305 step:1K smpl:90K ep:1K epch:0.44 loss:6.281 grdn:10.301 lr:1.0e-04 updt_s:0.625 data_s:0.208 +INFO 2026-02-09 09:05:21 celerate.py:305 step:2K smpl:102K ep:1K epch:0.50 loss:4.527 grdn:12.841 lr:1.0e-04 updt_s:0.629 data_s:0.249 +INFO 2026-02-09 09:08:19 celerate.py:305 step:2K smpl:115K ep:1K epch:0.56 loss:3.584 grdn:18.320 lr:1.0e-04 updt_s:0.637 data_s:0.245 +INFO 2026-02-09 09:11:16 celerate.py:305 step:2K smpl:128K ep:2K epch:0.62 loss:3.034 grdn:19.230 lr:1.0e-04 updt_s:0.635 data_s:0.246 +INFO 2026-02-09 09:14:16 celerate.py:305 step:2K smpl:141K ep:2K epch:0.69 loss:2.667 grdn:20.335 lr:1.0e-04 updt_s:0.634 data_s:0.262 +INFO 2026-02-09 09:17:10 celerate.py:305 step:2K smpl:154K ep:2K epch:0.75 loss:2.365 grdn:18.961 lr:1.0e-04 updt_s:0.629 data_s:0.238 +INFO 2026-02-09 09:20:08 celerate.py:305 step:3K smpl:166K ep:2K epch:0.81 loss:2.082 grdn:17.609 lr:1.0e-04 updt_s:0.627 data_s:0.257 +INFO 2026-02-09 09:23:07 celerate.py:305 step:3K smpl:179K ep:2K epch:0.87 loss:1.910 grdn:20.429 lr:1.0e-04 updt_s:0.627 data_s:0.260 +INFO 2026-02-09 09:26:01 celerate.py:305 step:3K smpl:192K ep:2K epch:0.94 loss:1.713 grdn:18.244 lr:1.0e-04 updt_s:0.625 data_s:0.241 +INFO 2026-02-09 09:28:57 celerate.py:305 step:3K smpl:205K ep:2K epch:1.00 loss:1.654 grdn:19.459 lr:1.0e-04 updt_s:0.627 data_s:0.249 +INFO 2026-02-09 09:31:50 celerate.py:305 step:3K smpl:218K ep:3K epch:1.06 loss:1.543 grdn:20.011 lr:1.0e-04 updt_s:0.628 data_s:0.232 +INFO 2026-02-09 09:34:37 celerate.py:305 step:4K smpl:230K ep:3K epch:1.12 loss:1.434 grdn:16.980 lr:1.0e-04 updt_s:0.626 data_s:0.205 +INFO 2026-02-09 09:37:26 celerate.py:305 step:4K smpl:243K ep:3K epch:1.19 loss:1.394 grdn:17.192 lr:1.0e-04 updt_s:0.631 data_s:0.208 +INFO 2026-02-09 09:40:10 celerate.py:305 step:4K smpl:256K ep:3K epch:1.25 loss:1.335 grdn:16.410 lr:1.0e-04 updt_s:0.629 data_s:0.185 +INFO 2026-02-09 09:42:56 celerate.py:305 step:4K smpl:269K ep:3K epch:1.31 loss:1.284 grdn:15.971 lr:1.0e-04 updt_s:0.627 data_s:0.196 +INFO 2026-02-09 09:45:42 celerate.py:305 step:4K smpl:282K ep:3K epch:1.37 loss:1.268 grdn:15.471 lr:1.0e-04 updt_s:0.628 data_s:0.198 +INFO 2026-02-09 09:48:29 celerate.py:305 step:5K smpl:294K ep:4K epch:1.44 loss:1.197 grdn:14.137 lr:1.0e-04 updt_s:0.629 data_s:0.197 +INFO 2026-02-09 09:51:13 celerate.py:305 step:5K smpl:307K ep:4K epch:1.50 loss:1.155 grdn:14.092 lr:9.9e-05 updt_s:0.625 data_s:0.192 +INFO 2026-02-09 09:53:55 celerate.py:305 step:5K smpl:320K ep:4K epch:1.56 loss:1.145 grdn:13.698 lr:9.9e-05 updt_s:0.626 data_s:0.176 +INFO 2026-02-09 09:56:37 celerate.py:305 step:5K smpl:333K ep:4K epch:1.62 loss:1.181 grdn:15.076 lr:9.9e-05 updt_s:0.623 data_s:0.186 +INFO 2026-02-09 09:59:21 celerate.py:305 step:5K smpl:346K ep:4K epch:1.69 loss:1.136 grdn:14.339 lr:9.9e-05 updt_s:0.623 data_s:0.189 +INFO 2026-02-09 10:02:03 celerate.py:305 step:6K smpl:358K ep:4K epch:1.75 loss:1.114 grdn:13.201 lr:9.9e-05 updt_s:0.621 data_s:0.185 +INFO 2026-02-09 10:04:48 celerate.py:305 step:6K smpl:371K ep:5K epch:1.81 loss:1.078 grdn:12.415 lr:9.9e-05 updt_s:0.627 data_s:0.190 +INFO 2026-02-09 10:07:30 celerate.py:305 step:6K smpl:384K ep:5K epch:1.87 loss:1.070 grdn:12.009 lr:9.9e-05 updt_s:0.628 data_s:0.181 +INFO 2026-02-09 10:10:12 celerate.py:305 step:6K smpl:397K ep:5K epch:1.94 loss:1.077 grdn:12.564 lr:9.9e-05 updt_s:0.619 data_s:0.182 +INFO 2026-02-09 10:12:55 celerate.py:305 step:6K smpl:410K ep:5K epch:2.00 loss:1.047 grdn:12.666 lr:9.9e-05 updt_s:0.623 data_s:0.188 +INFO 2026-02-09 10:15:50 celerate.py:305 step:7K smpl:422K ep:5K epch:2.06 loss:1.024 grdn:11.858 lr:9.9e-05 updt_s:0.623 data_s:0.246 +INFO 2026-02-09 10:18:34 celerate.py:305 step:7K smpl:435K ep:5K epch:2.12 loss:1.035 grdn:12.027 lr:9.9e-05 updt_s:0.621 data_s:0.197 +INFO 2026-02-09 10:21:19 celerate.py:305 step:7K smpl:448K ep:5K epch:2.19 loss:1.002 grdn:11.739 lr:9.9e-05 updt_s:0.620 data_s:0.197 +INFO 2026-02-09 10:24:04 celerate.py:305 step:7K smpl:461K ep:6K epch:2.25 loss:1.006 grdn:11.478 lr:9.9e-05 updt_s:0.623 data_s:0.200 +INFO 2026-02-09 10:26:47 celerate.py:305 step:7K smpl:474K ep:6K epch:2.31 loss:0.999 grdn:11.411 lr:9.9e-05 updt_s:0.623 data_s:0.183 +INFO 2026-02-09 10:29:30 celerate.py:305 step:8K smpl:486K ep:6K epch:2.37 loss:0.970 grdn:10.952 lr:9.9e-05 updt_s:0.622 data_s:0.187 +INFO 2026-02-09 10:32:14 celerate.py:305 step:8K smpl:499K ep:6K epch:2.44 loss:0.957 grdn:10.947 lr:9.9e-05 updt_s:0.626 data_s:0.189 +INFO 2026-02-09 10:34:57 celerate.py:305 step:8K smpl:512K ep:6K epch:2.50 loss:0.959 grdn:11.097 lr:9.9e-05 updt_s:0.625 data_s:0.184 +INFO 2026-02-09 10:37:41 celerate.py:305 step:8K smpl:525K ep:6K epch:2.56 loss:0.948 grdn:10.099 lr:9.8e-05 updt_s:0.623 data_s:0.191 +INFO 2026-02-09 10:40:21 celerate.py:305 step:8K smpl:538K ep:7K epch:2.62 loss:0.930 grdn:10.678 lr:9.8e-05 updt_s:0.622 data_s:0.175 +INFO 2026-02-09 10:43:00 celerate.py:305 step:9K smpl:550K ep:7K epch:2.69 loss:0.909 grdn:9.836 lr:9.8e-05 updt_s:0.621 data_s:0.167 +INFO 2026-02-09 10:45:40 celerate.py:305 step:9K smpl:563K ep:7K epch:2.75 loss:0.924 grdn:10.388 lr:9.8e-05 updt_s:0.619 data_s:0.175 +INFO 2026-02-09 10:48:16 celerate.py:305 step:9K smpl:576K ep:7K epch:2.81 loss:0.920 grdn:10.526 lr:9.8e-05 updt_s:0.616 data_s:0.161 +INFO 2026-02-09 10:50:54 celerate.py:305 step:9K smpl:589K ep:7K epch:2.87 loss:0.910 grdn:9.857 lr:9.8e-05 updt_s:0.616 data_s:0.167 +INFO 2026-02-09 10:53:28 celerate.py:305 step:9K smpl:602K ep:7K epch:2.94 loss:0.907 grdn:10.436 lr:9.8e-05 updt_s:0.617 data_s:0.151 +INFO 2026-02-09 10:56:05 celerate.py:305 step:10K smpl:614K ep:7K epch:3.00 loss:0.892 grdn:10.266 lr:9.8e-05 updt_s:0.613 data_s:0.164 +INFO 2026-02-09 10:58:49 celerate.py:305 step:10K smpl:627K ep:8K epch:3.06 loss:0.887 grdn:10.101 lr:9.8e-05 updt_s:0.611 data_s:0.205 +INFO 2026-02-09 11:01:25 celerate.py:305 step:10K smpl:640K ep:8K epch:3.12 loss:0.866 grdn:9.343 lr:9.8e-05 updt_s:0.614 data_s:0.159 +INFO 2026-02-09 11:01:25 celerate.py:330 Checkpoint policy after step 10000 +INFO 2026-02-09 11:04:00 celerate.py:305 step:10K smpl:653K ep:8K epch:3.19 loss:0.900 grdn:10.545 lr:9.8e-05 updt_s:0.610 data_s:0.148 +INFO 2026-02-09 11:06:31 celerate.py:305 step:10K smpl:666K ep:8K epch:3.25 loss:0.886 grdn:9.971 lr:9.7e-05 updt_s:0.610 data_s:0.137 +INFO 2026-02-09 11:09:02 celerate.py:305 step:11K smpl:678K ep:8K epch:3.31 loss:0.855 grdn:9.077 lr:9.7e-05 updt_s:0.609 data_s:0.142 +INFO 2026-02-09 11:11:30 celerate.py:305 step:11K smpl:691K ep:8K epch:3.37 loss:0.869 grdn:9.581 lr:9.7e-05 updt_s:0.605 data_s:0.129 +INFO 2026-02-09 11:13:52 celerate.py:305 step:11K smpl:704K ep:9K epch:3.44 loss:0.873 grdn:9.330 lr:9.7e-05 updt_s:0.602 data_s:0.104 +INFO 2026-02-09 11:16:15 celerate.py:305 step:11K smpl:717K ep:9K epch:3.50 loss:0.853 grdn:9.457 lr:9.7e-05 updt_s:0.599 data_s:0.114 +INFO 2026-02-09 11:18:37 celerate.py:305 step:11K smpl:730K ep:9K epch:3.56 loss:0.849 grdn:9.110 lr:9.7e-05 updt_s:0.601 data_s:0.101 +INFO 2026-02-09 11:20:57 celerate.py:305 step:12K smpl:742K ep:9K epch:3.62 loss:0.839 grdn:9.436 lr:9.7e-05 updt_s:0.600 data_s:0.097 +INFO 2026-02-09 11:23:18 celerate.py:305 step:12K smpl:755K ep:9K epch:3.69 loss:0.826 grdn:9.287 lr:9.7e-05 updt_s:0.601 data_s:0.100 +INFO 2026-02-09 11:25:40 celerate.py:305 step:12K smpl:768K ep:9K epch:3.75 loss:0.828 grdn:9.258 lr:9.7e-05 updt_s:0.597 data_s:0.110 +INFO 2026-02-09 11:28:02 celerate.py:305 step:12K smpl:781K ep:10K epch:3.81 loss:0.819 grdn:8.663 lr:9.7e-05 updt_s:0.600 data_s:0.105 +INFO 2026-02-09 11:30:23 celerate.py:305 step:12K smpl:794K ep:10K epch:3.87 loss:0.834 grdn:8.889 lr:9.6e-05 updt_s:0.598 data_s:0.099 +INFO 2026-02-09 11:32:43 celerate.py:305 step:13K smpl:806K ep:10K epch:3.94 loss:0.812 grdn:8.215 lr:9.6e-05 updt_s:0.600 data_s:0.095 +INFO 2026-02-09 11:35:04 celerate.py:305 step:13K smpl:819K ep:10K epch:4.00 loss:0.820 grdn:8.781 lr:9.6e-05 updt_s:0.601 data_s:0.104 +INFO 2026-02-09 11:37:30 celerate.py:305 step:13K smpl:832K ep:10K epch:4.06 loss:0.815 grdn:9.100 lr:9.6e-05 updt_s:0.598 data_s:0.125 +INFO 2026-02-09 11:39:48 celerate.py:305 step:13K smpl:845K ep:10K epch:4.12 loss:0.810 grdn:8.679 lr:9.6e-05 updt_s:0.596 data_s:0.092 +INFO 2026-02-09 11:42:08 celerate.py:305 step:13K smpl:858K ep:10K epch:4.19 loss:0.808 grdn:9.156 lr:9.6e-05 updt_s:0.601 data_s:0.090 +INFO 2026-02-09 11:44:31 celerate.py:305 step:14K smpl:870K ep:11K epch:4.25 loss:0.791 grdn:8.491 lr:9.6e-05 updt_s:0.604 data_s:0.107 +INFO 2026-02-09 11:46:55 celerate.py:305 step:14K smpl:883K ep:11K epch:4.31 loss:0.796 grdn:8.894 lr:9.6e-05 updt_s:0.604 data_s:0.113 +INFO 2026-02-09 11:49:21 celerate.py:305 step:14K smpl:896K ep:11K epch:4.37 loss:0.791 grdn:8.444 lr:9.5e-05 updt_s:0.604 data_s:0.118 +INFO 2026-02-09 11:51:45 celerate.py:305 step:14K smpl:909K ep:11K epch:4.44 loss:0.788 grdn:8.458 lr:9.5e-05 updt_s:0.601 data_s:0.116 +INFO 2026-02-09 11:54:11 celerate.py:305 step:14K smpl:922K ep:11K epch:4.50 loss:0.778 grdn:8.824 lr:9.5e-05 updt_s:0.600 data_s:0.126 +INFO 2026-02-09 11:56:40 celerate.py:305 step:15K smpl:934K ep:11K epch:4.56 loss:0.783 grdn:8.735 lr:9.5e-05 updt_s:0.613 data_s:0.127 +INFO 2026-02-09 11:59:05 celerate.py:305 step:15K smpl:947K ep:12K epch:4.62 loss:0.764 grdn:8.485 lr:9.5e-05 updt_s:0.601 data_s:0.116 +INFO 2026-02-09 12:01:28 celerate.py:305 step:15K smpl:960K ep:12K epch:4.69 loss:0.778 grdn:8.067 lr:9.5e-05 updt_s:0.601 data_s:0.112 +INFO 2026-02-09 12:03:49 celerate.py:305 step:15K smpl:973K ep:12K epch:4.75 loss:0.757 grdn:8.068 lr:9.5e-05 updt_s:0.601 data_s:0.097 +INFO 2026-02-09 12:06:10 celerate.py:305 step:15K smpl:986K ep:12K epch:4.81 loss:0.769 grdn:8.099 lr:9.4e-05 updt_s:0.601 data_s:0.098 +INFO 2026-02-09 12:08:33 celerate.py:305 step:16K smpl:998K ep:12K epch:4.87 loss:0.779 grdn:8.144 lr:9.4e-05 updt_s:0.601 data_s:0.111 +INFO 2026-02-09 12:10:54 celerate.py:305 step:16K smpl:1M ep:12K epch:4.94 loss:0.778 grdn:8.332 lr:9.4e-05 updt_s:0.601 data_s:0.098 +INFO 2026-02-09 12:13:13 celerate.py:305 step:16K smpl:1M ep:12K epch:5.00 loss:0.759 grdn:7.759 lr:9.4e-05 updt_s:0.595 data_s:0.095 +INFO 2026-02-09 12:15:45 celerate.py:305 step:16K smpl:1M ep:13K epch:5.06 loss:0.736 grdn:7.637 lr:9.4e-05 updt_s:0.601 data_s:0.151 +INFO 2026-02-09 12:18:05 celerate.py:305 step:16K smpl:1M ep:13K epch:5.12 loss:0.761 grdn:8.520 lr:9.4e-05 updt_s:0.600 data_s:0.098 +INFO 2026-02-09 12:20:25 celerate.py:305 step:17K smpl:1M ep:13K epch:5.19 loss:0.746 grdn:7.730 lr:9.4e-05 updt_s:0.600 data_s:0.096 +INFO 2026-02-09 12:22:53 celerate.py:305 step:17K smpl:1M ep:13K epch:5.25 loss:0.756 grdn:8.179 lr:9.3e-05 updt_s:0.601 data_s:0.135 +INFO 2026-02-09 12:25:19 celerate.py:305 step:17K smpl:1M ep:13K epch:5.31 loss:0.752 grdn:7.976 lr:9.3e-05 updt_s:0.604 data_s:0.119 +INFO 2026-02-09 12:27:44 celerate.py:305 step:17K smpl:1M ep:13K epch:5.37 loss:0.736 grdn:8.127 lr:9.3e-05 updt_s:0.597 data_s:0.120 +INFO 2026-02-09 12:30:08 celerate.py:305 step:17K smpl:1M ep:14K epch:5.44 loss:0.743 grdn:8.161 lr:9.3e-05 updt_s:0.590 data_s:0.127 +INFO 2026-02-09 12:32:33 celerate.py:305 step:18K smpl:1M ep:14K epch:5.50 loss:0.741 grdn:8.110 lr:9.3e-05 updt_s:0.598 data_s:0.122 +INFO 2026-02-09 12:34:54 celerate.py:305 step:18K smpl:1M ep:14K epch:5.56 loss:0.736 grdn:8.000 lr:9.3e-05 updt_s:0.590 data_s:0.111 +INFO 2026-02-09 12:37:14 celerate.py:305 step:18K smpl:1M ep:14K epch:5.62 loss:0.741 grdn:7.925 lr:9.2e-05 updt_s:0.594 data_s:0.105 +INFO 2026-02-09 12:39:18 celerate.py:305 step:18K smpl:1M ep:14K epch:5.69 loss:0.731 grdn:8.119 lr:9.2e-05 updt_s:0.593 data_s:0.021 +INFO 2026-02-09 12:41:28 celerate.py:305 step:18K smpl:1M ep:14K epch:5.75 loss:0.729 grdn:7.654 lr:9.2e-05 updt_s:0.590 data_s:0.056 +INFO 2026-02-09 12:43:38 celerate.py:305 step:19K smpl:1M ep:15K epch:5.81 loss:0.723 grdn:7.772 lr:9.2e-05 updt_s:0.588 data_s:0.055 +INFO 2026-02-09 12:45:43 celerate.py:305 step:19K smpl:1M ep:15K epch:5.87 loss:0.720 grdn:7.503 lr:9.2e-05 updt_s:0.589 data_s:0.029 +INFO 2026-02-09 12:47:52 celerate.py:305 step:19K smpl:1M ep:15K epch:5.94 loss:0.732 grdn:7.782 lr:9.2e-05 updt_s:0.587 data_s:0.055 +INFO 2026-02-09 12:50:01 celerate.py:305 step:19K smpl:1M ep:15K epch:6.00 loss:0.717 grdn:7.995 lr:9.1e-05 updt_s:0.584 data_s:0.057 +INFO 2026-02-09 12:52:10 celerate.py:305 step:19K smpl:1M ep:15K epch:6.06 loss:0.720 grdn:7.799 lr:9.1e-05 updt_s:0.586 data_s:0.056 +INFO 2026-02-09 12:54:21 celerate.py:305 step:20K smpl:1M ep:15K epch:6.12 loss:0.709 grdn:7.207 lr:9.1e-05 updt_s:0.579 data_s:0.067 +INFO 2026-02-09 12:56:30 celerate.py:305 step:20K smpl:1M ep:15K epch:6.19 loss:0.706 grdn:7.980 lr:9.1e-05 updt_s:0.580 data_s:0.061 +INFO 2026-02-09 12:58:36 celerate.py:305 step:20K smpl:1M ep:16K epch:6.25 loss:0.713 grdn:7.731 lr:9.1e-05 updt_s:0.586 data_s:0.040 +INFO 2026-02-09 12:58:36 celerate.py:330 Checkpoint policy after step 20000 +INFO 2026-02-09 13:00:42 celerate.py:305 step:20K smpl:1M ep:16K epch:6.31 loss:0.700 grdn:7.431 lr:9.1e-05 updt_s:0.584 data_s:0.032 +INFO 2026-02-09 13:02:47 celerate.py:305 step:20K smpl:1M ep:16K epch:6.37 loss:0.695 grdn:7.701 lr:9.0e-05 updt_s:0.587 data_s:0.030 +INFO 2026-02-09 13:04:52 celerate.py:305 step:21K smpl:1M ep:16K epch:6.44 loss:0.693 grdn:7.174 lr:9.0e-05 updt_s:0.587 data_s:0.034 +INFO 2026-02-09 13:07:01 celerate.py:305 step:21K smpl:1M ep:16K epch:6.50 loss:0.713 grdn:7.916 lr:9.0e-05 updt_s:0.587 data_s:0.056 +INFO 2026-02-09 13:09:10 celerate.py:305 step:21K smpl:1M ep:16K epch:6.56 loss:0.697 grdn:7.610 lr:9.0e-05 updt_s:0.584 data_s:0.055 +INFO 2026-02-09 13:11:17 celerate.py:305 step:21K smpl:1M ep:17K epch:6.62 loss:0.701 grdn:7.959 lr:9.0e-05 updt_s:0.582 data_s:0.046 +INFO 2026-02-09 13:13:24 celerate.py:305 step:21K smpl:1M ep:17K epch:6.69 loss:0.695 grdn:7.502 lr:8.9e-05 updt_s:0.584 data_s:0.049 +INFO 2026-02-09 13:15:38 celerate.py:305 step:22K smpl:1M ep:17K epch:6.75 loss:0.701 grdn:7.409 lr:8.9e-05 updt_s:0.585 data_s:0.081 +INFO 2026-02-09 13:17:48 celerate.py:305 step:22K smpl:1M ep:17K epch:6.81 loss:0.691 grdn:7.667 lr:8.9e-05 updt_s:0.586 data_s:0.055 +INFO 2026-02-09 13:19:52 celerate.py:305 step:22K smpl:1M ep:17K epch:6.87 loss:0.700 grdn:7.640 lr:8.9e-05 updt_s:0.585 data_s:0.035 +INFO 2026-02-09 13:21:56 celerate.py:305 step:22K smpl:1M ep:17K epch:6.94 loss:0.682 grdn:7.069 lr:8.9e-05 updt_s:0.588 data_s:0.026 +INFO 2026-02-09 13:24:05 celerate.py:305 step:22K smpl:1M ep:17K epch:7.00 loss:0.689 grdn:7.884 lr:8.9e-05 updt_s:0.585 data_s:0.056 +INFO 2026-02-09 13:26:13 celerate.py:305 step:23K smpl:1M ep:18K epch:7.06 loss:0.679 grdn:7.188 lr:8.8e-05 updt_s:0.582 data_s:0.054 +INFO 2026-02-09 13:28:17 celerate.py:305 step:23K smpl:1M ep:18K epch:7.12 loss:0.682 grdn:7.287 lr:8.8e-05 updt_s:0.586 data_s:0.029 +INFO 2026-02-09 13:30:21 celerate.py:305 step:23K smpl:1M ep:18K epch:7.19 loss:0.691 grdn:7.662 lr:8.8e-05 updt_s:0.585 data_s:0.031 +INFO 2026-02-09 13:32:32 celerate.py:305 step:23K smpl:1M ep:18K epch:7.25 loss:0.662 grdn:6.967 lr:8.8e-05 updt_s:0.581 data_s:0.065 +INFO 2026-02-09 13:34:41 celerate.py:305 step:23K smpl:1M ep:18K epch:7.31 loss:0.687 grdn:7.400 lr:8.8e-05 updt_s:0.584 data_s:0.057 +INFO 2026-02-09 13:36:49 celerate.py:305 step:24K smpl:2M ep:18K epch:7.37 loss:0.687 grdn:7.457 lr:8.7e-05 updt_s:0.581 data_s:0.055 +INFO 2026-02-09 13:38:56 celerate.py:305 step:24K smpl:2M ep:19K epch:7.44 loss:0.662 grdn:7.443 lr:8.7e-05 updt_s:0.583 data_s:0.047 +INFO 2026-02-09 13:40:58 celerate.py:305 step:24K smpl:2M ep:19K epch:7.50 loss:0.667 grdn:7.186 lr:8.7e-05 updt_s:0.587 data_s:0.023 +INFO 2026-02-09 13:43:04 celerate.py:305 step:24K smpl:2M ep:19K epch:7.56 loss:0.669 grdn:7.482 lr:8.7e-05 updt_s:0.587 data_s:0.037 +INFO 2026-02-09 13:45:13 celerate.py:305 step:24K smpl:2M ep:19K epch:7.62 loss:0.658 grdn:7.291 lr:8.6e-05 updt_s:0.587 data_s:0.050 +INFO 2026-02-09 13:47:19 celerate.py:305 step:25K smpl:2M ep:19K epch:7.69 loss:0.656 grdn:7.174 lr:8.6e-05 updt_s:0.582 data_s:0.045 +INFO 2026-02-09 13:49:29 celerate.py:305 step:25K smpl:2M ep:19K epch:7.75 loss:0.657 grdn:7.341 lr:8.6e-05 updt_s:0.587 data_s:0.058 +INFO 2026-02-09 13:51:36 celerate.py:305 step:25K smpl:2M ep:20K epch:7.81 loss:0.667 grdn:7.089 lr:8.6e-05 updt_s:0.590 data_s:0.043 +INFO 2026-02-09 13:53:43 celerate.py:305 step:25K smpl:2M ep:20K epch:7.87 loss:0.677 grdn:7.260 lr:8.6e-05 updt_s:0.583 data_s:0.047 +INFO 2026-02-09 13:55:59 celerate.py:305 step:25K smpl:2M ep:20K epch:7.94 loss:0.657 grdn:7.135 lr:8.5e-05 updt_s:0.587 data_s:0.087 +INFO 2026-02-09 13:58:07 celerate.py:305 step:26K smpl:2M ep:20K epch:8.00 loss:0.651 grdn:7.034 lr:8.5e-05 updt_s:0.588 data_s:0.046 +INFO 2026-02-09 14:00:13 celerate.py:305 step:26K smpl:2M ep:20K epch:8.06 loss:0.659 grdn:7.212 lr:8.5e-05 updt_s:0.587 data_s:0.039 +INFO 2026-02-09 14:02:19 celerate.py:305 step:26K smpl:2M ep:20K epch:8.12 loss:0.647 grdn:7.119 lr:8.5e-05 updt_s:0.580 data_s:0.045 +INFO 2026-02-09 14:04:30 celerate.py:305 step:26K smpl:2M ep:20K epch:8.19 loss:0.642 grdn:6.756 lr:8.5e-05 updt_s:0.576 data_s:0.076 +INFO 2026-02-09 14:06:36 celerate.py:305 step:26K smpl:2M ep:21K epch:8.25 loss:0.642 grdn:6.903 lr:8.4e-05 updt_s:0.579 data_s:0.046 +INFO 2026-02-09 14:08:41 celerate.py:305 step:27K smpl:2M ep:21K epch:8.31 loss:0.640 grdn:7.082 lr:8.4e-05 updt_s:0.582 data_s:0.038 +INFO 2026-02-09 14:10:48 celerate.py:305 step:27K smpl:2M ep:21K epch:8.37 loss:0.653 grdn:7.068 lr:8.4e-05 updt_s:0.586 data_s:0.045 +INFO 2026-02-09 14:12:57 celerate.py:305 step:27K smpl:2M ep:21K epch:8.44 loss:0.637 grdn:7.012 lr:8.4e-05 updt_s:0.588 data_s:0.054 +INFO 2026-02-09 14:15:01 celerate.py:305 step:27K smpl:2M ep:21K epch:8.50 loss:0.641 grdn:7.171 lr:8.3e-05 updt_s:0.588 data_s:0.025 +INFO 2026-02-09 14:17:10 celerate.py:305 step:27K smpl:2M ep:21K epch:8.56 loss:0.639 grdn:7.147 lr:8.3e-05 updt_s:0.581 data_s:0.059 +INFO 2026-02-09 14:19:23 celerate.py:305 step:28K smpl:2M ep:22K epch:8.62 loss:0.645 grdn:7.212 lr:8.3e-05 updt_s:0.585 data_s:0.078 +INFO 2026-02-09 14:21:34 celerate.py:305 step:28K smpl:2M ep:22K epch:8.69 loss:0.644 grdn:6.981 lr:8.3e-05 updt_s:0.584 data_s:0.067 +INFO 2026-02-09 14:23:40 celerate.py:305 step:28K smpl:2M ep:22K epch:8.75 loss:0.644 grdn:7.174 lr:8.2e-05 updt_s:0.583 data_s:0.043 +INFO 2026-02-09 14:25:48 celerate.py:305 step:28K smpl:2M ep:22K epch:8.81 loss:0.640 grdn:7.126 lr:8.2e-05 updt_s:0.584 data_s:0.047 +INFO 2026-02-09 14:28:02 celerate.py:305 step:28K smpl:2M ep:22K epch:8.87 loss:0.628 grdn:6.649 lr:8.2e-05 updt_s:0.583 data_s:0.083 +INFO 2026-02-09 14:30:17 celerate.py:305 step:29K smpl:2M ep:22K epch:8.94 loss:0.631 grdn:6.742 lr:8.2e-05 updt_s:0.586 data_s:0.084 +INFO 2026-02-09 14:32:26 celerate.py:305 step:29K smpl:2M ep:22K epch:9.00 loss:0.629 grdn:7.107 lr:8.1e-05 updt_s:0.586 data_s:0.056 +INFO 2026-02-09 14:34:34 celerate.py:305 step:29K smpl:2M ep:23K epch:9.06 loss:0.635 grdn:7.010 lr:8.1e-05 updt_s:0.585 data_s:0.047 +INFO 2026-02-09 14:36:37 celerate.py:305 step:29K smpl:2M ep:23K epch:9.12 loss:0.623 grdn:7.030 lr:8.1e-05 updt_s:0.580 data_s:0.032 +INFO 2026-02-09 14:38:49 celerate.py:305 step:29K smpl:2M ep:23K epch:9.19 loss:0.623 grdn:7.004 lr:8.1e-05 updt_s:0.584 data_s:0.069 +INFO 2026-02-09 14:40:54 celerate.py:305 step:30K smpl:2M ep:23K epch:9.25 loss:0.619 grdn:6.904 lr:8.1e-05 updt_s:0.580 data_s:0.046 +INFO 2026-02-09 14:43:02 celerate.py:305 step:30K smpl:2M ep:23K epch:9.31 loss:0.612 grdn:6.459 lr:8.0e-05 updt_s:0.586 data_s:0.049 +INFO 2026-02-09 14:45:12 celerate.py:305 step:30K smpl:2M ep:23K epch:9.37 loss:0.621 grdn:7.055 lr:8.0e-05 updt_s:0.581 data_s:0.065 +INFO 2026-02-09 14:45:12 celerate.py:330 Checkpoint policy after step 30000 +INFO 2026-02-09 14:47:17 celerate.py:305 step:30K smpl:2M ep:24K epch:9.44 loss:0.617 grdn:6.827 lr:8.0e-05 updt_s:0.584 data_s:0.028 +INFO 2026-02-09 14:49:28 celerate.py:305 step:30K smpl:2M ep:24K epch:9.50 loss:0.624 grdn:6.797 lr:8.0e-05 updt_s:0.587 data_s:0.063 +INFO 2026-02-09 14:51:41 celerate.py:305 step:31K smpl:2M ep:24K epch:9.56 loss:0.611 grdn:6.996 lr:7.9e-05 updt_s:0.586 data_s:0.071 +INFO 2026-02-09 14:53:46 celerate.py:305 step:31K smpl:2M ep:24K epch:9.62 loss:0.621 grdn:6.816 lr:7.9e-05 updt_s:0.592 data_s:0.029 +INFO 2026-02-09 14:55:52 celerate.py:305 step:31K smpl:2M ep:24K epch:9.69 loss:0.619 grdn:6.875 lr:7.9e-05 updt_s:0.587 data_s:0.037 +INFO 2026-02-09 14:57:58 celerate.py:305 step:31K smpl:2M ep:24K epch:9.75 loss:0.627 grdn:7.087 lr:7.9e-05 updt_s:0.583 data_s:0.042 +INFO 2026-02-09 15:00:08 celerate.py:305 step:31K smpl:2M ep:25K epch:9.81 loss:0.612 grdn:7.201 lr:7.8e-05 updt_s:0.589 data_s:0.058 +INFO 2026-02-09 15:02:18 celerate.py:305 step:32K smpl:2M ep:25K epch:9.87 loss:0.614 grdn:6.740 lr:7.8e-05 updt_s:0.586 data_s:0.060 +INFO 2026-02-09 15:04:23 celerate.py:305 step:32K smpl:2M ep:25K epch:9.94 loss:0.613 grdn:7.062 lr:7.8e-05 updt_s:0.581 data_s:0.038 +INFO 2026-02-09 15:06:32 celerate.py:305 step:32K smpl:2M ep:25K epch:10.00 loss:0.605 grdn:6.879 lr:7.8e-05 updt_s:0.580 data_s:0.063 +INFO 2026-02-09 15:08:40 celerate.py:305 step:32K smpl:2M ep:25K epch:10.06 loss:0.611 grdn:7.068 lr:7.7e-05 updt_s:0.585 data_s:0.049 +INFO 2026-02-09 15:10:44 celerate.py:305 step:32K smpl:2M ep:25K epch:10.12 loss:0.610 grdn:7.101 lr:7.7e-05 updt_s:0.582 data_s:0.035 +INFO 2026-02-09 15:12:49 celerate.py:305 step:33K smpl:2M ep:25K epch:10.19 loss:0.604 grdn:6.806 lr:7.7e-05 updt_s:0.584 data_s:0.034 +INFO 2026-02-09 15:14:58 celerate.py:305 step:33K smpl:2M ep:26K epch:10.25 loss:0.602 grdn:6.857 lr:7.6e-05 updt_s:0.580 data_s:0.063 +INFO 2026-02-09 15:17:10 celerate.py:305 step:33K smpl:2M ep:26K epch:10.31 loss:0.596 grdn:6.652 lr:7.6e-05 updt_s:0.577 data_s:0.078 +INFO 2026-02-09 15:19:16 celerate.py:305 step:33K smpl:2M ep:26K epch:10.37 loss:0.589 grdn:6.629 lr:7.6e-05 updt_s:0.581 data_s:0.045 +INFO 2026-02-09 15:21:22 celerate.py:305 step:33K smpl:2M ep:26K epch:10.44 loss:0.594 grdn:6.482 lr:7.6e-05 updt_s:0.581 data_s:0.044 +INFO 2026-02-09 15:23:26 celerate.py:305 step:34K smpl:2M ep:26K epch:10.50 loss:0.608 grdn:6.857 lr:7.5e-05 updt_s:0.586 data_s:0.029 +INFO 2026-02-09 15:25:33 celerate.py:305 step:34K smpl:2M ep:26K epch:10.56 loss:0.588 grdn:6.665 lr:7.5e-05 updt_s:0.583 data_s:0.047 +INFO 2026-02-09 15:27:37 celerate.py:305 step:34K smpl:2M ep:27K epch:10.62 loss:0.597 grdn:6.582 lr:7.5e-05 updt_s:0.582 data_s:0.030 +INFO 2026-02-09 15:29:42 celerate.py:305 step:34K smpl:2M ep:27K epch:10.69 loss:0.586 grdn:6.491 lr:7.5e-05 updt_s:0.588 data_s:0.035 +INFO 2026-02-09 15:31:47 celerate.py:305 step:34K smpl:2M ep:27K epch:10.75 loss:0.585 grdn:6.481 lr:7.4e-05 updt_s:0.588 data_s:0.033 +INFO 2026-02-09 15:33:52 celerate.py:305 step:35K smpl:2M ep:27K epch:10.81 loss:0.591 grdn:6.391 lr:7.4e-05 updt_s:0.586 data_s:0.033 +INFO 2026-02-09 15:35:59 celerate.py:305 step:35K smpl:2M ep:27K epch:10.87 loss:0.582 grdn:6.599 lr:7.4e-05 updt_s:0.587 data_s:0.044 +INFO 2026-02-09 15:38:05 celerate.py:305 step:35K smpl:2M ep:27K epch:10.94 loss:0.594 grdn:6.736 lr:7.4e-05 updt_s:0.585 data_s:0.040 +INFO 2026-02-09 15:40:19 celerate.py:305 step:35K smpl:2M ep:27K epch:11.00 loss:0.593 grdn:6.831 lr:7.3e-05 updt_s:0.582 data_s:0.085 +INFO 2026-02-09 15:42:29 celerate.py:305 step:35K smpl:2M ep:28K epch:11.06 loss:0.579 grdn:6.343 lr:7.3e-05 updt_s:0.579 data_s:0.063 +INFO 2026-02-09 15:44:33 celerate.py:305 step:36K smpl:2M ep:28K epch:11.12 loss:0.582 grdn:6.487 lr:7.3e-05 updt_s:0.586 data_s:0.034 +INFO 2026-02-09 15:46:43 celerate.py:305 step:36K smpl:2M ep:28K epch:11.19 loss:0.579 grdn:6.512 lr:7.2e-05 updt_s:0.582 data_s:0.059 +INFO 2026-02-09 15:48:54 celerate.py:305 step:36K smpl:2M ep:28K epch:11.25 loss:0.572 grdn:6.401 lr:7.2e-05 updt_s:0.584 data_s:0.066 +INFO 2026-02-09 15:50:59 celerate.py:305 step:36K smpl:2M ep:28K epch:11.31 loss:0.582 grdn:6.560 lr:7.2e-05 updt_s:0.582 data_s:0.039 +INFO 2026-02-09 15:53:06 celerate.py:305 step:36K smpl:2M ep:28K epch:11.37 loss:0.572 grdn:6.462 lr:7.2e-05 updt_s:0.587 data_s:0.044 +INFO 2026-02-09 15:55:14 celerate.py:305 step:37K smpl:2M ep:29K epch:11.44 loss:0.581 grdn:6.731 lr:7.1e-05 updt_s:0.584 data_s:0.051 +INFO 2026-02-09 15:57:20 celerate.py:305 step:37K smpl:2M ep:29K epch:11.50 loss:0.568 grdn:6.474 lr:7.1e-05 updt_s:0.583 data_s:0.040 +INFO 2026-02-09 15:59:24 celerate.py:305 step:37K smpl:2M ep:29K epch:11.56 loss:0.570 grdn:6.246 lr:7.1e-05 updt_s:0.586 data_s:0.032 +INFO 2026-02-09 16:01:33 celerate.py:305 step:37K smpl:2M ep:29K epch:11.62 loss:0.579 grdn:6.570 lr:7.0e-05 updt_s:0.584 data_s:0.054 +INFO 2026-02-09 16:03:46 celerate.py:305 step:37K smpl:2M ep:29K epch:11.69 loss:0.583 grdn:6.633 lr:7.0e-05 updt_s:0.584 data_s:0.075 +INFO 2026-02-09 16:05:52 celerate.py:305 step:38K smpl:2M ep:29K epch:11.75 loss:0.579 grdn:6.821 lr:7.0e-05 updt_s:0.580 data_s:0.050 +INFO 2026-02-09 16:07:59 celerate.py:305 step:38K smpl:2M ep:30K epch:11.81 loss:0.581 grdn:6.753 lr:7.0e-05 updt_s:0.583 data_s:0.047 +INFO 2026-02-09 16:10:05 celerate.py:305 step:38K smpl:2M ep:30K epch:11.87 loss:0.574 grdn:6.428 lr:6.9e-05 updt_s:0.584 data_s:0.042 +INFO 2026-02-09 16:12:17 celerate.py:305 step:38K smpl:2M ep:30K epch:11.94 loss:0.577 grdn:6.796 lr:6.9e-05 updt_s:0.582 data_s:0.069 +INFO 2026-02-09 16:14:19 celerate.py:305 step:38K smpl:2M ep:30K epch:12.00 loss:0.565 grdn:6.700 lr:6.9e-05 updt_s:0.577 data_s:0.031 +INFO 2026-02-09 16:16:28 celerate.py:305 step:39K smpl:2M ep:30K epch:12.06 loss:0.565 grdn:6.539 lr:6.8e-05 updt_s:0.587 data_s:0.054 +INFO 2026-02-09 16:18:34 celerate.py:305 step:39K smpl:2M ep:30K epch:12.12 loss:0.559 grdn:6.644 lr:6.8e-05 updt_s:0.586 data_s:0.041 +INFO 2026-02-09 16:20:37 celerate.py:305 step:39K smpl:2M ep:30K epch:12.19 loss:0.557 grdn:6.215 lr:6.8e-05 updt_s:0.584 data_s:0.027 +INFO 2026-02-09 16:22:48 celerate.py:305 step:39K smpl:3M ep:31K epch:12.25 loss:0.562 grdn:6.414 lr:6.8e-05 updt_s:0.581 data_s:0.066 +INFO 2026-02-09 16:24:59 celerate.py:305 step:39K smpl:3M ep:31K epch:12.31 loss:0.567 grdn:6.812 lr:6.7e-05 updt_s:0.578 data_s:0.073 +INFO 2026-02-09 16:27:09 celerate.py:305 step:40K smpl:3M ep:31K epch:12.37 loss:0.559 grdn:6.221 lr:6.7e-05 updt_s:0.583 data_s:0.061 +INFO 2026-02-09 16:29:16 celerate.py:305 step:40K smpl:3M ep:31K epch:12.44 loss:0.560 grdn:6.821 lr:6.7e-05 updt_s:0.583 data_s:0.049 +INFO 2026-02-09 16:31:25 celerate.py:305 step:40K smpl:3M ep:31K epch:12.50 loss:0.565 grdn:6.727 lr:6.6e-05 updt_s:0.585 data_s:0.056 +INFO 2026-02-09 16:31:25 celerate.py:330 Checkpoint policy after step 40000 +INFO 2026-02-09 16:33:31 celerate.py:305 step:40K smpl:3M ep:31K epch:12.56 loss:0.557 grdn:6.624 lr:6.6e-05 updt_s:0.583 data_s:0.033 +INFO 2026-02-09 16:35:39 celerate.py:305 step:40K smpl:3M ep:32K epch:12.62 loss:0.557 grdn:6.428 lr:6.6e-05 updt_s:0.582 data_s:0.052 +INFO 2026-02-09 16:37:49 celerate.py:305 step:41K smpl:3M ep:32K epch:12.69 loss:0.548 grdn:6.253 lr:6.6e-05 updt_s:0.586 data_s:0.059 +INFO 2026-02-09 16:39:59 celerate.py:305 step:41K smpl:3M ep:32K epch:12.75 loss:0.551 grdn:6.452 lr:6.5e-05 updt_s:0.580 data_s:0.069 +INFO 2026-02-09 16:42:09 celerate.py:305 step:41K smpl:3M ep:32K epch:12.81 loss:0.563 grdn:6.877 lr:6.5e-05 updt_s:0.587 data_s:0.057 +INFO 2026-02-09 16:44:18 celerate.py:305 step:41K smpl:3M ep:32K epch:12.87 loss:0.554 grdn:6.568 lr:6.5e-05 updt_s:0.586 data_s:0.052 +INFO 2026-02-09 16:46:29 celerate.py:305 step:41K smpl:3M ep:32K epch:12.94 loss:0.554 grdn:6.430 lr:6.4e-05 updt_s:0.584 data_s:0.068 +INFO 2026-02-09 16:48:37 celerate.py:305 step:42K smpl:3M ep:32K epch:13.00 loss:0.558 grdn:6.470 lr:6.4e-05 updt_s:0.582 data_s:0.054 +INFO 2026-02-09 16:50:43 celerate.py:305 step:42K smpl:3M ep:33K epch:13.06 loss:0.546 grdn:6.406 lr:6.4e-05 updt_s:0.582 data_s:0.043 +INFO 2026-02-09 16:52:48 celerate.py:305 step:42K smpl:3M ep:33K epch:13.12 loss:0.550 grdn:6.830 lr:6.4e-05 updt_s:0.587 data_s:0.031 +INFO 2026-02-09 16:54:53 celerate.py:305 step:42K smpl:3M ep:33K epch:13.19 loss:0.544 grdn:6.377 lr:6.3e-05 updt_s:0.585 data_s:0.038 +INFO 2026-02-09 16:57:01 celerate.py:305 step:42K smpl:3M ep:33K epch:13.25 loss:0.539 grdn:6.273 lr:6.3e-05 updt_s:0.578 data_s:0.058 +INFO 2026-02-09 16:59:14 celerate.py:305 step:43K smpl:3M ep:33K epch:13.31 loss:0.555 grdn:6.599 lr:6.3e-05 updt_s:0.581 data_s:0.079 +INFO 2026-02-09 17:01:26 celerate.py:305 step:43K smpl:3M ep:33K epch:13.37 loss:0.555 grdn:6.585 lr:6.2e-05 updt_s:0.584 data_s:0.070 +INFO 2026-02-09 17:03:37 celerate.py:305 step:43K smpl:3M ep:34K epch:13.44 loss:0.544 grdn:6.606 lr:6.2e-05 updt_s:0.578 data_s:0.077 +INFO 2026-02-09 17:05:50 celerate.py:305 step:43K smpl:3M ep:34K epch:13.50 loss:0.545 grdn:6.456 lr:6.2e-05 updt_s:0.586 data_s:0.070 +INFO 2026-02-09 17:08:01 celerate.py:305 step:43K smpl:3M ep:34K epch:13.56 loss:0.547 grdn:6.348 lr:6.1e-05 updt_s:0.582 data_s:0.067 +INFO 2026-02-09 17:10:11 celerate.py:305 step:44K smpl:3M ep:34K epch:13.62 loss:0.545 grdn:6.460 lr:6.1e-05 updt_s:0.581 data_s:0.067 +INFO 2026-02-09 17:12:22 celerate.py:305 step:44K smpl:3M ep:34K epch:13.69 loss:0.540 grdn:6.412 lr:6.1e-05 updt_s:0.588 data_s:0.059 +INFO 2026-02-09 17:14:30 celerate.py:305 step:44K smpl:3M ep:34K epch:13.75 loss:0.535 grdn:6.289 lr:6.1e-05 updt_s:0.586 data_s:0.051 +INFO 2026-02-09 17:16:32 celerate.py:305 step:44K smpl:3M ep:35K epch:13.81 loss:0.544 grdn:6.138 lr:6.0e-05 updt_s:0.587 data_s:0.018 +INFO 2026-02-09 17:18:40 celerate.py:305 step:44K smpl:3M ep:35K epch:13.87 loss:0.539 grdn:6.219 lr:6.0e-05 updt_s:0.578 data_s:0.058 +INFO 2026-02-09 17:20:48 celerate.py:305 step:45K smpl:3M ep:35K epch:13.94 loss:0.524 grdn:6.173 lr:6.0e-05 updt_s:0.579 data_s:0.055 +INFO 2026-02-09 17:23:02 celerate.py:305 step:45K smpl:3M ep:35K epch:14.00 loss:0.537 grdn:6.431 lr:5.9e-05 updt_s:0.587 data_s:0.079 +INFO 2026-02-09 17:25:07 celerate.py:305 step:45K smpl:3M ep:35K epch:14.06 loss:0.528 grdn:6.380 lr:5.9e-05 updt_s:0.580 data_s:0.039 +INFO 2026-02-09 17:27:09 celerate.py:305 step:45K smpl:3M ep:35K epch:14.12 loss:0.540 grdn:6.529 lr:5.9e-05 updt_s:0.584 data_s:0.025 +INFO 2026-02-09 17:29:17 celerate.py:305 step:45K smpl:3M ep:35K epch:14.19 loss:0.535 grdn:6.316 lr:5.8e-05 updt_s:0.578 data_s:0.053 +INFO 2026-02-09 17:31:26 celerate.py:305 step:46K smpl:3M ep:36K epch:14.25 loss:0.525 grdn:6.271 lr:5.8e-05 updt_s:0.583 data_s:0.059 +INFO 2026-02-09 17:33:38 celerate.py:305 step:46K smpl:3M ep:36K epch:14.31 loss:0.536 grdn:6.396 lr:5.8e-05 updt_s:0.585 data_s:0.071 +INFO 2026-02-09 17:35:41 celerate.py:305 step:46K smpl:3M ep:36K epch:14.37 loss:0.536 grdn:6.495 lr:5.8e-05 updt_s:0.586 data_s:0.023 +INFO 2026-02-09 17:37:52 celerate.py:305 step:46K smpl:3M ep:36K epch:14.44 loss:0.521 grdn:6.303 lr:5.7e-05 updt_s:0.580 data_s:0.070 +INFO 2026-02-09 17:40:01 celerate.py:305 step:46K smpl:3M ep:36K epch:14.50 loss:0.520 grdn:6.226 lr:5.7e-05 updt_s:0.587 data_s:0.052 +INFO 2026-02-09 17:42:06 celerate.py:305 step:47K smpl:3M ep:36K epch:14.56 loss:0.524 grdn:6.206 lr:5.7e-05 updt_s:0.586 data_s:0.038 +INFO 2026-02-09 17:44:20 celerate.py:305 step:47K smpl:3M ep:37K epch:14.62 loss:0.534 grdn:6.556 lr:5.6e-05 updt_s:0.584 data_s:0.082 +INFO 2026-02-09 17:46:31 celerate.py:305 step:47K smpl:3M ep:37K epch:14.69 loss:0.531 grdn:6.369 lr:5.6e-05 updt_s:0.588 data_s:0.059 +INFO 2026-02-09 17:48:40 celerate.py:305 step:47K smpl:3M ep:37K epch:14.75 loss:0.516 grdn:6.134 lr:5.6e-05 updt_s:0.587 data_s:0.055 +INFO 2026-02-09 17:50:46 celerate.py:305 step:47K smpl:3M ep:37K epch:14.81 loss:0.514 grdn:6.039 lr:5.5e-05 updt_s:0.582 data_s:0.046 +INFO 2026-02-09 17:52:59 celerate.py:305 step:48K smpl:3M ep:37K epch:14.87 loss:0.525 grdn:6.263 lr:5.5e-05 updt_s:0.587 data_s:0.073 +INFO 2026-02-09 17:55:14 celerate.py:305 step:48K smpl:3M ep:37K epch:14.94 loss:0.520 grdn:6.231 lr:5.5e-05 updt_s:0.586 data_s:0.081 +INFO 2026-02-09 17:57:25 celerate.py:305 step:48K smpl:3M ep:37K epch:15.00 loss:0.522 grdn:6.296 lr:5.4e-05 updt_s:0.581 data_s:0.070 +INFO 2026-02-09 17:59:34 celerate.py:305 step:48K smpl:3M ep:38K epch:15.06 loss:0.518 grdn:6.405 lr:5.4e-05 updt_s:0.583 data_s:0.056 +INFO 2026-02-09 18:01:40 celerate.py:305 step:48K smpl:3M ep:38K epch:15.12 loss:0.516 grdn:6.421 lr:5.4e-05 updt_s:0.582 data_s:0.046 +INFO 2026-02-09 18:03:40 celerate.py:305 step:49K smpl:3M ep:38K epch:15.19 loss:0.525 grdn:6.163 lr:5.4e-05 updt_s:0.583 data_s:0.015 +INFO 2026-02-09 18:05:49 celerate.py:305 step:49K smpl:3M ep:38K epch:15.25 loss:0.523 grdn:6.440 lr:5.3e-05 updt_s:0.586 data_s:0.053 +INFO 2026-02-09 18:07:53 celerate.py:305 step:49K smpl:3M ep:38K epch:15.31 loss:0.511 grdn:6.059 lr:5.3e-05 updt_s:0.580 data_s:0.037 +INFO 2026-02-09 18:10:05 celerate.py:305 step:49K smpl:3M ep:38K epch:15.37 loss:0.527 grdn:6.524 lr:5.3e-05 updt_s:0.579 data_s:0.073 +INFO 2026-02-09 18:12:10 celerate.py:305 step:49K smpl:3M ep:39K epch:15.44 loss:0.521 grdn:6.364 lr:5.2e-05 updt_s:0.581 data_s:0.039 +INFO 2026-02-09 18:14:18 celerate.py:305 step:50K smpl:3M ep:39K epch:15.50 loss:0.513 grdn:6.149 lr:5.2e-05 updt_s:0.582 data_s:0.058 +INFO 2026-02-09 18:16:27 celerate.py:305 step:50K smpl:3M ep:39K epch:15.56 loss:0.514 grdn:6.350 lr:5.2e-05 updt_s:0.583 data_s:0.054 +INFO 2026-02-09 18:18:39 celerate.py:305 step:50K smpl:3M ep:39K epch:15.62 loss:0.506 grdn:6.109 lr:5.1e-05 updt_s:0.584 data_s:0.075 +INFO 2026-02-09 18:18:39 celerate.py:330 Checkpoint policy after step 50000 +INFO 2026-02-09 18:20:49 celerate.py:305 step:50K smpl:3M ep:39K epch:15.69 loss:0.518 grdn:6.238 lr:5.1e-05 updt_s:0.583 data_s:0.052 +INFO 2026-02-09 18:22:56 celerate.py:305 step:50K smpl:3M ep:39K epch:15.75 loss:0.519 grdn:6.985 lr:5.1e-05 updt_s:0.588 data_s:0.041 +INFO 2026-02-09 18:25:05 celerate.py:305 step:51K smpl:3M ep:40K epch:15.81 loss:0.505 grdn:6.137 lr:5.0e-05 updt_s:0.582 data_s:0.059 +INFO 2026-02-09 18:27:12 celerate.py:305 step:51K smpl:3M ep:40K epch:15.87 loss:0.503 grdn:6.198 lr:5.0e-05 updt_s:0.583 data_s:0.045 +INFO 2026-02-09 18:29:19 celerate.py:305 step:51K smpl:3M ep:40K epch:15.94 loss:0.506 grdn:6.044 lr:5.0e-05 updt_s:0.583 data_s:0.048 +INFO 2026-02-09 18:31:25 celerate.py:305 step:51K smpl:3M ep:40K epch:16.00 loss:0.506 grdn:6.201 lr:5.0e-05 updt_s:0.582 data_s:0.045 +INFO 2026-02-09 18:33:35 celerate.py:305 step:51K smpl:3M ep:40K epch:16.06 loss:0.503 grdn:6.108 lr:4.9e-05 updt_s:0.582 data_s:0.063 +INFO 2026-02-09 18:35:43 celerate.py:305 step:52K smpl:3M ep:40K epch:16.12 loss:0.509 grdn:6.113 lr:4.9e-05 updt_s:0.587 data_s:0.046 +INFO 2026-02-09 18:37:47 celerate.py:305 step:52K smpl:3M ep:40K epch:16.19 loss:0.506 grdn:6.112 lr:4.9e-05 updt_s:0.586 data_s:0.032 +INFO 2026-02-09 18:39:53 celerate.py:305 step:52K smpl:3M ep:41K epch:16.25 loss:0.515 grdn:6.322 lr:4.8e-05 updt_s:0.582 data_s:0.044 +INFO 2026-02-09 18:42:00 celerate.py:305 step:52K smpl:3M ep:41K epch:16.31 loss:0.506 grdn:6.231 lr:4.8e-05 updt_s:0.577 data_s:0.054 +INFO 2026-02-09 18:44:09 celerate.py:305 step:52K smpl:3M ep:41K epch:16.37 loss:0.507 grdn:6.204 lr:4.8e-05 updt_s:0.587 data_s:0.054 +INFO 2026-02-09 18:46:20 celerate.py:305 step:53K smpl:3M ep:41K epch:16.44 loss:0.507 grdn:5.972 lr:4.7e-05 updt_s:0.579 data_s:0.068 +INFO 2026-02-09 18:48:32 celerate.py:305 step:53K smpl:3M ep:41K epch:16.50 loss:0.507 grdn:6.138 lr:4.7e-05 updt_s:0.572 data_s:0.088 +INFO 2026-02-09 18:50:47 celerate.py:305 step:53K smpl:3M ep:41K epch:16.56 loss:0.514 grdn:6.758 lr:4.7e-05 updt_s:0.575 data_s:0.094 +INFO 2026-02-09 18:52:57 celerate.py:305 step:53K smpl:3M ep:42K epch:16.62 loss:0.508 grdn:6.227 lr:4.7e-05 updt_s:0.587 data_s:0.058 +INFO 2026-02-09 18:55:00 celerate.py:305 step:53K smpl:3M ep:42K epch:16.69 loss:0.506 grdn:6.357 lr:4.6e-05 updt_s:0.586 data_s:0.028 +INFO 2026-02-09 18:57:06 celerate.py:305 step:54K smpl:3M ep:42K epch:16.75 loss:0.500 grdn:6.155 lr:4.6e-05 updt_s:0.583 data_s:0.043 +INFO 2026-02-09 18:59:16 celerate.py:305 step:54K smpl:3M ep:42K epch:16.81 loss:0.497 grdn:6.010 lr:4.6e-05 updt_s:0.587 data_s:0.058 +INFO 2026-02-09 19:01:30 celerate.py:305 step:54K smpl:3M ep:42K epch:16.87 loss:0.505 grdn:6.638 lr:4.5e-05 updt_s:0.585 data_s:0.078 +INFO 2026-02-09 19:03:41 celerate.py:305 step:54K smpl:3M ep:42K epch:16.94 loss:0.502 grdn:6.318 lr:4.5e-05 updt_s:0.582 data_s:0.070 +INFO 2026-02-09 19:05:47 celerate.py:305 step:54K smpl:3M ep:42K epch:17.00 loss:0.502 grdn:6.368 lr:4.5e-05 updt_s:0.584 data_s:0.041 +INFO 2026-02-09 19:07:54 celerate.py:305 step:55K smpl:3M ep:43K epch:17.06 loss:0.496 grdn:6.283 lr:4.4e-05 updt_s:0.582 data_s:0.048 +INFO 2026-02-09 19:10:03 celerate.py:305 step:55K smpl:4M ep:43K epch:17.12 loss:0.496 grdn:6.380 lr:4.4e-05 updt_s:0.585 data_s:0.056 +INFO 2026-02-09 19:12:06 celerate.py:305 step:55K smpl:4M ep:43K epch:17.19 loss:0.501 grdn:6.040 lr:4.4e-05 updt_s:0.591 data_s:0.019 +INFO 2026-02-09 19:14:06 celerate.py:305 step:55K smpl:4M ep:43K epch:17.25 loss:0.489 grdn:6.117 lr:4.3e-05 updt_s:0.582 data_s:0.015 +INFO 2026-02-09 19:16:11 celerate.py:305 step:55K smpl:4M ep:43K epch:17.31 loss:0.501 grdn:6.167 lr:4.3e-05 updt_s:0.587 data_s:0.031 +INFO 2026-02-09 19:18:21 celerate.py:305 step:56K smpl:4M ep:43K epch:17.37 loss:0.491 grdn:6.274 lr:4.3e-05 updt_s:0.585 data_s:0.061 +INFO 2026-02-09 19:20:28 celerate.py:305 step:56K smpl:4M ep:44K epch:17.44 loss:0.485 grdn:5.999 lr:4.3e-05 updt_s:0.583 data_s:0.045 +INFO 2026-02-09 19:22:35 celerate.py:305 step:56K smpl:4M ep:44K epch:17.50 loss:0.496 grdn:5.916 lr:4.2e-05 updt_s:0.586 data_s:0.046 +INFO 2026-02-09 19:24:44 celerate.py:305 step:56K smpl:4M ep:44K epch:17.56 loss:0.490 grdn:6.036 lr:4.2e-05 updt_s:0.580 data_s:0.062 +INFO 2026-02-09 19:26:51 celerate.py:305 step:56K smpl:4M ep:44K epch:17.62 loss:0.509 grdn:6.388 lr:4.2e-05 updt_s:0.585 data_s:0.045 +INFO 2026-02-09 19:28:56 celerate.py:305 step:57K smpl:4M ep:44K epch:17.69 loss:0.490 grdn:6.162 lr:4.1e-05 updt_s:0.584 data_s:0.036 +INFO 2026-02-09 19:31:01 celerate.py:305 step:57K smpl:4M ep:44K epch:17.75 loss:0.495 grdn:6.319 lr:4.1e-05 updt_s:0.584 data_s:0.036 +INFO 2026-02-09 19:33:07 celerate.py:305 step:57K smpl:4M ep:45K epch:17.81 loss:0.489 grdn:5.918 lr:4.1e-05 updt_s:0.583 data_s:0.046 +INFO 2026-02-09 19:35:11 celerate.py:305 step:57K smpl:4M ep:45K epch:17.87 loss:0.496 grdn:6.209 lr:4.0e-05 updt_s:0.590 data_s:0.022 +INFO 2026-02-09 19:37:21 celerate.py:305 step:57K smpl:4M ep:45K epch:17.94 loss:0.492 grdn:5.993 lr:4.0e-05 updt_s:0.585 data_s:0.059 +INFO 2026-02-09 19:39:30 celerate.py:305 step:58K smpl:4M ep:45K epch:18.00 loss:0.490 grdn:6.114 lr:4.0e-05 updt_s:0.587 data_s:0.056 +INFO 2026-02-09 19:41:36 celerate.py:305 step:58K smpl:4M ep:45K epch:18.06 loss:0.491 grdn:6.238 lr:4.0e-05 updt_s:0.579 data_s:0.045 +INFO 2026-02-09 19:43:40 celerate.py:305 step:58K smpl:4M ep:45K epch:18.12 loss:0.490 grdn:6.089 lr:3.9e-05 updt_s:0.584 data_s:0.033 +INFO 2026-02-09 19:45:46 celerate.py:305 step:58K smpl:4M ep:45K epch:18.19 loss:0.492 grdn:5.956 lr:3.9e-05 updt_s:0.586 data_s:0.039 +INFO 2026-02-09 19:47:51 celerate.py:305 step:58K smpl:4M ep:46K epch:18.25 loss:0.482 grdn:6.034 lr:3.9e-05 updt_s:0.584 data_s:0.033 +INFO 2026-02-09 19:49:56 celerate.py:305 step:59K smpl:4M ep:46K epch:18.31 loss:0.474 grdn:5.801 lr:3.8e-05 updt_s:0.588 data_s:0.036 +INFO 2026-02-09 19:52:01 celerate.py:305 step:59K smpl:4M ep:46K epch:18.37 loss:0.487 grdn:6.071 lr:3.8e-05 updt_s:0.580 data_s:0.038 +INFO 2026-02-09 19:54:07 celerate.py:305 step:59K smpl:4M ep:46K epch:18.44 loss:0.485 grdn:6.163 lr:3.8e-05 updt_s:0.582 data_s:0.042 +INFO 2026-02-09 19:56:18 celerate.py:305 step:59K smpl:4M ep:46K epch:18.50 loss:0.496 grdn:6.220 lr:3.8e-05 updt_s:0.586 data_s:0.062 +INFO 2026-02-09 19:58:23 celerate.py:305 step:59K smpl:4M ep:46K epch:18.56 loss:0.483 grdn:5.975 lr:3.7e-05 updt_s:0.580 data_s:0.042 +INFO 2026-02-09 20:00:29 celerate.py:305 step:60K smpl:4M ep:47K epch:18.62 loss:0.490 grdn:6.002 lr:3.7e-05 updt_s:0.578 data_s:0.048 +INFO 2026-02-09 20:02:36 celerate.py:305 step:60K smpl:4M ep:47K epch:18.69 loss:0.499 grdn:6.132 lr:3.7e-05 updt_s:0.587 data_s:0.042 +INFO 2026-02-09 20:04:43 celerate.py:305 step:60K smpl:4M ep:47K epch:18.75 loss:0.483 grdn:6.066 lr:3.6e-05 updt_s:0.586 data_s:0.046 +INFO 2026-02-09 20:04:43 celerate.py:330 Checkpoint policy after step 60000 +INFO 2026-02-09 20:06:53 celerate.py:305 step:60K smpl:4M ep:47K epch:18.81 loss:0.487 grdn:6.142 lr:3.6e-05 updt_s:0.581 data_s:0.057 +INFO 2026-02-09 20:08:59 celerate.py:305 step:60K smpl:4M ep:47K epch:18.87 loss:0.481 grdn:5.677 lr:3.6e-05 updt_s:0.588 data_s:0.037 +INFO 2026-02-09 20:11:02 celerate.py:305 step:61K smpl:4M ep:47K epch:18.94 loss:0.482 grdn:6.002 lr:3.5e-05 updt_s:0.578 data_s:0.029 +INFO 2026-02-09 20:13:16 celerate.py:305 step:61K smpl:4M ep:47K epch:19.00 loss:0.476 grdn:5.649 lr:3.5e-05 updt_s:0.585 data_s:0.082 +INFO 2026-02-09 20:15:24 celerate.py:305 step:61K smpl:4M ep:48K epch:19.06 loss:0.477 grdn:5.839 lr:3.5e-05 updt_s:0.582 data_s:0.054 +INFO 2026-02-09 20:17:33 celerate.py:305 step:61K smpl:4M ep:48K epch:19.12 loss:0.469 grdn:5.722 lr:3.5e-05 updt_s:0.581 data_s:0.058 +INFO 2026-02-09 20:19:42 celerate.py:305 step:61K smpl:4M ep:48K epch:19.19 loss:0.497 grdn:6.194 lr:3.4e-05 updt_s:0.578 data_s:0.066 +INFO 2026-02-09 20:21:52 celerate.py:305 step:62K smpl:4M ep:48K epch:19.25 loss:0.479 grdn:6.119 lr:3.4e-05 updt_s:0.584 data_s:0.061 +INFO 2026-02-09 20:24:03 celerate.py:305 step:62K smpl:4M ep:48K epch:19.31 loss:0.482 grdn:5.954 lr:3.4e-05 updt_s:0.577 data_s:0.073 +INFO 2026-02-09 20:26:14 celerate.py:305 step:62K smpl:4M ep:48K epch:19.37 loss:0.475 grdn:6.044 lr:3.3e-05 updt_s:0.578 data_s:0.072 +INFO 2026-02-09 20:28:24 celerate.py:305 step:62K smpl:4M ep:49K epch:19.44 loss:0.487 grdn:6.037 lr:3.3e-05 updt_s:0.580 data_s:0.063 +INFO 2026-02-09 20:30:34 celerate.py:305 step:62K smpl:4M ep:49K epch:19.50 loss:0.480 grdn:5.873 lr:3.3e-05 updt_s:0.580 data_s:0.065 +INFO 2026-02-09 20:32:42 celerate.py:305 step:63K smpl:4M ep:49K epch:19.56 loss:0.475 grdn:5.784 lr:3.3e-05 updt_s:0.583 data_s:0.052 +INFO 2026-02-09 20:34:47 celerate.py:305 step:63K smpl:4M ep:49K epch:19.62 loss:0.475 grdn:6.125 lr:3.2e-05 updt_s:0.590 data_s:0.030 +INFO 2026-02-09 20:36:54 celerate.py:305 step:63K smpl:4M ep:49K epch:19.69 loss:0.484 grdn:6.146 lr:3.2e-05 updt_s:0.582 data_s:0.048 +INFO 2026-02-09 20:38:59 celerate.py:305 step:63K smpl:4M ep:49K epch:19.75 loss:0.477 grdn:6.386 lr:3.2e-05 updt_s:0.587 data_s:0.035 +INFO 2026-02-09 20:41:02 celerate.py:305 step:63K smpl:4M ep:50K epch:19.81 loss:0.482 grdn:6.085 lr:3.1e-05 updt_s:0.585 data_s:0.025 +INFO 2026-02-09 20:43:07 celerate.py:305 step:64K smpl:4M ep:50K epch:19.87 loss:0.487 grdn:6.094 lr:3.1e-05 updt_s:0.584 data_s:0.037 +INFO 2026-02-09 20:45:14 celerate.py:305 step:64K smpl:4M ep:50K epch:19.94 loss:0.476 grdn:5.830 lr:3.1e-05 updt_s:0.582 data_s:0.047 +INFO 2026-02-09 20:47:24 celerate.py:305 step:64K smpl:4M ep:50K epch:20.00 loss:0.482 grdn:5.915 lr:3.1e-05 updt_s:0.578 data_s:0.071 +INFO 2026-02-09 20:49:30 celerate.py:305 step:64K smpl:4M ep:50K epch:20.06 loss:0.478 grdn:6.071 lr:3.0e-05 updt_s:0.586 data_s:0.040 +INFO 2026-02-09 20:51:34 celerate.py:305 step:64K smpl:4M ep:50K epch:20.12 loss:0.480 grdn:5.771 lr:3.0e-05 updt_s:0.581 data_s:0.032 +INFO 2026-02-09 20:53:36 celerate.py:305 step:65K smpl:4M ep:50K epch:20.19 loss:0.477 grdn:5.829 lr:3.0e-05 updt_s:0.585 data_s:0.022 +INFO 2026-02-09 20:55:40 celerate.py:305 step:65K smpl:4M ep:51K epch:20.25 loss:0.473 grdn:5.998 lr:3.0e-05 updt_s:0.586 data_s:0.031 +INFO 2026-02-09 20:57:46 celerate.py:305 step:65K smpl:4M ep:51K epch:20.31 loss:0.468 grdn:6.002 lr:2.9e-05 updt_s:0.585 data_s:0.040 +INFO 2026-02-09 20:59:51 celerate.py:305 step:65K smpl:4M ep:51K epch:20.37 loss:0.472 grdn:5.859 lr:2.9e-05 updt_s:0.582 data_s:0.036 +INFO 2026-02-09 21:01:59 celerate.py:305 step:65K smpl:4M ep:51K epch:20.44 loss:0.483 grdn:5.935 lr:2.9e-05 updt_s:0.580 data_s:0.054 +INFO 2026-02-09 21:04:01 celerate.py:305 step:66K smpl:4M ep:51K epch:20.50 loss:0.477 grdn:6.145 lr:2.8e-05 updt_s:0.586 data_s:0.021 +INFO 2026-02-09 21:06:11 celerate.py:305 step:66K smpl:4M ep:51K epch:20.56 loss:0.474 grdn:6.041 lr:2.8e-05 updt_s:0.585 data_s:0.058 +INFO 2026-02-09 21:08:16 celerate.py:305 step:66K smpl:4M ep:52K epch:20.62 loss:0.477 grdn:6.020 lr:2.8e-05 updt_s:0.587 data_s:0.034 +INFO 2026-02-09 21:10:23 celerate.py:305 step:66K smpl:4M ep:52K epch:20.69 loss:0.472 grdn:5.955 lr:2.8e-05 updt_s:0.581 data_s:0.054 +INFO 2026-02-09 21:12:33 celerate.py:305 step:66K smpl:4M ep:52K epch:20.75 loss:0.467 grdn:5.719 lr:2.7e-05 updt_s:0.579 data_s:0.065 +INFO 2026-02-09 21:14:38 celerate.py:305 step:67K smpl:4M ep:52K epch:20.81 loss:0.478 grdn:6.250 lr:2.7e-05 updt_s:0.588 data_s:0.032 +INFO 2026-02-09 21:16:43 celerate.py:305 step:67K smpl:4M ep:52K epch:20.87 loss:0.477 grdn:6.004 lr:2.7e-05 updt_s:0.584 data_s:0.038 +INFO 2026-02-09 21:18:54 celerate.py:305 step:67K smpl:4M ep:52K epch:20.94 loss:0.459 grdn:5.567 lr:2.7e-05 updt_s:0.588 data_s:0.057 +INFO 2026-02-09 21:20:59 celerate.py:305 step:67K smpl:4M ep:52K epch:21.00 loss:0.478 grdn:6.103 lr:2.6e-05 updt_s:0.584 data_s:0.040 +INFO 2026-02-09 21:23:05 celerate.py:305 step:67K smpl:4M ep:53K epch:21.06 loss:0.470 grdn:5.756 lr:2.6e-05 updt_s:0.582 data_s:0.040 +INFO 2026-02-09 21:25:11 celerate.py:305 step:68K smpl:4M ep:53K epch:21.12 loss:0.478 grdn:6.003 lr:2.6e-05 updt_s:0.585 data_s:0.043 +INFO 2026-02-09 21:27:19 celerate.py:305 step:68K smpl:4M ep:53K epch:21.19 loss:0.465 grdn:5.599 lr:2.6e-05 updt_s:0.585 data_s:0.049 +INFO 2026-02-09 21:29:25 celerate.py:305 step:68K smpl:4M ep:53K epch:21.25 loss:0.473 grdn:5.767 lr:2.5e-05 updt_s:0.583 data_s:0.045 +INFO 2026-02-09 21:31:30 celerate.py:305 step:68K smpl:4M ep:53K epch:21.31 loss:0.469 grdn:5.909 lr:2.5e-05 updt_s:0.584 data_s:0.035 +INFO 2026-02-09 21:33:38 celerate.py:305 step:68K smpl:4M ep:53K epch:21.37 loss:0.469 grdn:5.791 lr:2.5e-05 updt_s:0.581 data_s:0.057 +INFO 2026-02-09 21:35:42 celerate.py:305 step:69K smpl:4M ep:54K epch:21.44 loss:0.476 grdn:6.004 lr:2.4e-05 updt_s:0.583 data_s:0.032 +INFO 2026-02-09 21:37:45 celerate.py:305 step:69K smpl:4M ep:54K epch:21.50 loss:0.468 grdn:5.745 lr:2.4e-05 updt_s:0.583 data_s:0.025 +INFO 2026-02-09 21:39:52 celerate.py:305 step:69K smpl:4M ep:54K epch:21.56 loss:0.466 grdn:5.767 lr:2.4e-05 updt_s:0.587 data_s:0.044 +INFO 2026-02-09 21:41:54 celerate.py:305 step:69K smpl:4M ep:54K epch:21.62 loss:0.466 grdn:5.550 lr:2.4e-05 updt_s:0.586 data_s:0.020 +INFO 2026-02-09 21:44:00 celerate.py:305 step:69K smpl:4M ep:54K epch:21.69 loss:0.465 grdn:5.640 lr:2.3e-05 updt_s:0.585 data_s:0.042 +INFO 2026-02-09 21:46:09 celerate.py:305 step:70K smpl:4M ep:54K epch:21.75 loss:0.470 grdn:5.928 lr:2.3e-05 updt_s:0.584 data_s:0.054 +INFO 2026-02-09 21:48:20 celerate.py:305 step:70K smpl:4M ep:55K epch:21.81 loss:0.472 grdn:5.924 lr:2.3e-05 updt_s:0.581 data_s:0.072 +INFO 2026-02-09 21:50:25 celerate.py:305 step:70K smpl:4M ep:55K epch:21.87 loss:0.472 grdn:5.802 lr:2.3e-05 updt_s:0.581 data_s:0.039 +INFO 2026-02-09 21:50:25 celerate.py:330 Checkpoint policy after step 70000 +INFO 2026-02-09 21:52:35 celerate.py:305 step:70K smpl:4M ep:55K epch:21.94 loss:0.461 grdn:5.830 lr:2.2e-05 updt_s:0.586 data_s:0.050 +INFO 2026-02-09 21:54:42 celerate.py:305 step:70K smpl:5M ep:55K epch:22.00 loss:0.476 grdn:6.275 lr:2.2e-05 updt_s:0.588 data_s:0.044 +INFO 2026-02-09 21:56:53 celerate.py:305 step:71K smpl:5M ep:55K epch:22.06 loss:0.466 grdn:5.700 lr:2.2e-05 updt_s:0.581 data_s:0.067 +INFO 2026-02-09 21:58:55 celerate.py:305 step:71K smpl:5M ep:55K epch:22.12 loss:0.463 grdn:5.656 lr:2.2e-05 updt_s:0.578 data_s:0.030 +INFO 2026-02-09 22:00:57 celerate.py:305 step:71K smpl:5M ep:55K epch:22.19 loss:0.469 grdn:5.820 lr:2.1e-05 updt_s:0.584 data_s:0.023 +INFO 2026-02-09 22:03:04 celerate.py:305 step:71K smpl:5M ep:56K epch:22.25 loss:0.462 grdn:5.688 lr:2.1e-05 updt_s:0.584 data_s:0.047 +INFO 2026-02-09 22:05:13 celerate.py:305 step:71K smpl:5M ep:56K epch:22.31 loss:0.465 grdn:5.831 lr:2.1e-05 updt_s:0.582 data_s:0.054 +INFO 2026-02-09 22:07:21 celerate.py:305 step:72K smpl:5M ep:56K epch:22.37 loss:0.465 grdn:5.782 lr:2.1e-05 updt_s:0.580 data_s:0.057 +INFO 2026-02-09 22:09:23 celerate.py:305 step:72K smpl:5M ep:56K epch:22.44 loss:0.466 grdn:5.824 lr:2.1e-05 updt_s:0.585 data_s:0.022 +INFO 2026-02-09 22:11:28 celerate.py:305 step:72K smpl:5M ep:56K epch:22.50 loss:0.470 grdn:5.958 lr:2.0e-05 updt_s:0.581 data_s:0.038 +INFO 2026-02-09 22:13:36 celerate.py:305 step:72K smpl:5M ep:56K epch:22.56 loss:0.471 grdn:6.077 lr:2.0e-05 updt_s:0.584 data_s:0.055 +INFO 2026-02-09 22:15:42 celerate.py:305 step:72K smpl:5M ep:57K epch:22.62 loss:0.470 grdn:5.768 lr:2.0e-05 updt_s:0.580 data_s:0.045 +INFO 2026-02-09 22:17:48 celerate.py:305 step:73K smpl:5M ep:57K epch:22.69 loss:0.463 grdn:5.769 lr:2.0e-05 updt_s:0.581 data_s:0.042 +INFO 2026-02-09 22:19:56 celerate.py:305 step:73K smpl:5M ep:57K epch:22.75 loss:0.457 grdn:5.365 lr:1.9e-05 updt_s:0.584 data_s:0.053 +INFO 2026-02-09 22:22:07 celerate.py:305 step:73K smpl:5M ep:57K epch:22.81 loss:0.468 grdn:5.811 lr:1.9e-05 updt_s:0.586 data_s:0.065 +INFO 2026-02-09 22:24:17 celerate.py:305 step:73K smpl:5M ep:57K epch:22.87 loss:0.463 grdn:5.874 lr:1.9e-05 updt_s:0.586 data_s:0.059 +INFO 2026-02-09 22:26:23 celerate.py:305 step:73K smpl:5M ep:57K epch:22.94 loss:0.464 grdn:5.752 lr:1.9e-05 updt_s:0.583 data_s:0.043 +INFO 2026-02-09 22:28:34 celerate.py:305 step:74K smpl:5M ep:57K epch:23.00 loss:0.466 grdn:5.779 lr:1.8e-05 updt_s:0.587 data_s:0.059 +INFO 2026-02-09 22:30:47 celerate.py:305 step:74K smpl:5M ep:58K epch:23.06 loss:0.477 grdn:6.011 lr:1.8e-05 updt_s:0.581 data_s:0.082 +INFO 2026-02-09 22:32:58 celerate.py:305 step:74K smpl:5M ep:58K epch:23.12 loss:0.461 grdn:5.696 lr:1.8e-05 updt_s:0.577 data_s:0.076 +INFO 2026-02-09 22:35:08 celerate.py:305 step:74K smpl:5M ep:58K epch:23.19 loss:0.471 grdn:6.132 lr:1.8e-05 updt_s:0.583 data_s:0.060 +INFO 2026-02-09 22:37:17 celerate.py:305 step:74K smpl:5M ep:58K epch:23.25 loss:0.462 grdn:5.792 lr:1.8e-05 updt_s:0.589 data_s:0.051 +INFO 2026-02-09 22:39:26 celerate.py:305 step:75K smpl:5M ep:58K epch:23.31 loss:0.468 grdn:5.578 lr:1.7e-05 updt_s:0.580 data_s:0.063 +INFO 2026-02-09 22:41:35 celerate.py:305 step:75K smpl:5M ep:58K epch:23.37 loss:0.462 grdn:5.685 lr:1.7e-05 updt_s:0.579 data_s:0.059 +INFO 2026-02-09 22:43:40 celerate.py:305 step:75K smpl:5M ep:59K epch:23.44 loss:0.456 grdn:5.989 lr:1.7e-05 updt_s:0.584 data_s:0.035 +INFO 2026-02-09 22:45:44 celerate.py:305 step:75K smpl:5M ep:59K epch:23.50 loss:0.465 grdn:5.777 lr:1.7e-05 updt_s:0.584 data_s:0.034 +INFO 2026-02-09 22:47:57 celerate.py:305 step:75K smpl:5M ep:59K epch:23.56 loss:0.457 grdn:5.508 lr:1.6e-05 updt_s:0.587 data_s:0.071 +INFO 2026-02-09 22:50:03 celerate.py:305 step:76K smpl:5M ep:59K epch:23.62 loss:0.465 grdn:5.582 lr:1.6e-05 updt_s:0.588 data_s:0.037 +INFO 2026-02-09 22:52:07 celerate.py:305 step:76K smpl:5M ep:59K epch:23.69 loss:0.457 grdn:5.641 lr:1.6e-05 updt_s:0.587 data_s:0.030 +INFO 2026-02-09 22:54:15 celerate.py:305 step:76K smpl:5M ep:59K epch:23.75 loss:0.463 grdn:5.823 lr:1.6e-05 updt_s:0.585 data_s:0.051 +INFO 2026-02-09 22:56:23 celerate.py:305 step:76K smpl:5M ep:60K epch:23.81 loss:0.462 grdn:5.771 lr:1.6e-05 updt_s:0.587 data_s:0.048 +INFO 2026-02-09 22:58:33 celerate.py:305 step:76K smpl:5M ep:60K epch:23.87 loss:0.462 grdn:5.578 lr:1.5e-05 updt_s:0.584 data_s:0.060 +INFO 2026-02-09 23:00:42 celerate.py:305 step:77K smpl:5M ep:60K epch:23.94 loss:0.464 grdn:5.813 lr:1.5e-05 updt_s:0.576 data_s:0.062 +INFO 2026-02-09 23:02:54 celerate.py:305 step:77K smpl:5M ep:60K epch:24.00 loss:0.464 grdn:5.899 lr:1.5e-05 updt_s:0.583 data_s:0.076 +INFO 2026-02-09 23:05:01 celerate.py:305 step:77K smpl:5M ep:60K epch:24.06 loss:0.462 grdn:5.627 lr:1.5e-05 updt_s:0.582 data_s:0.046 +INFO 2026-02-09 23:07:05 celerate.py:305 step:77K smpl:5M ep:60K epch:24.12 loss:0.458 grdn:5.723 lr:1.5e-05 updt_s:0.582 data_s:0.032 +INFO 2026-02-09 23:09:11 celerate.py:305 step:77K smpl:5M ep:60K epch:24.19 loss:0.458 grdn:5.538 lr:1.4e-05 updt_s:0.585 data_s:0.043 +INFO 2026-02-09 23:11:17 celerate.py:305 step:78K smpl:5M ep:61K epch:24.25 loss:0.474 grdn:5.919 lr:1.4e-05 updt_s:0.581 data_s:0.041 +INFO 2026-02-09 23:13:21 celerate.py:305 step:78K smpl:5M ep:61K epch:24.31 loss:0.453 grdn:5.709 lr:1.4e-05 updt_s:0.587 data_s:0.028 +INFO 2026-02-09 23:15:33 celerate.py:305 step:78K smpl:5M ep:61K epch:24.37 loss:0.469 grdn:5.975 lr:1.4e-05 updt_s:0.583 data_s:0.074 +INFO 2026-02-09 23:17:46 celerate.py:305 step:78K smpl:5M ep:61K epch:24.44 loss:0.462 grdn:5.632 lr:1.4e-05 updt_s:0.582 data_s:0.077 +INFO 2026-02-09 23:19:58 celerate.py:305 step:78K smpl:5M ep:61K epch:24.50 loss:0.458 grdn:5.661 lr:1.3e-05 updt_s:0.584 data_s:0.072 +INFO 2026-02-09 23:22:07 celerate.py:305 step:79K smpl:5M ep:61K epch:24.56 loss:0.461 grdn:5.871 lr:1.3e-05 updt_s:0.577 data_s:0.065 +INFO 2026-02-09 23:24:12 celerate.py:305 step:79K smpl:5M ep:62K epch:24.62 loss:0.465 grdn:5.912 lr:1.3e-05 updt_s:0.584 data_s:0.037 +INFO 2026-02-09 23:26:17 celerate.py:305 step:79K smpl:5M ep:62K epch:24.69 loss:0.457 grdn:5.529 lr:1.3e-05 updt_s:0.580 data_s:0.037 +INFO 2026-02-09 23:28:24 celerate.py:305 step:79K smpl:5M ep:62K epch:24.75 loss:0.464 grdn:5.723 lr:1.3e-05 updt_s:0.588 data_s:0.044 +INFO 2026-02-09 23:30:34 celerate.py:305 step:79K smpl:5M ep:62K epch:24.81 loss:0.469 grdn:5.671 lr:1.2e-05 updt_s:0.580 data_s:0.062 +INFO 2026-02-09 23:32:39 celerate.py:305 step:80K smpl:5M ep:62K epch:24.87 loss:0.454 grdn:5.619 lr:1.2e-05 updt_s:0.585 data_s:0.037 +INFO 2026-02-09 23:34:47 celerate.py:305 step:80K smpl:5M ep:62K epch:24.94 loss:0.457 grdn:5.450 lr:1.2e-05 updt_s:0.588 data_s:0.046 +INFO 2026-02-09 23:37:00 celerate.py:305 step:80K smpl:5M ep:62K epch:25.00 loss:0.449 grdn:5.518 lr:1.2e-05 updt_s:0.580 data_s:0.080 +INFO 2026-02-09 23:37:00 celerate.py:330 Checkpoint policy after step 80000 +INFO 2026-02-09 23:39:08 celerate.py:305 step:80K smpl:5M ep:63K epch:25.06 loss:0.451 grdn:5.619 lr:1.2e-05 updt_s:0.584 data_s:0.042 +INFO 2026-02-09 23:41:10 celerate.py:305 step:80K smpl:5M ep:63K epch:25.12 loss:0.459 grdn:5.932 lr:1.2e-05 updt_s:0.583 data_s:0.021 +INFO 2026-02-09 23:43:21 celerate.py:305 step:81K smpl:5M ep:63K epch:25.19 loss:0.456 grdn:5.694 lr:1.1e-05 updt_s:0.581 data_s:0.067 +INFO 2026-02-09 23:45:26 celerate.py:305 step:81K smpl:5M ep:63K epch:25.25 loss:0.458 grdn:5.874 lr:1.1e-05 updt_s:0.584 data_s:0.039 +INFO 2026-02-09 23:47:35 celerate.py:305 step:81K smpl:5M ep:63K epch:25.31 loss:0.462 grdn:5.698 lr:1.1e-05 updt_s:0.579 data_s:0.060 +INFO 2026-02-09 23:49:42 celerate.py:305 step:81K smpl:5M ep:63K epch:25.37 loss:0.460 grdn:5.598 lr:1.1e-05 updt_s:0.590 data_s:0.040 +INFO 2026-02-09 23:51:51 celerate.py:305 step:81K smpl:5M ep:64K epch:25.44 loss:0.457 grdn:5.756 lr:1.1e-05 updt_s:0.581 data_s:0.058 +INFO 2026-02-09 23:53:55 celerate.py:305 step:82K smpl:5M ep:64K epch:25.50 loss:0.450 grdn:5.484 lr:1.1e-05 updt_s:0.586 data_s:0.032 +INFO 2026-02-09 23:56:07 celerate.py:305 step:82K smpl:5M ep:64K epch:25.56 loss:0.461 grdn:5.456 lr:1.0e-05 updt_s:0.585 data_s:0.066 +INFO 2026-02-09 23:58:17 celerate.py:305 step:82K smpl:5M ep:64K epch:25.62 loss:0.450 grdn:5.416 lr:1.0e-05 updt_s:0.581 data_s:0.068 +INFO 2026-02-10 00:00:26 celerate.py:305 step:82K smpl:5M ep:64K epch:25.69 loss:0.464 grdn:5.831 lr:1.0e-05 updt_s:0.583 data_s:0.056 +INFO 2026-02-10 00:02:36 celerate.py:305 step:82K smpl:5M ep:64K epch:25.75 loss:0.452 grdn:5.814 lr:9.8e-06 updt_s:0.584 data_s:0.061 +INFO 2026-02-10 00:04:47 celerate.py:305 step:83K smpl:5M ep:65K epch:25.81 loss:0.453 grdn:5.548 lr:9.7e-06 updt_s:0.587 data_s:0.061 +INFO 2026-02-10 00:06:52 celerate.py:305 step:83K smpl:5M ep:65K epch:25.87 loss:0.466 grdn:5.579 lr:9.5e-06 updt_s:0.585 data_s:0.038 +INFO 2026-02-10 00:08:59 celerate.py:305 step:83K smpl:5M ep:65K epch:25.94 loss:0.459 grdn:5.619 lr:9.4e-06 updt_s:0.589 data_s:0.040 +INFO 2026-02-10 00:11:03 celerate.py:305 step:83K smpl:5M ep:65K epch:26.00 loss:0.465 grdn:5.919 lr:9.2e-06 updt_s:0.582 data_s:0.033 +INFO 2026-02-10 00:13:13 celerate.py:305 step:83K smpl:5M ep:65K epch:26.06 loss:0.454 grdn:5.621 lr:9.1e-06 updt_s:0.589 data_s:0.061 +INFO 2026-02-10 00:15:22 celerate.py:305 step:84K smpl:5M ep:65K epch:26.12 loss:0.454 grdn:5.667 lr:8.9e-06 updt_s:0.582 data_s:0.056 +INFO 2026-02-10 00:17:29 celerate.py:305 step:84K smpl:5M ep:65K epch:26.19 loss:0.460 grdn:5.724 lr:8.8e-06 updt_s:0.583 data_s:0.050 +INFO 2026-02-10 00:19:42 celerate.py:305 step:84K smpl:5M ep:66K epch:26.25 loss:0.456 grdn:5.771 lr:8.6e-06 updt_s:0.579 data_s:0.080 +INFO 2026-02-10 00:21:52 celerate.py:305 step:84K smpl:5M ep:66K epch:26.31 loss:0.448 grdn:5.441 lr:8.5e-06 updt_s:0.583 data_s:0.066 +INFO 2026-02-10 00:24:00 celerate.py:305 step:84K smpl:5M ep:66K epch:26.37 loss:0.456 grdn:5.643 lr:8.3e-06 updt_s:0.583 data_s:0.050 +INFO 2026-02-10 00:26:06 celerate.py:305 step:85K smpl:5M ep:66K epch:26.44 loss:0.453 grdn:5.371 lr:8.2e-06 updt_s:0.579 data_s:0.047 +INFO 2026-02-10 00:28:16 celerate.py:305 step:85K smpl:5M ep:66K epch:26.50 loss:0.467 grdn:5.866 lr:8.0e-06 updt_s:0.582 data_s:0.065 +INFO 2026-02-10 00:30:31 celerate.py:305 step:85K smpl:5M ep:66K epch:26.56 loss:0.454 grdn:5.475 lr:7.9e-06 updt_s:0.583 data_s:0.086 +INFO 2026-02-10 00:32:40 celerate.py:305 step:85K smpl:5M ep:67K epch:26.62 loss:0.457 grdn:5.570 lr:7.7e-06 updt_s:0.585 data_s:0.053 +INFO 2026-02-10 00:34:44 celerate.py:305 step:85K smpl:5M ep:67K epch:26.69 loss:0.464 grdn:5.738 lr:7.6e-06 updt_s:0.581 data_s:0.036 +INFO 2026-02-10 00:36:51 celerate.py:305 step:86K smpl:5M ep:67K epch:26.75 loss:0.455 grdn:5.538 lr:7.5e-06 updt_s:0.584 data_s:0.046 +INFO 2026-02-10 00:39:00 celerate.py:305 step:86K smpl:5M ep:67K epch:26.81 loss:0.464 grdn:5.856 lr:7.3e-06 updt_s:0.582 data_s:0.057 +INFO 2026-02-10 00:41:04 celerate.py:305 step:86K smpl:6M ep:67K epch:26.87 loss:0.459 grdn:5.798 lr:7.2e-06 updt_s:0.583 data_s:0.034 +INFO 2026-02-10 00:43:10 celerate.py:305 step:86K smpl:6M ep:67K epch:26.94 loss:0.457 grdn:5.678 lr:7.1e-06 updt_s:0.585 data_s:0.039 +INFO 2026-02-10 00:45:16 celerate.py:305 step:86K smpl:6M ep:67K epch:27.00 loss:0.454 grdn:5.661 lr:6.9e-06 updt_s:0.584 data_s:0.043 +INFO 2026-02-10 00:47:25 celerate.py:305 step:87K smpl:6M ep:68K epch:27.06 loss:0.454 grdn:5.485 lr:6.8e-06 updt_s:0.584 data_s:0.059 +INFO 2026-02-10 00:49:32 celerate.py:305 step:87K smpl:6M ep:68K epch:27.12 loss:0.464 grdn:5.885 lr:6.7e-06 updt_s:0.580 data_s:0.049 +INFO 2026-02-10 00:51:34 celerate.py:305 step:87K smpl:6M ep:68K epch:27.19 loss:0.457 grdn:5.661 lr:6.6e-06 updt_s:0.587 data_s:0.020 +INFO 2026-02-10 00:53:39 celerate.py:305 step:87K smpl:6M ep:68K epch:27.25 loss:0.456 grdn:5.711 lr:6.4e-06 updt_s:0.584 data_s:0.033 +INFO 2026-02-10 00:55:45 celerate.py:305 step:87K smpl:6M ep:68K epch:27.31 loss:0.454 grdn:5.381 lr:6.3e-06 updt_s:0.583 data_s:0.045 +INFO 2026-02-10 00:57:50 celerate.py:305 step:88K smpl:6M ep:68K epch:27.37 loss:0.458 grdn:5.553 lr:6.2e-06 updt_s:0.583 data_s:0.037 +INFO 2026-02-10 00:59:57 celerate.py:305 step:88K smpl:6M ep:69K epch:27.44 loss:0.450 grdn:5.504 lr:6.1e-06 updt_s:0.585 data_s:0.043 +INFO 2026-02-10 01:02:01 celerate.py:305 step:88K smpl:6M ep:69K epch:27.50 loss:0.468 grdn:5.929 lr:6.0e-06 updt_s:0.584 data_s:0.032 +INFO 2026-02-10 01:04:10 celerate.py:305 step:88K smpl:6M ep:69K epch:27.56 loss:0.465 grdn:5.671 lr:5.9e-06 updt_s:0.581 data_s:0.059 +INFO 2026-02-10 01:06:18 celerate.py:305 step:88K smpl:6M ep:69K epch:27.62 loss:0.450 grdn:5.524 lr:5.8e-06 updt_s:0.580 data_s:0.055 +INFO 2026-02-10 01:08:23 celerate.py:305 step:89K smpl:6M ep:69K epch:27.69 loss:0.468 grdn:5.769 lr:5.6e-06 updt_s:0.582 data_s:0.042 +INFO 2026-02-10 01:10:33 celerate.py:305 step:89K smpl:6M ep:69K epch:27.75 loss:0.453 grdn:5.381 lr:5.5e-06 updt_s:0.582 data_s:0.059 +INFO 2026-02-10 01:12:39 celerate.py:305 step:89K smpl:6M ep:70K epch:27.81 loss:0.456 grdn:5.471 lr:5.4e-06 updt_s:0.587 data_s:0.039 +INFO 2026-02-10 01:14:45 celerate.py:305 step:89K smpl:6M ep:70K epch:27.87 loss:0.449 grdn:5.516 lr:5.3e-06 updt_s:0.583 data_s:0.043 +INFO 2026-02-10 01:16:53 celerate.py:305 step:89K smpl:6M ep:70K epch:27.94 loss:0.455 grdn:5.534 lr:5.2e-06 updt_s:0.585 data_s:0.052 +INFO 2026-02-10 01:19:06 celerate.py:305 step:90K smpl:6M ep:70K epch:28.00 loss:0.455 grdn:5.719 lr:5.1e-06 updt_s:0.585 data_s:0.073 +INFO 2026-02-10 01:21:14 celerate.py:305 step:90K smpl:6M ep:70K epch:28.06 loss:0.455 grdn:5.393 lr:5.0e-06 updt_s:0.586 data_s:0.048 +INFO 2026-02-10 01:23:19 celerate.py:305 step:90K smpl:6M ep:70K epch:28.12 loss:0.447 grdn:5.404 lr:4.9e-06 updt_s:0.584 data_s:0.037 +INFO 2026-02-10 01:23:19 celerate.py:330 Checkpoint policy after step 90000 +INFO 2026-02-10 01:25:26 celerate.py:305 step:90K smpl:6M ep:70K epch:28.19 loss:0.458 grdn:5.606 lr:4.8e-06 updt_s:0.580 data_s:0.042 +INFO 2026-02-10 01:27:30 celerate.py:305 step:90K smpl:6M ep:71K epch:28.25 loss:0.452 grdn:5.383 lr:4.7e-06 updt_s:0.580 data_s:0.034 +INFO 2026-02-10 01:29:36 celerate.py:305 step:91K smpl:6M ep:71K epch:28.31 loss:0.458 grdn:5.533 lr:4.7e-06 updt_s:0.583 data_s:0.042 +INFO 2026-02-10 01:31:44 celerate.py:305 step:91K smpl:6M ep:71K epch:28.37 loss:0.458 grdn:5.894 lr:4.6e-06 updt_s:0.583 data_s:0.051 +INFO 2026-02-10 01:33:54 celerate.py:305 step:91K smpl:6M ep:71K epch:28.44 loss:0.455 grdn:5.779 lr:4.5e-06 updt_s:0.580 data_s:0.064 +INFO 2026-02-10 01:35:59 celerate.py:305 step:91K smpl:6M ep:71K epch:28.50 loss:0.450 grdn:5.603 lr:4.4e-06 updt_s:0.584 data_s:0.036 +INFO 2026-02-10 01:38:04 celerate.py:305 step:91K smpl:6M ep:71K epch:28.56 loss:0.452 grdn:5.522 lr:4.3e-06 updt_s:0.586 data_s:0.038 +INFO 2026-02-10 01:40:07 celerate.py:305 step:92K smpl:6M ep:72K epch:28.62 loss:0.454 grdn:5.534 lr:4.2e-06 updt_s:0.591 data_s:0.019 +INFO 2026-02-10 01:42:16 celerate.py:305 step:92K smpl:6M ep:72K epch:28.69 loss:0.461 grdn:5.533 lr:4.1e-06 updt_s:0.585 data_s:0.052 +INFO 2026-02-10 01:44:23 celerate.py:305 step:92K smpl:6M ep:72K epch:28.75 loss:0.459 grdn:5.679 lr:4.1e-06 updt_s:0.584 data_s:0.046 +INFO 2026-02-10 01:46:27 celerate.py:305 step:92K smpl:6M ep:72K epch:28.81 loss:0.461 grdn:5.690 lr:4.0e-06 updt_s:0.590 data_s:0.027 +INFO 2026-02-10 01:48:33 celerate.py:305 step:92K smpl:6M ep:72K epch:28.87 loss:0.455 grdn:5.381 lr:3.9e-06 updt_s:0.581 data_s:0.042 +INFO 2026-02-10 01:50:43 celerate.py:305 step:93K smpl:6M ep:72K epch:28.94 loss:0.449 grdn:5.436 lr:3.8e-06 updt_s:0.586 data_s:0.058 +INFO 2026-02-10 01:52:53 celerate.py:305 step:93K smpl:6M ep:72K epch:29.00 loss:0.459 grdn:5.656 lr:3.8e-06 updt_s:0.584 data_s:0.064 +INFO 2026-02-10 01:54:59 celerate.py:305 step:93K smpl:6M ep:73K epch:29.06 loss:0.450 grdn:5.524 lr:3.7e-06 updt_s:0.583 data_s:0.043 +INFO 2026-02-10 01:57:04 celerate.py:305 step:93K smpl:6M ep:73K epch:29.12 loss:0.456 grdn:5.587 lr:3.6e-06 updt_s:0.580 data_s:0.038 +INFO 2026-02-10 01:59:06 celerate.py:305 step:93K smpl:6M ep:73K epch:29.19 loss:0.454 grdn:5.532 lr:3.6e-06 updt_s:0.589 data_s:0.016 +INFO 2026-02-10 02:01:19 celerate.py:305 step:94K smpl:6M ep:73K epch:29.25 loss:0.461 grdn:5.608 lr:3.5e-06 updt_s:0.585 data_s:0.077 +INFO 2026-02-10 02:03:27 celerate.py:305 step:94K smpl:6M ep:73K epch:29.31 loss:0.454 grdn:5.531 lr:3.5e-06 updt_s:0.581 data_s:0.052 +INFO 2026-02-10 02:05:35 celerate.py:305 step:94K smpl:6M ep:73K epch:29.37 loss:0.463 grdn:5.868 lr:3.4e-06 updt_s:0.583 data_s:0.053 +INFO 2026-02-10 02:07:41 celerate.py:305 step:94K smpl:6M ep:74K epch:29.44 loss:0.454 grdn:5.749 lr:3.3e-06 updt_s:0.587 data_s:0.040 +INFO 2026-02-10 02:09:48 celerate.py:305 step:94K smpl:6M ep:74K epch:29.50 loss:0.453 grdn:5.550 lr:3.3e-06 updt_s:0.590 data_s:0.041 +INFO 2026-02-10 02:12:00 celerate.py:305 step:95K smpl:6M ep:74K epch:29.56 loss:0.454 grdn:5.700 lr:3.2e-06 updt_s:0.586 data_s:0.068 +INFO 2026-02-10 02:14:11 celerate.py:305 step:95K smpl:6M ep:74K epch:29.62 loss:0.453 grdn:5.555 lr:3.2e-06 updt_s:0.580 data_s:0.069 +INFO 2026-02-10 02:16:22 celerate.py:305 step:95K smpl:6M ep:74K epch:29.69 loss:0.452 grdn:5.344 lr:3.1e-06 updt_s:0.585 data_s:0.064 +INFO 2026-02-10 02:18:27 celerate.py:305 step:95K smpl:6M ep:74K epch:29.75 loss:0.454 grdn:5.474 lr:3.1e-06 updt_s:0.589 data_s:0.034 +INFO 2026-02-10 02:20:35 celerate.py:305 step:95K smpl:6M ep:75K epch:29.81 loss:0.455 grdn:5.521 lr:3.0e-06 updt_s:0.588 data_s:0.047 +INFO 2026-02-10 02:22:46 celerate.py:305 step:96K smpl:6M ep:75K epch:29.87 loss:0.447 grdn:5.180 lr:3.0e-06 updt_s:0.581 data_s:0.070 +INFO 2026-02-10 02:24:51 celerate.py:305 step:96K smpl:6M ep:75K epch:29.94 loss:0.455 grdn:5.530 lr:2.9e-06 updt_s:0.583 data_s:0.038 +INFO 2026-02-10 02:26:59 celerate.py:305 step:96K smpl:6M ep:75K epch:30.00 loss:0.455 grdn:5.384 lr:2.9e-06 updt_s:0.584 data_s:0.052 +INFO 2026-02-10 02:29:08 celerate.py:305 step:96K smpl:6M ep:75K epch:30.06 loss:0.458 grdn:5.504 lr:2.9e-06 updt_s:0.581 data_s:0.057 +INFO 2026-02-10 02:31:14 celerate.py:305 step:96K smpl:6M ep:75K epch:30.12 loss:0.458 grdn:5.473 lr:2.8e-06 updt_s:0.586 data_s:0.039 +INFO 2026-02-10 02:33:20 celerate.py:305 step:97K smpl:6M ep:75K epch:30.19 loss:0.452 grdn:5.464 lr:2.8e-06 updt_s:0.584 data_s:0.040 +INFO 2026-02-10 02:35:23 celerate.py:305 step:97K smpl:6M ep:76K epch:30.25 loss:0.448 grdn:5.320 lr:2.8e-06 updt_s:0.585 data_s:0.026 +INFO 2026-02-10 02:37:28 celerate.py:305 step:97K smpl:6M ep:76K epch:30.31 loss:0.452 grdn:5.556 lr:2.7e-06 updt_s:0.588 data_s:0.034 +INFO 2026-02-10 02:39:34 celerate.py:305 step:97K smpl:6M ep:76K epch:30.37 loss:0.453 grdn:5.718 lr:2.7e-06 updt_s:0.590 data_s:0.034 +INFO 2026-02-10 02:41:38 celerate.py:305 step:97K smpl:6M ep:76K epch:30.44 loss:0.458 grdn:5.778 lr:2.7e-06 updt_s:0.583 data_s:0.034 +INFO 2026-02-10 02:43:49 celerate.py:305 step:98K smpl:6M ep:76K epch:30.50 loss:0.450 grdn:5.346 lr:2.7e-06 updt_s:0.582 data_s:0.064 +INFO 2026-02-10 02:46:00 celerate.py:305 step:98K smpl:6M ep:76K epch:30.56 loss:0.453 grdn:5.519 lr:2.6e-06 updt_s:0.584 data_s:0.065 +INFO 2026-02-10 02:48:10 celerate.py:305 step:98K smpl:6M ep:77K epch:30.62 loss:0.449 grdn:5.429 lr:2.6e-06 updt_s:0.581 data_s:0.067 +INFO 2026-02-10 02:50:21 celerate.py:305 step:98K smpl:6M ep:77K epch:30.69 loss:0.453 grdn:5.820 lr:2.6e-06 updt_s:0.589 data_s:0.059 +INFO 2026-02-10 02:52:29 celerate.py:305 step:98K smpl:6M ep:77K epch:30.75 loss:0.456 grdn:5.683 lr:2.6e-06 updt_s:0.583 data_s:0.053 +INFO 2026-02-10 02:54:41 celerate.py:305 step:99K smpl:6M ep:77K epch:30.81 loss:0.460 grdn:5.653 lr:2.6e-06 updt_s:0.581 data_s:0.074 +INFO 2026-02-10 02:56:47 celerate.py:305 step:99K smpl:6M ep:77K epch:30.87 loss:0.461 grdn:5.577 lr:2.5e-06 updt_s:0.593 data_s:0.033 +INFO 2026-02-10 02:58:53 celerate.py:305 step:99K smpl:6M ep:77K epch:30.94 loss:0.453 grdn:5.541 lr:2.5e-06 updt_s:0.590 data_s:0.035 +INFO 2026-02-10 03:01:00 celerate.py:305 step:99K smpl:6M ep:77K epch:31.00 loss:0.449 grdn:5.195 lr:2.5e-06 updt_s:0.587 data_s:0.044 +INFO 2026-02-10 03:03:08 celerate.py:305 step:99K smpl:6M ep:78K epch:31.06 loss:0.461 grdn:5.542 lr:2.5e-06 updt_s:0.584 data_s:0.049 +INFO 2026-02-10 03:05:18 celerate.py:305 step:100K smpl:6M ep:78K epch:31.12 loss:0.452 grdn:5.435 lr:2.5e-06 updt_s:0.580 data_s:0.069 +INFO 2026-02-10 03:07:22 celerate.py:305 step:100K smpl:6M ep:78K epch:31.19 loss:0.464 grdn:5.700 lr:2.5e-06 updt_s:0.580 data_s:0.032 +INFO 2026-02-10 03:09:27 celerate.py:305 step:100K smpl:6M ep:78K epch:31.25 loss:0.462 grdn:5.585 lr:2.5e-06 updt_s:0.587 data_s:0.033 +INFO 2026-02-10 03:09:27 celerate.py:330 Checkpoint policy after step 100000 +INFO 2026-02-10 03:09:28 celerate.py:387 End of training diff --git a/wandb/run-20260209_084451-csy2m2pr/files/requirements.txt b/wandb/run-20260209_084451-csy2m2pr/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9c03a866df5018491455e7e96ece81f991688719 --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/files/requirements.txt @@ -0,0 +1,138 @@ +google-crc32c==1.8.0 +triton==3.6.0 +tqdm==4.67.3 +llvmlite==0.46.0 +pandas==3.0.0 +Werkzeug==3.1.5 +nvidia-cuda-runtime-cu12==12.8.90 +typer-slim==0.21.1 +sympy==1.14.0 +av==16.1.0 +httpcore==1.0.9 +pynput==1.8.1 +Jinja2==3.1.6 +filelock==3.20.3 +sentry-sdk==2.52.0 +gitdb==4.0.12 +aiosignal==1.4.0 +nvidia-cufft-cu12==11.3.3.83 +Flask==3.1.2 +frozenlist==1.8.0 +jsonlines==4.0.0 +timm==1.0.24 +multiprocess==0.70.16 +psutil==7.2.2 +pyzmq==27.1.0 +nvidia-cublas-cu12==12.8.4.1 +importlib_metadata==8.7.1 +aiohttp==3.13.3 +GitPython==3.1.46 +annotated-types==0.7.0 +typing-inspect==0.9.0 +antlr4-python3-runtime==4.9.3 +nvidia-nvjitlink-cu12==12.8.93 +charset-normalizer==3.4.4 +PySocks==1.7.1 +future==1.0.0 +transformers==5.1.0 +python-dotenv==1.2.1 +num2words==0.5.14 +pyarrow==23.0.0 +six==1.17.0 +soupsieve==2.8.3 +nvidia-curand-cu12==10.3.9.90 +cmake==4.2.1 +h11==0.16.0 +zipp==3.23.0 +numpy==2.4.2 +typing-inspection==0.4.2 +lerobot==0.1.0 +propcache==0.4.1 +python-xlib==0.33 +wandb==0.24.2 +typing_extensions==4.15.0 +nvidia-cusparse-cu12==12.5.8.93 +pydantic_core==2.41.5 +attrs==25.4.0 +yarl==1.22.0 +torchvision==0.25.0 +beautifulsoup4==4.14.3 +cffi==2.0.0 +segmentation_models_pytorch==0.5.0 +gymnasium==0.29.1 +nvidia-cusolver-cu12==11.7.3.90 +huggingface_hub==1.4.1 +pymunk==6.11.1 +cuda-pathfinder==1.3.3 +numba==0.63.1 +datasets==3.6.0 +nvidia-nccl-cu12==2.27.5 +safetensors==0.7.0 +requests==2.32.5 +mypy_extensions==1.1.0 +omegaconf==2.3.0 +imageio-ffmpeg==0.6.0 +pydantic==2.12.5 +einops==0.8.2 +docopt==0.6.2 +pillow==12.1.0 +blinker==1.9.0 +gdown==5.2.1 +termcolor==3.3.0 +evdev==1.9.3 +xxhash==3.6.0 +dill==0.3.8 +PyYAML==6.0.3 +scipy==1.17.0 +accelerate==1.12.0 +deepdiff==8.6.1 +nvidia-cuda-cupti-cu12==12.8.90 +tokenizers==0.22.2 +rerun-sdk==0.29.1 +idna==3.11 +mpmath==1.3.0 +donfig==0.8.1.post1 +numcodecs==0.16.5 +urllib3==2.6.3 +networkx==3.6.1 +diffusers==0.36.0 +hf-xet==1.2.0 +nvidia-nvtx-cu12==12.8.90 +ImageIO==2.37.2 +serial==0.0.97 +click==8.3.1 +zarr==3.1.5 +nvidia-cufile-cu12==1.13.1.3 +iso8601==2.1.0 +cloudpickle==3.1.2 +h5py==3.15.1 +pyyaml-include==1.4.1 +opencv-python-headless==4.13.0.92 +packaging==26.0 +MarkupSafe==3.0.3 +Farama-Notifications==0.0.4 +torchcodec==0.10.0 +fsspec==2026.2.0 +multidict==6.7.1 +torch==2.10.0 +certifi==2026.1.4 +smmap==5.0.2 +draccus==0.10.0 +mergedeep==1.3.4 +nvidia-cuda-nvrtc-cu12==12.8.93 +regex==2026.1.15 +aiohappyeyeballs==2.6.1 +nvidia-nvshmem-cu12==3.4.5 +python-dateutil==2.9.0.post0 +itsdangerous==2.2.0 +toml==0.10.2 +platformdirs==4.5.1 +shellingham==1.5.4 +httpx==0.28.1 +pycparser==3.0 +cuda-bindings==12.9.4 +orderly-set==5.5.0 +nvidia-cusparselt-cu12==0.7.1 +nvidia-cudnn-cu12==9.10.2.21 +anyio==4.12.1 +protobuf==6.33.5 diff --git a/wandb/run-20260209_084451-csy2m2pr/files/wandb-metadata.json b/wandb/run-20260209_084451-csy2m2pr/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..122f07653f63770d9698c31ed4fcc16aa474d901 --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/files/wandb-metadata.json @@ -0,0 +1,72 @@ +{ + "os": "Linux-4.18.0-513.5.1.el8_9.x86_64-x86_64-with-glibc2.35", + "python": "CPython 3.11.14", + "startedAt": "2026-02-09T08:44:51.654399Z", + "args": [ + "--policy.type=litevla-ms", + "--policy.vlm_model_name=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct", + "--policy.of_path=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5", + "--dataset.root=/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50", + "--output_dir=outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "--job_name=MW_100%_scratch_litevla-ms_lastlayer", + "--config_path=configs/default.json", + "--batch_size=64", + "--wandb.mode=online" + ], + "program": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/LiteVLA-MS/lerobot/scripts/train_accelerate.py", + "codePath": "lerobot/scripts/train_accelerate.py", + "codePathLocal": "lerobot/scripts/train_accelerate.py", + "git": { + "remote": "https://github.com/ducido/LiteVLA-MS.git", + "commit": "a7e835bc70780ed105414dd3bb743c62137f6a8b" + }, + "email": "ducido.w@gmail.com", + "root": "outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer", + "host": "aed594a1dd20", + "executable": "/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/LiteVLA-MS/.venv/bin/python3", + "cpu_count": 32, + "cpu_count_logical": 32, + "gpu": "NVIDIA A100-SXM4-80GB", + "gpu_count": 4, + "disk": { + "/": { + "total": "10737418240", + "used": "78622720" + } + }, + "memory": { + "total": "2163816673280" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-a6489813-5309-3432-09e4-c663845f9cef" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-fc32315d-9871-1ebf-235f-c6f211db2e03" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-2f913744-29a0-03ae-59d3-c19fd3d11eb6" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-23e52388-87b5-c416-3205-eae06c6942f2" + } + ], + "cudaVersion": "12.5", + "writerId": "pqkhfzolauly8lhncay8a0oavkuhy83f" +} \ No newline at end of file diff --git a/wandb/run-20260209_084451-csy2m2pr/files/wandb-summary.json b/wandb/run-20260209_084451-csy2m2pr/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b4aab7229198ba3a5932f93ace7ebe91946b0ac7 --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/lr":2.500318359811219e-06,"train/steps":100000,"train/loss":0.46198040843009947,"_wandb":{"runtime":66281},"_timestamp":1.7706929672659686e+09,"train/static_loss_inside":0.204107865691185,"train/epochs":31.249084499477554,"train/l2_loss":0.36605414748191833,"train/static_loss_outside":0.1404591053724289,"train/samples":6400000,"train/losses_after_forward":{"values":[62897,14816,8402,5254,3484,2269,1648,1087,726,523,386,248,184,140,93,70,51,34,25,18,8,5,9,6,7,2,3,1,0,0,3,1],"bins":[1.4210854715202004e-12,0.8399303555488586,1.6798607110977173,2.5197911262512207,3.3597214221954346,4.199651718139648,5.039582252502441,5.879512310028076,6.719442844390869,7.559373378753662,8.399303436279297,9.23923397064209,10.079164505004883,10.919095039367676,11.759024620056152,12.598955154418945,13.438885688781738,14.278816223144531,15.118746757507324,15.9586763381958,16.798606872558594,17.638538360595703,18.47846794128418,19.318397521972656,20.158329010009766,20.998258590698242,21.83819007873535,22.678119659423828,23.518049240112305,24.357980728149414,25.19791030883789,26.037841796875,26.877771377563477],"_type":"histogram"},"_step":100000,"train/l1_infer_loss":0.2591104209423065,"train/grad_norm":5.58464741230011,"train/dataloading_s":0.03291126469150186,"train/episodes":78122.71124869389,"train/losses_after_rm_padding":{"bins":[1.1542766742422828e-11,0.7308922410011292,1.4617844820022583,2.1926767826080322,2.9235689640045166,3.654461145401001,4.3853535652160645,5.116245746612549,5.847137928009033,6.578030109405518,7.308922290802002,8.039814949035645,8.770707130432129,9.501599311828613,10.232491493225098,10.963383674621582,11.694275856018066,12.42516803741455,13.156060218811035,13.88695240020752,14.617844581604004,15.348736763000488,16.07962989807129,16.810522079467773,17.541414260864258,18.272306442260742,19.003198623657227,19.73409080505371,20.464982986450195,21.19587516784668,21.926767349243164,22.65765953063965,23.388551712036133],"_type":"histogram","values":[12738,33,8,5,3,2,0,2,1,2,3,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]},"_runtime":66281.413547654,"train/update_s":0.5869448679313064} \ No newline at end of file diff --git a/wandb/run-20260209_084451-csy2m2pr/logs/debug-core.log b/wandb/run-20260209_084451-csy2m2pr/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..e512b78d02b87dff004fa7c9efd2d7bb8a7450fa --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-02-09T08:44:51.855017444Z","level":"INFO","msg":"main: starting server","port-filename":"/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/tmpbbty6jd7/port-138391.txt","pid":138391,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-02-09T08:44:51.856550459Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138391} +{"time":"2026-02-09T08:44:51.856532987Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/wandb-138391-138401-1551997388/socket","Net":"unix"}} +{"time":"2026-02-09T08:44:51.963086721Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-02-09T08:44:51.979320585Z","level":"INFO","msg":"handleInformInit: received","streamId":"csy2m2pr","id":"1(@)"} +{"time":"2026-02-09T08:44:52.251127055Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"csy2m2pr","id":"1(@)"} +{"time":"2026-02-09T08:44:57.678904839Z","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"a2ipk0x1p3cc"} +{"time":"2026-02-10T03:09:33.94472901Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-02-10T03:09:33.946917466Z","level":"INFO","msg":"server is shutting down"} +{"time":"2026-02-10T03:09:33.946922205Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-02-10T03:09:33.947067103Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-02-10T03:09:33.949327994Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/wandb-138391-138401-1551997388/socket","Net":"unix"}} +{"time":"2026-02-10T03:09:34.811999001Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-02-10T03:09:34.812026913Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-02-10T03:09:34.812038594Z","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260209_084451-csy2m2pr/logs/debug-internal.log b/wandb/run-20260209_084451-csy2m2pr/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c19e51d64cbe029380ead3c2128dbe5778fd4d27 --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2026-02-09T08:44:51.980035036Z","level":"INFO","msg":"stream: starting","core version":"0.24.2"} +{"time":"2026-02-09T08:44:52.250046733Z","level":"INFO","msg":"stream: created new stream","id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.250176074Z","level":"INFO","msg":"handler: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251106958Z","level":"INFO","msg":"stream: started","id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251174203Z","level":"INFO","msg":"sender: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T08:44:52.251190143Z","level":"INFO","msg":"writer: started","stream_id":"csy2m2pr"} +{"time":"2026-02-09T18:47:07.815456874Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/Robotics_VLA/LiteVLA-MS/csy2m2pr/file_stream","body":"\n\n\n502 Server Error\n\n\n

Error: Server Error

\n

The server encountered a temporary error and could not complete your request.

Please try again in 30 seconds.

\n

\n\n"} +{"time":"2026-02-10T03:09:33.945729023Z","level":"INFO","msg":"stream: closing","id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.622097575Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-02-10T03:09:34.809342627Z","level":"INFO","msg":"handler: closed","stream_id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.811271683Z","level":"INFO","msg":"sender: closed","stream_id":"csy2m2pr"} +{"time":"2026-02-10T03:09:34.811285308Z","level":"INFO","msg":"stream: closed","id":"csy2m2pr"} diff --git a/wandb/run-20260209_084451-csy2m2pr/logs/debug.log b/wandb/run-20260209_084451-csy2m2pr/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c0c6b26e336a0298a0d035a2b4cf3fdb48ea941a --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/logs/debug.log @@ -0,0 +1,21 @@ +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Current SDK version is 0.24.2 +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Configure stats pid to 138391 +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer/wandb/run-20260209_084451-csy2m2pr/logs/debug.log +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer/wandb/run-20260209_084451-csy2m2pr/logs/debug-internal.log +2026-02-09 08:44:51,657 INFO MainThread:138391 [wandb_init.py:init():844] calling init triggers +2026-02-09 08:44:51,658 INFO MainThread:138391 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': '.', 'root': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/metaworld_mt50', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 3, 'random_order': False, 'image_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'crop_resize': {'weight': 1.0, 'type': 'RandomResizedCrop', 'kwargs': {'size': [256, 256], 'ratio': [1, 1], 'scale': [0.9, 0.95]}}, 'rotate': {'weight': 1.0, 'type': 'RandomRotate', 'kwargs': {'degrees': [-5, 5]}}}, 'wrist_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'vqa_data_path': None}, 'env': None, 'policy': {'type': 'litevla-ms', 'n_obs_steps': 1, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {}, 'output_features': {}, 'device': 'cuda', 'use_amp': False, 'gradient_accumulation_steps': 1, 'chunk_size': 50, 'n_action_steps': 1, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': False, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 100000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'of_path': '/pfss/mlde/workspaces/mlde_wsp_IAS_SAMMerge/VLA/duc/VLA-Humanoid-MW/ori_mw_if/ori_mw_100%_of.h5'}, 'output_dir': 'outputs/train/2026-02-09/08-44-30_MW_100%_scratch_litevla-ms_lastlayer', 'job_name': 'MW_100%_scratch_litevla-ms_lastlayer', 'resume': False, 'seed': 42, 'num_workers': 8, 'batch_size': 64, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 10000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 100000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'LiteVLA-MS', 'entity': 'Robotics_VLA', 'notes': None, 'run_id': None, 'mode': 'online'}, '_wandb': {}} +2026-02-09 08:44:51,658 INFO MainThread:138391 [wandb_init.py:init():892] starting backend +2026-02-09 08:44:51,963 INFO MainThread:138391 [wandb_init.py:init():895] sending inform_init request +2026-02-09 08:44:51,976 INFO MainThread:138391 [wandb_init.py:init():903] backend started and connected +2026-02-09 08:44:51,978 INFO MainThread:138391 [wandb_init.py:init():973] updated telemetry +2026-02-09 08:44:51,985 INFO MainThread:138391 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-02-09 08:44:52,532 INFO MainThread:138391 [wandb_init.py:init():1042] starting run threads in backend +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_console_start():2529] atexit reg +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-02-09 08:44:52,674 INFO MainThread:138391 [wandb_run.py:_redirect():2469] Redirects installed. +2026-02-09 08:44:52,685 INFO MainThread:138391 [wandb_init.py:init():1082] run started, returning control to user process +2026-02-10 03:09:33,944 INFO wandb-AsyncioManager-main:138391 [service_client.py:_forward_responses():94] Reached EOF. +2026-02-10 03:09:33,945 INFO wandb-AsyncioManager-main:138391 [mailbox.py:close():154] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260209_084451-csy2m2pr/run-csy2m2pr.wandb b/wandb/run-20260209_084451-csy2m2pr/run-csy2m2pr.wandb new file mode 100644 index 0000000000000000000000000000000000000000..118d75f6c4ea7ae7b03eaba87fd8b37a42f32549 --- /dev/null +++ b/wandb/run-20260209_084451-csy2m2pr/run-csy2m2pr.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601b47615fc09abdd741d4a335bc3e8dbf7c2e78814c4eebd3befd3b1f0c60d0 +size 11392669