diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..878106f3864d16e1d1a325549dff1517e1843264 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +wandb/run-20260408_063316-dzwcngqh/run-dzwcngqh.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoints/000500/pretrained_model/config.json b/checkpoints/000500/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/000500/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/000500/pretrained_model/model.safetensors b/checkpoints/000500/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce05446f428c299f8d12d42422a796fe65ccc9f0 --- /dev/null +++ b/checkpoints/000500/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa9caedd486124c8b83b0cd5fc29010e0aca445260aa0592c947bdf8f87df7ab +size 906712520 diff --git a/checkpoints/000500/pretrained_model/policy_postprocessor.json b/checkpoints/000500/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/000500/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/000500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/000500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/000500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/000500/pretrained_model/policy_preprocessor.json b/checkpoints/000500/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/000500/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/000500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/000500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/000500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/000500/pretrained_model/train_config.json b/checkpoints/000500/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/000500/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/000500/training_state/optimizer_param_groups.json b/checkpoints/000500/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..946b4cbe4e5b4d4cc2e9b5bb3a1984bb926a3c33 --- /dev/null +++ b/checkpoints/000500/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 1.882772228311384e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/000500/training_state/optimizer_state.safetensors b/checkpoints/000500/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d05ce4cf31542ed982395c825757abdfa47a7e2 --- /dev/null +++ b/checkpoints/000500/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0da8e0791ec25562f4af725cbfefbc18469aa167d94c13539e651708d3268f +size 412659164 diff --git a/checkpoints/000500/training_state/rng_state.safetensors b/checkpoints/000500/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3056b3707843ec841f312e4068a47eb34a273daa --- /dev/null +++ b/checkpoints/000500/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a58ea13e36cdc43c4a17a080bd9816c214cd5dba6850f376a15eec00df283a +size 15708 diff --git a/checkpoints/000500/training_state/scheduler_state.json b/checkpoints/000500/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..53425f04b861a6e9c20d98c7246e79420efec93e --- /dev/null +++ b/checkpoints/000500/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 500, + "_step_count": 501, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.882772228311384e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/000500/training_state/training_step.json b/checkpoints/000500/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..8df625539d7e10ddfbd1e2ac0daf2fdd64ff5c4b --- /dev/null +++ b/checkpoints/000500/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 500 +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/config.json b/checkpoints/001000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/001000/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/model.safetensors b/checkpoints/001000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44666bef0f606f774b7c0aa5d015ba8c2828fc88 --- /dev/null +++ b/checkpoints/001000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b007f46475f648e4d45c992b0860cd6b2c5f95e3bd19eec0740ba3d6d70a31 +size 906712520 diff --git a/checkpoints/001000/pretrained_model/policy_postprocessor.json b/checkpoints/001000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/001000/pretrained_model/policy_preprocessor.json b/checkpoints/001000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/001000/pretrained_model/train_config.json b/checkpoints/001000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/001000/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/001000/training_state/optimizer_param_groups.json b/checkpoints/001000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..04f9953526b52fe5aaae91e67daea9bc33cab95b --- /dev/null +++ b/checkpoints/001000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 1.5625e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/001000/training_state/optimizer_state.safetensors b/checkpoints/001000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..306dc449468c1007c10cbfb03f7aede0213bacea --- /dev/null +++ b/checkpoints/001000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27dd56675676c1c382ae1a3960c181dcd886af950cc681da8264b080dd030cbd +size 412659164 diff --git a/checkpoints/001000/training_state/rng_state.safetensors b/checkpoints/001000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..597505da28b24884ab1a7488ba6a30aafe7ebe31 --- /dev/null +++ b/checkpoints/001000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c1b85d3cd3fcdf8fc71df752f1b59f846a625c7b979b74fba93d474a70d65e +size 15708 diff --git a/checkpoints/001000/training_state/scheduler_state.json b/checkpoints/001000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54d7fec4474a14b69423ab602ae3859e6602b5d2 --- /dev/null +++ b/checkpoints/001000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 1000, + "_step_count": 1001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.5625e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/001000/training_state/training_step.json b/checkpoints/001000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d98c94f5b78238bf495ac68b9f9fb446cfac5c07 --- /dev/null +++ b/checkpoints/001000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 1000 +} \ No newline at end of file diff --git a/checkpoints/001500/pretrained_model/config.json b/checkpoints/001500/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/001500/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/001500/pretrained_model/model.safetensors b/checkpoints/001500/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eb6d498c33c47645f2f610909cf855e728672be --- /dev/null +++ b/checkpoints/001500/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487e7cf6dd816546f986d64627924fb259a87a7113c68b1784fd61a44713a8d0 +size 906712520 diff --git a/checkpoints/001500/pretrained_model/policy_postprocessor.json b/checkpoints/001500/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/001500/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/001500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/001500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/001500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/001500/pretrained_model/policy_preprocessor.json b/checkpoints/001500/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/001500/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/001500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/001500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/001500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/001500/pretrained_model/train_config.json b/checkpoints/001500/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/001500/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/001500/training_state/optimizer_param_groups.json b/checkpoints/001500/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..c35b4c3a983b93541d9b27d44b230ee725ffb3e8 --- /dev/null +++ b/checkpoints/001500/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 1.125e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/001500/training_state/optimizer_state.safetensors b/checkpoints/001500/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc171d6bb973a8208a3dbcba8718ad2ac46dae66 --- /dev/null +++ b/checkpoints/001500/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad5cc63401a54e2c847fdc0b9548589f97ec0168a3cdb02253abfe0ae35255b +size 412659164 diff --git a/checkpoints/001500/training_state/rng_state.safetensors b/checkpoints/001500/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cb6064f58e9e26e0aef0747d31f414946b682f3 --- /dev/null +++ b/checkpoints/001500/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ab98f9e37d5aab0724bcf44f043e03c1ed5ed121c133898aef18828ddaa030 +size 15708 diff --git a/checkpoints/001500/training_state/scheduler_state.json b/checkpoints/001500/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d5c891d0d6bf19fc0783d3df2afb10cee4e2e51c --- /dev/null +++ b/checkpoints/001500/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 1500, + "_step_count": 1501, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.125e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/001500/training_state/training_step.json b/checkpoints/001500/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..332c190c78659d9403699dab54afdd72e41d21b8 --- /dev/null +++ b/checkpoints/001500/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 1500 +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/config.json b/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/model.safetensors b/checkpoints/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eee9596b5d01f392387e8eb1afe87e8d6992a7d0 --- /dev/null +++ b/checkpoints/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1269c875bd02c5d4539dda387bb6b29c46766010754a773444fedd6435e90f0 +size 906712520 diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor.json b/checkpoints/002000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor.json b/checkpoints/002000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/002000/pretrained_model/train_config.json b/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_param_groups.json b/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..2c47e46147541f06ec2b940d4ab4a8e8a3d63c2f --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 6.875000000000003e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_state.safetensors b/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0bd4543196a5ab4607c03c8dda225d4eef5c760 --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:465ecb675f1fb7bc634da9b3a193b38134ce702b6249796d7d11b790670fcbae +size 412659164 diff --git a/checkpoints/002000/training_state/rng_state.safetensors b/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53f4a1f88bbeff933c9c73bdeec11100a625302e --- /dev/null +++ b/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90fb64faeba342b6fcd7f7c3e1ee2754e65d67a9a645d97e65c7d4a5cf0dce7 +size 15708 diff --git a/checkpoints/002000/training_state/scheduler_state.json b/checkpoints/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..24e195101ca89445e432632c9990a62c8b697a37 --- /dev/null +++ b/checkpoints/002000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 2000, + "_step_count": 2001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.875000000000003e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/training_step.json b/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/checkpoints/002500/pretrained_model/config.json b/checkpoints/002500/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/002500/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/002500/pretrained_model/model.safetensors b/checkpoints/002500/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b46177afb6eb92a805a286a8b29fb569c60c5709 --- /dev/null +++ b/checkpoints/002500/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9d99e78a06e4d2c24d22980cfd58c573c92ac796ae435886bd3ff005b2aed9 +size 906712520 diff --git a/checkpoints/002500/pretrained_model/policy_postprocessor.json b/checkpoints/002500/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/002500/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/002500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/002500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/002500/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/002500/pretrained_model/policy_preprocessor.json b/checkpoints/002500/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/002500/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/002500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/002500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/002500/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/002500/pretrained_model/train_config.json b/checkpoints/002500/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/002500/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/002500/training_state/optimizer_param_groups.json b/checkpoints/002500/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..6088944190cc469301ca82b90b718e0ccf8b8e25 --- /dev/null +++ b/checkpoints/002500/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 3.6722777168861617e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/002500/training_state/optimizer_state.safetensors b/checkpoints/002500/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61540cc7d7e1c71345c22c3faa6e68119810acc4 --- /dev/null +++ b/checkpoints/002500/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac44ceb78617482ae74ecddce94bf0a984ab62e28f00a16bff2a99ad19a3496 +size 412659164 diff --git a/checkpoints/002500/training_state/rng_state.safetensors b/checkpoints/002500/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1e3fe3e5b1616e3b892aa11c5571cb69aae238b --- /dev/null +++ b/checkpoints/002500/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77628103157194f3e8c07b59ed260df3021cca30e8661c02ee58d11c5353e53b +size 15708 diff --git a/checkpoints/002500/training_state/scheduler_state.json b/checkpoints/002500/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4931bf560369418987dc4217758025f619701dd3 --- /dev/null +++ b/checkpoints/002500/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 2500, + "_step_count": 2501, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.6722777168861617e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/002500/training_state/training_step.json b/checkpoints/002500/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d265f47a09c2910099ed59e197b57b34675d1ae0 --- /dev/null +++ b/checkpoints/002500/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2500 +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/config.json b/checkpoints/003000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d36e5cba45ee037f02f9f4e552d442c96905a6b5 --- /dev/null +++ b/checkpoints/003000/pretrained_model/config.json @@ -0,0 +1,111 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/model.safetensors b/checkpoints/003000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0809d3eb83d11ac18822740656ff0799721ea01 --- /dev/null +++ b/checkpoints/003000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6607fb070a38fd23fd1e45978fb9bdd892d911484d242361c4c3cf1be41ebd1 +size 906712520 diff --git a/checkpoints/003000/pretrained_model/policy_postprocessor.json b/checkpoints/003000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec8146a07f9b92bc5a3c02e2d6b7d49ba6d26e8 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/003000/pretrained_model/policy_preprocessor.json b/checkpoints/003000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..7527b68cb0a2d7c7a1a0f858ac44bec3df5ac0bf --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,97 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3b6bb870bf669cd89d6bc4600478c768d22459 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc5a6a77ada2e972696d0b4da9485e8ac2c2036629207a51cf587b2c434abb +size 5440 diff --git a/checkpoints/003000/pretrained_model/train_config.json b/checkpoints/003000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8291d78a108d5d18a1827440e8ab255a9b11cc9 --- /dev/null +++ b/checkpoints/003000/pretrained_model/train_config.json @@ -0,0 +1,394 @@ +{ + "dataset": { + "repo_id": "daniecraig/libero_90_no_noops_lerobot_v30", + "root": null, + "episodes": [ + 3319, + 3320, + 3321, + 3322, + 3323, + 3324, + 3325, + 3326, + 3327, + 3328, + 3329, + 3330, + 3331, + 3332, + 3333, + 3334, + 3335, + 3336, + 3337, + 3338, + 3339, + 3340, + 3341, + 3342, + 3343, + 3344, + 3345, + 3346, + 3347, + 3348, + 3349, + 3350, + 3351, + 3352, + 3353, + 3354, + 3355, + 3356, + 3357, + 3358, + 3359, + 3360, + 3361, + 3362, + 3363, + 3364, + 3365 + ], + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": "main", + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": { + "type": "libero", + "task": "libero_90", + "fps": 30, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + }, + "pixels/agentview_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "pixels/robot0_eye_in_hand_image": { + "type": "VISUAL", + "shape": [ + 360, + 360, + 3 + ] + }, + "robot_state/eef/pos": { + "type": "STATE", + "shape": [ + 3 + ] + }, + "robot_state/eef/quat": { + "type": "STATE", + "shape": [ + 4 + ] + }, + "robot_state/eef/mat": { + "type": "STATE", + "shape": [ + 3, + 3 + ] + }, + "robot_state/gripper/qpos": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/gripper/qvel": { + "type": "STATE", + "shape": [ + 2 + ] + }, + "robot_state/joints/pos": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "robot_state/joints/vel": { + "type": "STATE", + "shape": [ + 7 + ] + } + }, + "features_map": { + "action": "action", + "robot_state/eef/pos": "observation.state.eef_pos", + "robot_state/eef/quat": "observation.state.eef_quat", + "robot_state/eef/mat": "observation.state.eef_mat", + "robot_state/gripper/qpos": "observation.state.gripper_qpos", + "robot_state/gripper/qvel": "observation.state.gripper_qvel", + "robot_state/joints/pos": "observation.state.joint_pos", + "robot_state/joints/vel": "observation.state.joint_vel", + "pixels/agentview_image": "observation.images.image", + "pixels/robot0_eye_in_hand_image": "observation.images.image2" + }, + "max_parallel_tasks": 1, + "disable_env_checker": true, + "task_ids": [ + 18 + ], + "episode_length": null, + "obs_type": "pixels_agent_pos", + "render_mode": "rgb_array", + "camera_name": "agentview_image,robot0_eye_in_hand_image", + "init_states": true, + "camera_name_mapping": null, + "observation_height": 360, + "observation_width": 360, + "control_mode": "relative" + }, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.images.image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.images.wrist_image": { + "type": "VISUAL", + "shape": [ + 256, + 256, + 3 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 8 + ] + }, + "observation.states.ee_state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.states.joint_state": { + "type": "STATE", + "shape": [ + 7 + ] + }, + "observation.states.gripper_state": { + "type": "STATE", + "shape": [ + 2 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "ardalon/sequential_t1_t2_task4818_fft", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.01, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/sequential_t1_t2_task4818_fft", + "job_name": "sequential_t1_t2_task4818_fft", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 16, + "steps": 3000, + "eval_freq": 0, + "log_freq": 1, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 500, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 2e-05, + "weight_decay": 0.01, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 2e-05, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": true, + "project": "lerobot-smolvla", + "entity": null, + "notes": null, + "run_id": "dzwcngqh", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/003000/training_state/optimizer_param_groups.json b/checkpoints/003000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..a0609ea2fafd807fbd4faa8d694128c881a4b1e9 --- /dev/null +++ b/checkpoints/003000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 2.5e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 0.01, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/003000/training_state/optimizer_state.safetensors b/checkpoints/003000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88fb4cebbc1562386ca25635c9c8b597d0962c61 --- /dev/null +++ b/checkpoints/003000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7155df6da84c817be6497bbbc25cd0aef8d04f1bdbcf6fd9af7523fb93f7c61d +size 412659164 diff --git a/checkpoints/003000/training_state/rng_state.safetensors b/checkpoints/003000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1270f3b7ba58c27d75acb3566ef333903158f8bd --- /dev/null +++ b/checkpoints/003000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6f5bc4a0f7251539128fe05335e0ddd5c7db5da192ae9c95b407b28cac1e86 +size 15708 diff --git a/checkpoints/003000/training_state/scheduler_state.json b/checkpoints/003000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eadb41e983dfc81823cc44f7c06c900abb00bc99 --- /dev/null +++ b/checkpoints/003000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 2e-05 + ], + "last_epoch": 3000, + "_step_count": 3001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.5e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/003000/training_state/training_step.json b/checkpoints/003000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a44127b7dfea653fd776d529fa83c55d32081c --- /dev/null +++ b/checkpoints/003000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 3000 +} \ No newline at end of file diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..207b561d5019b589205bc9edddee6c7d20cf7a22 --- /dev/null +++ b/wandb/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2026-04-08T06:33:16.854663011Z","level":"INFO","msg":"stream: starting","core version":"0.24.2"} +{"time":"2026-04-08T06:33:17.199413993Z","level":"INFO","msg":"stream: created new stream","id":"dzwcngqh"} +{"time":"2026-04-08T06:33:17.199510437Z","level":"INFO","msg":"handler: started","stream_id":"dzwcngqh"} +{"time":"2026-04-08T06:33:17.199601366Z","level":"INFO","msg":"stream: started","id":"dzwcngqh"} +{"time":"2026-04-08T06:33:17.199631189Z","level":"INFO","msg":"writer: started","stream_id":"dzwcngqh"} +{"time":"2026-04-08T06:33:17.199635077Z","level":"INFO","msg":"sender: started","stream_id":"dzwcngqh"} +{"time":"2026-04-08T06:45:52.384959086Z","level":"INFO","msg":"stream: closing","id":"dzwcngqh"} +{"time":"2026-04-08T06:45:52.578550658Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-08T06:45:52.722010171Z","level":"INFO","msg":"handler: closed","stream_id":"dzwcngqh"} +{"time":"2026-04-08T06:45:52.722128029Z","level":"INFO","msg":"sender: closed","stream_id":"dzwcngqh"} +{"time":"2026-04-08T06:45:52.722155037Z","level":"INFO","msg":"stream: closed","id":"dzwcngqh"} diff --git a/wandb/debug.log b/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..3ff1cbb5d12a75245100f9d17d96496c2c2e38b0 --- /dev/null +++ b/wandb/debug.log @@ -0,0 +1,21 @@ +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_setup.py:_flush():81] Current SDK version is 0.24.2 +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_setup.py:_flush():81] Configure stats pid to 8586 +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/train/sequential_t1_t2_task4818_fft/wandb/run-20260408_063316-dzwcngqh/logs/debug.log +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/train/sequential_t1_t2_task4818_fft/wandb/run-20260408_063316-dzwcngqh/logs/debug-internal.log +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:init():844] calling init triggers +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'daniecraig/libero_90_no_noops_lerobot_v30', 'root': None, 'episodes': [3319, 3320, 3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365], 'image_transforms': {'enable': False, 'max_num_transforms': 3, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'affine': {'weight': 1.0, 'type': 'RandomAffine', 'kwargs': {'degrees': [-5.0, 5.0], 'translate': [0.05, 0.05]}}}}, 'revision': 'main', 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'streaming': False}, 'env': {'type': 'libero', 'task': 'libero_90', 'fps': 30, 'features': {'action': {'type': , 'shape': [7]}, 'pixels/agentview_image': {'type': , 'shape': [360, 360, 3]}, 'pixels/robot0_eye_in_hand_image': {'type': , 'shape': [360, 360, 3]}, 'robot_state/eef/pos': {'type': , 'shape': [3]}, 'robot_state/eef/quat': {'type': , 'shape': [4]}, 'robot_state/eef/mat': {'type': , 'shape': [3, 3]}, 'robot_state/gripper/qpos': {'type': , 'shape': [2]}, 'robot_state/gripper/qvel': {'type': , 'shape': [2]}, 'robot_state/joints/pos': {'type': , 'shape': [7]}, 'robot_state/joints/vel': {'type': , 'shape': [7]}}, 'features_map': {'action': 'action', 'robot_state/eef/pos': 'observation.state.eef_pos', 'robot_state/eef/quat': 'observation.state.eef_quat', 'robot_state/eef/mat': 'observation.state.eef_mat', 'robot_state/gripper/qpos': 'observation.state.gripper_qpos', 'robot_state/gripper/qvel': 'observation.state.gripper_qvel', 'robot_state/joints/pos': 'observation.state.joint_pos', 'robot_state/joints/vel': 'observation.state.joint_vel', 'pixels/agentview_image': 'observation.images.image', 'pixels/robot0_eye_in_hand_image': 'observation.images.image2'}, 'max_parallel_tasks': 1, 'disable_env_checker': True, 'task_ids': [18], 'episode_length': None, 'obs_type': 'pixels_agent_pos', 'render_mode': 'rgb_array', 'camera_name': 'agentview_image,robot0_eye_in_hand_image', 'init_states': True, 'camera_name_mapping': None, 'observation_height': 360, 'observation_width': 360, 'control_mode': 'relative'}, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'input_features': None, 'output_features': None, 'device': 'cuda', 'use_amp': False, 'use_peft': False, 'push_to_hub': True, 'repo_id': 'ardalon/sequential_t1_t2_task4818_fft', 'private': None, 'tags': None, 'license': None, 'pretrained_path': '/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model', 'chunk_size': 50, 'n_action_steps': 50, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': True, 'train_state_proj': True, 'optimizer_lr': 2e-05, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 0.01, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'rtc_config': None, 'compile_model': False, 'compile_mode': 'max-autotune'}, 'output_dir': 'outputs/train/sequential_t1_t2_task4818_fft', 'job_name': 'sequential_t1_t2_task4818_fft', 'resume': False, 'seed': 42, 'cudnn_deterministic': False, 'num_workers': 4, 'batch_size': 16, 'steps': 3000, 'eval_freq': 0, 'log_freq': 1, 'tolerance_s': 0.0001, 'save_checkpoint': True, 'save_freq': 500, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 2e-05, 'weight_decay': 0.01, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 2e-05, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'lerobot-smolvla', 'entity': None, 'notes': None, 'run_id': None, 'mode': None, 'add_tags': True}, 'peft': None, 'use_rabc': False, 'rabc_progress_path': None, 'rabc_kappa': 0.01, 'rabc_epsilon': 1e-06, 'rabc_head_mode': 'sparse', 'rename_map': {}, 'checkpoint_path': None, '_wandb': {}} +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:init():892] starting backend +2026-04-08 06:33:16,848 INFO MainThread:8586 [wandb_init.py:init():895] sending inform_init request +2026-04-08 06:33:16,852 INFO MainThread:8586 [wandb_init.py:init():903] backend started and connected +2026-04-08 06:33:16,855 INFO MainThread:8586 [wandb_init.py:init():973] updated telemetry +2026-04-08 06:33:16,855 INFO MainThread:8586 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-08 06:33:17,392 INFO MainThread:8586 [wandb_init.py:init():1042] starting run threads in backend +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_console_start():2529] atexit reg +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-08 06:33:17,939 INFO MainThread:8586 [wandb_init.py:init():1082] run started, returning control to user process +2026-04-08 06:45:52,384 INFO wandb-AsyncioManager-main:8586 [service_client.py:_forward_responses():94] Reached EOF. +2026-04-08 06:45:52,385 INFO wandb-AsyncioManager-main:8586 [mailbox.py:close():154] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260408_063316-dzwcngqh/files/config.yaml b/wandb/run-20260408_063316-dzwcngqh/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a98635d6c73c3136e81bc177bdd8dea05bd2962b --- /dev/null +++ b/wandb/run-20260408_063316-dzwcngqh/files/config.yaml @@ -0,0 +1,409 @@ +_wandb: + value: + cli_version: 0.24.2 + e: + q2o8n7ytl4tnn1upp8wk4ymi7ne7hti9: + args: + - --policy.path=/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model + - --policy.input_features=null + - --policy.output_features=null + - --policy.device=cuda + - --dataset.repo_id=daniecraig/libero_90_no_noops_lerobot_v30 + - --dataset.revision=main + - --dataset.episodes=[3319, 3320, 3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365] + - --env.type=libero + - --env.task=libero_90 + - --env.task_ids=[18] + - --env.control_mode=relative + - --batch_size=16 + - --eval_freq=0 + - --save_freq=500 + - --seed=42 + - --output_dir=outputs/train/sequential_t1_t2_task4818_fft + - --policy.optimizer_lr=2e-5 + - --policy.optimizer_weight_decay=0.01 + - --steps=3000 + - --job_name=sequential_t1_t2_task4818_fft + - --wandb.enable=true + - --wandb.project=lerobot-smolvla + - --wandb.disable_artifac=true + - --policy.push_to_hub=true + - --policy.repo_id=ardalon/sequential_t1_t2_task4818_fft + - --log_freq=1 + cpu_count: 6 + cpu_count_logical: 12 + cudaVersion: "13.0" + disk: + /: + total: "253055008768" + used: "59792175104" + executable: /usr/bin/python3 + gpu: NVIDIA A100-SXM4-40GB + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "42949672960" + name: NVIDIA A100-SXM4-40GB + uuid: GPU-8127346a-b290-0d4a-3e6a-5d346abf9d4b + host: 01748ae5acaf + memory: + total: "89629200384" + os: Linux-6.6.113+-x86_64-with-glibc2.35 + program: /usr/local/bin/lerobot-train + python: CPython 3.12.13 + root: outputs/train/sequential_t1_t2_task4818_fft + startedAt: "2026-04-08T06:33:16.623737Z" + writerId: q2o8n7ytl4tnn1upp8wk4ymi7ne7hti9 + m: [] + python_version: 3.12.13 + t: + "1": + - 1 + - 5 + - 11 + - 41 + - 49 + - 51 + - 53 + - 71 + - 83 + - 105 + "2": + - 1 + - 5 + - 11 + - 41 + - 49 + - 51 + - 53 + - 71 + - 83 + - 105 + "3": + - 13 + - 15 + - 16 + - 61 + "4": 3.12.13 + "5": 0.24.2 + "6": 5.3.0 + "10": + - 21 + "12": 0.24.2 + "13": linux-x86_64 +batch_size: + value: 16 +checkpoint_path: + value: null +cudnn_deterministic: + value: false +dataset: + value: + episodes: + - 3319 + - 3320 + - 3321 + - 3322 + - 3323 + - 3324 + - 3325 + - 3326 + - 3327 + - 3328 + - 3329 + - 3330 + - 3331 + - 3332 + - 3333 + - 3334 + - 3335 + - 3336 + - 3337 + - 3338 + - 3339 + - 3340 + - 3341 + - 3342 + - 3343 + - 3344 + - 3345 + - 3346 + - 3347 + - 3348 + - 3349 + - 3350 + - 3351 + - 3352 + - 3353 + - 3354 + - 3355 + - 3356 + - 3357 + - 3358 + - 3359 + - 3360 + - 3361 + - 3362 + - 3363 + - 3364 + - 3365 + image_transforms: + enable: false + max_num_transforms: 3 + random_order: false + tfs: + affine: + kwargs: + degrees: + - -5 + - 5 + translate: + - 0.05 + - 0.05 + type: RandomAffine + weight: 1 + brightness: + kwargs: + brightness: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + contrast: + kwargs: + contrast: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + hue: + kwargs: + hue: + - -0.05 + - 0.05 + type: ColorJitter + weight: 1 + saturation: + kwargs: + saturation: + - 0.5 + - 1.5 + type: ColorJitter + weight: 1 + sharpness: + kwargs: + sharpness: + - 0.5 + - 1.5 + type: SharpnessJitter + weight: 1 + repo_id: daniecraig/libero_90_no_noops_lerobot_v30 + revision: main + root: null + streaming: false + use_imagenet_stats: true + video_backend: torchcodec +env: + value: + camera_name: agentview_image,robot0_eye_in_hand_image + camera_name_mapping: null + control_mode: relative + disable_env_checker: true + episode_length: null + features: + action: + shape: + - 7 + type: ACTION + pixels/agentview_image: + shape: + - 360 + - 360 + - 3 + type: VISUAL + pixels/robot0_eye_in_hand_image: + shape: + - 360 + - 360 + - 3 + type: VISUAL + robot_state/eef/mat: + shape: + - 3 + - 3 + type: STATE + robot_state/eef/pos: + shape: + - 3 + type: STATE + robot_state/eef/quat: + shape: + - 4 + type: STATE + robot_state/gripper/qpos: + shape: + - 2 + type: STATE + robot_state/gripper/qvel: + shape: + - 2 + type: STATE + robot_state/joints/pos: + shape: + - 7 + type: STATE + robot_state/joints/vel: + shape: + - 7 + type: STATE + features_map: + action: action + pixels/agentview_image: observation.images.image + pixels/robot0_eye_in_hand_image: observation.images.image2 + robot_state/eef/mat: observation.state.eef_mat + robot_state/eef/pos: observation.state.eef_pos + robot_state/eef/quat: observation.state.eef_quat + robot_state/gripper/qpos: observation.state.gripper_qpos + robot_state/gripper/qvel: observation.state.gripper_qvel + robot_state/joints/pos: observation.state.joint_pos + robot_state/joints/vel: observation.state.joint_vel + fps: 30 + init_states: true + max_parallel_tasks: 1 + obs_type: pixels_agent_pos + observation_height: 360 + observation_width: 360 + render_mode: rgb_array + task: libero_90 + task_ids: + - 18 + type: libero +eval: + value: + batch_size: 50 + n_episodes: 50 + use_async_envs: false +eval_freq: + value: 0 +job_name: + value: sequential_t1_t2_task4818_fft +log_freq: + value: 1 +num_workers: + value: 4 +optimizer: + value: + betas: + - 0.9 + - 0.95 + eps: 1e-08 + grad_clip_norm: 10 + lr: 2e-05 + type: adamw + weight_decay: 0.01 +output_dir: + value: outputs/train/sequential_t1_t2_task4818_fft +peft: + value: null +policy: + value: + adapt_to_pi_aloha: false + add_image_special_tokens: false + attention_mode: cross_attn + chunk_size: 50 + compile_mode: max-autotune + compile_model: false + device: cuda + empty_cameras: 0 + expert_width_multiplier: 0.75 + freeze_vision_encoder: true + input_features: null + license: null + load_vlm_weights: true + max_action_dim: 32 + max_period: 4 + max_state_dim: 32 + min_period: 0.004 + n_action_steps: 50 + n_obs_steps: 1 + normalization_mapping: + ACTION: MEAN_STD + STATE: MEAN_STD + VISUAL: IDENTITY + num_expert_layers: 0 + num_steps: 10 + num_vlm_layers: 16 + optimizer_betas: + - 0.9 + - 0.95 + optimizer_eps: 1e-08 + optimizer_grad_clip_norm: 10 + optimizer_lr: 2e-05 + optimizer_weight_decay: 0.01 + output_features: null + pad_language_to: max_length + prefix_length: 0 + pretrained_path: /content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model + private: null + push_to_hub: true + repo_id: ardalon/sequential_t1_t2_task4818_fft + resize_imgs_with_padding: + - 512 + - 512 + rtc_config: null + scheduler_decay_lr: 2.5e-06 + scheduler_decay_steps: 30000 + scheduler_warmup_steps: 1000 + self_attn_every_n_layers: 2 + tags: null + tokenizer_max_length: 48 + train_expert_only: true + train_state_proj: true + type: smolvla + use_amp: false + use_cache: true + use_delta_joint_actions_aloha: false + use_peft: false + vlm_model_name: HuggingFaceTB/SmolVLM2-500M-Video-Instruct +rabc_epsilon: + value: 1e-06 +rabc_head_mode: + value: sparse +rabc_kappa: + value: 0.01 +rabc_progress_path: + value: null +resume: + value: false +save_checkpoint: + value: true +save_freq: + value: 500 +scheduler: + value: + decay_lr: 2.5e-06 + num_decay_steps: 30000 + num_warmup_steps: 1000 + peak_lr: 2e-05 + type: cosine_decay_with_warmup +seed: + value: 42 +steps: + value: 3000 +tolerance_s: + value: 0.0001 +use_policy_training_preset: + value: true +use_rabc: + value: false +wandb: + value: + add_tags: true + disable_artifact: true + enable: true + entity: null + mode: null + notes: null + project: lerobot-smolvla + run_id: null diff --git a/wandb/run-20260408_063316-dzwcngqh/files/output.log b/wandb/run-20260408_063316-dzwcngqh/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ceb929a2ab26fabed7baa5d602b71b5ffe4eb90e --- /dev/null +++ b/wandb/run-20260408_063316-dzwcngqh/files/output.log @@ -0,0 +1,3046 @@ +INFO 2026-04-08 06:33:17 db_utils.py:117 Logs will be synced with wandb. +INFO 2026-04-08 06:33:17 db_utils.py:118 Track this run --> https://wandb.ai/aryashad-usc/lerobot-smolvla/runs/dzwcngqh +INFO 2026-04-08 06:33:17 ot_train.py:221 Creating dataset +Fetching 4 files: 100% 4/4 [00:00<00:00, 4350.94it/s]] +Download complete: : 0.00B [00:00, ?B/s] +INFO 2026-04-08 06:33:18 eo_utils.py:108 Using video codec: libsvtav1 +Fetching 4 files: 100% 4/4 [00:00<00:00, 3961.56it/s]] +Download complete: : 0.00B [00:00, ?B/s] +Fetching 3 files: 100% 3/3 [00:06<00:00, 2.20s/it]00:06<00:00, 54.1MB/s] +Download complete: 100% 209M/209M [00:06<00:00, 54.1MB/s] INFO 2026-04-08 06:33:25 ot_train.py:239 Creating policy +Download complete: 100% 209M/209M [00:06<00:00, 30.6MB/s] +Loading HuggingFaceTB/SmolVLM2-500M-Video-Instruct weights ... +`torch_dtype` is deprecated! Use `dtype` instead! +Loading weights: 100% 489/489 [00:00<00:00, 2734.24it/s] +Reducing the number of VLM layers to 16 ... +Loading weights from local directory +INFO 2026-04-08 06:33:36 ot_train.py:306 Creating optimizer and scheduler +INFO 2026-04-08 06:33:36 hedulers.py:105 Auto-scaling LR scheduler: num_training_steps (3000) < num_decay_steps (30000). Scaling warmup: 1000 → 100, decay: 30000 → 3000 (scale factor: 0.100) +INFO 2026-04-08 06:33:36 ot_train.py:341 Output dir: outputs/train/sequential_t1_t2_task4818_fft +INFO 2026-04-08 06:33:36 ot_train.py:343 cfg.env.task='libero_90' +INFO 2026-04-08 06:33:36 ot_train.py:344 Creating environment processors +INFO 2026-04-08 06:33:36 ot_train.py:348 cfg.steps=3000 (3K) +INFO 2026-04-08 06:33:36 ot_train.py:349 dataset.num_frames=9287 (9K) +INFO 2026-04-08 06:33:36 ot_train.py:350 dataset.num_episodes=47 +INFO 2026-04-08 06:33:36 ot_train.py:353 Effective batch size: 16 x 1 = 16 +INFO 2026-04-08 06:33:36 ot_train.py:354 num_learnable_params=99880992 (100M) +INFO 2026-04-08 06:33:36 ot_train.py:355 num_total_params=450046176 (450M) +Training: 0% 0/3000 [00:00, 'shape': [7]}, 'pixels/agentview_image': {'type': , 'shape': [360, 360, 3]}, 'pixels/robot0_eye_in_hand_image': {'type': , 'shape': [360, 360, 3]}, 'robot_state/eef/pos': {'type': , 'shape': [3]}, 'robot_state/eef/quat': {'type': , 'shape': [4]}, 'robot_state/eef/mat': {'type': , 'shape': [3, 3]}, 'robot_state/gripper/qpos': {'type': , 'shape': [2]}, 'robot_state/gripper/qvel': {'type': , 'shape': [2]}, 'robot_state/joints/pos': {'type': , 'shape': [7]}, 'robot_state/joints/vel': {'type': , 'shape': [7]}}, 'features_map': {'action': 'action', 'robot_state/eef/pos': 'observation.state.eef_pos', 'robot_state/eef/quat': 'observation.state.eef_quat', 'robot_state/eef/mat': 'observation.state.eef_mat', 'robot_state/gripper/qpos': 'observation.state.gripper_qpos', 'robot_state/gripper/qvel': 'observation.state.gripper_qvel', 'robot_state/joints/pos': 'observation.state.joint_pos', 'robot_state/joints/vel': 'observation.state.joint_vel', 'pixels/agentview_image': 'observation.images.image', 'pixels/robot0_eye_in_hand_image': 'observation.images.image2'}, 'max_parallel_tasks': 1, 'disable_env_checker': True, 'task_ids': [18], 'episode_length': None, 'obs_type': 'pixels_agent_pos', 'render_mode': 'rgb_array', 'camera_name': 'agentview_image,robot0_eye_in_hand_image', 'init_states': True, 'camera_name_mapping': None, 'observation_height': 360, 'observation_width': 360, 'control_mode': 'relative'}, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'input_features': None, 'output_features': None, 'device': 'cuda', 'use_amp': False, 'use_peft': False, 'push_to_hub': True, 'repo_id': 'ardalon/sequential_t1_t2_task4818_fft', 'private': None, 'tags': None, 'license': None, 'pretrained_path': '/content/outputs/train/sequential_t1_task48_fft/checkpoints/003000/pretrained_model', 'chunk_size': 50, 'n_action_steps': 50, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': True, 'train_state_proj': True, 'optimizer_lr': 2e-05, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 0.01, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'rtc_config': None, 'compile_model': False, 'compile_mode': 'max-autotune'}, 'output_dir': 'outputs/train/sequential_t1_t2_task4818_fft', 'job_name': 'sequential_t1_t2_task4818_fft', 'resume': False, 'seed': 42, 'cudnn_deterministic': False, 'num_workers': 4, 'batch_size': 16, 'steps': 3000, 'eval_freq': 0, 'log_freq': 1, 'tolerance_s': 0.0001, 'save_checkpoint': True, 'save_freq': 500, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 2e-05, 'weight_decay': 0.01, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 2e-05, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'lerobot-smolvla', 'entity': None, 'notes': None, 'run_id': None, 'mode': None, 'add_tags': True}, 'peft': None, 'use_rabc': False, 'rabc_progress_path': None, 'rabc_kappa': 0.01, 'rabc_epsilon': 1e-06, 'rabc_head_mode': 'sparse', 'rename_map': {}, 'checkpoint_path': None, '_wandb': {}} +2026-04-08 06:33:16,625 INFO MainThread:8586 [wandb_init.py:init():892] starting backend +2026-04-08 06:33:16,848 INFO MainThread:8586 [wandb_init.py:init():895] sending inform_init request +2026-04-08 06:33:16,852 INFO MainThread:8586 [wandb_init.py:init():903] backend started and connected +2026-04-08 06:33:16,855 INFO MainThread:8586 [wandb_init.py:init():973] updated telemetry +2026-04-08 06:33:16,855 INFO MainThread:8586 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-08 06:33:17,392 INFO MainThread:8586 [wandb_init.py:init():1042] starting run threads in backend +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_console_start():2529] atexit reg +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-08 06:33:17,935 INFO MainThread:8586 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-08 06:33:17,939 INFO MainThread:8586 [wandb_init.py:init():1082] run started, returning control to user process +2026-04-08 06:45:52,384 INFO wandb-AsyncioManager-main:8586 [service_client.py:_forward_responses():94] Reached EOF. +2026-04-08 06:45:52,385 INFO wandb-AsyncioManager-main:8586 [mailbox.py:close():154] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260408_063316-dzwcngqh/run-dzwcngqh.wandb b/wandb/run-20260408_063316-dzwcngqh/run-dzwcngqh.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6c226f3c2c3fac9f9897f8b98efed94e3f3505ae --- /dev/null +++ b/wandb/run-20260408_063316-dzwcngqh/run-dzwcngqh.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f08de163bd5357196add322d36532ccc1d4810ee8745772feddff3b189417c0 +size 4568236