diff --git a/CAN_GRAB/checkpoints/002000/pretrained_model/config.json b/CAN_GRAB/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1251c9fcb338c74ae98ef9d8b5de36fcc3bd2937 --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/002000/pretrained_model/model.safetensors b/CAN_GRAB/checkpoints/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f7eea5d640d0cc483349a1358faad2735ff9346 --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ffdbe2e3a7f78f4410cc748a997cdedcc231f9955d12578530e72a41d19224 +size 906713296 diff --git a/CAN_GRAB/checkpoints/002000/pretrained_model/train_config.json b/CAN_GRAB/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53d1acf2f50c37062405afaf4814e847aa31f42a --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,195 @@ +{ + "dataset": { + "repo_id": "ps5387/Grab_Can3", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/CAN_GRAB", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 320, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/002000/training_state/optimizer_param_groups.json b/CAN_GRAB/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..da5f0780eca11f85ea3cdbe4e12e539c10572f9c --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,526 @@ +[ + { + "lr": 9.893469553577303e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505 + ] + } +] \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/002000/training_state/optimizer_state.safetensors b/CAN_GRAB/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66bb493d3d3e8fe8f99d529bbfb340c07df157a1 --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac74304222ce0ac947ee3bc4a97a43c9408ff83fd0b1ab4d6a981e6ebe95f63 +size 412659164 diff --git a/CAN_GRAB/checkpoints/002000/training_state/rng_state.safetensors b/CAN_GRAB/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a9ef4cf8b7fdae6e69c55403979172f7b796c1f --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b8845ce5327a03e7c66e303cfa1f43c74428df34e31e5ce9f953ecf72bb4a2 +size 15708 diff --git a/CAN_GRAB/checkpoints/002000/training_state/scheduler_state.json b/CAN_GRAB/checkpoints/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a8ea1fe94d7672f44a951869d73aad049af8814e --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 2000, + "verbose": false, + "_step_count": 2001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.893469553577303e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/002000/training_state/training_step.json b/CAN_GRAB/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/CAN_GRAB/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/004000/pretrained_model/config.json b/CAN_GRAB/checkpoints/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1251c9fcb338c74ae98ef9d8b5de36fcc3bd2937 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/004000/pretrained_model/model.safetensors b/CAN_GRAB/checkpoints/004000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6096e1806b175e0650003c5350e84b77827a3cf6 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787403e067c82bc7af92753ee6ee13420ffc81d974f8e8d8415829f7971b8124 +size 906713296 diff --git a/CAN_GRAB/checkpoints/004000/pretrained_model/train_config.json b/CAN_GRAB/checkpoints/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53d1acf2f50c37062405afaf4814e847aa31f42a --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/pretrained_model/train_config.json @@ -0,0 +1,195 @@ +{ + "dataset": { + "repo_id": "ps5387/Grab_Can3", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/CAN_GRAB", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 320, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/004000/training_state/optimizer_param_groups.json b/CAN_GRAB/checkpoints/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..d4330260d219cd977383eed0e454fe332e2fb116 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,526 @@ +[ + { + "lr": 9.578534106007679e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505 + ] + } +] \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/004000/training_state/optimizer_state.safetensors b/CAN_GRAB/checkpoints/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d6f5a36359625acb8ab34142de4bc57c3139375 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e8cb0e634ed7e71e71902361bdf5bbf0a8735644fdb1a941a161592880892f +size 412659164 diff --git a/CAN_GRAB/checkpoints/004000/training_state/rng_state.safetensors b/CAN_GRAB/checkpoints/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82319f847460a4f129a402a18d44bf22262dc2c3 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007113d5bb46e307bc404ee26a8687808a31128d615058bd123a46ebe6a319fc +size 15708 diff --git a/CAN_GRAB/checkpoints/004000/training_state/scheduler_state.json b/CAN_GRAB/checkpoints/004000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..50e2d2adb22983d2846fdc719e34193983fcb510 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 4000, + "verbose": false, + "_step_count": 4001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.578534106007679e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/CAN_GRAB/checkpoints/004000/training_state/training_step.json b/CAN_GRAB/checkpoints/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/CAN_GRAB/checkpoints/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/002000/pretrained_model/config.json b/FORKNiFe/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1251c9fcb338c74ae98ef9d8b5de36fcc3bd2937 --- /dev/null +++ b/FORKNiFe/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/002000/pretrained_model/model.safetensors b/FORKNiFe/checkpoints/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98e8305735d0eb4144877a8234df2e7097f39137 --- /dev/null +++ b/FORKNiFe/checkpoints/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ee3ba71072a3c90ea2d05c22c0afef9a00f1bc30d87c7686bee5701695631f +size 906713296 diff --git a/FORKNiFe/checkpoints/002000/pretrained_model/train_config.json b/FORKNiFe/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cb56613eb677026482b8f1eb7136c2d4e099c557 --- /dev/null +++ b/FORKNiFe/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,195 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORKNiFe", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/002000/training_state/optimizer_param_groups.json b/FORKNiFe/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..da5f0780eca11f85ea3cdbe4e12e539c10572f9c --- /dev/null +++ b/FORKNiFe/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,526 @@ +[ + { + "lr": 9.893469553577303e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505 + ] + } +] \ No newline at end of file diff --git a/FORKNiFe/checkpoints/002000/training_state/optimizer_state.safetensors b/FORKNiFe/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a53a0eefec0527cdbc52b71b07c1bb5188f2fb7f --- /dev/null +++ b/FORKNiFe/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ce9fd52302e1b0e8c9d96be9513ccd8f4727e5cfb8bd571ebf0d3f99d37148 +size 412659164 diff --git a/FORKNiFe/checkpoints/002000/training_state/rng_state.safetensors b/FORKNiFe/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9805a62b79c7684483796255437c9af0bc4d5a13 --- /dev/null +++ b/FORKNiFe/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf388cb9cf8c798945448969b72dfc7da8e7401cbeff0ff65761edf573b6226 +size 15708 diff --git a/FORKNiFe/checkpoints/002000/training_state/scheduler_state.json b/FORKNiFe/checkpoints/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a8ea1fe94d7672f44a951869d73aad049af8814e --- /dev/null +++ b/FORKNiFe/checkpoints/002000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 2000, + "verbose": false, + "_step_count": 2001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.893469553577303e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/002000/training_state/training_step.json b/FORKNiFe/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/FORKNiFe/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/004000/pretrained_model/config.json b/FORKNiFe/checkpoints/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1251c9fcb338c74ae98ef9d8b5de36fcc3bd2937 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/004000/pretrained_model/model.safetensors b/FORKNiFe/checkpoints/004000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4141254f0b6880860d05d07b1a58a6619b727e2 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa253bd74dc5781d7236acd55d3de0db1337b1daf2319f30df608ddf0bb04870 +size 906713296 diff --git a/FORKNiFe/checkpoints/004000/pretrained_model/train_config.json b/FORKNiFe/checkpoints/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cb56613eb677026482b8f1eb7136c2d4e099c557 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/pretrained_model/train_config.json @@ -0,0 +1,195 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORKNiFe", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/004000/training_state/optimizer_param_groups.json b/FORKNiFe/checkpoints/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..d4330260d219cd977383eed0e454fe332e2fb116 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,526 @@ +[ + { + "lr": 9.578534106007679e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505 + ] + } +] \ No newline at end of file diff --git a/FORKNiFe/checkpoints/004000/training_state/optimizer_state.safetensors b/FORKNiFe/checkpoints/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fca315df7c86e17ef100c7c4a9e1c032964ddbc0 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4c95d7b01c3f3ef8ac81e1b1e825bfd1390f92909c1ba367f88c2e010c19d2 +size 412659164 diff --git a/FORKNiFe/checkpoints/004000/training_state/rng_state.safetensors b/FORKNiFe/checkpoints/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386217221d35291e02f1de71d9b70158e1f4b9e6 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8047111117e985fda378ee716f3bfa8561e846f4feff07c61978a8f274a9733 +size 15708 diff --git a/FORKNiFe/checkpoints/004000/training_state/scheduler_state.json b/FORKNiFe/checkpoints/004000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..50e2d2adb22983d2846fdc719e34193983fcb510 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 4000, + "verbose": false, + "_step_count": 4001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.578534106007679e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/004000/training_state/training_step.json b/FORKNiFe/checkpoints/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/FORKNiFe/checkpoints/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/006000/pretrained_model/config.json b/FORKNiFe/checkpoints/006000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1251c9fcb338c74ae98ef9d8b5de36fcc3bd2937 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/pretrained_model/config.json @@ -0,0 +1,83 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/006000/pretrained_model/model.safetensors b/FORKNiFe/checkpoints/006000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89365e0ecbf41eec96e372e51c17396a9435d5a6 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8ddd8386254663419b35aaa3f92f02148e03fe8760dd01318967bed82b5487 +size 906713296 diff --git a/FORKNiFe/checkpoints/006000/pretrained_model/train_config.json b/FORKNiFe/checkpoints/006000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cb56613eb677026482b8f1eb7136c2d4e099c557 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/pretrained_model/train_config.json @@ -0,0 +1,195 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 50, + "n_action_steps": 50, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORKNiFe", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/006000/training_state/optimizer_param_groups.json b/FORKNiFe/checkpoints/006000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..739c63cfc05e7bdde5130fb92902a7a2c3372aa1 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/training_state/optimizer_param_groups.json @@ -0,0 +1,526 @@ +[ + { + "lr": 9.06895784757787e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505 + ] + } +] \ No newline at end of file diff --git a/FORKNiFe/checkpoints/006000/training_state/optimizer_state.safetensors b/FORKNiFe/checkpoints/006000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d3a86e38e727e5878d312dcddc259934a9e6dd4 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fd8cc31ee13ad5b1c7c81418a2b85404535beda70a859d7f26476bc0d5f7a0 +size 412659164 diff --git a/FORKNiFe/checkpoints/006000/training_state/rng_state.safetensors b/FORKNiFe/checkpoints/006000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f5f5771708de25e605cd78f06d1c5b21357b8f9 --- /dev/null +++ b/FORKNiFe/checkpoints/006000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842a27dc17ed379a37dfa43d067cc4b98ee5272557123a3d2320fe71a395d416 +size 15708 diff --git a/FORKNiFe/checkpoints/006000/training_state/scheduler_state.json b/FORKNiFe/checkpoints/006000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77db939c3a449f106739b1e4542ca4718810450a --- /dev/null +++ b/FORKNiFe/checkpoints/006000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 6000, + "verbose": false, + "_step_count": 6001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.06895784757787e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/FORKNiFe/checkpoints/006000/training_state/training_step.json b/FORKNiFe/checkpoints/006000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..e267ac589be64705f8674638b9f5099c886778da --- /dev/null +++ b/FORKNiFe/checkpoints/006000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 6000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/002000/pretrained_model/config.json b/FORK_act/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/002000/pretrained_model/model.safetensors b/FORK_act/checkpoints/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a673eeb99b0af008c846b1d8fae72c804874ad30 --- /dev/null +++ b/FORK_act/checkpoints/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e6bb3ee05c77ef310835d874c985935354626405e7464e08618311be898359b +size 206701064 diff --git a/FORK_act/checkpoints/002000/pretrained_model/train_config.json b/FORK_act/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/002000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/002000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4503ee5b1a0b5b449adfeeb998f0f2b1299bed24 --- /dev/null +++ b/FORK_act/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e275f857d719082a7847596e819d384066bc83cc1a61c64328373f354bcc6242 +size 412817652 diff --git a/FORK_act/checkpoints/002000/training_state/rng_state.safetensors b/FORK_act/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8dea7be91877e24899070dfd17b333fa61116e6 --- /dev/null +++ b/FORK_act/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7faa54126bbd5cc046ab24500d1bbb256636ede2a24ff878dec91ef7f6ce569c +size 15708 diff --git a/FORK_act/checkpoints/002000/training_state/training_step.json b/FORK_act/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/FORK_act/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/004000/pretrained_model/config.json b/FORK_act/checkpoints/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/004000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/004000/pretrained_model/model.safetensors b/FORK_act/checkpoints/004000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d25f017853eba20f92fdcb0ed63a1d72fa6b760d --- /dev/null +++ b/FORK_act/checkpoints/004000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b1fa1f0aa85cad276003166a4b019d20e0e11019f3a46ef838e3cc2eeae938 +size 206701064 diff --git a/FORK_act/checkpoints/004000/pretrained_model/train_config.json b/FORK_act/checkpoints/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/004000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/004000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/004000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d99b3592de90bb611b2a40ad29edd383e677dca --- /dev/null +++ b/FORK_act/checkpoints/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534a7d198bcbda119e208eca8e44ca6e09748d70b88971744667eec05b2ebd0b +size 412817652 diff --git a/FORK_act/checkpoints/004000/training_state/rng_state.safetensors b/FORK_act/checkpoints/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab08ce23984005f84ca2a40957844dccfd351152 --- /dev/null +++ b/FORK_act/checkpoints/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46aeef983079a5f300223d452e66896aba8cdae8304f1c4e540caf62713b60b +size 15708 diff --git a/FORK_act/checkpoints/004000/training_state/training_step.json b/FORK_act/checkpoints/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/FORK_act/checkpoints/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/006000/pretrained_model/config.json b/FORK_act/checkpoints/006000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/006000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/006000/pretrained_model/model.safetensors b/FORK_act/checkpoints/006000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b7516ac2afdc4dde8e35df8e0ef17347040758b --- /dev/null +++ b/FORK_act/checkpoints/006000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5002ab2295eb366a4cefa64ee1f712e3595368a4d3ebdce56b70592a139a7e +size 206701064 diff --git a/FORK_act/checkpoints/006000/pretrained_model/train_config.json b/FORK_act/checkpoints/006000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/006000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/006000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/006000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/006000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/006000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/006000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ec2fbaae3a61b33ddc0a077dc8db19775311d58 --- /dev/null +++ b/FORK_act/checkpoints/006000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd272b7a6d94ec65b0b5fdf8a250c4d1c0595880f5528dc239abf3a0a912c897 +size 412817652 diff --git a/FORK_act/checkpoints/006000/training_state/rng_state.safetensors b/FORK_act/checkpoints/006000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8535c9d42be485a42b902d27c82ecc1f971ff083 --- /dev/null +++ b/FORK_act/checkpoints/006000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c90bd67f6ff8b8057e669f2948075f36251de82ffdec0153e4d511625023b0 +size 15708 diff --git a/FORK_act/checkpoints/006000/training_state/training_step.json b/FORK_act/checkpoints/006000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..e267ac589be64705f8674638b9f5099c886778da --- /dev/null +++ b/FORK_act/checkpoints/006000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 6000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/008000/pretrained_model/config.json b/FORK_act/checkpoints/008000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/008000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/008000/pretrained_model/model.safetensors b/FORK_act/checkpoints/008000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..747de650bd932f95b639880e94882e90a51ffb41 --- /dev/null +++ b/FORK_act/checkpoints/008000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4618d1b319824066fb0e58068023a58eb0393a4413a4749fbb20979ffdc635b3 +size 206701064 diff --git a/FORK_act/checkpoints/008000/pretrained_model/train_config.json b/FORK_act/checkpoints/008000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/008000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/008000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/008000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/008000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/008000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/008000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1907f27bf2568352c958384a433757510240ef31 --- /dev/null +++ b/FORK_act/checkpoints/008000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a296a4a4975f41b4caa62d45646cdecaa0164865dad63e0cdd050e17885cec +size 412817652 diff --git a/FORK_act/checkpoints/008000/training_state/rng_state.safetensors b/FORK_act/checkpoints/008000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ba77d1a7627b6c18f4c4e2ea239701abf403966 --- /dev/null +++ b/FORK_act/checkpoints/008000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9d55c975f17566d0fd9a1388d422e4172946974826e2293419049769b65265 +size 15708 diff --git a/FORK_act/checkpoints/008000/training_state/training_step.json b/FORK_act/checkpoints/008000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..74d8cba01cab8506617b2cbae6f268fe80fbfa79 --- /dev/null +++ b/FORK_act/checkpoints/008000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 8000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/010000/pretrained_model/config.json b/FORK_act/checkpoints/010000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/010000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/010000/pretrained_model/model.safetensors b/FORK_act/checkpoints/010000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b84bf5338fe06f92b1b6f53d3cf191c1a1e75dc --- /dev/null +++ b/FORK_act/checkpoints/010000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9024227eabd42226bc43c4b339814375d12bc13a1b1736eb1cdc638afa31d57 +size 206701064 diff --git a/FORK_act/checkpoints/010000/pretrained_model/train_config.json b/FORK_act/checkpoints/010000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/010000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/010000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/010000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/010000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/010000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/010000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21bcefabefbf4f54d545ab6b4bfad7feb2cbaf6 --- /dev/null +++ b/FORK_act/checkpoints/010000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fc238a9b95f15b643bc280b0740152705c5b116a2e5c627ed0973d981a6adf +size 412817652 diff --git a/FORK_act/checkpoints/010000/training_state/rng_state.safetensors b/FORK_act/checkpoints/010000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..854b6b956061996ea513620786db42d2dd2bb98a --- /dev/null +++ b/FORK_act/checkpoints/010000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7de729c0ccbc0ed6bc97212bbdfaf456294ea2516c0e95ed5f7642001161504 +size 15708 diff --git a/FORK_act/checkpoints/010000/training_state/training_step.json b/FORK_act/checkpoints/010000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7cb7c0986e9e7461ca851ce71e95d235ae3d2732 --- /dev/null +++ b/FORK_act/checkpoints/010000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 10000 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/012000/pretrained_model/config.json b/FORK_act/checkpoints/012000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a3988e9a8483f256f2f5fc6c1aeaab3a4298ebd --- /dev/null +++ b/FORK_act/checkpoints/012000/pretrained_model/config.json @@ -0,0 +1,64 @@ +{ + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 +} \ No newline at end of file diff --git a/FORK_act/checkpoints/012000/pretrained_model/model.safetensors b/FORK_act/checkpoints/012000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4b470d0b76986f9e5eaf5f73e6523397f7523a5 --- /dev/null +++ b/FORK_act/checkpoints/012000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab8c18e940f67d67146f529189712a45dbfef54c301e32b8b7e5e5a4b9b107b +size 206701064 diff --git a/FORK_act/checkpoints/012000/pretrained_model/train_config.json b/FORK_act/checkpoints/012000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37d06b693b8e47a63792ee5425dbd04eb4d09e5d --- /dev/null +++ b/FORK_act/checkpoints/012000/pretrained_model/train_config.json @@ -0,0 +1,170 @@ +{ + "dataset": { + "repo_id": "ps5387/mobile_kitchen_t1", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "act", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "chunk_size": 100, + "n_action_steps": 100, + "vision_backbone": "resnet18", + "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", + "replace_final_stride_with_dilation": false, + "pre_norm": false, + "dim_model": 512, + "n_heads": 8, + "dim_feedforward": 3200, + "feedforward_activation": "relu", + "n_encoder_layers": 4, + "n_decoder_layers": 1, + "use_vae": true, + "latent_dim": 32, + "n_vae_encoder_layers": 4, + "temporal_ensemble_coeff": null, + "dropout": 0.1, + "kl_weight": 10.0, + "optimizer_lr": 1e-05, + "optimizer_weight_decay": 0.0001, + "optimizer_lr_backbone": 1e-05 + }, + "output_dir": "/scratch/ps5387/GLOBALHACK_OUT/train/FORK_act", + "job_name": "smolVLA_Forknife", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 160, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 1e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": null, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null + } +} \ No newline at end of file diff --git a/FORK_act/checkpoints/012000/training_state/optimizer_param_groups.json b/FORK_act/checkpoints/012000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4293d5bac08dc4a451e660603130bbee86963094 --- /dev/null +++ b/FORK_act/checkpoints/012000/training_state/optimizer_param_groups.json @@ -0,0 +1,189 @@ +[ + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132 + ] + }, + { + "lr": 1e-05, + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "foreach": null, + "maximize": false, + "capturable": false, + "differentiable": false, + "fused": null, + "params": [ + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152 + ] + } +] \ No newline at end of file diff --git a/FORK_act/checkpoints/012000/training_state/optimizer_state.safetensors b/FORK_act/checkpoints/012000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0074243857334c409c9f74ec091d868edae298db --- /dev/null +++ b/FORK_act/checkpoints/012000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc292a8df7627fed6dc14899c8e2fccbe46fefedac0d8a8f20d129db2026f4a +size 412817652 diff --git a/FORK_act/checkpoints/012000/training_state/rng_state.safetensors b/FORK_act/checkpoints/012000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39c22d214d5ca5913c05def7fa13413fca59b2c4 --- /dev/null +++ b/FORK_act/checkpoints/012000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f05871b72c5af68381f7c4429f03332e1e4caf0423f2241db78c2bcb413f72 +size 15708 diff --git a/FORK_act/checkpoints/012000/training_state/training_step.json b/FORK_act/checkpoints/012000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fb27ac819b81943e6545c7c18510bdfb8eae1b --- /dev/null +++ b/FORK_act/checkpoints/012000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 12000 +} \ No newline at end of file