diff --git a/checkpoints/010000/pretrained_model/config.json b/checkpoints/010000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/010000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/model.safetensors b/checkpoints/010000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a68746aba91b479a7370afffa1e2c176ac08ada1 --- /dev/null +++ b/checkpoints/010000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1177383edcec91362235ba5b2bb991922375c07e459c554e9956766a52df11a2 +size 857967220 diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor.json b/checkpoints/010000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor.json b/checkpoints/010000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/010000/pretrained_model/train_config.json b/checkpoints/010000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/010000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_param_groups.json b/checkpoints/010000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4a1a095527457a0719ced08eec9ea667ddbd9397 --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 4.877641290737884e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 4.8776412907378845e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_state.safetensors b/checkpoints/010000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3b2cdd33cf7823c61d747c690d477fe75cd76fa --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3cc08359f9ed344ad5f25761fc7f30d00f89215ec58d439a5686e55c0a2f65c +size 1715911736 diff --git a/checkpoints/010000/training_state/rng_state.safetensors b/checkpoints/010000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..554fe935e08dc8e25b9ccd800920edc1afed6a28 --- /dev/null +++ b/checkpoints/010000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cda57fb4963bfd45fa4f0dbf6f718ea9b4c8fc3164bd5d6086a7141bc3d939 +size 15708 diff --git a/checkpoints/010000/training_state/scheduler_state.json b/checkpoints/010000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bc59e5eb82866107c7bb766d5969c6d71c6e0255 --- /dev/null +++ b/checkpoints/010000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 10000, + "_step_count": 10001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.877641290737884e-05, + 4.8776412907378845e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/training_step.json b/checkpoints/010000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7cb7c0986e9e7461ca851ce71e95d235ae3d2732 --- /dev/null +++ b/checkpoints/010000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 10000 +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/config.json b/checkpoints/020000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/020000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/model.safetensors b/checkpoints/020000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8337b3236b9de8a161012421355e6c475b8f2f26 --- /dev/null +++ b/checkpoints/020000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bbabad8cbd223b39a19d45f73c02495d3bb29ab950decf2d892ee9869673a1e +size 857967220 diff --git a/checkpoints/020000/pretrained_model/policy_postprocessor.json b/checkpoints/020000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/020000/pretrained_model/policy_preprocessor.json b/checkpoints/020000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/020000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/020000/pretrained_model/train_config.json b/checkpoints/020000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/020000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_param_groups.json b/checkpoints/020000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..bca6998b16533581a4a699683d8f2c4f1c10f2e8 --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 4.522542485937369e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 4.522542485937369e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_state.safetensors b/checkpoints/020000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca61b49ddd213ff45d4382283be00b08ed335e08 --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed26cf37ed67dc91b0c0c4c085eb9f42d052191b5935c1ac8a74adb5791aaab5 +size 1715911736 diff --git a/checkpoints/020000/training_state/rng_state.safetensors b/checkpoints/020000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbfcd4dc6bfc2aceed715284f89b0f306bcfa63b --- /dev/null +++ b/checkpoints/020000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1fd30d6d274d117d510e9a89dfabe0c19405650c88173a5b5ff37fc1555354 +size 15708 diff --git a/checkpoints/020000/training_state/scheduler_state.json b/checkpoints/020000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..731c5593fbd41db57330c250ded7cffa6d2fca2e --- /dev/null +++ b/checkpoints/020000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 20000, + "_step_count": 20001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.522542485937369e-05, + 4.522542485937369e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/training_step.json b/checkpoints/020000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..dc9bb47026c5d5237ca6fc5dbff6020dd122ea05 --- /dev/null +++ b/checkpoints/020000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 20000 +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/config.json b/checkpoints/030000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/030000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/model.safetensors b/checkpoints/030000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0227b694e861e2f0139ee33b782b094305810ce7 --- /dev/null +++ b/checkpoints/030000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13b176e48e3052ea1ffb9840d84ba9a8e5464171cae49ffeceb388e6e999088 +size 857967220 diff --git a/checkpoints/030000/pretrained_model/policy_postprocessor.json b/checkpoints/030000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/030000/pretrained_model/policy_preprocessor.json b/checkpoints/030000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/030000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/030000/pretrained_model/train_config.json b/checkpoints/030000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/030000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_param_groups.json b/checkpoints/030000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4a0ac6771a389e0574c160e5a09085202b854d73 --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 3.969463130731183e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 3.969463130731183e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_state.safetensors b/checkpoints/030000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce36d9db2bc8b3ba89605eef851144774be565f5 --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5c020708cfa6340f398a6c82644c6deef2a8031586a3173e8b14aa9cff2890 +size 1715911736 diff --git a/checkpoints/030000/training_state/rng_state.safetensors b/checkpoints/030000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..886212e4ab15a7b75b3522c18703f357d375c9e7 --- /dev/null +++ b/checkpoints/030000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1feedd4950dab3d6d768e5b3c52f064e92d3f02cac1c128ccae7173324e290b4 +size 15708 diff --git a/checkpoints/030000/training_state/scheduler_state.json b/checkpoints/030000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a6f740611d794705232cdfbb106051fff67c8b22 --- /dev/null +++ b/checkpoints/030000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 30000, + "_step_count": 30001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.969463130731183e-05, + 3.969463130731183e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/training_step.json b/checkpoints/030000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..f4945f660f45b332883dccfccf18d8b8815d916a --- /dev/null +++ b/checkpoints/030000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 30000 +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/config.json b/checkpoints/040000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/040000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/model.safetensors b/checkpoints/040000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e31de287ae8fe69a34b69395b34d4ca454582304 --- /dev/null +++ b/checkpoints/040000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f664e4fd40459da01925cf223624a103ca95a5ae336f3804899bf22aac4b17f +size 857967220 diff --git a/checkpoints/040000/pretrained_model/policy_postprocessor.json b/checkpoints/040000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/040000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/040000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/040000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/040000/pretrained_model/policy_preprocessor.json b/checkpoints/040000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/040000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/040000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/040000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/040000/pretrained_model/train_config.json b/checkpoints/040000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/040000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_param_groups.json b/checkpoints/040000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..977eccef5875d0d5415b3f7b1da1783987b894a0 --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 3.272542485937369e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 3.272542485937369e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_state.safetensors b/checkpoints/040000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..403360a1f4212fbb0697b4467e04a26d12c0c4a5 --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ed5bca0e31ac5476ac4778fac17f1bee70f4748b4323eb888309d26959b924 +size 1715911736 diff --git a/checkpoints/040000/training_state/rng_state.safetensors b/checkpoints/040000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5c8493eb2ee66ee055c25a85c31fd1a15773d3c --- /dev/null +++ b/checkpoints/040000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07ba24ed79df5ed4d2d78e7b5d3aaac90d64676ce9d82e51931397435fcdd10 +size 15708 diff --git a/checkpoints/040000/training_state/scheduler_state.json b/checkpoints/040000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6b36acbaa27560561c60462bd4b31a981564afe0 --- /dev/null +++ b/checkpoints/040000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 40000, + "_step_count": 40001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.272542485937369e-05, + 3.272542485937369e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/training_step.json b/checkpoints/040000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..fe40d8ff4312c7e5a8fba9bcc932a43a1384ba77 --- /dev/null +++ b/checkpoints/040000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 40000 +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/config.json b/checkpoints/050000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/050000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/model.safetensors b/checkpoints/050000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17d7f9da20fecd56c70d9e718085138bb8437beb --- /dev/null +++ b/checkpoints/050000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2dd574d9991fe7ef967f339d64f53e5072ebb9143625d43a9bad0c298aeae14 +size 857967220 diff --git a/checkpoints/050000/pretrained_model/policy_postprocessor.json b/checkpoints/050000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/050000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/050000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/050000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/050000/pretrained_model/policy_preprocessor.json b/checkpoints/050000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/050000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/050000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/050000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/050000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/050000/pretrained_model/train_config.json b/checkpoints/050000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/050000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/050000/training_state/optimizer_param_groups.json b/checkpoints/050000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..0ddf54ddf230962113fc3349feb08e856d4704ca --- /dev/null +++ b/checkpoints/050000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 2.5e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 2.5e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/050000/training_state/optimizer_state.safetensors b/checkpoints/050000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00f677822c63fcb3ee9bf313271727da4448fccc --- /dev/null +++ b/checkpoints/050000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974bf0dda40c8834235c2f2c9cfaf41ee94d8058792bd74a8ee97e1f77021468 +size 1715911736 diff --git a/checkpoints/050000/training_state/rng_state.safetensors b/checkpoints/050000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a35cbdda18ab435e398da446846274217de6d630 --- /dev/null +++ b/checkpoints/050000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f42b90a6fcfc344ed537dc54a3f3f592cec33f82dfe8812d4edfd208350bb3 +size 15708 diff --git a/checkpoints/050000/training_state/scheduler_state.json b/checkpoints/050000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a0071287b712596538b736396126138b863d694a --- /dev/null +++ b/checkpoints/050000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 50000, + "_step_count": 50001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.5e-05, + 2.5e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/050000/training_state/training_step.json b/checkpoints/050000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..b3c03563f17d6d98552117ccf00705b693aa3fbd --- /dev/null +++ b/checkpoints/050000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 50000 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/config.json b/checkpoints/060000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/060000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/model.safetensors b/checkpoints/060000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b3379b7a4ff447babc8c27ee5a64141110cc214 --- /dev/null +++ b/checkpoints/060000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66e8605ac5e2a8b4263382e24eb1119d055cf05b699e302bb7ad436db3418aa +size 857967220 diff --git a/checkpoints/060000/pretrained_model/policy_postprocessor.json b/checkpoints/060000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/060000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/060000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/060000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/060000/pretrained_model/policy_preprocessor.json b/checkpoints/060000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/060000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/060000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/060000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/060000/pretrained_model/train_config.json b/checkpoints/060000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/060000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/060000/training_state/optimizer_param_groups.json b/checkpoints/060000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..78ea035239a7235a84e61e568bea66a7baa3b541 --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 1.7274575140626318e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 1.7274575140626318e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/060000/training_state/optimizer_state.safetensors b/checkpoints/060000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4c17c8cc7e3ec1d93d4342dba0bf9df0f39363f --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7599a6bd0d5cf6e2a09638d5cc363aaa156ed70bc481a6d65b3be3e9aa17f4e7 +size 1715911736 diff --git a/checkpoints/060000/training_state/rng_state.safetensors b/checkpoints/060000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3457a79a0426327d97f97fba0805748e3e68937c --- /dev/null +++ b/checkpoints/060000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55fd7db1b15bfb9938ba2c13eb053f959442b86f948673074c03743ff5ccc146 +size 15708 diff --git a/checkpoints/060000/training_state/scheduler_state.json b/checkpoints/060000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a6b62905cfe70fe61351b8dc9d64b3faa573ff79 --- /dev/null +++ b/checkpoints/060000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 60000, + "_step_count": 60001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.7274575140626318e-05, + 1.7274575140626318e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/060000/training_state/training_step.json b/checkpoints/060000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d360b6037f1c8c2c85d38e951160eb02ace507dd --- /dev/null +++ b/checkpoints/060000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 60000 +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/config.json b/checkpoints/070000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/070000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/model.safetensors b/checkpoints/070000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffa16910c019580b423c11a1cf081ddd84361c07 --- /dev/null +++ b/checkpoints/070000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19e4bb94023bf993109ea441ef7f800a9b37b8ba9ddc140fd5f597fb1edb691 +size 857967220 diff --git a/checkpoints/070000/pretrained_model/policy_postprocessor.json b/checkpoints/070000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/070000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/070000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/070000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/070000/pretrained_model/policy_preprocessor.json b/checkpoints/070000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/070000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/070000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/070000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/070000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/070000/pretrained_model/train_config.json b/checkpoints/070000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/070000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/070000/training_state/optimizer_param_groups.json b/checkpoints/070000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..ad0e3aa82af1563dadf19f0c7e4fa0a9cfc779fb --- /dev/null +++ b/checkpoints/070000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 1.0305368692688174e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 1.0305368692688175e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/070000/training_state/optimizer_state.safetensors b/checkpoints/070000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f986f30f25beb7e5351d9301c6e548c3172d7c91 --- /dev/null +++ b/checkpoints/070000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64876fc9cfc25507af348b1fdae0d5bb46a3dd71f9b5c2cbd1df696ff41318c9 +size 1715911736 diff --git a/checkpoints/070000/training_state/rng_state.safetensors b/checkpoints/070000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f8c3ab6f41e1d2db6239db67d52a43f327b0ebc --- /dev/null +++ b/checkpoints/070000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b8a66f494b2d5b00e93d3abfee2524bdd9a202427ebd232cc0f1438f8a68f1 +size 15708 diff --git a/checkpoints/070000/training_state/scheduler_state.json b/checkpoints/070000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..76b5302504ccb68bcedff45eaa158719c29cf229 --- /dev/null +++ b/checkpoints/070000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 70000, + "_step_count": 70001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.0305368692688174e-05, + 1.0305368692688175e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/070000/training_state/training_step.json b/checkpoints/070000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0c1eb0f525e34fa3a57743f750b2b34b5b77b --- /dev/null +++ b/checkpoints/070000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 70000 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/config.json b/checkpoints/080000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/080000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/model.safetensors b/checkpoints/080000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9564e93af603efcfbb4febfb4cafe8b29f7a5330 --- /dev/null +++ b/checkpoints/080000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc287be7b2f70d0cbcb538c21e8a1d0baa30086746d3103e02e25ad6160eb6ca +size 857967220 diff --git a/checkpoints/080000/pretrained_model/policy_postprocessor.json b/checkpoints/080000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/080000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/080000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/080000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/080000/pretrained_model/policy_preprocessor.json b/checkpoints/080000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/080000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/080000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/080000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/080000/pretrained_model/train_config.json b/checkpoints/080000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/080000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/080000/training_state/optimizer_param_groups.json b/checkpoints/080000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..9f3124c5e064fe23df70b87a56ac4dd200c6843d --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 4.7745751406263165e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 4.774575140626317e-07, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/080000/training_state/optimizer_state.safetensors b/checkpoints/080000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76b29900f5ed3bbf1c217bdf92963e1280bdc3d7 --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462cb42b74fbcd3ba6d399588ff3a1676c8c8738b9145449159842c4d9c29269 +size 1715911736 diff --git a/checkpoints/080000/training_state/rng_state.safetensors b/checkpoints/080000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..749fcf4d46aeed369336991b38fdd0ebcd9ea6ea --- /dev/null +++ b/checkpoints/080000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ce34fbc6f7b489490b3e6abfbb3ff18da5c8cc1e27595b46eb7ffb600eb370 +size 15708 diff --git a/checkpoints/080000/training_state/scheduler_state.json b/checkpoints/080000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..afca1322332f73a31ccdf5fa0b26e5487c11ceb0 --- /dev/null +++ b/checkpoints/080000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 80000, + "_step_count": 80001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.7745751406263165e-06, + 4.774575140626317e-07 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/080000/training_state/training_step.json b/checkpoints/080000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..384eace4ecc2a6cba352aa7cf27f04405b7319c3 --- /dev/null +++ b/checkpoints/080000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 80000 +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/config.json b/checkpoints/090000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/090000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/model.safetensors b/checkpoints/090000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebb55e9d8f2d9b7f0650553ac117536837614490 --- /dev/null +++ b/checkpoints/090000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9080906b8ea506d8950bf7e73847ca0ae9718e0f5b47b5680fcbb083247d3c58 +size 857967220 diff --git a/checkpoints/090000/pretrained_model/policy_postprocessor.json b/checkpoints/090000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/090000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/090000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/090000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/090000/pretrained_model/policy_preprocessor.json b/checkpoints/090000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/090000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/090000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/090000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/090000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/090000/pretrained_model/train_config.json b/checkpoints/090000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/090000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/090000/training_state/optimizer_param_groups.json b/checkpoints/090000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..c19614b199c16462fd1b12446a7e8f525b329fb2 --- /dev/null +++ b/checkpoints/090000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 1.2235870926211619e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 1.223587092621162e-07, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/090000/training_state/optimizer_state.safetensors b/checkpoints/090000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f37137fbb196b5df2ca6312732f60659d5213b60 --- /dev/null +++ b/checkpoints/090000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f340d479fd34e0a064ebe51348265f3090568dc2eebc2e80c31d1d58c829fc +size 1715911736 diff --git a/checkpoints/090000/training_state/rng_state.safetensors b/checkpoints/090000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f3f8a6bc10eb9d9c8f6fe1e0aa7a166b64a0c84 --- /dev/null +++ b/checkpoints/090000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c42dd4d0e839f5d681aa654a1656adfece928c45b864714516bea09c9270728 +size 15708 diff --git a/checkpoints/090000/training_state/scheduler_state.json b/checkpoints/090000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fe1d6594b97ed71c4b3509d8b9e0dbcda0d26479 --- /dev/null +++ b/checkpoints/090000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 90000, + "_step_count": 90001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.2235870926211619e-06, + 1.223587092621162e-07 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/090000/training_state/training_step.json b/checkpoints/090000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..cec965b83b7f817ac99559792f84cbb69a721bfd --- /dev/null +++ b/checkpoints/090000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 90000 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/config.json b/checkpoints/100000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2747134ca684b58bd53268f4c7f7cc82cea3dc0 --- /dev/null +++ b/checkpoints/100000/pretrained_model/config.json @@ -0,0 +1,121 @@ +{ + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/model.safetensors b/checkpoints/100000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fed849b23a86e91a93aec33d8dbed7e3192fafb2 --- /dev/null +++ b/checkpoints/100000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1e127660db82695245b4addd1a0f4b78a29a989742ab96cc6c69f69aae30a3 +size 857967220 diff --git a/checkpoints/100000/pretrained_model/policy_postprocessor.json b/checkpoints/100000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0e896c35807a4a0616f52b341799a83165d6380e --- /dev/null +++ b/checkpoints/100000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/100000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/100000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/100000/pretrained_model/policy_preprocessor.json b/checkpoints/100000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..b41eceac2b794b61d582dd3d76851d5ec58234be --- /dev/null +++ b/checkpoints/100000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,78 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors b/checkpoints/100000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c221a226159fe3471b3c1c4a8d3d7b6b30a6364 --- /dev/null +++ b/checkpoints/100000/pretrained_model/policy_preprocessor_step_3_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d58fa83446e4b0bdfc09a02565d49fca3dd894c73486df2d1ce3a22e752cd6 +size 11368 diff --git a/checkpoints/100000/pretrained_model/train_config.json b/checkpoints/100000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff32d6e84813efb6822901699550a7a6ca61c7e --- /dev/null +++ b/checkpoints/100000/pretrained_model/train_config.json @@ -0,0 +1,608 @@ +{ + "dataset": { + "repo_id": "local/local_aic_cheatcode_demos_dit_snap", + "root": "/opt/dlami/nvme/lerobot/local_aic_cheatcode_demos_dit_snap", + "episodes": [ + 3, + 5, + 8, + 157, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 192, + 193, + 194, + 195, + 196, + 198, + 199, + 200, + 201, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 227, + 228, + 229, + 230, + 232, + 233, + 234, + 236, + 238, + 239, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 255, + 256, + 257, + 258, + 259, + 260, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 276, + 277, + 278, + 279, + 280, + 281, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 321, + 322, + 323, + 325, + 327, + 328, + 329, + 330, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 350, + 351, + 352, + 354, + 355, + 356, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 377, + 378, + 379, + 380, + 381, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 393, + 395, + 397, + 399, + 400, + 401, + 402, + 404, + 405, + 407, + 408, + 409, + 410, + 411, + 413, + 414, + 415, + 416, + 417, + 419, + 420, + 421, + 422, + 423, + 425, + 430, + 431, + 433, + 434, + 436, + 437, + 439, + 440, + 442, + 444, + 447, + 449, + 450, + 452, + 453, + 454, + 455, + 457, + 458, + 460, + 462, + 463, + 464, + 465, + 466, + 467, + 470, + 471, + 473, + 474, + 475, + 476, + 478, + 479, + 480, + 482, + 485, + 490, + 491, + 492, + 497, + 498, + 500, + 501, + 502, + 504, + 507, + 508, + 509, + 512, + 514, + 515, + 516, + 517, + 518, + 520, + 522, + 524, + 527, + 529, + 530, + 532, + 533, + 534, + 535, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 546, + 548, + 549, + 550, + 551, + 553, + 555, + 556, + 557, + 558, + 559, + 560, + 562, + 563, + 565, + 566, + 569, + 570, + 571, + 572, + 575, + 576, + 577, + 578, + 580, + 581, + 582, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 593, + 594, + 595, + 596, + 598, + 600, + 601, + 602, + 603, + 606, + 607, + 608, + 609, + 612, + 613, + 615, + 616, + 617, + 619, + 620, + 621, + 623, + 624, + 627, + 628, + 631, + 632, + 633, + 634, + 635, + 636 + ], + "image_transforms": { + "enable": true, + "max_num_transforms": 3, + "random_order": true, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.9, + 1.1 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.9, + 1.1 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.95, + 1.05 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.02, + 0.02 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.9, + 1.1 + ] + } + }, + "affine": { + "weight": 0.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "pyav", + "streaming": false + }, + "env": null, + "policy": { + "type": "aic_multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.images.left_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.center_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.images.right_camera": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 288 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 26 + ] + }, + "observation.wrench": { + "type": "STATE", + "shape": [ + 6 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 7 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": false, + "repo_id": null, + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 32, + "n_action_steps": 24, + "task_conditioning": "embedding", + "num_tasks": 10, + "objective": "diffusion", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 232, + 232 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 5e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0001, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 7 + }, + "output_dir": "/opt/dlami/nvme/outputs/dit_diffusion_20260511_105440", + "job_name": "aic_multi_task_dit", + "resume": false, + "seed": 42, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 12, + "steps": 100000, + "eval_freq": 0, + "log_freq": 100, + "tolerance_s": 0.1, + "save_checkpoint": true, + "save_freq": 10000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 5e-05, + "weight_decay": 0.0001, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "aic_sfp_dit_diffusion_embedding", + "entity": "gustave-charles-s-eth-z-rich", + "notes": null, + "run_id": "2ipoouae", + "mode": null, + "add_tags": true + }, + "peft": null, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/100000/training_state/optimizer_param_groups.json b/checkpoints/100000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..1a5147302117fb8d221740ceedffb875652db86e --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_param_groups.json @@ -0,0 +1,308 @@ +[ + { + "lr": 0.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68 + ] + }, + { + "lr": 0.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0001, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 5e-06, + "params": [ + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267 + ] + } +] \ No newline at end of file diff --git a/checkpoints/100000/training_state/optimizer_state.safetensors b/checkpoints/100000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6b4f78a182bf336c375af10c3dc688468e81915 --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb655d46b9fc10188395611ebc5d931333515982336be7189963192fe9f3f83f +size 1715911736 diff --git a/checkpoints/100000/training_state/rng_state.safetensors b/checkpoints/100000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b60e7fecd45516fefb6edfbd68ea0f8ab8ad7cb2 --- /dev/null +++ b/checkpoints/100000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bffaea42ba83219e433f076e4a7ab22e5e396076151c830afb648de0a8ef8ab +size 15708 diff --git a/checkpoints/100000/training_state/scheduler_state.json b/checkpoints/100000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fdf082488d47e55a7fc8e2f5d5e1ac69dd27654d --- /dev/null +++ b/checkpoints/100000/training_state/scheduler_state.json @@ -0,0 +1,17 @@ +{ + "base_lrs": [ + 5e-05, + 5e-06 + ], + "last_epoch": 100000, + "_step_count": 100001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0, + 0.0 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/checkpoints/100000/training_state/training_step.json b/checkpoints/100000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb73c13d28bca88058c08796abbf931c3f9b012 --- /dev/null +++ b/checkpoints/100000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 100000 +} \ No newline at end of file