diff --git a/002000/pretrained_model/config.json b/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71fdaff2981b4317a2d978c5323025c4483a03 --- /dev/null +++ b/002000/pretrained_model/config.json @@ -0,0 +1,97 @@ +{ + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 +} \ No newline at end of file diff --git a/002000/pretrained_model/model.safetensors b/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0bcd153345805f8dcbf432586a0089c48340e65 --- /dev/null +++ b/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6245d3bfec225e4d9575fa7cee6fd0dd317a87fda60453c69be4ac4b2c70b272 +size 628503144 diff --git a/002000/pretrained_model/policy_postprocessor.json b/002000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c5e6cea7850bb999ab193f96021fa0129ce4db --- /dev/null +++ b/002000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/002000/pretrained_model/policy_preprocessor.json b/002000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..54f723d4e5930a76aeb5a57ad84544682ba64cb7 --- /dev/null +++ b/002000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,67 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 77, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "openai/clip-vit-base-patch16" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_4_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/002000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors b/002000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/002000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/002000/pretrained_model/train_config.json b/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b70d34ded670a23e3adf343050b00848188e3 --- /dev/null +++ b/002000/pretrained_model/train_config.json @@ -0,0 +1,233 @@ +{ + "dataset": { + "repo_id": "jjr1007/5may_lorenzo_merged_1-4_6-12", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 + }, + "reward_model": null, + "output_dir": "outputs/run1_baseline", + "job_name": "multi_task_dit", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "prefetch_factor": 4, + "persistent_workers": true, + "steps": 10000, + "eval_freq": 1000, + "log_freq": 100, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 2e-05, + "weight_decay": 0.0, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 19, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "multitask-dit-experiments", + "entity": null, + "notes": null, + "run_id": "run1_baseline_h49_a8", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/002000/training_state/optimizer_param_groups.json b/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..683ffe504e4a0203ecca3586f860d1e074a29a3a --- /dev/null +++ b/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,307 @@ +[ + { + "lr": 1.8090169943749477e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ] + }, + { + "lr": 1.8090169943749478e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2.0000000000000003e-06, + "params": [ + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266 + ] + } +] \ No newline at end of file diff --git a/002000/training_state/optimizer_state.safetensors b/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37181127b999ad1b460ebc1c0e553110fa9467c4 --- /dev/null +++ b/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0707739209eb33178eed5becfb680246453653d68c2ea5706fa8b1b6c1b894 +size 1256979084 diff --git a/002000/training_state/rng_state.safetensors b/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..041519bdcec8b213a206e0adbceae166d4a2e176 --- /dev/null +++ b/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa14cd3e907f7b2ef47b1a253db4ae42caebacdbae122084c8a18ecabcf7fe0 +size 15708 diff --git a/002000/training_state/scheduler_state.json b/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eb4e41e0665382fe6508f21c39fd888c2598aaa3 --- /dev/null +++ b/002000/training_state/scheduler_state.json @@ -0,0 +1,18 @@ +{ + "base_lrs": [ + 2e-05, + 2.0000000000000003e-06 + ], + "last_epoch": 2000, + "_step_count": 2001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.8090169943749477e-05, + 1.8090169943749478e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/002000/training_state/training_step.json b/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/004000/pretrained_model/config.json b/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71fdaff2981b4317a2d978c5323025c4483a03 --- /dev/null +++ b/004000/pretrained_model/config.json @@ -0,0 +1,97 @@ +{ + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 +} \ No newline at end of file diff --git a/004000/pretrained_model/model.safetensors b/004000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb0d9cbcf85f459a8a7d953db9c775735be4b39f --- /dev/null +++ b/004000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d0aa06a5123225f887aabda0cb91af1e244c8b676ac42a9b237afbaf4a40cc +size 628503144 diff --git a/004000/pretrained_model/policy_postprocessor.json b/004000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c5e6cea7850bb999ab193f96021fa0129ce4db --- /dev/null +++ b/004000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/004000/pretrained_model/policy_preprocessor.json b/004000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..54f723d4e5930a76aeb5a57ad84544682ba64cb7 --- /dev/null +++ b/004000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,67 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 77, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "openai/clip-vit-base-patch16" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_4_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/004000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors b/004000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/004000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/004000/pretrained_model/train_config.json b/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b70d34ded670a23e3adf343050b00848188e3 --- /dev/null +++ b/004000/pretrained_model/train_config.json @@ -0,0 +1,233 @@ +{ + "dataset": { + "repo_id": "jjr1007/5may_lorenzo_merged_1-4_6-12", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 + }, + "reward_model": null, + "output_dir": "outputs/run1_baseline", + "job_name": "multi_task_dit", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "prefetch_factor": 4, + "persistent_workers": true, + "steps": 10000, + "eval_freq": 1000, + "log_freq": 100, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 2e-05, + "weight_decay": 0.0, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 19, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "multitask-dit-experiments", + "entity": null, + "notes": null, + "run_id": "run1_baseline_h49_a8", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/004000/training_state/optimizer_param_groups.json b/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..5f2e9907d308ac7512acec762610c9bcccb00988 --- /dev/null +++ b/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,307 @@ +[ + { + "lr": 1.3090169943749475e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ] + }, + { + "lr": 1.3090169943749477e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2.0000000000000003e-06, + "params": [ + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266 + ] + } +] \ No newline at end of file diff --git a/004000/training_state/optimizer_state.safetensors b/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9abee3dc6787324ab1ba5957f3596cfda3631be6 --- /dev/null +++ b/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08049ae6014bbd38e7134185e210afe5033d41c9d0978035c95653728212c3d9 +size 1256979084 diff --git a/004000/training_state/rng_state.safetensors b/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32c01edcb667da1efe6aeae8a987916936a4800f --- /dev/null +++ b/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb99af0477c0e787228e1e6b503db4cf50e1390a63e48de0f1f0c340a3c0cb0 +size 15708 diff --git a/004000/training_state/scheduler_state.json b/004000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1822afe9f3261d0b2f588830d243c06b6078ed9c --- /dev/null +++ b/004000/training_state/scheduler_state.json @@ -0,0 +1,18 @@ +{ + "base_lrs": [ + 2e-05, + 2.0000000000000003e-06 + ], + "last_epoch": 4000, + "_step_count": 4001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.3090169943749475e-05, + 1.3090169943749477e-06 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/004000/training_state/training_step.json b/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/006000/pretrained_model/config.json b/006000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71fdaff2981b4317a2d978c5323025c4483a03 --- /dev/null +++ b/006000/pretrained_model/config.json @@ -0,0 +1,97 @@ +{ + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 +} \ No newline at end of file diff --git a/006000/pretrained_model/model.safetensors b/006000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a43f3637e895ac479c22f1a9a6705d286556acf --- /dev/null +++ b/006000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e5e5004870f75e957c3fbb51b33ea26e7047963c6c03234e3e17b981e6546a +size 628503144 diff --git a/006000/pretrained_model/policy_postprocessor.json b/006000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c5e6cea7850bb999ab193f96021fa0129ce4db --- /dev/null +++ b/006000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/006000/pretrained_model/policy_preprocessor.json b/006000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..54f723d4e5930a76aeb5a57ad84544682ba64cb7 --- /dev/null +++ b/006000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,67 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 77, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "openai/clip-vit-base-patch16" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_4_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/006000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors b/006000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/006000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/006000/pretrained_model/train_config.json b/006000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b70d34ded670a23e3adf343050b00848188e3 --- /dev/null +++ b/006000/pretrained_model/train_config.json @@ -0,0 +1,233 @@ +{ + "dataset": { + "repo_id": "jjr1007/5may_lorenzo_merged_1-4_6-12", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 + }, + "reward_model": null, + "output_dir": "outputs/run1_baseline", + "job_name": "multi_task_dit", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "prefetch_factor": 4, + "persistent_workers": true, + "steps": 10000, + "eval_freq": 1000, + "log_freq": 100, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 2e-05, + "weight_decay": 0.0, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 19, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "multitask-dit-experiments", + "entity": null, + "notes": null, + "run_id": "run1_baseline_h49_a8", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/006000/training_state/optimizer_param_groups.json b/006000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..0607f3cde516f70f9d4c1588b19ba852508cf20a --- /dev/null +++ b/006000/training_state/optimizer_param_groups.json @@ -0,0 +1,307 @@ +[ + { + "lr": 6.909830056250527e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ] + }, + { + "lr": 6.909830056250527e-07, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2.0000000000000003e-06, + "params": [ + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266 + ] + } +] \ No newline at end of file diff --git a/006000/training_state/optimizer_state.safetensors b/006000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1883bff9699f251708cb6bc6b00feea9d4912f33 --- /dev/null +++ b/006000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1df59c56f20c0d9beb0002cf47ecde413a0ec15ac7dd9233c70709ced2431a +size 1256979084 diff --git a/006000/training_state/rng_state.safetensors b/006000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9432ce9ad075522e0dbd322157f768ffac68ee6 --- /dev/null +++ b/006000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2216518ac2cca1365dbb86c17bec18ec46a70522a6bc4be3f27ae3734ad3abc6 +size 15708 diff --git a/006000/training_state/scheduler_state.json b/006000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a2be663b276db2e6cc07b1e7f2194277fe0b3cf --- /dev/null +++ b/006000/training_state/scheduler_state.json @@ -0,0 +1,18 @@ +{ + "base_lrs": [ + 2e-05, + 2.0000000000000003e-06 + ], + "last_epoch": 6000, + "_step_count": 6001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.909830056250527e-06, + 6.909830056250527e-07 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/006000/training_state/training_step.json b/006000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..e267ac589be64705f8674638b9f5099c886778da --- /dev/null +++ b/006000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 6000 +} \ No newline at end of file diff --git a/008000/pretrained_model/config.json b/008000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71fdaff2981b4317a2d978c5323025c4483a03 --- /dev/null +++ b/008000/pretrained_model/config.json @@ -0,0 +1,97 @@ +{ + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 +} \ No newline at end of file diff --git a/008000/pretrained_model/model.safetensors b/008000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f1aae9e7f16ed8a9bfe070afb6ae51ae3b98bb0 --- /dev/null +++ b/008000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2cd21d5867c87ad651f570e726be1fb090c226d8ba2b7d2c51bd5e0c466113 +size 628503144 diff --git a/008000/pretrained_model/policy_postprocessor.json b/008000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c5e6cea7850bb999ab193f96021fa0129ce4db --- /dev/null +++ b/008000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/008000/pretrained_model/policy_preprocessor.json b/008000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..54f723d4e5930a76aeb5a57ad84544682ba64cb7 --- /dev/null +++ b/008000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,67 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 77, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "openai/clip-vit-base-patch16" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_4_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/008000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors b/008000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/008000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/008000/pretrained_model/train_config.json b/008000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b70d34ded670a23e3adf343050b00848188e3 --- /dev/null +++ b/008000/pretrained_model/train_config.json @@ -0,0 +1,233 @@ +{ + "dataset": { + "repo_id": "jjr1007/5may_lorenzo_merged_1-4_6-12", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 + }, + "reward_model": null, + "output_dir": "outputs/run1_baseline", + "job_name": "multi_task_dit", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "prefetch_factor": 4, + "persistent_workers": true, + "steps": 10000, + "eval_freq": 1000, + "log_freq": 100, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 2e-05, + "weight_decay": 0.0, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 19, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "multitask-dit-experiments", + "entity": null, + "notes": null, + "run_id": "run1_baseline_h49_a8", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/008000/training_state/optimizer_param_groups.json b/008000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..7b38dfcd0ad74fd27c42bdb17ea96409716ec2be --- /dev/null +++ b/008000/training_state/optimizer_param_groups.json @@ -0,0 +1,307 @@ +[ + { + "lr": 1.9098300562505266e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ] + }, + { + "lr": 1.909830056250527e-07, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2.0000000000000003e-06, + "params": [ + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266 + ] + } +] \ No newline at end of file diff --git a/008000/training_state/optimizer_state.safetensors b/008000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac1ba15c63c909c01fadd241d2b7cb88e426b31a --- /dev/null +++ b/008000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d3adbe0c7c8f60fd3ee4b4e45c1c8b664194ca4f45a385085adff43f1a00a7 +size 1256979084 diff --git a/008000/training_state/rng_state.safetensors b/008000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a00f3a30ff81750b4d90fd26cbb2671994c31753 --- /dev/null +++ b/008000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7c3b22c3d20eff04dd5801bba673b44e768a7e4426ae1f1ad5033403c283b7 +size 15708 diff --git a/008000/training_state/scheduler_state.json b/008000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bbc756e416e01e5e9d450fbc3b2fbb57f2d8a0ce --- /dev/null +++ b/008000/training_state/scheduler_state.json @@ -0,0 +1,18 @@ +{ + "base_lrs": [ + 2e-05, + 2.0000000000000003e-06 + ], + "last_epoch": 8000, + "_step_count": 8001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.9098300562505266e-06, + 1.909830056250527e-07 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/008000/training_state/training_step.json b/008000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..74d8cba01cab8506617b2cbae6f268fe80fbfa79 --- /dev/null +++ b/008000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 8000 +} \ No newline at end of file diff --git a/010000/pretrained_model/config.json b/010000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe71fdaff2981b4317a2d978c5323025c4483a03 --- /dev/null +++ b/010000/pretrained_model/config.json @@ -0,0 +1,97 @@ +{ + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 +} \ No newline at end of file diff --git a/010000/pretrained_model/model.safetensors b/010000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf11af2c49c84efbbe860ca8873520c8228aa126 --- /dev/null +++ b/010000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e209b2610b01de92051021ce8adccd8604b29248bc0d91ebd3e4740fffdc2e51 +size 628503144 diff --git a/010000/pretrained_model/policy_postprocessor.json b/010000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c5e6cea7850bb999ab193f96021fa0129ce4db --- /dev/null +++ b/010000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/010000/pretrained_model/policy_preprocessor.json b/010000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..54f723d4e5930a76aeb5a57ad84544682ba64cb7 --- /dev/null +++ b/010000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,67 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": {} + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 77, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "openai/clip-vit-base-patch16" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + } + }, + "state_file": "policy_preprocessor_step_4_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/010000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors b/010000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f4dcd9d66a89a91e199ac6a7c21bce35c9b4cb --- /dev/null +++ b/010000/pretrained_model/policy_preprocessor_step_4_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e52b5e47d0bc03c9a9e94752a2fbfee02c868cae66f93cdcb53e917c910577b +size 6560 diff --git a/010000/pretrained_model/train_config.json b/010000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b70d34ded670a23e3adf343050b00848188e3 --- /dev/null +++ b/010000/pretrained_model/train_config.json @@ -0,0 +1,233 @@ +{ + "dataset": { + "repo_id": "jjr1007/5may_lorenzo_merged_1-4_6-12", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "multi_task_dit", + "n_obs_steps": 2, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 1080, + 1920 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": true, + "use_peft": false, + "push_to_hub": true, + "repo_id": "jjr1007/multitask-dit-run1-baseline", + "private": null, + "tags": null, + "license": null, + "pretrained_path": null, + "horizon": 49, + "n_action_steps": 8, + "objective": "flow_matching", + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "sigma_min": 0.0, + "num_integration_steps": 100, + "integration_method": "euler", + "timestep_sampling_strategy": "beta", + "timestep_sampling_s": 0.999, + "timestep_sampling_alpha": 1.5, + "timestep_sampling_beta": 1.0, + "hidden_dim": 512, + "num_layers": 6, + "num_heads": 8, + "dropout": 0.1, + "use_positional_encoding": false, + "timestep_embed_dim": 256, + "use_rope": true, + "rope_base": 10000.0, + "vision_encoder_name": "openai/clip-vit-base-patch16", + "use_separate_rgb_encoder_per_camera": false, + "vision_encoder_lr_multiplier": 0.1, + "image_resize_shape": [ + 224, + 224 + ], + "image_crop_shape": [ + 224, + 224 + ], + "image_crop_is_random": true, + "text_encoder_name": "openai/clip-vit-base-patch16", + "tokenizer_max_length": 77, + "tokenizer_padding": "max_length", + "tokenizer_padding_side": "right", + "tokenizer_truncation": true, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "optimizer_lr": 2e-05, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 0.0, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 0, + "do_mask_loss_for_padding": false, + "drop_n_last_frames": 40 + }, + "reward_model": null, + "output_dir": "outputs/run1_baseline", + "job_name": "multi_task_dit", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "prefetch_factor": 4, + "persistent_workers": true, + "steps": 10000, + "eval_freq": 1000, + "log_freq": 100, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 2e-05, + "weight_decay": 0.0, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 0, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 19, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "multitask-dit-experiments", + "entity": null, + "notes": null, + "run_id": "run1_baseline_h49_a8", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": {}, + "checkpoint_path": null +} \ No newline at end of file diff --git a/010000/training_state/optimizer_param_groups.json b/010000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..1720fda70ff258097f69bcf8027fc698bfd3c037 --- /dev/null +++ b/010000/training_state/optimizer_param_groups.json @@ -0,0 +1,307 @@ +[ + { + "lr": 0.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2e-05, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ] + }, + { + "lr": 0.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 0.0, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 2.0000000000000003e-06, + "params": [ + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266 + ] + } +] \ No newline at end of file diff --git a/010000/training_state/optimizer_state.safetensors b/010000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1674bcca99961b1117bb28ed118a030c83e9b1e6 --- /dev/null +++ b/010000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbc54cb8c6b80331f51c85068b354f938c2b7e2937032220dce2d8e48c0535f +size 1256979084 diff --git a/010000/training_state/rng_state.safetensors b/010000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc4abf9f6695d531be0d5a948973e86a5ee99f9f --- /dev/null +++ b/010000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4386a36a2653b8df806d6e5c999cfd3fe200e4982f7c1b4d56d62c63ce92ddf2 +size 15708 diff --git a/010000/training_state/scheduler_state.json b/010000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..73e64d3a9ae46a313a5b6076b9d036c9d02c7fee --- /dev/null +++ b/010000/training_state/scheduler_state.json @@ -0,0 +1,18 @@ +{ + "base_lrs": [ + 2e-05, + 2.0000000000000003e-06 + ], + "last_epoch": 10000, + "_step_count": 10001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0, + 0.0 + ], + "lr_lambdas": [ + null, + null + ] +} \ No newline at end of file diff --git a/010000/training_state/training_step.json b/010000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7cb7c0986e9e7461ca851ce71e95d235ae3d2732 --- /dev/null +++ b/010000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 10000 +} \ No newline at end of file