diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..0700b528199dc5ec5458dac53dd79bdf44215109 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +wandb/run-20250804_020558-ftnu6goz/run-ftnu6goz.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20250805_055539-ftnu6goz/run-ftnu6goz.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoints/020000/pretrained_model/config.json b/checkpoints/020000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b979b280bcc067d857fa498e10fc0e2725de8b --- /dev/null +++ b/checkpoints/020000/pretrained_model/config.json @@ -0,0 +1,125 @@ +{ + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/model.safetensors b/checkpoints/020000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b79f8cd2ccd4279d1a9f69bd647d23777c67ec33 --- /dev/null +++ b/checkpoints/020000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94236fa4e21c4c4ed7fe06f4dbfd88003b300608d2da5100c53ec071a8e4a8d7 +size 1112509416 diff --git a/checkpoints/020000/pretrained_model/train_config.json b/checkpoints/020000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6a18719f2100fd7f2faf397649d925aa11879fb --- /dev/null +++ b/checkpoints/020000/pretrained_model/train_config.json @@ -0,0 +1,235 @@ +{ + "dataset": { + "repo_id": "a1o/tac_insert", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 + }, + "output_dir": "outputs/train/diff_tac_insert", + "job_name": "diff_piper_tac_insert", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 8, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 20000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 0.0001, + "weight_decay": 1e-06, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 500, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "ftnu6goz", + "mode": null + } +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_param_groups.json b/checkpoints/020000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..bf39c5c897a050ff8265bb1308f13cf05d6399da --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_param_groups.json @@ -0,0 +1,233 @@ +[ + { + "lr": 9.081878607071996e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-06, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211 + ] + } +] \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_state.safetensors b/checkpoints/020000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f3a5e3bd6d136dc17ab7214f671c22c7e63ea96 --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6725bd7b3be32a8668bc74db2a7a7fbbc274b01a1a89ea97358635caefa839d3 +size 2225018952 diff --git a/checkpoints/020000/training_state/rng_state.safetensors b/checkpoints/020000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58981e0f9925274746b11b058bafbef9bd54f124 --- /dev/null +++ b/checkpoints/020000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213b60c5279eaa3759fe1d400d63d15823514c4fa435daeecd90ff6ed011a331 +size 15708 diff --git a/checkpoints/020000/training_state/scheduler_state.json b/checkpoints/020000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7079c5eb5d1f64d9d6c22b336677c1354414261d --- /dev/null +++ b/checkpoints/020000/training_state/scheduler_state.json @@ -0,0 +1,14 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 20000, + "_step_count": 20001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.081878607071996e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/training_step.json b/checkpoints/020000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..dc9bb47026c5d5237ca6fc5dbff6020dd122ea05 --- /dev/null +++ b/checkpoints/020000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 20000 +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/config.json b/checkpoints/040000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b979b280bcc067d857fa498e10fc0e2725de8b --- /dev/null +++ b/checkpoints/040000/pretrained_model/config.json @@ -0,0 +1,125 @@ +{ + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 +} \ No newline at end of file diff --git a/checkpoints/040000/pretrained_model/model.safetensors b/checkpoints/040000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5d0c2ecf7ec2ed01195de4df42b4047e8e8e0ca --- /dev/null +++ b/checkpoints/040000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9215debe107563de77ef2c69a742d126a28a6192d7ef7cd319f43255e6dbe82 +size 1112509416 diff --git a/checkpoints/040000/pretrained_model/train_config.json b/checkpoints/040000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6a18719f2100fd7f2faf397649d925aa11879fb --- /dev/null +++ b/checkpoints/040000/pretrained_model/train_config.json @@ -0,0 +1,235 @@ +{ + "dataset": { + "repo_id": "a1o/tac_insert", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 + }, + "output_dir": "outputs/train/diff_tac_insert", + "job_name": "diff_piper_tac_insert", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 8, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 20000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 0.0001, + "weight_decay": 1e-06, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 500, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "ftnu6goz", + "mode": null + } +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_param_groups.json b/checkpoints/040000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..da769670afd79c7409bd31e4f84f2a93bad74c99 --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_param_groups.json @@ -0,0 +1,233 @@ +[ + { + "lr": 6.590057681196191e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-06, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211 + ] + } +] \ No newline at end of file diff --git a/checkpoints/040000/training_state/optimizer_state.safetensors b/checkpoints/040000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2188607f4795a1b546cf3f66ff655454aec9b75e --- /dev/null +++ b/checkpoints/040000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a581c208549c22b74fca4e5b3b378b5d4b5ffce6780318e6034c7cf7620da04b +size 2225018952 diff --git a/checkpoints/040000/training_state/rng_state.safetensors b/checkpoints/040000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d97e9671f911b503f1948f5ad9fce546d0586cef --- /dev/null +++ b/checkpoints/040000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d2fdd5357daf333540e0e6869cf355477bec3ad7a1a96d603943969493556d +size 15708 diff --git a/checkpoints/040000/training_state/scheduler_state.json b/checkpoints/040000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d5ef807651649e49fd130ba01189f1e3e79f035 --- /dev/null +++ b/checkpoints/040000/training_state/scheduler_state.json @@ -0,0 +1,14 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 40000, + "_step_count": 40001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.590057681196191e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/040000/training_state/training_step.json b/checkpoints/040000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..fe40d8ff4312c7e5a8fba9bcc932a43a1384ba77 --- /dev/null +++ b/checkpoints/040000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 40000 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/config.json b/checkpoints/060000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b979b280bcc067d857fa498e10fc0e2725de8b --- /dev/null +++ b/checkpoints/060000/pretrained_model/config.json @@ -0,0 +1,125 @@ +{ + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 +} \ No newline at end of file diff --git a/checkpoints/060000/pretrained_model/model.safetensors b/checkpoints/060000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..711d44ae573b586ac8e311064b51ce00b73d70c2 --- /dev/null +++ b/checkpoints/060000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d92be0d28f45b8e05b06cf466644dd11d68c951d76e62ee8f8d30c649594b90 +size 1112509416 diff --git a/checkpoints/060000/pretrained_model/train_config.json b/checkpoints/060000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6a18719f2100fd7f2faf397649d925aa11879fb --- /dev/null +++ b/checkpoints/060000/pretrained_model/train_config.json @@ -0,0 +1,235 @@ +{ + "dataset": { + "repo_id": "a1o/tac_insert", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 + }, + "output_dir": "outputs/train/diff_tac_insert", + "job_name": "diff_piper_tac_insert", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 8, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 20000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 0.0001, + "weight_decay": 1e-06, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 500, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "ftnu6goz", + "mode": null + } +} \ No newline at end of file diff --git a/checkpoints/060000/training_state/optimizer_param_groups.json b/checkpoints/060000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..044c2e63a486a01bdda61b8fdf8b369988a133ac --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_param_groups.json @@ -0,0 +1,233 @@ +[ + { + "lr": 3.484974098465636e-05, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-06, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211 + ] + } +] \ No newline at end of file diff --git a/checkpoints/060000/training_state/optimizer_state.safetensors b/checkpoints/060000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a5b8bfe0a1e4a70478c932591152f7ee54ef665 --- /dev/null +++ b/checkpoints/060000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f32a026e893360ccd011eecbe815228c839ff2aecea9307561a528520dd79 +size 2225018952 diff --git a/checkpoints/060000/training_state/rng_state.safetensors b/checkpoints/060000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81910a60203e6cc5990201865746aab254f7c853 --- /dev/null +++ b/checkpoints/060000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07969b442a414ee170a9d26913d6230a6cfb38baabfb38779713a5b8b3721cb8 +size 15708 diff --git a/checkpoints/060000/training_state/scheduler_state.json b/checkpoints/060000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..668b51a9f5d74f350a4aa0a1b35f5d4f4498a280 --- /dev/null +++ b/checkpoints/060000/training_state/scheduler_state.json @@ -0,0 +1,14 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 60000, + "_step_count": 60001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.484974098465636e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/060000/training_state/training_step.json b/checkpoints/060000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d360b6037f1c8c2c85d38e951160eb02ace507dd --- /dev/null +++ b/checkpoints/060000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 60000 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/config.json b/checkpoints/080000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b979b280bcc067d857fa498e10fc0e2725de8b --- /dev/null +++ b/checkpoints/080000/pretrained_model/config.json @@ -0,0 +1,125 @@ +{ + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 +} \ No newline at end of file diff --git a/checkpoints/080000/pretrained_model/model.safetensors b/checkpoints/080000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52118d0517231bed322f1522c4de4e9b449edc23 --- /dev/null +++ b/checkpoints/080000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41da22ee41323f65ce435604afbf0d529370e495f11e363e8747d51d80b7e4c +size 1112509416 diff --git a/checkpoints/080000/pretrained_model/train_config.json b/checkpoints/080000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..497e0a472e58d3fad4e76334732ce7b9640f85a5 --- /dev/null +++ b/checkpoints/080000/pretrained_model/train_config.json @@ -0,0 +1,235 @@ +{ + "dataset": { + "repo_id": "a1o/tac_insert", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 + }, + "output_dir": "outputs/train/diff_tac_insert", + "job_name": "diff_piper_tac_insert", + "resume": true, + "seed": 1000, + "num_workers": 4, + "batch_size": 8, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 20000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 0.0001, + "weight_decay": 1e-06, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 500, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "ftnu6goz", + "mode": null + } +} \ No newline at end of file diff --git a/checkpoints/080000/training_state/optimizer_param_groups.json b/checkpoints/080000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..a3e37a0becbe350cfc73f7ac1886b0322872e19b --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_param_groups.json @@ -0,0 +1,233 @@ +[ + { + "lr": 9.642144811900739e-06, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-06, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211 + ] + } +] \ No newline at end of file diff --git a/checkpoints/080000/training_state/optimizer_state.safetensors b/checkpoints/080000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc1f215362986025a93706a326737cc08f63df66 --- /dev/null +++ b/checkpoints/080000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997e354402c26a0ca05248b90ec8a5b992a9f7d38a338f06f1c3391ddf9139fb +size 2225018952 diff --git a/checkpoints/080000/training_state/rng_state.safetensors b/checkpoints/080000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69772f59a08d79777c8b3d06aa65048c236861fb --- /dev/null +++ b/checkpoints/080000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded3052e78befcd39df798542e38fa18c2204096493cd9112a1e77abf835c40d +size 15708 diff --git a/checkpoints/080000/training_state/scheduler_state.json b/checkpoints/080000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0d7972eada3dacc85eaf6bf64c05d9e14b9cb216 --- /dev/null +++ b/checkpoints/080000/training_state/scheduler_state.json @@ -0,0 +1,14 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 80000, + "_step_count": 80001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.642144811900739e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/080000/training_state/training_step.json b/checkpoints/080000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..384eace4ecc2a6cba352aa7cf27f04405b7319c3 --- /dev/null +++ b/checkpoints/080000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 80000 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/config.json b/checkpoints/100000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b979b280bcc067d857fa498e10fc0e2725de8b --- /dev/null +++ b/checkpoints/100000/pretrained_model/config.json @@ -0,0 +1,125 @@ +{ + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 +} \ No newline at end of file diff --git a/checkpoints/100000/pretrained_model/model.safetensors b/checkpoints/100000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe292f3cc56841ba2e840fa70f605168b70611a0 --- /dev/null +++ b/checkpoints/100000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea4fc2777535b0e84495423f4c42c46a20e9806b03fc323ceabdf355c0269a69 +size 1112509416 diff --git a/checkpoints/100000/pretrained_model/train_config.json b/checkpoints/100000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..497e0a472e58d3fad4e76334732ce7b9640f85a5 --- /dev/null +++ b/checkpoints/100000/pretrained_model/train_config.json @@ -0,0 +1,235 @@ +{ + "dataset": { + "repo_id": "a1o/tac_insert", + "root": null, + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec" + }, + "env": null, + "policy": { + "type": "diffusion", + "n_obs_steps": 2, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.effort": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.qvel": { + "type": "STATE", + "shape": [ + 14 + ] + }, + "observation.images.cam_high": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_left_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.cam_right_wrist": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.tactile2": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 14 + ] + } + }, + "device": "cuda", + "use_amp": false, + "push_to_hub": true, + "repo_id": "a1o/diff_pick_tac_insert_policy", + "private": null, + "tags": null, + "license": null, + "horizon": 16, + "n_action_steps": 8, + "drop_n_last_frames": 7, + "vision_backbone": "resnet18", + "crop_shape": [ + 84, + 84 + ], + "crop_is_random": true, + "pretrained_backbone_weights": null, + "use_group_norm": true, + "spatial_softmax_num_keypoints": 32, + "use_separate_rgb_encoder_per_camera": false, + "down_dims": [ + 512, + 1024, + 2048 + ], + "kernel_size": 5, + "n_groups": 8, + "diffusion_step_embed_dim": 128, + "use_film_scale_modulation": true, + "noise_scheduler_type": "DDPM", + "num_train_timesteps": 100, + "beta_schedule": "squaredcos_cap_v2", + "beta_start": 0.0001, + "beta_end": 0.02, + "prediction_type": "epsilon", + "clip_sample": true, + "clip_sample_range": 1.0, + "num_inference_steps": null, + "do_mask_loss_for_padding": false, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.95, + 0.999 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-06, + "scheduler_name": "cosine", + "scheduler_warmup_steps": 500 + }, + "output_dir": "outputs/train/diff_tac_insert", + "job_name": "diff_piper_tac_insert", + "resume": true, + "seed": 1000, + "num_workers": 4, + "batch_size": 8, + "steps": 100000, + "eval_freq": 20000, + "log_freq": 200, + "save_checkpoint": true, + "save_freq": 20000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adam", + "lr": 0.0001, + "weight_decay": 1e-06, + "grad_clip_norm": 10.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "diffuser", + "num_warmup_steps": 500, + "name": "cosine" + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "ftnu6goz", + "mode": null + } +} \ No newline at end of file diff --git a/checkpoints/100000/training_state/optimizer_param_groups.json b/checkpoints/100000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..5ad4a0652f3e90db3740c824423694349bd048f0 --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_param_groups.json @@ -0,0 +1,233 @@ +[ + { + "lr": 0.0, + "betas": [ + 0.95, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-06, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": false, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211 + ] + } +] \ No newline at end of file diff --git a/checkpoints/100000/training_state/optimizer_state.safetensors b/checkpoints/100000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9043eceeda0737ba7c28ac1f6fe8842e9760a07f --- /dev/null +++ b/checkpoints/100000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9a7657493759e0b69a8b67b0b378bdb93122b54a8876bf27c989205487da82 +size 2225018952 diff --git a/checkpoints/100000/training_state/rng_state.safetensors b/checkpoints/100000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62405a4c15264e799869b83c788dd67cbd444d37 --- /dev/null +++ b/checkpoints/100000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac89339ee9b5d463598521d71317217a6ce2d404490cc5712659f674ce56f0e9 +size 15708 diff --git a/checkpoints/100000/training_state/scheduler_state.json b/checkpoints/100000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17e61252d488b35f8d1370b0c55198508910e68c --- /dev/null +++ b/checkpoints/100000/training_state/scheduler_state.json @@ -0,0 +1,14 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 100000, + "_step_count": 100001, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/100000/training_state/training_step.json b/checkpoints/100000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb73c13d28bca88058c08796abbf931c3f9b012 --- /dev/null +++ b/checkpoints/100000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 100000 +} \ No newline at end of file diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4217a2e1da62952a58ed1f83650f6dc45f1e1ed5 --- /dev/null +++ b/wandb/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2025-08-05T22:53:34.874153673-05:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-08-05T22:53:35.113343469-05:00","level":"INFO","msg":"stream: created new stream","id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.113431564-05:00","level":"INFO","msg":"stream: started","id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115576518-05:00","level":"INFO","msg":"handler: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115694248-05:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115890877-05:00","level":"INFO","msg":"sender: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.424943433-05:00","level":"INFO","msg":"stream: closing","id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.836519735-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-08-05T22:54:37.990590611-05:00","level":"INFO","msg":"handler: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.990724324-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.99089975-05:00","level":"INFO","msg":"sender: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.991141503-05:00","level":"INFO","msg":"stream: closed","id":"ftnu6goz"} diff --git a/wandb/debug.log b/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7cc21ff7009672ba120e4f04bff014dde771df00 --- /dev/null +++ b/wandb/debug.log @@ -0,0 +1,23 @@ +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Configure stats pid to 1532602 +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from /home/aye8078/.config/wandb/settings +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from /gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/wandb/settings +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:setup_run_log_directory():703] Logging user logs to outputs/train/diff_tac_insert/wandb/run-20250805_225334-ftnu6goz/logs/debug.log +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to outputs/train/diff_tac_insert/wandb/run-20250805_225334-ftnu6goz/logs/debug-internal.log +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:init():830] calling init triggers +2025-08-05 22:53:34,239 INFO MainThread:1532602 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'a1o/tac_insert', 'root': None, 'episodes': None, 'image_transforms': {'enable': False, 'max_num_transforms': 3, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec'}, 'env': None, 'policy': {'type': 'diffusion', 'n_obs_steps': 2, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {'observation.state': {'type': , 'shape': [14]}, 'observation.effort': {'type': , 'shape': [14]}, 'observation.qvel': {'type': , 'shape': [14]}, 'observation.images.cam_high': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_left_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_right_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile1': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile2': {'type': , 'shape': [3, 480, 640]}}, 'output_features': {'action': {'type': , 'shape': [14]}}, 'device': 'cuda', 'use_amp': False, 'push_to_hub': True, 'repo_id': 'a1o/diff_pick_tac_insert_policy', 'private': None, 'tags': None, 'license': None, 'horizon': 16, 'n_action_steps': 8, 'drop_n_last_frames': 7, 'vision_backbone': 'resnet18', 'crop_shape': [84, 84], 'crop_is_random': True, 'pretrained_backbone_weights': None, 'use_group_norm': True, 'spatial_softmax_num_keypoints': 32, 'use_separate_rgb_encoder_per_camera': False, 'down_dims': [512, 1024, 2048], 'kernel_size': 5, 'n_groups': 8, 'diffusion_step_embed_dim': 128, 'use_film_scale_modulation': True, 'noise_scheduler_type': 'DDPM', 'num_train_timesteps': 100, 'beta_schedule': 'squaredcos_cap_v2', 'beta_start': 0.0001, 'beta_end': 0.02, 'prediction_type': 'epsilon', 'clip_sample': True, 'clip_sample_range': 1.0, 'num_inference_steps': None, 'do_mask_loss_for_padding': False, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.95, 0.999], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-06, 'scheduler_name': 'cosine', 'scheduler_warmup_steps': 500}, 'output_dir': 'outputs/train/diff_tac_insert', 'job_name': 'diff_piper_tac_insert', 'resume': True, 'seed': 1000, 'num_workers': 4, 'batch_size': 8, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 20000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adam', 'lr': 0.0001, 'weight_decay': 1e-06, 'grad_clip_norm': 10.0, 'betas': [0.95, 0.999], 'eps': 1e-08}, 'scheduler': {'type': 'diffuser', 'num_warmup_steps': 500, 'name': 'cosine'}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': 'ftnu6goz', 'mode': None}, '_wandb': {}} +2025-08-05 22:53:34,240 INFO MainThread:1532602 [wandb_init.py:init():871] starting backend +2025-08-05 22:53:34,777 INFO MainThread:1532602 [wandb_init.py:init():874] sending inform_init request +2025-08-05 22:53:34,864 INFO MainThread:1532602 [wandb_init.py:init():882] backend started and connected +2025-08-05 22:53:34,867 INFO MainThread:1532602 [wandb_init.py:init():953] updated telemetry +2025-08-05 22:53:34,867 INFO MainThread:1532602 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-08-05 22:53:35,434 INFO MainThread:1532602 [wandb_init.py:init():1024] run resumed +2025-08-05 22:53:35,440 INFO MainThread:1532602 [wandb_init.py:init():1029] starting run threads in backend +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_console_start():2458] atexit reg +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2398] Redirects installed. +2025-08-05 22:53:37,314 INFO MainThread:1532602 [wandb_init.py:init():1075] run started, returning control to user process +2025-08-05 22:54:37,423 INFO MsgRouterThr:1532602 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20250804_020558-ftnu6goz/files/output.log b/wandb/run-20250804_020558-ftnu6goz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ddcfd5417edbea509f885dc6487fe57bea78c236 --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/files/output.log @@ -0,0 +1,333 @@ +Logs will be synced with wandb. +INFO 2025-08-04 02:05:59 db_utils.py:103 Track this run --> https://wandb.ai/a10v-1/lerobot/runs/ftnu6goz +INFO 2025-08-04 02:05:59 ts/train.py:127 Creating dataset +INFO 2025-08-04 02:06:01 ts/train.py:138 Creating policy +INFO 2025-08-04 02:06:03 ts/train.py:144 Creating optimizer and scheduler +INFO 2025-08-04 02:06:03 ts/train.py:156 Output dir: outputs/train/diff_tac_insert +INFO 2025-08-04 02:06:03 ts/train.py:159 cfg.steps=100000 (100K) +INFO 2025-08-04 02:06:03 ts/train.py:160 dataset.num_frames=50000 (50K) +INFO 2025-08-04 02:06:03 ts/train.py:161 dataset.num_episodes=100 +INFO 2025-08-04 02:06:03 ts/train.py:162 num_learnable_params=278120238 (278M) +INFO 2025-08-04 02:06:03 ts/train.py:163 num_total_params=278120408 (278M) +/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/torch/utils/data/dataloader.py:626: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 1, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. + warnings.warn( +INFO 2025-08-04 02:06:03 ts/train.py:202 Start offline training on a fixed dataset +INFO 2025-08-04 02:10:45 ts/train.py:232 step:200 smpl:2K ep:3 epch:0.03 loss:0.900 grdn:3.827 lr:2.0e-05 updt_s:0.194 data_s:1.215 +INFO 2025-08-04 02:15:17 ts/train.py:232 step:400 smpl:3K ep:6 epch:0.06 loss:0.232 grdn:3.169 lr:6.0e-05 updt_s:0.169 data_s:1.187 +INFO 2025-08-04 02:19:50 ts/train.py:232 step:600 smpl:5K ep:10 epch:0.10 loss:0.134 grdn:1.979 lr:9.5e-05 updt_s:0.161 data_s:1.204 +INFO 2025-08-04 02:24:21 ts/train.py:232 step:800 smpl:6K ep:13 epch:0.13 loss:0.090 grdn:1.321 lr:1.0e-04 updt_s:0.161 data_s:1.194 +INFO 2025-08-04 02:28:52 ts/train.py:232 step:1K smpl:8K ep:16 epch:0.16 loss:0.069 grdn:1.059 lr:1.0e-04 updt_s:0.162 data_s:1.192 +INFO 2025-08-04 02:33:20 ts/train.py:232 step:1K smpl:10K ep:19 epch:0.19 loss:0.066 grdn:1.011 lr:1.0e-04 updt_s:0.166 data_s:1.175 +INFO 2025-08-04 02:37:53 ts/train.py:232 step:1K smpl:11K ep:22 epch:0.22 loss:0.058 grdn:0.873 lr:1.0e-04 updt_s:0.166 data_s:1.197 +INFO 2025-08-04 02:42:16 ts/train.py:232 step:2K smpl:13K ep:26 epch:0.26 loss:0.044 grdn:0.730 lr:1.0e-04 updt_s:0.166 data_s:1.146 +INFO 2025-08-04 02:46:36 ts/train.py:232 step:2K smpl:14K ep:29 epch:0.29 loss:0.049 grdn:0.763 lr:1.0e-04 updt_s:0.164 data_s:1.135 +INFO 2025-08-04 02:50:56 ts/train.py:232 step:2K smpl:16K ep:32 epch:0.32 loss:0.042 grdn:0.665 lr:1.0e-04 updt_s:0.162 data_s:1.138 +INFO 2025-08-04 02:55:17 ts/train.py:232 step:2K smpl:18K ep:35 epch:0.35 loss:0.046 grdn:0.663 lr:1.0e-04 updt_s:0.167 data_s:1.136 +INFO 2025-08-04 02:59:37 ts/train.py:232 step:2K smpl:19K ep:38 epch:0.38 loss:0.044 grdn:0.663 lr:1.0e-04 updt_s:0.161 data_s:1.140 +INFO 2025-08-04 03:03:59 ts/train.py:232 step:3K smpl:21K ep:42 epch:0.42 loss:0.042 grdn:0.618 lr:1.0e-04 updt_s:0.161 data_s:1.148 +INFO 2025-08-04 03:08:18 ts/train.py:232 step:3K smpl:22K ep:45 epch:0.45 loss:0.040 grdn:0.584 lr:1.0e-04 updt_s:0.168 data_s:1.124 +INFO 2025-08-04 03:12:38 ts/train.py:232 step:3K smpl:24K ep:48 epch:0.48 loss:0.040 grdn:0.562 lr:1.0e-04 updt_s:0.165 data_s:1.135 +INFO 2025-08-04 03:17:00 ts/train.py:232 step:3K smpl:26K ep:51 epch:0.51 loss:0.039 grdn:0.524 lr:1.0e-04 updt_s:0.167 data_s:1.142 +INFO 2025-08-04 03:21:19 ts/train.py:232 step:3K smpl:27K ep:54 epch:0.54 loss:0.039 grdn:0.537 lr:1.0e-04 updt_s:0.161 data_s:1.135 +INFO 2025-08-04 03:25:42 ts/train.py:232 step:4K smpl:29K ep:58 epch:0.58 loss:0.035 grdn:0.507 lr:1.0e-04 updt_s:0.163 data_s:1.147 +INFO 2025-08-04 03:30:00 ts/train.py:232 step:4K smpl:30K ep:61 epch:0.61 loss:0.036 grdn:0.488 lr:1.0e-04 updt_s:0.163 data_s:1.130 +INFO 2025-08-04 03:34:23 ts/train.py:232 step:4K smpl:32K ep:64 epch:0.64 loss:0.035 grdn:0.472 lr:1.0e-04 updt_s:0.165 data_s:1.146 +INFO 2025-08-04 03:38:47 ts/train.py:232 step:4K smpl:34K ep:67 epch:0.67 loss:0.035 grdn:0.485 lr:1.0e-04 updt_s:0.169 data_s:1.149 +INFO 2025-08-04 03:43:07 ts/train.py:232 step:4K smpl:35K ep:70 epch:0.70 loss:0.031 grdn:0.443 lr:1.0e-04 updt_s:0.163 data_s:1.138 +INFO 2025-08-04 03:47:28 ts/train.py:232 step:5K smpl:37K ep:74 epch:0.74 loss:0.033 grdn:0.422 lr:1.0e-04 updt_s:0.168 data_s:1.133 +INFO 2025-08-04 03:51:50 ts/train.py:232 step:5K smpl:38K ep:77 epch:0.77 loss:0.036 grdn:0.469 lr:1.0e-04 updt_s:0.165 data_s:1.145 +INFO 2025-08-04 03:56:09 ts/train.py:232 step:5K smpl:40K ep:80 epch:0.80 loss:0.031 grdn:0.434 lr:1.0e-04 updt_s:0.163 data_s:1.134 +INFO 2025-08-04 04:00:32 ts/train.py:232 step:5K smpl:42K ep:83 epch:0.83 loss:0.033 grdn:0.426 lr:9.9e-05 updt_s:0.165 data_s:1.145 +INFO 2025-08-04 04:04:58 ts/train.py:232 step:5K smpl:43K ep:86 epch:0.86 loss:0.032 grdn:0.418 lr:9.9e-05 updt_s:0.158 data_s:1.172 +INFO 2025-08-04 04:09:19 ts/train.py:232 step:6K smpl:45K ep:90 epch:0.90 loss:0.031 grdn:0.408 lr:9.9e-05 updt_s:0.159 data_s:1.143 +INFO 2025-08-04 04:13:40 ts/train.py:232 step:6K smpl:46K ep:93 epch:0.93 loss:0.031 grdn:0.412 lr:9.9e-05 updt_s:0.165 data_s:1.140 +INFO 2025-08-04 04:18:03 ts/train.py:232 step:6K smpl:48K ep:96 epch:0.96 loss:0.030 grdn:0.383 lr:9.9e-05 updt_s:0.167 data_s:1.148 +INFO 2025-08-04 04:22:24 ts/train.py:232 step:6K smpl:50K ep:99 epch:0.99 loss:0.026 grdn:0.370 lr:9.9e-05 updt_s:0.166 data_s:1.140 +INFO 2025-08-04 04:27:01 ts/train.py:232 step:6K smpl:51K ep:102 epch:1.02 loss:0.033 grdn:0.407 lr:9.9e-05 updt_s:0.179 data_s:1.204 +INFO 2025-08-04 04:31:35 ts/train.py:232 step:7K smpl:53K ep:106 epch:1.06 loss:0.030 grdn:0.375 lr:9.9e-05 updt_s:0.170 data_s:1.202 +INFO 2025-08-04 04:36:10 ts/train.py:232 step:7K smpl:54K ep:109 epch:1.09 loss:0.028 grdn:0.384 lr:9.9e-05 updt_s:0.172 data_s:1.200 +INFO 2025-08-04 04:40:45 ts/train.py:232 step:7K smpl:56K ep:112 epch:1.12 loss:0.027 grdn:0.352 lr:9.9e-05 updt_s:0.173 data_s:1.200 +INFO 2025-08-04 04:45:19 ts/train.py:232 step:7K smpl:58K ep:115 epch:1.15 loss:0.029 grdn:0.375 lr:9.9e-05 updt_s:0.169 data_s:1.204 +INFO 2025-08-04 04:49:52 ts/train.py:232 step:7K smpl:59K ep:118 epch:1.18 loss:0.028 grdn:0.370 lr:9.9e-05 updt_s:0.169 data_s:1.197 +INFO 2025-08-04 04:54:23 ts/train.py:232 step:8K smpl:61K ep:122 epch:1.22 loss:0.027 grdn:0.350 lr:9.9e-05 updt_s:0.168 data_s:1.186 +INFO 2025-08-04 04:58:51 ts/train.py:232 step:8K smpl:62K ep:125 epch:1.25 loss:0.025 grdn:0.331 lr:9.9e-05 updt_s:0.172 data_s:1.164 +INFO 2025-08-04 05:03:18 ts/train.py:232 step:8K smpl:64K ep:128 epch:1.28 loss:0.029 grdn:0.370 lr:9.9e-05 updt_s:0.168 data_s:1.167 +INFO 2025-08-04 05:07:42 ts/train.py:232 step:8K smpl:66K ep:131 epch:1.31 loss:0.027 grdn:0.336 lr:9.9e-05 updt_s:0.166 data_s:1.153 +INFO 2025-08-04 05:12:06 ts/train.py:232 step:8K smpl:67K ep:134 epch:1.34 loss:0.027 grdn:0.353 lr:9.8e-05 updt_s:0.167 data_s:1.152 +INFO 2025-08-04 05:16:34 ts/train.py:232 step:9K smpl:69K ep:138 epch:1.38 loss:0.027 grdn:0.354 lr:9.8e-05 updt_s:0.170 data_s:1.170 +INFO 2025-08-04 05:20:58 ts/train.py:232 step:9K smpl:70K ep:141 epch:1.41 loss:0.029 grdn:0.337 lr:9.8e-05 updt_s:0.170 data_s:1.150 +INFO 2025-08-04 05:25:25 ts/train.py:232 step:9K smpl:72K ep:144 epch:1.44 loss:0.024 grdn:0.314 lr:9.8e-05 updt_s:0.167 data_s:1.168 +INFO 2025-08-04 05:29:51 ts/train.py:232 step:9K smpl:74K ep:147 epch:1.47 loss:0.024 grdn:0.325 lr:9.8e-05 updt_s:0.166 data_s:1.160 +INFO 2025-08-04 05:34:18 ts/train.py:232 step:9K smpl:75K ep:150 epch:1.50 loss:0.025 grdn:0.320 lr:9.8e-05 updt_s:0.164 data_s:1.170 +INFO 2025-08-04 05:38:41 ts/train.py:232 step:10K smpl:77K ep:154 epch:1.54 loss:0.025 grdn:0.322 lr:9.8e-05 updt_s:0.168 data_s:1.149 +INFO 2025-08-04 05:43:08 ts/train.py:232 step:10K smpl:78K ep:157 epch:1.57 loss:0.024 grdn:0.325 lr:9.8e-05 updt_s:0.171 data_s:1.162 +INFO 2025-08-04 05:47:36 ts/train.py:232 step:10K smpl:80K ep:160 epch:1.60 loss:0.025 grdn:0.329 lr:9.8e-05 updt_s:0.165 data_s:1.172 +INFO 2025-08-04 05:52:00 ts/train.py:232 step:10K smpl:82K ep:163 epch:1.63 loss:0.025 grdn:0.322 lr:9.8e-05 updt_s:0.168 data_s:1.151 +INFO 2025-08-04 05:56:26 ts/train.py:232 step:10K smpl:83K ep:166 epch:1.66 loss:0.024 grdn:0.306 lr:9.8e-05 updt_s:0.168 data_s:1.162 +INFO 2025-08-04 06:00:52 ts/train.py:232 step:11K smpl:85K ep:170 epch:1.70 loss:0.023 grdn:0.292 lr:9.8e-05 updt_s:0.166 data_s:1.164 +INFO 2025-08-04 06:05:19 ts/train.py:232 step:11K smpl:86K ep:173 epch:1.73 loss:0.022 grdn:0.298 lr:9.7e-05 updt_s:0.167 data_s:1.168 +INFO 2025-08-04 06:09:43 ts/train.py:232 step:11K smpl:88K ep:176 epch:1.76 loss:0.027 grdn:0.344 lr:9.7e-05 updt_s:0.171 data_s:1.150 +INFO 2025-08-04 06:14:10 ts/train.py:232 step:11K smpl:90K ep:179 epch:1.79 loss:0.025 grdn:0.311 lr:9.7e-05 updt_s:0.168 data_s:1.163 +INFO 2025-08-04 06:18:36 ts/train.py:232 step:11K smpl:91K ep:182 epch:1.82 loss:0.024 grdn:0.317 lr:9.7e-05 updt_s:0.169 data_s:1.160 +INFO 2025-08-04 06:23:01 ts/train.py:232 step:12K smpl:93K ep:186 epch:1.86 loss:0.024 grdn:0.316 lr:9.7e-05 updt_s:0.167 data_s:1.157 +INFO 2025-08-04 06:27:24 ts/train.py:232 step:12K smpl:94K ep:189 epch:1.89 loss:0.022 grdn:0.300 lr:9.7e-05 updt_s:0.166 data_s:1.152 +INFO 2025-08-04 06:31:50 ts/train.py:232 step:12K smpl:96K ep:192 epch:1.92 loss:0.021 grdn:0.290 lr:9.7e-05 updt_s:0.167 data_s:1.163 +INFO 2025-08-04 06:36:15 ts/train.py:232 step:12K smpl:98K ep:195 epch:1.95 loss:0.024 grdn:0.301 lr:9.7e-05 updt_s:0.168 data_s:1.152 +INFO 2025-08-04 06:40:49 ts/train.py:232 step:12K smpl:99K ep:198 epch:1.98 loss:0.024 grdn:0.303 lr:9.7e-05 updt_s:0.165 data_s:1.204 +INFO 2025-08-04 06:45:35 ts/train.py:232 step:13K smpl:101K ep:202 epch:2.02 loss:0.023 grdn:0.308 lr:9.6e-05 updt_s:0.175 data_s:1.253 +INFO 2025-08-04 06:50:17 ts/train.py:232 step:13K smpl:102K ep:205 epch:2.05 loss:0.020 grdn:0.279 lr:9.6e-05 updt_s:0.169 data_s:1.244 +INFO 2025-08-04 06:54:58 ts/train.py:232 step:13K smpl:104K ep:208 epch:2.08 loss:0.023 grdn:0.314 lr:9.6e-05 updt_s:0.174 data_s:1.227 +INFO 2025-08-04 06:59:43 ts/train.py:232 step:13K smpl:106K ep:211 epch:2.11 loss:0.023 grdn:0.304 lr:9.6e-05 updt_s:0.174 data_s:1.253 +INFO 2025-08-04 07:04:25 ts/train.py:232 step:13K smpl:107K ep:214 epch:2.14 loss:0.021 grdn:0.285 lr:9.6e-05 updt_s:0.171 data_s:1.237 +INFO 2025-08-04 07:09:07 ts/train.py:232 step:14K smpl:109K ep:218 epch:2.18 loss:0.021 grdn:0.278 lr:9.6e-05 updt_s:0.167 data_s:1.243 +INFO 2025-08-04 07:13:36 ts/train.py:232 step:14K smpl:110K ep:221 epch:2.21 loss:0.020 grdn:0.289 lr:9.6e-05 updt_s:0.169 data_s:1.175 +INFO 2025-08-04 07:18:00 ts/train.py:232 step:14K smpl:112K ep:224 epch:2.24 loss:0.022 grdn:0.303 lr:9.6e-05 updt_s:0.169 data_s:1.147 +INFO 2025-08-04 07:22:27 ts/train.py:232 step:14K smpl:114K ep:227 epch:2.27 loss:0.022 grdn:0.308 lr:9.5e-05 updt_s:0.173 data_s:1.164 +INFO 2025-08-04 07:26:49 ts/train.py:232 step:14K smpl:115K ep:230 epch:2.30 loss:0.023 grdn:0.302 lr:9.5e-05 updt_s:0.170 data_s:1.139 +INFO 2025-08-04 07:31:14 ts/train.py:232 step:15K smpl:117K ep:234 epch:2.34 loss:0.019 grdn:0.269 lr:9.5e-05 updt_s:0.167 data_s:1.155 +INFO 2025-08-04 07:35:30 ts/train.py:232 step:15K smpl:118K ep:237 epch:2.37 loss:0.019 grdn:0.276 lr:9.5e-05 updt_s:0.172 data_s:1.106 +INFO 2025-08-04 07:39:56 ts/train.py:232 step:15K smpl:120K ep:240 epch:2.40 loss:0.019 grdn:0.275 lr:9.5e-05 updt_s:0.172 data_s:1.158 +INFO 2025-08-04 07:44:23 ts/train.py:232 step:15K smpl:122K ep:243 epch:2.43 loss:0.021 grdn:0.288 lr:9.5e-05 updt_s:0.174 data_s:1.161 +INFO 2025-08-04 07:48:47 ts/train.py:232 step:15K smpl:123K ep:246 epch:2.46 loss:0.022 grdn:0.300 lr:9.5e-05 updt_s:0.173 data_s:1.147 +INFO 2025-08-04 07:53:12 ts/train.py:232 step:16K smpl:125K ep:250 epch:2.50 loss:0.022 grdn:0.294 lr:9.4e-05 updt_s:0.173 data_s:1.149 +INFO 2025-08-04 07:57:35 ts/train.py:232 step:16K smpl:126K ep:253 epch:2.53 loss:0.019 grdn:0.267 lr:9.4e-05 updt_s:0.169 data_s:1.148 +INFO 2025-08-04 08:02:00 ts/train.py:232 step:16K smpl:128K ep:256 epch:2.56 loss:0.020 grdn:0.290 lr:9.4e-05 updt_s:0.174 data_s:1.146 +INFO 2025-08-04 08:06:22 ts/train.py:232 step:16K smpl:130K ep:259 epch:2.59 loss:0.020 grdn:0.298 lr:9.4e-05 updt_s:0.168 data_s:1.144 +INFO 2025-08-04 08:10:43 ts/train.py:232 step:16K smpl:131K ep:262 epch:2.62 loss:0.020 grdn:0.274 lr:9.4e-05 updt_s:0.166 data_s:1.138 +INFO 2025-08-04 08:15:24 ts/train.py:232 step:17K smpl:133K ep:266 epch:2.66 loss:0.021 grdn:0.287 lr:9.4e-05 updt_s:0.173 data_s:1.230 +INFO 2025-08-04 08:20:10 ts/train.py:232 step:17K smpl:134K ep:269 epch:2.69 loss:0.022 grdn:0.297 lr:9.4e-05 updt_s:0.182 data_s:1.247 +INFO 2025-08-04 08:24:38 ts/train.py:232 step:17K smpl:136K ep:272 epch:2.72 loss:0.017 grdn:0.255 lr:9.3e-05 updt_s:0.173 data_s:1.165 +INFO 2025-08-04 08:29:02 ts/train.py:232 step:17K smpl:138K ep:275 epch:2.75 loss:0.020 grdn:0.287 lr:9.3e-05 updt_s:0.172 data_s:1.148 +INFO 2025-08-04 08:33:25 ts/train.py:232 step:17K smpl:139K ep:278 epch:2.78 loss:0.019 grdn:0.283 lr:9.3e-05 updt_s:0.165 data_s:1.149 +INFO 2025-08-04 08:37:49 ts/train.py:232 step:18K smpl:141K ep:282 epch:2.82 loss:0.018 grdn:0.276 lr:9.3e-05 updt_s:0.168 data_s:1.153 +INFO 2025-08-04 08:42:11 ts/train.py:232 step:18K smpl:142K ep:285 epch:2.85 loss:0.018 grdn:0.264 lr:9.3e-05 updt_s:0.165 data_s:1.141 +INFO 2025-08-04 08:46:32 ts/train.py:232 step:18K smpl:144K ep:288 epch:2.88 loss:0.020 grdn:0.299 lr:9.3e-05 updt_s:0.169 data_s:1.135 +INFO 2025-08-04 08:50:52 ts/train.py:232 step:18K smpl:146K ep:291 epch:2.91 loss:0.021 grdn:0.297 lr:9.2e-05 updt_s:0.168 data_s:1.132 +INFO 2025-08-04 08:55:13 ts/train.py:232 step:18K smpl:147K ep:294 epch:2.94 loss:0.021 grdn:0.300 lr:9.2e-05 updt_s:0.169 data_s:1.136 +INFO 2025-08-04 08:59:46 ts/train.py:232 step:19K smpl:149K ep:298 epch:2.98 loss:0.019 grdn:0.285 lr:9.2e-05 updt_s:0.172 data_s:1.191 +INFO 2025-08-04 09:04:26 ts/train.py:232 step:19K smpl:150K ep:301 epch:3.01 loss:0.018 grdn:0.283 lr:9.2e-05 updt_s:0.170 data_s:1.227 +INFO 2025-08-04 09:09:04 ts/train.py:232 step:19K smpl:152K ep:304 epch:3.04 loss:0.019 grdn:0.293 lr:9.2e-05 updt_s:0.167 data_s:1.225 +INFO 2025-08-04 09:13:42 ts/train.py:232 step:19K smpl:154K ep:307 epch:3.07 loss:0.018 grdn:0.278 lr:9.2e-05 updt_s:0.165 data_s:1.225 +INFO 2025-08-04 09:18:17 ts/train.py:232 step:19K smpl:155K ep:310 epch:3.10 loss:0.017 grdn:0.261 lr:9.1e-05 updt_s:0.169 data_s:1.204 +INFO 2025-08-04 09:23:00 ts/train.py:232 step:20K smpl:157K ep:314 epch:3.14 loss:0.019 grdn:0.287 lr:9.1e-05 updt_s:0.173 data_s:1.244 +INFO 2025-08-04 09:27:44 ts/train.py:232 step:20K smpl:158K ep:317 epch:3.17 loss:0.018 grdn:0.273 lr:9.1e-05 updt_s:0.169 data_s:1.248 +INFO 2025-08-04 09:32:09 ts/train.py:232 step:20K smpl:160K ep:320 epch:3.20 loss:0.020 grdn:0.295 lr:9.1e-05 updt_s:0.173 data_s:1.152 +INFO 2025-08-04 09:32:09 ts/train.py:241 Checkpoint policy after step 20000 +INFO 2025-08-04 09:36:41 ts/train.py:232 step:20K smpl:162K ep:323 epch:3.23 loss:0.019 grdn:0.287 lr:9.1e-05 updt_s:0.175 data_s:1.110 +INFO 2025-08-04 09:40:58 ts/train.py:232 step:20K smpl:163K ep:326 epch:3.26 loss:0.018 grdn:0.285 lr:9.1e-05 updt_s:0.169 data_s:1.113 +INFO 2025-08-04 09:45:14 ts/train.py:232 step:21K smpl:165K ep:330 epch:3.30 loss:0.019 grdn:0.286 lr:9.0e-05 updt_s:0.166 data_s:1.117 +INFO 2025-08-04 09:49:32 ts/train.py:232 step:21K smpl:166K ep:333 epch:3.33 loss:0.019 grdn:0.287 lr:9.0e-05 updt_s:0.170 data_s:1.118 +INFO 2025-08-04 09:53:51 ts/train.py:232 step:21K smpl:168K ep:336 epch:3.36 loss:0.017 grdn:0.279 lr:9.0e-05 updt_s:0.169 data_s:1.125 +INFO 2025-08-04 09:58:10 ts/train.py:232 step:21K smpl:170K ep:339 epch:3.39 loss:0.017 grdn:0.269 lr:9.0e-05 updt_s:0.171 data_s:1.125 +INFO 2025-08-04 10:02:26 ts/train.py:232 step:21K smpl:171K ep:342 epch:3.42 loss:0.017 grdn:0.271 lr:9.0e-05 updt_s:0.166 data_s:1.110 +INFO 2025-08-04 10:06:45 ts/train.py:232 step:22K smpl:173K ep:346 epch:3.46 loss:0.018 grdn:0.273 lr:8.9e-05 updt_s:0.167 data_s:1.129 +INFO 2025-08-04 10:10:58 ts/train.py:232 step:22K smpl:174K ep:349 epch:3.49 loss:0.019 grdn:0.279 lr:8.9e-05 updt_s:0.172 data_s:1.090 +INFO 2025-08-04 10:15:18 ts/train.py:232 step:22K smpl:176K ep:352 epch:3.52 loss:0.019 grdn:0.279 lr:8.9e-05 updt_s:0.170 data_s:1.126 +INFO 2025-08-04 10:19:36 ts/train.py:232 step:22K smpl:178K ep:355 epch:3.55 loss:0.020 grdn:0.282 lr:8.9e-05 updt_s:0.168 data_s:1.121 +INFO 2025-08-04 10:23:55 ts/train.py:232 step:22K smpl:179K ep:358 epch:3.58 loss:0.018 grdn:0.289 lr:8.9e-05 updt_s:0.171 data_s:1.127 +INFO 2025-08-04 10:28:19 ts/train.py:232 step:23K smpl:181K ep:362 epch:3.62 loss:0.016 grdn:0.265 lr:8.8e-05 updt_s:0.168 data_s:1.150 +INFO 2025-08-04 10:32:46 ts/train.py:232 step:23K smpl:182K ep:365 epch:3.65 loss:0.018 grdn:0.285 lr:8.8e-05 updt_s:0.177 data_s:1.155 +INFO 2025-08-04 10:37:12 ts/train.py:232 step:23K smpl:184K ep:368 epch:3.68 loss:0.018 grdn:0.266 lr:8.8e-05 updt_s:0.175 data_s:1.156 +INFO 2025-08-04 10:41:38 ts/train.py:232 step:23K smpl:186K ep:371 epch:3.71 loss:0.015 grdn:0.254 lr:8.8e-05 updt_s:0.172 data_s:1.159 +INFO 2025-08-04 10:45:57 ts/train.py:232 step:23K smpl:187K ep:374 epch:3.74 loss:0.017 grdn:0.277 lr:8.8e-05 updt_s:0.167 data_s:1.124 +INFO 2025-08-04 10:50:16 ts/train.py:232 step:24K smpl:189K ep:378 epch:3.78 loss:0.016 grdn:0.272 lr:8.7e-05 updt_s:0.170 data_s:1.123 +INFO 2025-08-04 10:54:37 ts/train.py:232 step:24K smpl:190K ep:381 epch:3.81 loss:0.016 grdn:0.264 lr:8.7e-05 updt_s:0.173 data_s:1.134 +INFO 2025-08-04 10:58:58 ts/train.py:232 step:24K smpl:192K ep:384 epch:3.84 loss:0.017 grdn:0.280 lr:8.7e-05 updt_s:0.171 data_s:1.130 +INFO 2025-08-04 11:03:16 ts/train.py:232 step:24K smpl:194K ep:387 epch:3.87 loss:0.016 grdn:0.275 lr:8.7e-05 updt_s:0.172 data_s:1.117 +INFO 2025-08-04 11:07:38 ts/train.py:232 step:24K smpl:195K ep:390 epch:3.90 loss:0.016 grdn:0.280 lr:8.7e-05 updt_s:0.169 data_s:1.143 +INFO 2025-08-04 11:11:56 ts/train.py:232 step:25K smpl:197K ep:394 epch:3.94 loss:0.015 grdn:0.259 lr:8.6e-05 updt_s:0.166 data_s:1.124 +INFO 2025-08-04 11:16:31 ts/train.py:232 step:25K smpl:198K ep:397 epch:3.97 loss:0.017 grdn:0.270 lr:8.6e-05 updt_s:0.171 data_s:1.202 +INFO 2025-08-04 11:21:13 ts/train.py:232 step:25K smpl:200K ep:400 epch:4.00 loss:0.016 grdn:0.267 lr:8.6e-05 updt_s:0.175 data_s:1.234 +INFO 2025-08-04 11:25:52 ts/train.py:232 step:25K smpl:202K ep:403 epch:4.03 loss:0.015 grdn:0.270 lr:8.6e-05 updt_s:0.173 data_s:1.220 +INFO 2025-08-04 11:30:32 ts/train.py:232 step:25K smpl:203K ep:406 epch:4.06 loss:0.018 grdn:0.286 lr:8.5e-05 updt_s:0.166 data_s:1.234 +INFO 2025-08-04 11:35:13 ts/train.py:232 step:26K smpl:205K ep:410 epch:4.10 loss:0.019 grdn:0.290 lr:8.5e-05 updt_s:0.166 data_s:1.235 +INFO 2025-08-04 11:39:53 ts/train.py:232 step:26K smpl:206K ep:413 epch:4.13 loss:0.016 grdn:0.261 lr:8.5e-05 updt_s:0.165 data_s:1.234 +INFO 2025-08-04 11:44:31 ts/train.py:232 step:26K smpl:208K ep:416 epch:4.16 loss:0.016 grdn:0.266 lr:8.5e-05 updt_s:0.167 data_s:1.227 +INFO 2025-08-04 11:48:54 ts/train.py:232 step:26K smpl:210K ep:419 epch:4.19 loss:0.014 grdn:0.258 lr:8.5e-05 updt_s:0.168 data_s:1.145 +INFO 2025-08-04 11:53:16 ts/train.py:232 step:26K smpl:211K ep:422 epch:4.22 loss:0.015 grdn:0.267 lr:8.4e-05 updt_s:0.173 data_s:1.133 +INFO 2025-08-04 11:57:44 ts/train.py:232 step:27K smpl:213K ep:426 epch:4.26 loss:0.016 grdn:0.267 lr:8.4e-05 updt_s:0.173 data_s:1.167 +INFO 2025-08-04 12:02:08 ts/train.py:232 step:27K smpl:214K ep:429 epch:4.29 loss:0.016 grdn:0.278 lr:8.4e-05 updt_s:0.167 data_s:1.152 +INFO 2025-08-04 12:06:34 ts/train.py:232 step:27K smpl:216K ep:432 epch:4.32 loss:0.014 grdn:0.252 lr:8.4e-05 updt_s:0.164 data_s:1.165 +INFO 2025-08-04 12:10:58 ts/train.py:232 step:27K smpl:218K ep:435 epch:4.35 loss:0.017 grdn:0.278 lr:8.3e-05 updt_s:0.167 data_s:1.154 +INFO 2025-08-04 12:15:21 ts/train.py:232 step:27K smpl:219K ep:438 epch:4.38 loss:0.015 grdn:0.262 lr:8.3e-05 updt_s:0.172 data_s:1.140 +INFO 2025-08-04 12:19:42 ts/train.py:232 step:28K smpl:221K ep:442 epch:4.42 loss:0.017 grdn:0.281 lr:8.3e-05 updt_s:0.172 data_s:1.135 +INFO 2025-08-04 12:24:07 ts/train.py:232 step:28K smpl:222K ep:445 epch:4.45 loss:0.017 grdn:0.282 lr:8.3e-05 updt_s:0.173 data_s:1.148 +INFO 2025-08-04 12:28:29 ts/train.py:232 step:28K smpl:224K ep:448 epch:4.48 loss:0.016 grdn:0.261 lr:8.2e-05 updt_s:0.171 data_s:1.142 +INFO 2025-08-04 12:32:54 ts/train.py:232 step:28K smpl:226K ep:451 epch:4.51 loss:0.017 grdn:0.279 lr:8.2e-05 updt_s:0.172 data_s:1.153 +INFO 2025-08-04 12:37:29 ts/train.py:232 step:28K smpl:227K ep:454 epch:4.54 loss:0.016 grdn:0.267 lr:8.2e-05 updt_s:0.172 data_s:1.200 +INFO 2025-08-04 12:42:06 ts/train.py:232 step:29K smpl:229K ep:458 epch:4.58 loss:0.016 grdn:0.282 lr:8.2e-05 updt_s:0.173 data_s:1.212 +INFO 2025-08-04 12:46:42 ts/train.py:232 step:29K smpl:230K ep:461 epch:4.61 loss:0.016 grdn:0.281 lr:8.1e-05 updt_s:0.178 data_s:1.200 +INFO 2025-08-04 12:51:20 ts/train.py:232 step:29K smpl:232K ep:464 epch:4.64 loss:0.017 grdn:0.290 lr:8.1e-05 updt_s:0.177 data_s:1.211 +INFO 2025-08-04 12:55:53 ts/train.py:232 step:29K smpl:234K ep:467 epch:4.67 loss:0.016 grdn:0.267 lr:8.1e-05 updt_s:0.176 data_s:1.190 +INFO 2025-08-04 13:00:29 ts/train.py:232 step:29K smpl:235K ep:470 epch:4.70 loss:0.016 grdn:0.264 lr:8.1e-05 updt_s:0.174 data_s:1.202 +INFO 2025-08-04 13:05:04 ts/train.py:232 step:30K smpl:237K ep:474 epch:4.74 loss:0.017 grdn:0.272 lr:8.0e-05 updt_s:0.173 data_s:1.205 +INFO 2025-08-04 13:09:40 ts/train.py:232 step:30K smpl:238K ep:477 epch:4.77 loss:0.014 grdn:0.257 lr:8.0e-05 updt_s:0.175 data_s:1.204 +INFO 2025-08-04 13:14:16 ts/train.py:232 step:30K smpl:240K ep:480 epch:4.80 loss:0.015 grdn:0.253 lr:8.0e-05 updt_s:0.177 data_s:1.200 +INFO 2025-08-04 13:18:54 ts/train.py:232 step:30K smpl:242K ep:483 epch:4.83 loss:0.015 grdn:0.269 lr:8.0e-05 updt_s:0.170 data_s:1.218 +INFO 2025-08-04 13:23:29 ts/train.py:232 step:30K smpl:243K ep:486 epch:4.86 loss:0.014 grdn:0.253 lr:7.9e-05 updt_s:0.173 data_s:1.204 +INFO 2025-08-04 13:28:07 ts/train.py:232 step:31K smpl:245K ep:490 epch:4.90 loss:0.013 grdn:0.245 lr:7.9e-05 updt_s:0.176 data_s:1.212 +INFO 2025-08-04 13:32:39 ts/train.py:232 step:31K smpl:246K ep:493 epch:4.93 loss:0.014 grdn:0.256 lr:7.9e-05 updt_s:0.173 data_s:1.185 +INFO 2025-08-04 13:37:35 ts/train.py:232 step:31K smpl:248K ep:496 epch:4.96 loss:0.015 grdn:0.261 lr:7.9e-05 updt_s:0.175 data_s:1.305 +INFO 2025-08-04 13:42:29 ts/train.py:232 step:31K smpl:250K ep:499 epch:4.99 loss:0.015 grdn:0.274 lr:7.8e-05 updt_s:0.174 data_s:1.295 +INFO 2025-08-04 13:47:22 ts/train.py:232 step:31K smpl:251K ep:502 epch:5.02 loss:0.015 grdn:0.277 lr:7.8e-05 updt_s:0.172 data_s:1.294 +INFO 2025-08-04 13:52:12 ts/train.py:232 step:32K smpl:253K ep:506 epch:5.06 loss:0.015 grdn:0.249 lr:7.8e-05 updt_s:0.170 data_s:1.276 +INFO 2025-08-04 13:57:04 ts/train.py:232 step:32K smpl:254K ep:509 epch:5.09 loss:0.015 grdn:0.264 lr:7.8e-05 updt_s:0.172 data_s:1.288 +INFO 2025-08-04 14:01:55 ts/train.py:232 step:32K smpl:256K ep:512 epch:5.12 loss:0.015 grdn:0.258 lr:7.7e-05 updt_s:0.174 data_s:1.282 +INFO 2025-08-04 14:06:47 ts/train.py:232 step:32K smpl:258K ep:515 epch:5.15 loss:0.016 grdn:0.272 lr:7.7e-05 updt_s:0.180 data_s:1.278 +INFO 2025-08-04 14:11:25 ts/train.py:232 step:32K smpl:259K ep:518 epch:5.18 loss:0.016 grdn:0.267 lr:7.7e-05 updt_s:0.179 data_s:1.211 +INFO 2025-08-04 14:15:59 ts/train.py:232 step:33K smpl:261K ep:522 epch:5.22 loss:0.016 grdn:0.269 lr:7.7e-05 updt_s:0.176 data_s:1.192 +INFO 2025-08-04 14:20:35 ts/train.py:232 step:33K smpl:262K ep:525 epch:5.25 loss:0.014 grdn:0.266 lr:7.6e-05 updt_s:0.178 data_s:1.201 +INFO 2025-08-04 14:25:11 ts/train.py:232 step:33K smpl:264K ep:528 epch:5.28 loss:0.015 grdn:0.261 lr:7.6e-05 updt_s:0.178 data_s:1.201 +INFO 2025-08-04 14:29:45 ts/train.py:232 step:33K smpl:266K ep:531 epch:5.31 loss:0.016 grdn:0.288 lr:7.6e-05 updt_s:0.173 data_s:1.196 +INFO 2025-08-04 14:34:18 ts/train.py:232 step:33K smpl:267K ep:534 epch:5.34 loss:0.013 grdn:0.245 lr:7.5e-05 updt_s:0.179 data_s:1.186 +INFO 2025-08-04 14:38:51 ts/train.py:232 step:34K smpl:269K ep:538 epch:5.38 loss:0.014 grdn:0.267 lr:7.5e-05 updt_s:0.181 data_s:1.186 +INFO 2025-08-04 14:43:25 ts/train.py:232 step:34K smpl:270K ep:541 epch:5.41 loss:0.014 grdn:0.262 lr:7.5e-05 updt_s:0.179 data_s:1.186 +INFO 2025-08-04 14:48:03 ts/train.py:232 step:34K smpl:272K ep:544 epch:5.44 loss:0.015 grdn:0.286 lr:7.5e-05 updt_s:0.174 data_s:1.218 +INFO 2025-08-04 14:52:39 ts/train.py:232 step:34K smpl:274K ep:547 epch:5.47 loss:0.015 grdn:0.271 lr:7.4e-05 updt_s:0.179 data_s:1.199 +INFO 2025-08-04 14:57:15 ts/train.py:232 step:34K smpl:275K ep:550 epch:5.50 loss:0.014 grdn:0.256 lr:7.4e-05 updt_s:0.179 data_s:1.198 +INFO 2025-08-04 15:01:51 ts/train.py:232 step:35K smpl:277K ep:554 epch:5.54 loss:0.014 grdn:0.262 lr:7.4e-05 updt_s:0.175 data_s:1.205 +INFO 2025-08-04 15:06:30 ts/train.py:232 step:35K smpl:278K ep:557 epch:5.57 loss:0.016 grdn:0.280 lr:7.4e-05 updt_s:0.179 data_s:1.214 +INFO 2025-08-04 15:11:06 ts/train.py:232 step:35K smpl:280K ep:560 epch:5.60 loss:0.014 grdn:0.241 lr:7.3e-05 updt_s:0.174 data_s:1.205 +INFO 2025-08-04 15:15:40 ts/train.py:232 step:35K smpl:282K ep:563 epch:5.63 loss:0.013 grdn:0.251 lr:7.3e-05 updt_s:0.179 data_s:1.191 +INFO 2025-08-04 15:20:16 ts/train.py:232 step:35K smpl:283K ep:566 epch:5.66 loss:0.014 grdn:0.251 lr:7.3e-05 updt_s:0.175 data_s:1.207 +INFO 2025-08-04 15:24:54 ts/train.py:232 step:36K smpl:285K ep:570 epch:5.70 loss:0.013 grdn:0.263 lr:7.2e-05 updt_s:0.180 data_s:1.205 +INFO 2025-08-04 15:29:29 ts/train.py:232 step:36K smpl:286K ep:573 epch:5.73 loss:0.015 grdn:0.266 lr:7.2e-05 updt_s:0.173 data_s:1.203 +INFO 2025-08-04 15:34:06 ts/train.py:232 step:36K smpl:288K ep:576 epch:5.76 loss:0.013 grdn:0.241 lr:7.2e-05 updt_s:0.173 data_s:1.210 +INFO 2025-08-04 15:38:38 ts/train.py:232 step:36K smpl:290K ep:579 epch:5.79 loss:0.015 grdn:0.255 lr:7.2e-05 updt_s:0.176 data_s:1.183 +INFO 2025-08-04 15:43:14 ts/train.py:232 step:36K smpl:291K ep:582 epch:5.82 loss:0.014 grdn:0.263 lr:7.1e-05 updt_s:0.181 data_s:1.201 +INFO 2025-08-04 15:47:53 ts/train.py:232 step:37K smpl:293K ep:586 epch:5.86 loss:0.014 grdn:0.266 lr:7.1e-05 updt_s:0.177 data_s:1.215 +INFO 2025-08-04 15:52:33 ts/train.py:232 step:37K smpl:294K ep:589 epch:5.89 loss:0.013 grdn:0.264 lr:7.1e-05 updt_s:0.174 data_s:1.226 +INFO 2025-08-04 15:57:10 ts/train.py:232 step:37K smpl:296K ep:592 epch:5.92 loss:0.014 grdn:0.261 lr:7.0e-05 updt_s:0.171 data_s:1.213 +INFO 2025-08-04 16:02:06 ts/train.py:232 step:37K smpl:298K ep:595 epch:5.95 loss:0.014 grdn:0.253 lr:7.0e-05 updt_s:0.178 data_s:1.300 +INFO 2025-08-04 16:06:57 ts/train.py:232 step:37K smpl:299K ep:598 epch:5.98 loss:0.012 grdn:0.242 lr:7.0e-05 updt_s:0.177 data_s:1.275 +INFO 2025-08-04 16:11:50 ts/train.py:232 step:38K smpl:301K ep:602 epch:6.02 loss:0.012 grdn:0.231 lr:7.0e-05 updt_s:0.179 data_s:1.290 +INFO 2025-08-04 16:16:44 ts/train.py:232 step:38K smpl:302K ep:605 epch:6.05 loss:0.013 grdn:0.257 lr:6.9e-05 updt_s:0.180 data_s:1.288 +INFO 2025-08-04 16:21:41 ts/train.py:232 step:38K smpl:304K ep:608 epch:6.08 loss:0.014 grdn:0.243 lr:6.9e-05 updt_s:0.176 data_s:1.309 +INFO 2025-08-04 16:26:36 ts/train.py:232 step:38K smpl:306K ep:611 epch:6.11 loss:0.013 grdn:0.273 lr:6.9e-05 updt_s:0.174 data_s:1.296 +INFO 2025-08-04 16:31:25 ts/train.py:232 step:38K smpl:307K ep:614 epch:6.14 loss:0.014 grdn:0.247 lr:6.8e-05 updt_s:0.176 data_s:1.269 +INFO 2025-08-04 16:36:03 ts/train.py:232 step:39K smpl:309K ep:618 epch:6.18 loss:0.015 grdn:0.255 lr:6.8e-05 updt_s:0.177 data_s:1.215 +INFO 2025-08-04 16:40:42 ts/train.py:232 step:39K smpl:310K ep:621 epch:6.21 loss:0.014 grdn:0.242 lr:6.8e-05 updt_s:0.178 data_s:1.216 +INFO 2025-08-04 16:45:18 ts/train.py:232 step:39K smpl:312K ep:624 epch:6.24 loss:0.014 grdn:0.260 lr:6.8e-05 updt_s:0.176 data_s:1.202 +INFO 2025-08-04 16:49:54 ts/train.py:232 step:39K smpl:314K ep:627 epch:6.27 loss:0.013 grdn:0.253 lr:6.7e-05 updt_s:0.173 data_s:1.204 +INFO 2025-08-04 16:54:31 ts/train.py:232 step:39K smpl:315K ep:630 epch:6.30 loss:0.014 grdn:0.279 lr:6.7e-05 updt_s:0.174 data_s:1.212 +INFO 2025-08-04 16:59:05 ts/train.py:232 step:40K smpl:317K ep:634 epch:6.34 loss:0.012 grdn:0.248 lr:6.7e-05 updt_s:0.175 data_s:1.192 +INFO 2025-08-04 17:03:37 ts/train.py:232 step:40K smpl:318K ep:637 epch:6.37 loss:0.013 grdn:0.249 lr:6.6e-05 updt_s:0.178 data_s:1.184 +INFO 2025-08-04 17:08:16 ts/train.py:232 step:40K smpl:320K ep:640 epch:6.40 loss:0.014 grdn:0.272 lr:6.6e-05 updt_s:0.178 data_s:1.214 +INFO 2025-08-04 17:08:16 ts/train.py:241 Checkpoint policy after step 40000 +INFO 2025-08-04 17:12:58 ts/train.py:232 step:40K smpl:322K ep:643 epch:6.43 loss:0.013 grdn:0.262 lr:6.6e-05 updt_s:0.185 data_s:1.179 +INFO 2025-08-04 17:17:35 ts/train.py:232 step:40K smpl:323K ep:646 epch:6.46 loss:0.013 grdn:0.266 lr:6.5e-05 updt_s:0.176 data_s:1.209 +INFO 2025-08-04 17:22:10 ts/train.py:232 step:41K smpl:325K ep:650 epch:6.50 loss:0.014 grdn:0.251 lr:6.5e-05 updt_s:0.176 data_s:1.197 +INFO 2025-08-04 17:26:46 ts/train.py:232 step:41K smpl:326K ep:653 epch:6.53 loss:0.013 grdn:0.248 lr:6.5e-05 updt_s:0.172 data_s:1.207 +INFO 2025-08-04 17:31:21 ts/train.py:232 step:41K smpl:328K ep:656 epch:6.56 loss:0.014 grdn:0.266 lr:6.5e-05 updt_s:0.172 data_s:1.199 +INFO 2025-08-04 17:35:53 ts/train.py:232 step:41K smpl:330K ep:659 epch:6.59 loss:0.013 grdn:0.269 lr:6.4e-05 updt_s:0.175 data_s:1.184 +INFO 2025-08-04 17:40:32 ts/train.py:232 step:41K smpl:331K ep:662 epch:6.62 loss:0.012 grdn:0.257 lr:6.4e-05 updt_s:0.177 data_s:1.217 +INFO 2025-08-04 17:45:09 ts/train.py:232 step:42K smpl:333K ep:666 epch:6.66 loss:0.013 grdn:0.270 lr:6.4e-05 updt_s:0.177 data_s:1.212 +INFO 2025-08-04 17:49:47 ts/train.py:232 step:42K smpl:334K ep:669 epch:6.69 loss:0.013 grdn:0.251 lr:6.3e-05 updt_s:0.174 data_s:1.211 +INFO 2025-08-04 17:54:22 ts/train.py:232 step:42K smpl:336K ep:672 epch:6.72 loss:0.013 grdn:0.251 lr:6.3e-05 updt_s:0.174 data_s:1.203 +INFO 2025-08-04 17:59:00 ts/train.py:232 step:42K smpl:338K ep:675 epch:6.75 loss:0.011 grdn:0.233 lr:6.3e-05 updt_s:0.176 data_s:1.209 +INFO 2025-08-04 18:03:34 ts/train.py:232 step:42K smpl:339K ep:678 epch:6.78 loss:0.013 grdn:0.251 lr:6.2e-05 updt_s:0.176 data_s:1.194 +INFO 2025-08-04 18:08:09 ts/train.py:232 step:43K smpl:341K ep:682 epch:6.82 loss:0.015 grdn:0.297 lr:6.2e-05 updt_s:0.175 data_s:1.198 +INFO 2025-08-04 18:12:43 ts/train.py:232 step:43K smpl:342K ep:685 epch:6.85 loss:0.014 grdn:0.264 lr:6.2e-05 updt_s:0.171 data_s:1.202 +INFO 2025-08-04 18:17:17 ts/train.py:232 step:43K smpl:344K ep:688 epch:6.88 loss:0.012 grdn:0.257 lr:6.1e-05 updt_s:0.179 data_s:1.190 +INFO 2025-08-04 18:21:58 ts/train.py:232 step:43K smpl:346K ep:691 epch:6.91 loss:0.014 grdn:0.267 lr:6.1e-05 updt_s:0.174 data_s:1.229 +INFO 2025-08-04 18:26:55 ts/train.py:232 step:43K smpl:347K ep:694 epch:6.94 loss:0.012 grdn:0.235 lr:6.1e-05 updt_s:0.176 data_s:1.306 +INFO 2025-08-04 18:31:45 ts/train.py:232 step:44K smpl:349K ep:698 epch:6.98 loss:0.012 grdn:0.241 lr:6.1e-05 updt_s:0.176 data_s:1.275 +INFO 2025-08-04 18:36:36 ts/train.py:232 step:44K smpl:350K ep:701 epch:7.01 loss:0.012 grdn:0.241 lr:6.0e-05 updt_s:0.176 data_s:1.279 +INFO 2025-08-04 18:41:25 ts/train.py:232 step:44K smpl:352K ep:704 epch:7.04 loss:0.013 grdn:0.256 lr:6.0e-05 updt_s:0.182 data_s:1.262 +INFO 2025-08-04 18:46:18 ts/train.py:232 step:44K smpl:354K ep:707 epch:7.07 loss:0.014 grdn:0.265 lr:6.0e-05 updt_s:0.182 data_s:1.281 +INFO 2025-08-04 18:51:11 ts/train.py:232 step:44K smpl:355K ep:710 epch:7.10 loss:0.013 grdn:0.277 lr:5.9e-05 updt_s:0.177 data_s:1.286 +INFO 2025-08-04 18:55:55 ts/train.py:232 step:45K smpl:357K ep:714 epch:7.14 loss:0.011 grdn:0.252 lr:5.9e-05 updt_s:0.178 data_s:1.239 +INFO 2025-08-04 19:00:33 ts/train.py:232 step:45K smpl:358K ep:717 epch:7.17 loss:0.014 grdn:0.258 lr:5.9e-05 updt_s:0.179 data_s:1.212 +INFO 2025-08-04 19:05:11 ts/train.py:232 step:45K smpl:360K ep:720 epch:7.20 loss:0.014 grdn:0.277 lr:5.8e-05 updt_s:0.176 data_s:1.216 +INFO 2025-08-04 19:09:51 ts/train.py:232 step:45K smpl:362K ep:723 epch:7.23 loss:0.013 grdn:0.245 lr:5.8e-05 updt_s:0.174 data_s:1.224 +INFO 2025-08-04 19:14:27 ts/train.py:232 step:45K smpl:363K ep:726 epch:7.26 loss:0.013 grdn:0.239 lr:5.8e-05 updt_s:0.174 data_s:1.206 +INFO 2025-08-04 19:19:06 ts/train.py:232 step:46K smpl:365K ep:730 epch:7.30 loss:0.012 grdn:0.260 lr:5.7e-05 updt_s:0.177 data_s:1.212 +INFO 2025-08-04 19:23:47 ts/train.py:232 step:46K smpl:366K ep:733 epch:7.33 loss:0.012 grdn:0.251 lr:5.7e-05 updt_s:0.178 data_s:1.227 +INFO 2025-08-04 19:28:21 ts/train.py:232 step:46K smpl:368K ep:736 epch:7.36 loss:0.013 grdn:0.252 lr:5.7e-05 updt_s:0.178 data_s:1.195 +INFO 2025-08-04 19:32:54 ts/train.py:232 step:46K smpl:370K ep:739 epch:7.39 loss:0.014 grdn:0.256 lr:5.7e-05 updt_s:0.180 data_s:1.180 +INFO 2025-08-04 19:37:29 ts/train.py:232 step:46K smpl:371K ep:742 epch:7.42 loss:0.012 grdn:0.234 lr:5.6e-05 updt_s:0.178 data_s:1.197 +INFO 2025-08-04 19:42:09 ts/train.py:232 step:47K smpl:373K ep:746 epch:7.46 loss:0.012 grdn:0.253 lr:5.6e-05 updt_s:0.177 data_s:1.225 +INFO 2025-08-04 19:46:46 ts/train.py:232 step:47K smpl:374K ep:749 epch:7.49 loss:0.012 grdn:0.250 lr:5.6e-05 updt_s:0.173 data_s:1.208 +INFO 2025-08-04 19:51:22 ts/train.py:232 step:47K smpl:376K ep:752 epch:7.52 loss:0.012 grdn:0.252 lr:5.5e-05 updt_s:0.174 data_s:1.204 +INFO 2025-08-04 19:55:57 ts/train.py:232 step:47K smpl:378K ep:755 epch:7.55 loss:0.012 grdn:0.241 lr:5.5e-05 updt_s:0.175 data_s:1.202 +INFO 2025-08-04 20:00:33 ts/train.py:232 step:47K smpl:379K ep:758 epch:7.58 loss:0.014 grdn:0.264 lr:5.5e-05 updt_s:0.171 data_s:1.207 +INFO 2025-08-04 20:05:13 ts/train.py:232 step:48K smpl:381K ep:762 epch:7.62 loss:0.012 grdn:0.245 lr:5.4e-05 updt_s:0.175 data_s:1.224 +INFO 2025-08-04 20:09:53 ts/train.py:232 step:48K smpl:382K ep:765 epch:7.65 loss:0.012 grdn:0.251 lr:5.4e-05 updt_s:0.178 data_s:1.222 +INFO 2025-08-04 20:14:31 ts/train.py:232 step:48K smpl:384K ep:768 epch:7.68 loss:0.013 grdn:0.248 lr:5.4e-05 updt_s:0.180 data_s:1.208 +INFO 2025-08-04 20:19:14 ts/train.py:232 step:48K smpl:386K ep:771 epch:7.71 loss:0.011 grdn:0.248 lr:5.3e-05 updt_s:0.179 data_s:1.234 +INFO 2025-08-04 20:23:49 ts/train.py:232 step:48K smpl:387K ep:774 epch:7.74 loss:0.012 grdn:0.249 lr:5.3e-05 updt_s:0.171 data_s:1.204 +INFO 2025-08-04 20:28:26 ts/train.py:232 step:49K smpl:389K ep:778 epch:7.78 loss:0.012 grdn:0.270 lr:5.3e-05 updt_s:0.170 data_s:1.214 +INFO 2025-08-04 20:32:59 ts/train.py:232 step:49K smpl:390K ep:781 epch:7.81 loss:0.012 grdn:0.234 lr:5.2e-05 updt_s:0.172 data_s:1.188 +INFO 2025-08-04 20:37:31 ts/train.py:232 step:49K smpl:392K ep:784 epch:7.84 loss:0.013 grdn:0.259 lr:5.2e-05 updt_s:0.176 data_s:1.187 +INFO 2025-08-04 20:42:10 ts/train.py:232 step:49K smpl:394K ep:787 epch:7.87 loss:0.011 grdn:0.264 lr:5.2e-05 updt_s:0.177 data_s:1.215 +INFO 2025-08-04 20:46:53 ts/train.py:232 step:49K smpl:395K ep:790 epch:7.90 loss:0.014 grdn:0.252 lr:5.1e-05 updt_s:0.176 data_s:1.239 +INFO 2025-08-04 20:51:52 ts/train.py:232 step:50K smpl:397K ep:794 epch:7.94 loss:0.011 grdn:0.242 lr:5.1e-05 updt_s:0.179 data_s:1.313 +INFO 2025-08-04 20:56:45 ts/train.py:232 step:50K smpl:398K ep:797 epch:7.97 loss:0.012 grdn:0.244 lr:5.1e-05 updt_s:0.178 data_s:1.286 +INFO 2025-08-04 21:01:38 ts/train.py:232 step:50K smpl:400K ep:800 epch:8.00 loss:0.012 grdn:0.264 lr:5.1e-05 updt_s:0.169 data_s:1.296 +INFO 2025-08-04 21:06:32 ts/train.py:232 step:50K smpl:402K ep:803 epch:8.03 loss:0.012 grdn:0.239 lr:5.0e-05 updt_s:0.171 data_s:1.300 +INFO 2025-08-04 21:11:23 ts/train.py:232 step:50K smpl:403K ep:806 epch:8.06 loss:0.011 grdn:0.226 lr:5.0e-05 updt_s:0.173 data_s:1.282 +INFO 2025-08-04 21:16:16 ts/train.py:232 step:51K smpl:405K ep:810 epch:8.10 loss:0.011 grdn:0.241 lr:5.0e-05 updt_s:0.173 data_s:1.289 +INFO 2025-08-04 21:20:56 ts/train.py:232 step:51K smpl:406K ep:813 epch:8.13 loss:0.011 grdn:0.235 lr:4.9e-05 updt_s:0.172 data_s:1.229 +INFO 2025-08-04 21:25:34 ts/train.py:232 step:51K smpl:408K ep:816 epch:8.16 loss:0.012 grdn:0.257 lr:4.9e-05 updt_s:0.172 data_s:1.213 +INFO 2025-08-04 21:30:11 ts/train.py:232 step:51K smpl:410K ep:819 epch:8.19 loss:0.011 grdn:0.247 lr:4.9e-05 updt_s:0.175 data_s:1.212 +INFO 2025-08-04 21:34:50 ts/train.py:232 step:51K smpl:411K ep:822 epch:8.22 loss:0.011 grdn:0.241 lr:4.8e-05 updt_s:0.175 data_s:1.220 +INFO 2025-08-04 21:39:28 ts/train.py:232 step:52K smpl:413K ep:826 epch:8.26 loss:0.011 grdn:0.240 lr:4.8e-05 updt_s:0.174 data_s:1.213 +INFO 2025-08-04 21:44:03 ts/train.py:232 step:52K smpl:414K ep:829 epch:8.29 loss:0.012 grdn:0.244 lr:4.8e-05 updt_s:0.179 data_s:1.198 +INFO 2025-08-04 21:48:40 ts/train.py:232 step:52K smpl:416K ep:832 epch:8.32 loss:0.012 grdn:0.254 lr:4.7e-05 updt_s:0.179 data_s:1.203 +INFO 2025-08-04 21:53:17 ts/train.py:232 step:52K smpl:418K ep:835 epch:8.35 loss:0.013 grdn:0.258 lr:4.7e-05 updt_s:0.177 data_s:1.207 +INFO 2025-08-04 21:57:53 ts/train.py:232 step:52K smpl:419K ep:838 epch:8.38 loss:0.013 grdn:0.245 lr:4.7e-05 updt_s:0.180 data_s:1.199 +INFO 2025-08-04 22:02:29 ts/train.py:232 step:53K smpl:421K ep:842 epch:8.42 loss:0.011 grdn:0.245 lr:4.6e-05 updt_s:0.174 data_s:1.208 +INFO 2025-08-04 22:07:07 ts/train.py:232 step:53K smpl:422K ep:845 epch:8.45 loss:0.012 grdn:0.250 lr:4.6e-05 updt_s:0.177 data_s:1.208 +INFO 2025-08-04 22:11:40 ts/train.py:232 step:53K smpl:424K ep:848 epch:8.48 loss:0.012 grdn:0.255 lr:4.6e-05 updt_s:0.181 data_s:1.183 +INFO 2025-08-04 22:16:14 ts/train.py:232 step:53K smpl:426K ep:851 epch:8.51 loss:0.010 grdn:0.225 lr:4.6e-05 updt_s:0.178 data_s:1.192 +INFO 2025-08-04 22:20:57 ts/train.py:232 step:53K smpl:427K ep:854 epch:8.54 loss:0.012 grdn:0.267 lr:4.5e-05 updt_s:0.175 data_s:1.238 +INFO 2025-08-04 22:25:32 ts/train.py:232 step:54K smpl:429K ep:858 epch:8.58 loss:0.012 grdn:0.253 lr:4.5e-05 updt_s:0.177 data_s:1.200 +INFO 2025-08-04 22:30:07 ts/train.py:232 step:54K smpl:430K ep:861 epch:8.61 loss:0.012 grdn:0.243 lr:4.5e-05 updt_s:0.175 data_s:1.201 +INFO 2025-08-04 22:34:43 ts/train.py:232 step:54K smpl:432K ep:864 epch:8.64 loss:0.012 grdn:0.262 lr:4.4e-05 updt_s:0.175 data_s:1.202 +INFO 2025-08-04 22:39:20 ts/train.py:232 step:54K smpl:434K ep:867 epch:8.67 loss:0.010 grdn:0.222 lr:4.4e-05 updt_s:0.172 data_s:1.212 +INFO 2025-08-04 22:43:54 ts/train.py:232 step:54K smpl:435K ep:870 epch:8.70 loss:0.009 grdn:0.227 lr:4.4e-05 updt_s:0.178 data_s:1.191 +INFO 2025-08-04 22:48:31 ts/train.py:232 step:55K smpl:437K ep:874 epch:8.74 loss:0.011 grdn:0.244 lr:4.3e-05 updt_s:0.179 data_s:1.205 +INFO 2025-08-04 22:53:05 ts/train.py:232 step:55K smpl:438K ep:877 epch:8.77 loss:0.012 grdn:0.253 lr:4.3e-05 updt_s:0.176 data_s:1.191 +INFO 2025-08-04 22:57:45 ts/train.py:232 step:55K smpl:440K ep:880 epch:8.80 loss:0.012 grdn:0.256 lr:4.3e-05 updt_s:0.177 data_s:1.226 +INFO 2025-08-04 23:02:20 ts/train.py:232 step:55K smpl:442K ep:883 epch:8.83 loss:0.011 grdn:0.261 lr:4.2e-05 updt_s:0.172 data_s:1.200 +INFO 2025-08-04 23:06:54 ts/train.py:232 step:55K smpl:443K ep:886 epch:8.86 loss:0.012 grdn:0.241 lr:4.2e-05 updt_s:0.172 data_s:1.199 +INFO 2025-08-04 23:11:44 ts/train.py:232 step:56K smpl:445K ep:890 epch:8.90 loss:0.011 grdn:0.249 lr:4.2e-05 updt_s:0.174 data_s:1.276 +INFO 2025-08-04 23:16:43 ts/train.py:232 step:56K smpl:446K ep:893 epch:8.93 loss:0.012 grdn:0.266 lr:4.1e-05 updt_s:0.176 data_s:1.313 +INFO 2025-08-04 23:21:32 ts/train.py:232 step:56K smpl:448K ep:896 epch:8.96 loss:0.012 grdn:0.260 lr:4.1e-05 updt_s:0.174 data_s:1.270 +INFO 2025-08-04 23:26:22 ts/train.py:232 step:56K smpl:450K ep:899 epch:8.99 loss:0.011 grdn:0.245 lr:4.1e-05 updt_s:0.175 data_s:1.275 +INFO 2025-08-04 23:31:18 ts/train.py:232 step:56K smpl:451K ep:902 epch:9.02 loss:0.011 grdn:0.244 lr:4.1e-05 updt_s:0.176 data_s:1.302 +INFO 2025-08-04 23:36:13 ts/train.py:232 step:57K smpl:453K ep:906 epch:9.06 loss:0.011 grdn:0.251 lr:4.0e-05 updt_s:0.175 data_s:1.300 +INFO 2025-08-04 23:41:07 ts/train.py:232 step:57K smpl:454K ep:909 epch:9.09 loss:0.011 grdn:0.242 lr:4.0e-05 updt_s:0.174 data_s:1.294 +INFO 2025-08-04 23:45:46 ts/train.py:232 step:57K smpl:456K ep:912 epch:9.12 loss:0.011 grdn:0.236 lr:4.0e-05 updt_s:0.173 data_s:1.224 +INFO 2025-08-04 23:50:20 ts/train.py:232 step:57K smpl:458K ep:915 epch:9.15 loss:0.011 grdn:0.249 lr:3.9e-05 updt_s:0.185 data_s:1.183 +INFO 2025-08-04 23:54:58 ts/train.py:232 step:57K smpl:459K ep:918 epch:9.18 loss:0.010 grdn:0.231 lr:3.9e-05 updt_s:0.178 data_s:1.212 +INFO 2025-08-04 23:59:37 ts/train.py:232 step:58K smpl:461K ep:922 epch:9.22 loss:0.011 grdn:0.252 lr:3.9e-05 updt_s:0.170 data_s:1.223 +INFO 2025-08-05 00:04:15 ts/train.py:232 step:58K smpl:462K ep:925 epch:9.25 loss:0.011 grdn:0.247 lr:3.8e-05 updt_s:0.175 data_s:1.212 +INFO 2025-08-05 00:08:51 ts/train.py:232 step:58K smpl:464K ep:928 epch:9.28 loss:0.011 grdn:0.251 lr:3.8e-05 updt_s:0.177 data_s:1.206 +INFO 2025-08-05 00:13:30 ts/train.py:232 step:58K smpl:466K ep:931 epch:9.31 loss:0.010 grdn:0.224 lr:3.8e-05 updt_s:0.178 data_s:1.215 +INFO 2025-08-05 00:18:05 ts/train.py:232 step:58K smpl:467K ep:934 epch:9.34 loss:0.011 grdn:0.227 lr:3.7e-05 updt_s:0.181 data_s:1.194 +INFO 2025-08-05 00:22:47 ts/train.py:232 step:59K smpl:469K ep:938 epch:9.38 loss:0.011 grdn:0.265 lr:3.7e-05 updt_s:0.177 data_s:1.230 +INFO 2025-08-05 00:27:23 ts/train.py:232 step:59K smpl:470K ep:941 epch:9.41 loss:0.010 grdn:0.236 lr:3.7e-05 updt_s:0.175 data_s:1.206 +INFO 2025-08-05 00:31:56 ts/train.py:232 step:59K smpl:472K ep:944 epch:9.44 loss:0.010 grdn:0.232 lr:3.7e-05 updt_s:0.173 data_s:1.191 +INFO 2025-08-05 00:36:34 ts/train.py:232 step:59K smpl:474K ep:947 epch:9.47 loss:0.010 grdn:0.237 lr:3.6e-05 updt_s:0.176 data_s:1.211 +INFO 2025-08-05 00:41:00 ts/train.py:232 step:59K smpl:475K ep:950 epch:9.50 loss:0.011 grdn:0.244 lr:3.6e-05 updt_s:0.169 data_s:1.163 +INFO 2025-08-05 00:45:30 ts/train.py:232 step:60K smpl:477K ep:954 epch:9.54 loss:0.011 grdn:0.256 lr:3.6e-05 updt_s:0.172 data_s:1.176 +INFO 2025-08-05 00:50:05 ts/train.py:232 step:60K smpl:478K ep:957 epch:9.57 loss:0.011 grdn:0.260 lr:3.5e-05 updt_s:0.177 data_s:1.194 +INFO 2025-08-05 00:54:40 ts/train.py:232 step:60K smpl:480K ep:960 epch:9.60 loss:0.010 grdn:0.241 lr:3.5e-05 updt_s:0.175 data_s:1.203 +INFO 2025-08-05 00:54:40 ts/train.py:241 Checkpoint policy after step 60000 +INFO 2025-08-05 00:59:24 ts/train.py:232 step:60K smpl:482K ep:963 epch:9.63 loss:0.011 grdn:0.287 lr:3.5e-05 updt_s:0.178 data_s:1.178 +INFO 2025-08-05 01:03:59 ts/train.py:232 step:60K smpl:483K ep:966 epch:9.66 loss:0.011 grdn:0.239 lr:3.4e-05 updt_s:0.173 data_s:1.201 +INFO 2025-08-05 01:08:32 ts/train.py:232 step:61K smpl:485K ep:970 epch:9.70 loss:0.010 grdn:0.239 lr:3.4e-05 updt_s:0.173 data_s:1.189 +INFO 2025-08-05 01:13:05 ts/train.py:232 step:61K smpl:486K ep:973 epch:9.73 loss:0.011 grdn:0.254 lr:3.4e-05 updt_s:0.175 data_s:1.191 +INFO 2025-08-05 01:17:35 ts/train.py:232 step:61K smpl:488K ep:976 epch:9.76 loss:0.010 grdn:0.228 lr:3.4e-05 updt_s:0.174 data_s:1.176 +INFO 2025-08-05 01:22:08 ts/train.py:232 step:61K smpl:490K ep:979 epch:9.79 loss:0.011 grdn:0.236 lr:3.3e-05 updt_s:0.173 data_s:1.193 +INFO 2025-08-05 01:26:44 ts/train.py:232 step:61K smpl:491K ep:982 epch:9.82 loss:0.010 grdn:0.239 lr:3.3e-05 updt_s:0.173 data_s:1.204 +INFO 2025-08-05 01:31:19 ts/train.py:232 step:62K smpl:493K ep:986 epch:9.86 loss:0.010 grdn:0.226 lr:3.3e-05 updt_s:0.173 data_s:1.199 +INFO 2025-08-05 01:36:07 ts/train.py:232 step:62K smpl:494K ep:989 epch:9.89 loss:0.011 grdn:0.230 lr:3.2e-05 updt_s:0.173 data_s:1.269 +INFO 2025-08-05 01:40:56 ts/train.py:232 step:62K smpl:496K ep:992 epch:9.92 loss:0.010 grdn:0.231 lr:3.2e-05 updt_s:0.176 data_s:1.266 +INFO 2025-08-05 01:45:47 ts/train.py:232 step:62K smpl:498K ep:995 epch:9.95 loss:0.010 grdn:0.251 lr:3.2e-05 updt_s:0.173 data_s:1.280 +INFO 2025-08-05 01:50:37 ts/train.py:232 step:62K smpl:499K ep:998 epch:9.98 loss:0.010 grdn:0.239 lr:3.1e-05 updt_s:0.177 data_s:1.272 +INFO 2025-08-05 01:55:26 ts/train.py:232 step:63K smpl:501K ep:1K epch:10.02 loss:0.009 grdn:0.231 lr:3.1e-05 updt_s:0.179 data_s:1.266 +INFO 2025-08-05 02:00:19 ts/train.py:232 step:63K smpl:502K ep:1K epch:10.05 loss:0.010 grdn:0.221 lr:3.1e-05 updt_s:0.173 data_s:1.295 +INFO 2025-08-05 02:05:10 ts/train.py:232 step:63K smpl:504K ep:1K epch:10.08 loss:0.010 grdn:0.263 lr:3.1e-05 updt_s:0.178 data_s:1.273 +INFO 2025-08-05 02:09:44 ts/train.py:232 step:63K smpl:506K ep:1K epch:10.11 loss:0.010 grdn:0.234 lr:3.0e-05 updt_s:0.174 data_s:1.199 diff --git a/wandb/run-20250804_020558-ftnu6goz/files/requirements.txt b/wandb/run-20250804_020558-ftnu6goz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..47864fcff4e2976df45f0e2a69161f599efb87ef --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/files/requirements.txt @@ -0,0 +1,146 @@ +lightning-utilities==0.14.3 +uv==0.7.19 +umap-learn==0.5.9.post2 +torchmetrics==1.7.4 +anndata==0.11.4 +pynndescent==0.5.13 +array-api-compat==1.12.0 +contourpy==1.3.2 +tzdata==2025.2 +filelock==3.18.0 +natsort==8.4.0 +scipy==1.15.3 +kiwisolver==1.4.8 +networkx==3.4.2 +numba==0.61.2 +patsy==1.0.1 +legacy-api-wrap==1.4.1 +pillow==11.3.0 +pytorch-lightning==2.5.2 +cycler==0.12.1 +h5py==3.14.0 +pyparsing==3.2.3 +packaging==25.0 +joblib==1.5.1 +session-info2==0.2 +scikit-learn==1.7.1 +cell-load==0.7.4 +statsmodels==0.14.5 +numpy==2.2.6 +mpmath==1.3.0 +lightning==2.5.2 +fonttools==4.59.0 +typing_extensions==4.14.1 +seaborn==0.13.2 +scanpy==1.11.3 +toml==0.10.2 +matplotlib==3.10.3 +threadpoolctl==3.6.0 +llvmlite==0.44.0 +pandas==2.3.1 +nvidia-cufile-cu12==1.11.1.6 +triton==3.3.1 +charset-normalizer==3.4.2 +sentry-sdk==2.34.1 +nvidia-cuda-runtime-cu12==12.6.77 +pyarrow==21.0.0 +sympy==1.14.0 +pynput==1.8.1 +pfzy==0.3.4 +Jinja2==3.1.6 +pydantic_core==2.33.2 +cmake==4.0.3 +nvidia-cusparse-cu12==12.5.4.2 +gitdb==4.0.12 +aiosignal==1.4.0 +yarl==1.20.1 +platformdirs==4.3.8 +jsonlines==4.0.0 +multiprocess==0.70.16 +certifi==2025.8.3 +inquirerpy==0.3.4 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-cublas-cu12==12.6.4.1 +pydantic==2.11.7 +async-timeout==5.0.1 +annotated-types==0.7.0 +typing-inspect==0.9.0 +imageio==2.37.0 +nvidia-cuda-cupti-cu12==12.6.80 +wcwidth==0.2.13 +six==1.17.0 +tqdm==4.67.1 +zipp==3.23.0 +torchcodec==0.5 +cloudpickle==3.1.1 +lerobot==0.3.2 +Flask==3.1.1 +python-xlib==0.33 +wandb==0.21.0 +urllib3==2.5.0 +nvidia-nccl-cu12==2.26.2 +typing-inspection==0.4.1 +evdev==1.9.2 +prompt_toolkit==3.0.51 +gymnasium==0.29.1 +nvidia-curand-cu12==10.3.7.77 +GitPython==3.1.45 +opencv-python-headless==4.12.0.88 +datasets==3.6.0 +nvidia-cusolver-cu12==11.7.1.2 +termcolor==3.1.0 +mypy_extensions==1.1.0 +idna==3.10 +MarkupSafe==3.0.2 +frozenlist==1.7.0 +regex==2025.7.34 +Werkzeug==3.1.3 +imageio-ffmpeg==0.6.0 +importlib_metadata==8.7.0 +blinker==1.9.0 +psutil==7.0.0 +fsspec==2025.3.0 +deepdiff==8.5.0 +dill==0.3.8 +diffusers==0.34.0 +wheel==0.45.1 +nvidia-nvtx-cu12==12.6.77 +av==15.0.0 +nvidia-cusparselt-cu12==0.6.3 +propcache==0.3.2 +python-can==4.5.0 +msgpack==1.1.1 +einops==0.8.1 +safetensors==0.5.3 +setuptools==78.1.1 +requests==2.32.4 +torchvision==0.22.1 +wrapt==1.17.2 +pyyaml-include==1.4.1 +piper-sdk==0.4.1 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cuda-nvrtc-cu12==12.6.77 +Farama-Notifications==0.0.4 +click==8.2.1 +pyserial==3.5 +smmap==5.0.2 +attrs==25.3.0 +draccus==0.10.0 +mergedeep==1.3.4 +huggingface-hub==0.34.3 +pytz==2025.2 +aiohappyeyeballs==2.6.1 +python-dateutil==2.9.0.post0 +itsdangerous==2.2.0 +torch==2.7.1 +multidict==6.6.3 +aiohttp==3.12.15 +protobuf==6.31.1 +hf_transfer==0.1.9 +xxhash==3.5.0 +pip==25.1 +PyYAML==6.0.2 +orderly-set==5.5.0 +nvidia-cudnn-cu12==9.5.1.17 +hf-xet==1.1.5 +lerobot==0.3.2 diff --git a/wandb/run-20250804_020558-ftnu6goz/files/wandb-metadata.json b/wandb/run-20250804_020558-ftnu6goz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1548f86ad284e3e2c7ecd3c6a0656bdfa1776126 --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/files/wandb-metadata.json @@ -0,0 +1,77 @@ +{ + "os": "Linux-4.18.0-553.36.1.el8_10.x86_64-x86_64-with-glibc2.28", + "python": "CPython 3.10.18", + "startedAt": "2025-08-04T07:05:58.513304Z", + "args": [ + "--dataset.repo_id=a1o/tac_insert", + "--policy.type=diffusion", + "--output_dir=outputs/train/diff_tac_insert", + "--job_name=diff_piper_tac_insert", + "--policy.device=cuda", + "--wandb.enable=true", + "--policy.repo_id=a1o/diff_pick_tac_insert_policy" + ], + "program": "-m lerobot.scripts.train", + "root": "outputs/train/diff_tac_insert", + "host": "qgpu0402", + "executable": "/projects/p32775/pythonenvs/tac_vla/bin/python", + "cpu_count": 52, + "cpu_count_logical": 52, + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_count": 1, + "disk": { + "/": { + "total": "101237334016", + "used": "6647775232" + } + }, + "memory": { + "total": "202474672128" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-PCIE-40GB", + "memoryTotal": "42949672960", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-76058b9f-a0b6-ae57-fd53-722f223e69f6" + } + ], + "cudaVersion": "12.8", + "slurm": { + "cluster_name": "quest", + "conf": "/etc/slurm/slurm.conf", + "cpus_on_node": "1", + "gpus_on_node": "1", + "gtids": "0", + "job_account": "p32775", + "job_cpus_per_node": "1", + "job_end_time": "1754377529", + "job_gid": "2000033", + "job_gpus": "1", + "job_id": "750582", + "job_name": "diff_tac_insert", + "job_nodelist": "qgpu0402", + "job_num_nodes": "1", + "job_partition": "gengpu", + "job_qos": "normal", + "job_start_time": "1754291129", + "job_uid": "2000033", + "job_user": "aye8078", + "jobid": "750582", + "localid": "0", + "mem_per_node": "49152", + "nnodes": "1", + "nodeid": "0", + "nodelist": "qgpu0402", + "prio_process": "0", + "procid": "0", + "submit_dir": "/gpfs/home/aye8078/Documents/Github/tac_vla/slurm", + "submit_host": "quser43", + "task_pid": "3986886", + "tasks_per_node": "1", + "topology_addr": "qgpu0402", + "topology_addr_pattern": "node" + }, + "writerId": "if7hn1yzw8gyzjeuzkiuy4zi3uwy8qx4" +} \ No newline at end of file diff --git a/wandb/run-20250804_020558-ftnu6goz/logs/debug-core.log b/wandb/run-20250804_020558-ftnu6goz/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..50a9275639bd33768f3e6c563e8828563d6246cd --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/logs/debug-core.log @@ -0,0 +1,6 @@ +{"time":"2025-08-04T02:05:58.646716608-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqflxp2br/port-3987031.txt","pid":3987031,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-08-04T02:05:58.64925457-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3987031-3987071-3421209020/socket","Net":"unix"}} +{"time":"2025-08-04T02:05:58.649383141-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":3987031} +{"time":"2025-08-04T02:05:58.791834972-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-08-04T02:05:58.86389041-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"ftnu6goz","id":"1(@)"} +{"time":"2025-08-04T02:05:59.065406474-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ftnu6goz","id":"1(@)"} diff --git a/wandb/run-20250804_020558-ftnu6goz/logs/debug-internal.log b/wandb/run-20250804_020558-ftnu6goz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a2f7e4b16c3eb2f984ad6500ee1bfe20e92f57bf --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2025-08-04T02:05:58.868875247-05:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-08-04T02:05:59.065333139-05:00","level":"INFO","msg":"stream: created new stream","id":"ftnu6goz"} +{"time":"2025-08-04T02:05:59.065392572-05:00","level":"INFO","msg":"stream: started","id":"ftnu6goz"} +{"time":"2025-08-04T02:05:59.067195012-05:00","level":"INFO","msg":"handler: started","stream_id":"ftnu6goz"} +{"time":"2025-08-04T02:05:59.067268973-05:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftnu6goz"} +{"time":"2025-08-04T02:05:59.067416134-05:00","level":"INFO","msg":"sender: started","stream_id":"ftnu6goz"} +{"time":"2025-08-04T18:19:23.470451172-05:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/a10v-1/lerobot/ftnu6goz/file_stream","body":"\n\n\n502 Server Error\n\n\n

Error: Server Error

\n

The server encountered a temporary error and could not complete your request.

Please try again in 30 seconds.

\n

\n\n"} +{"time":"2025-08-04T18:24:14.245096-05:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/a10v-1/lerobot/ftnu6goz/file_stream","body":"\n\n\n502 Server Error\n\n\n

Error: Server Error

\n

The server encountered a temporary error and could not complete your request.

Please try again in 30 seconds.

\n

\n\n"} diff --git a/wandb/run-20250804_020558-ftnu6goz/logs/debug.log b/wandb/run-20250804_020558-ftnu6goz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..1c5793de94401221dcff5d8ed8573154abb3dab9 --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/logs/debug.log @@ -0,0 +1,21 @@ +2025-08-04 02:05:58,549 INFO MainThread:3987031 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_setup.py:_flush():80] Configure stats pid to 3987031 +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_setup.py:_flush():80] Loading settings from /home/aye8078/.config/wandb/settings +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_setup.py:_flush():80] Loading settings from /gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/wandb/settings +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_init.py:setup_run_log_directory():703] Logging user logs to outputs/train/diff_tac_insert/wandb/run-20250804_020558-ftnu6goz/logs/debug.log +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to outputs/train/diff_tac_insert/wandb/run-20250804_020558-ftnu6goz/logs/debug-internal.log +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_init.py:init():830] calling init triggers +2025-08-04 02:05:58,550 INFO MainThread:3987031 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'a1o/tac_insert', 'root': None, 'episodes': None, 'image_transforms': {'enable': False, 'max_num_transforms': 3, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec'}, 'env': None, 'policy': {'type': 'diffusion', 'n_obs_steps': 2, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {}, 'output_features': {}, 'device': 'cuda', 'use_amp': False, 'push_to_hub': True, 'repo_id': 'a1o/diff_pick_tac_insert_policy', 'private': None, 'tags': None, 'license': None, 'horizon': 16, 'n_action_steps': 8, 'drop_n_last_frames': 7, 'vision_backbone': 'resnet18', 'crop_shape': [84, 84], 'crop_is_random': True, 'pretrained_backbone_weights': None, 'use_group_norm': True, 'spatial_softmax_num_keypoints': 32, 'use_separate_rgb_encoder_per_camera': False, 'down_dims': [512, 1024, 2048], 'kernel_size': 5, 'n_groups': 8, 'diffusion_step_embed_dim': 128, 'use_film_scale_modulation': True, 'noise_scheduler_type': 'DDPM', 'num_train_timesteps': 100, 'beta_schedule': 'squaredcos_cap_v2', 'beta_start': 0.0001, 'beta_end': 0.02, 'prediction_type': 'epsilon', 'clip_sample': True, 'clip_sample_range': 1.0, 'num_inference_steps': None, 'do_mask_loss_for_padding': False, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.95, 0.999], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-06, 'scheduler_name': 'cosine', 'scheduler_warmup_steps': 500}, 'output_dir': 'outputs/train/diff_tac_insert', 'job_name': 'diff_piper_tac_insert', 'resume': False, 'seed': 1000, 'num_workers': 4, 'batch_size': 8, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 20000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adam', 'lr': 0.0001, 'weight_decay': 1e-06, 'grad_clip_norm': 10.0, 'betas': [0.95, 0.999], 'eps': 1e-08}, 'scheduler': {'type': 'diffuser', 'num_warmup_steps': 500, 'name': 'cosine'}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': None, 'mode': None}, '_wandb': {}} +2025-08-04 02:05:58,552 INFO MainThread:3987031 [wandb_init.py:init():871] starting backend +2025-08-04 02:05:58,792 INFO MainThread:3987031 [wandb_init.py:init():874] sending inform_init request +2025-08-04 02:05:58,860 INFO MainThread:3987031 [wandb_init.py:init():882] backend started and connected +2025-08-04 02:05:58,866 INFO MainThread:3987031 [wandb_init.py:init():953] updated telemetry +2025-08-04 02:05:58,866 INFO MainThread:3987031 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-08-04 02:05:59,198 INFO MainThread:3987031 [wandb_init.py:init():1029] starting run threads in backend +2025-08-04 02:05:59,492 INFO MainThread:3987031 [wandb_run.py:_console_start():2458] atexit reg +2025-08-04 02:05:59,492 INFO MainThread:3987031 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-08-04 02:05:59,492 INFO MainThread:3987031 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-08-04 02:05:59,492 INFO MainThread:3987031 [wandb_run.py:_redirect():2398] Redirects installed. +2025-08-04 02:05:59,497 INFO MainThread:3987031 [wandb_init.py:init():1075] run started, returning control to user process diff --git a/wandb/run-20250804_020558-ftnu6goz/run-ftnu6goz.wandb b/wandb/run-20250804_020558-ftnu6goz/run-ftnu6goz.wandb new file mode 100644 index 0000000000000000000000000000000000000000..9f8370587491742de42bcd52337e0b8a47c91bd7 --- /dev/null +++ b/wandb/run-20250804_020558-ftnu6goz/run-ftnu6goz.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed99a2e0b8e1f568f8535fe164740d3b9feeb16cadb68f7637f66c9b8c4d554 +size 5275648 diff --git a/wandb/run-20250805_055539-ftnu6goz/files/config.yaml b/wandb/run-20250805_055539-ftnu6goz/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..208e680d086f8e52e7bbaa2e426a53890c861d27 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/files/config.yaml @@ -0,0 +1,299 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + yd2qhrsop30spzg78awqeiox3wxlduul: + args: + - --config_path=/home/aye8078/Documents/Github/tac_vla/lerobot/outputs/train/diff_tac_insert/checkpoints/last/pretrained_model/train_config.json + - --resume=true + cpu_count: 52 + cpu_count_logical: 52 + cudaVersion: "12.8" + disk: + /: + total: "101237329920" + used: "9976344576" + executable: /projects/p32775/pythonenvs/tac_vla/bin/python + gpu: NVIDIA A100-PCIE-40GB + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "42949672960" + name: NVIDIA A100-PCIE-40GB + uuid: GPU-a233ec62-e827-0562-129f-d9164b24f210 + host: qgpu0404 + memory: + total: "202474663936" + os: Linux-4.18.0-553.36.1.el8_10.x86_64-x86_64-with-glibc2.28 + program: -m lerobot.scripts.train + python: CPython 3.10.18 + root: outputs/train/diff_tac_insert + slurm: + cluster_name: quest + conf: /etc/slurm/slurm.conf + cpus_on_node: "1" + gpus_on_node: "1" + gtids: "0" + job_account: p32775 + job_cpus_per_node: "1" + job_end_time: "1754477643" + job_gid: "2000033" + job_gpus: "0" + job_id: "839311" + job_name: diff_tac_insert + job_nodelist: qgpu0404 + job_num_nodes: "1" + job_partition: gengpu + job_qos: normal + job_start_time: "1754391243" + job_uid: "2000033" + job_user: aye8078 + jobid: "839311" + localid: "0" + mem_per_node: "49152" + nnodes: "1" + nodeid: "0" + nodelist: qgpu0404 + prio_process: "0" + procid: "0" + submit_dir: /gpfs/home/aye8078/Documents/Github/tac_vla/slurm + submit_host: quser43 + task_pid: "379968" + tasks_per_node: "1" + topology_addr: qgpu0404 + topology_addr_pattern: node + startedAt: "2025-08-05T10:55:39.023857Z" + writerId: yd2qhrsop30spzg78awqeiox3wxlduul + m: [] + python_version: 3.10.18 + t: + "1": + - 1 + - 41 + - 49 + - 51 + "2": + - 1 + - 41 + - 49 + - 51 + - 83 + "3": + - 5 + - 13 + - 14 + - 15 + - 16 + - 61 + - 62 + "4": 3.10.18 + "5": 0.21.0 + "10": + - 21 + "12": 0.21.0 + "13": linux-x86_64 +batch_size: + value: 8 +dataset: + value: + episodes: null + image_transforms: + enable: false + max_num_transforms: 3 + random_order: false + tfs: + brightness: + kwargs: + brightness: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + contrast: + kwargs: + contrast: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + hue: + kwargs: + hue: + - -0.05 + - 0.05 + type: ColorJitter + weight: 1 + saturation: + kwargs: + saturation: + - 0.5 + - 1.5 + type: ColorJitter + weight: 1 + sharpness: + kwargs: + sharpness: + - 0.5 + - 1.5 + type: SharpnessJitter + weight: 1 + repo_id: a1o/tac_insert + revision: null + root: null + use_imagenet_stats: true + video_backend: torchcodec +env: + value: null +eval: + value: + batch_size: 50 + n_episodes: 50 + use_async_envs: false +eval_freq: + value: 20000 +job_name: + value: diff_piper_tac_insert +log_freq: + value: 200 +num_workers: + value: 4 +optimizer: + value: + betas: + - 0.95 + - 0.999 + eps: 1e-08 + grad_clip_norm: 10 + lr: 0.0001 + type: adam + weight_decay: 1e-06 +output_dir: + value: outputs/train/diff_tac_insert +policy: + value: + beta_end: 0.02 + beta_schedule: squaredcos_cap_v2 + beta_start: 0.0001 + clip_sample: true + clip_sample_range: 1 + crop_is_random: true + crop_shape: + - 84 + - 84 + device: cuda + diffusion_step_embed_dim: 128 + do_mask_loss_for_padding: false + down_dims: + - 512 + - 1024 + - 2048 + drop_n_last_frames: 7 + horizon: 16 + input_features: + observation.effort: + shape: + - 14 + type: STATE + observation.images.cam_high: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.images.cam_left_wrist: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.images.cam_right_wrist: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.qvel: + shape: + - 14 + type: STATE + observation.state: + shape: + - 14 + type: STATE + observation.tactile1: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.tactile2: + shape: + - 3 + - 480 + - 640 + type: VISUAL + kernel_size: 5 + license: null + n_action_steps: 8 + n_groups: 8 + n_obs_steps: 2 + noise_scheduler_type: DDPM + normalization_mapping: + ACTION: MIN_MAX + STATE: MIN_MAX + VISUAL: MEAN_STD + num_inference_steps: null + num_train_timesteps: 100 + optimizer_betas: + - 0.95 + - 0.999 + optimizer_eps: 1e-08 + optimizer_lr: 0.0001 + optimizer_weight_decay: 1e-06 + output_features: + action: + shape: + - 14 + type: ACTION + prediction_type: epsilon + pretrained_backbone_weights: null + private: null + push_to_hub: true + repo_id: a1o/diff_pick_tac_insert_policy + scheduler_name: cosine + scheduler_warmup_steps: 500 + spatial_softmax_num_keypoints: 32 + tags: null + type: diffusion + use_amp: false + use_film_scale_modulation: true + use_group_norm: true + use_separate_rgb_encoder_per_camera: false + vision_backbone: resnet18 +resume: + value: true +save_checkpoint: + value: true +save_freq: + value: 20000 +scheduler: + value: + name: cosine + num_warmup_steps: 500 + type: diffuser +seed: + value: 1000 +steps: + value: 100000 +use_policy_training_preset: + value: true +wandb: + value: + disable_artifact: false + enable: true + entity: null + mode: null + notes: null + project: lerobot + run_id: ftnu6goz diff --git a/wandb/run-20250805_055539-ftnu6goz/files/output.log b/wandb/run-20250805_055539-ftnu6goz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b2696a8469b095275a94a92d3da824d0c41aaace --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/files/output.log @@ -0,0 +1,245 @@ +Logs will be synced with wandb. +INFO 2025-08-05 05:55:42 db_utils.py:103 Track this run --> https://wandb.ai/a10v-1/lerobot/runs/ftnu6goz +INFO 2025-08-05 05:55:42 ts/train.py:127 Creating dataset +INFO 2025-08-05 05:55:46 ts/train.py:138 Creating policy +Loading weights from local directory +INFO 2025-08-05 05:55:51 ts/train.py:144 Creating optimizer and scheduler +INFO 2025-08-05 05:56:07 ts/train.py:156 Output dir: outputs/train/diff_tac_insert +INFO 2025-08-05 05:56:07 ts/train.py:159 cfg.steps=100000 (100K) +INFO 2025-08-05 05:56:07 ts/train.py:160 dataset.num_frames=50000 (50K) +INFO 2025-08-05 05:56:07 ts/train.py:161 dataset.num_episodes=100 +INFO 2025-08-05 05:56:07 ts/train.py:162 num_learnable_params=278120238 (278M) +INFO 2025-08-05 05:56:07 ts/train.py:163 num_total_params=278120408 (278M) +/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/torch/utils/data/dataloader.py:626: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 1, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. + warnings.warn( +INFO 2025-08-05 05:56:07 ts/train.py:202 Start offline training on a fixed dataset +INFO 2025-08-05 06:00:55 ts/train.py:232 step:60K smpl:482K ep:963 epch:9.63 loss:0.012 grdn:0.257 lr:3.5e-05 updt_s:0.205 data_s:1.232 +INFO 2025-08-05 06:05:34 ts/train.py:232 step:60K smpl:483K ep:966 epch:9.66 loss:0.010 grdn:0.229 lr:3.4e-05 updt_s:0.167 data_s:1.232 +INFO 2025-08-05 06:10:14 ts/train.py:232 step:61K smpl:485K ep:970 epch:9.70 loss:0.011 grdn:0.231 lr:3.4e-05 updt_s:0.166 data_s:1.232 +INFO 2025-08-05 06:14:54 ts/train.py:232 step:61K smpl:486K ep:973 epch:9.73 loss:0.010 grdn:0.249 lr:3.4e-05 updt_s:0.169 data_s:1.230 +INFO 2025-08-05 06:19:33 ts/train.py:232 step:61K smpl:488K ep:976 epch:9.76 loss:0.010 grdn:0.234 lr:3.4e-05 updt_s:0.170 data_s:1.223 +INFO 2025-08-05 06:24:18 ts/train.py:232 step:61K smpl:490K ep:979 epch:9.79 loss:0.012 grdn:0.254 lr:3.3e-05 updt_s:0.171 data_s:1.251 +INFO 2025-08-05 06:28:54 ts/train.py:232 step:61K smpl:491K ep:982 epch:9.82 loss:0.011 grdn:0.231 lr:3.3e-05 updt_s:0.166 data_s:1.215 +INFO 2025-08-05 06:33:28 ts/train.py:232 step:62K smpl:493K ep:986 epch:9.86 loss:0.010 grdn:0.241 lr:3.3e-05 updt_s:0.171 data_s:1.197 +INFO 2025-08-05 06:38:15 ts/train.py:232 step:62K smpl:494K ep:989 epch:9.89 loss:0.010 grdn:0.238 lr:3.2e-05 updt_s:0.171 data_s:1.267 +INFO 2025-08-05 06:42:59 ts/train.py:232 step:62K smpl:496K ep:992 epch:9.92 loss:0.010 grdn:0.249 lr:3.2e-05 updt_s:0.167 data_s:1.248 +INFO 2025-08-05 06:47:38 ts/train.py:232 step:62K smpl:498K ep:995 epch:9.95 loss:0.010 grdn:0.231 lr:3.2e-05 updt_s:0.172 data_s:1.222 +INFO 2025-08-05 06:52:19 ts/train.py:232 step:62K smpl:499K ep:998 epch:9.98 loss:0.010 grdn:0.243 lr:3.1e-05 updt_s:0.171 data_s:1.232 +INFO 2025-08-05 06:57:02 ts/train.py:232 step:63K smpl:501K ep:1K epch:10.02 loss:0.010 grdn:0.217 lr:3.1e-05 updt_s:0.166 data_s:1.248 +INFO 2025-08-05 07:01:41 ts/train.py:232 step:63K smpl:502K ep:1K epch:10.05 loss:0.009 grdn:0.224 lr:3.1e-05 updt_s:0.164 data_s:1.230 +INFO 2025-08-05 07:06:21 ts/train.py:232 step:63K smpl:504K ep:1K epch:10.08 loss:0.010 grdn:0.262 lr:3.1e-05 updt_s:0.169 data_s:1.230 +INFO 2025-08-05 07:11:05 ts/train.py:232 step:63K smpl:506K ep:1K epch:10.11 loss:0.010 grdn:0.228 lr:3.0e-05 updt_s:0.171 data_s:1.249 +INFO 2025-08-05 07:15:43 ts/train.py:232 step:63K smpl:507K ep:1K epch:10.14 loss:0.010 grdn:0.243 lr:3.0e-05 updt_s:0.166 data_s:1.222 +INFO 2025-08-05 07:20:20 ts/train.py:232 step:64K smpl:509K ep:1K epch:10.18 loss:0.011 grdn:0.241 lr:3.0e-05 updt_s:0.165 data_s:1.223 +INFO 2025-08-05 07:25:00 ts/train.py:232 step:64K smpl:510K ep:1K epch:10.21 loss:0.011 grdn:0.253 lr:2.9e-05 updt_s:0.176 data_s:1.219 +INFO 2025-08-05 07:29:35 ts/train.py:232 step:64K smpl:512K ep:1K epch:10.24 loss:0.010 grdn:0.240 lr:2.9e-05 updt_s:0.164 data_s:1.213 +INFO 2025-08-05 07:34:14 ts/train.py:232 step:64K smpl:514K ep:1K epch:10.27 loss:0.010 grdn:0.218 lr:2.9e-05 updt_s:0.161 data_s:1.231 +INFO 2025-08-05 07:38:55 ts/train.py:232 step:64K smpl:515K ep:1K epch:10.30 loss:0.008 grdn:0.204 lr:2.9e-05 updt_s:0.167 data_s:1.237 +INFO 2025-08-05 07:43:36 ts/train.py:232 step:65K smpl:517K ep:1K epch:10.34 loss:0.010 grdn:0.239 lr:2.8e-05 updt_s:0.167 data_s:1.240 +INFO 2025-08-05 07:48:17 ts/train.py:232 step:65K smpl:518K ep:1K epch:10.37 loss:0.010 grdn:0.230 lr:2.8e-05 updt_s:0.166 data_s:1.237 +INFO 2025-08-05 07:52:57 ts/train.py:232 step:65K smpl:520K ep:1K epch:10.40 loss:0.011 grdn:0.226 lr:2.8e-05 updt_s:0.167 data_s:1.233 +INFO 2025-08-05 07:57:36 ts/train.py:232 step:65K smpl:522K ep:1K epch:10.43 loss:0.010 grdn:0.239 lr:2.7e-05 updt_s:0.170 data_s:1.223 +INFO 2025-08-05 08:02:18 ts/train.py:232 step:65K smpl:523K ep:1K epch:10.46 loss:0.010 grdn:0.225 lr:2.7e-05 updt_s:0.171 data_s:1.237 +INFO 2025-08-05 08:06:56 ts/train.py:232 step:66K smpl:525K ep:1K epch:10.50 loss:0.011 grdn:0.239 lr:2.7e-05 updt_s:0.169 data_s:1.220 +INFO 2025-08-05 08:11:35 ts/train.py:232 step:66K smpl:526K ep:1K epch:10.53 loss:0.010 grdn:0.253 lr:2.7e-05 updt_s:0.164 data_s:1.233 +INFO 2025-08-05 08:16:15 ts/train.py:232 step:66K smpl:528K ep:1K epch:10.56 loss:0.009 grdn:0.237 lr:2.6e-05 updt_s:0.160 data_s:1.235 +INFO 2025-08-05 08:21:02 ts/train.py:232 step:66K smpl:530K ep:1K epch:10.59 loss:0.009 grdn:0.226 lr:2.6e-05 updt_s:0.170 data_s:1.267 +INFO 2025-08-05 08:25:56 ts/train.py:232 step:66K smpl:531K ep:1K epch:10.62 loss:0.011 grdn:0.256 lr:2.6e-05 updt_s:0.172 data_s:1.297 +INFO 2025-08-05 08:30:54 ts/train.py:232 step:67K smpl:533K ep:1K epch:10.66 loss:0.010 grdn:0.220 lr:2.5e-05 updt_s:0.173 data_s:1.317 +INFO 2025-08-05 08:35:49 ts/train.py:232 step:67K smpl:534K ep:1K epch:10.69 loss:0.010 grdn:0.232 lr:2.5e-05 updt_s:0.175 data_s:1.297 +INFO 2025-08-05 08:40:39 ts/train.py:232 step:67K smpl:536K ep:1K epch:10.72 loss:0.010 grdn:0.234 lr:2.5e-05 updt_s:0.170 data_s:1.278 +INFO 2025-08-05 08:45:26 ts/train.py:232 step:67K smpl:538K ep:1K epch:10.75 loss:0.009 grdn:0.229 lr:2.5e-05 updt_s:0.173 data_s:1.262 +INFO 2025-08-05 08:50:20 ts/train.py:232 step:67K smpl:539K ep:1K epch:10.78 loss:0.011 grdn:0.257 lr:2.4e-05 updt_s:0.173 data_s:1.297 +INFO 2025-08-05 08:55:08 ts/train.py:232 step:68K smpl:541K ep:1K epch:10.82 loss:0.009 grdn:0.222 lr:2.4e-05 updt_s:0.168 data_s:1.271 +INFO 2025-08-05 08:59:44 ts/train.py:232 step:68K smpl:542K ep:1K epch:10.85 loss:0.010 grdn:0.236 lr:2.4e-05 updt_s:0.169 data_s:1.213 +INFO 2025-08-05 09:04:23 ts/train.py:232 step:68K smpl:544K ep:1K epch:10.88 loss:0.010 grdn:0.225 lr:2.4e-05 updt_s:0.173 data_s:1.220 +INFO 2025-08-05 09:09:02 ts/train.py:232 step:68K smpl:546K ep:1K epch:10.91 loss:0.010 grdn:0.241 lr:2.3e-05 updt_s:0.172 data_s:1.222 +INFO 2025-08-05 09:13:41 ts/train.py:232 step:68K smpl:547K ep:1K epch:10.94 loss:0.010 grdn:0.231 lr:2.3e-05 updt_s:0.172 data_s:1.219 +INFO 2025-08-05 09:18:17 ts/train.py:232 step:69K smpl:549K ep:1K epch:10.98 loss:0.010 grdn:0.247 lr:2.3e-05 updt_s:0.169 data_s:1.212 +INFO 2025-08-05 09:22:53 ts/train.py:232 step:69K smpl:550K ep:1K epch:11.01 loss:0.010 grdn:0.239 lr:2.2e-05 updt_s:0.176 data_s:1.204 +INFO 2025-08-05 09:27:33 ts/train.py:232 step:69K smpl:552K ep:1K epch:11.04 loss:0.010 grdn:0.251 lr:2.2e-05 updt_s:0.174 data_s:1.222 +INFO 2025-08-05 09:32:09 ts/train.py:232 step:69K smpl:554K ep:1K epch:11.07 loss:0.011 grdn:0.228 lr:2.2e-05 updt_s:0.178 data_s:1.200 +INFO 2025-08-05 09:36:47 ts/train.py:232 step:69K smpl:555K ep:1K epch:11.10 loss:0.009 grdn:0.216 lr:2.2e-05 updt_s:0.177 data_s:1.215 +INFO 2025-08-05 09:41:29 ts/train.py:232 step:70K smpl:557K ep:1K epch:11.14 loss:0.009 grdn:0.221 lr:2.1e-05 updt_s:0.170 data_s:1.237 +INFO 2025-08-05 09:46:05 ts/train.py:232 step:70K smpl:558K ep:1K epch:11.17 loss:0.009 grdn:0.227 lr:2.1e-05 updt_s:0.175 data_s:1.202 +INFO 2025-08-05 09:50:46 ts/train.py:232 step:70K smpl:560K ep:1K epch:11.20 loss:0.010 grdn:0.226 lr:2.1e-05 updt_s:0.176 data_s:1.228 +INFO 2025-08-05 09:55:25 ts/train.py:232 step:70K smpl:562K ep:1K epch:11.23 loss:0.010 grdn:0.235 lr:2.1e-05 updt_s:0.177 data_s:1.221 +INFO 2025-08-05 10:00:05 ts/train.py:232 step:70K smpl:563K ep:1K epch:11.26 loss:0.010 grdn:0.250 lr:2.0e-05 updt_s:0.170 data_s:1.228 +INFO 2025-08-05 10:04:48 ts/train.py:232 step:71K smpl:565K ep:1K epch:11.30 loss:0.010 grdn:0.228 lr:2.0e-05 updt_s:0.172 data_s:1.242 +INFO 2025-08-05 10:09:27 ts/train.py:232 step:71K smpl:566K ep:1K epch:11.33 loss:0.009 grdn:0.217 lr:2.0e-05 updt_s:0.173 data_s:1.219 +INFO 2025-08-05 10:14:06 ts/train.py:232 step:71K smpl:568K ep:1K epch:11.36 loss:0.008 grdn:0.210 lr:2.0e-05 updt_s:0.175 data_s:1.221 +INFO 2025-08-05 10:18:48 ts/train.py:232 step:71K smpl:570K ep:1K epch:11.39 loss:0.011 grdn:0.234 lr:1.9e-05 updt_s:0.178 data_s:1.229 +INFO 2025-08-05 10:23:26 ts/train.py:232 step:71K smpl:571K ep:1K epch:11.42 loss:0.009 grdn:0.234 lr:1.9e-05 updt_s:0.171 data_s:1.219 +INFO 2025-08-05 10:28:06 ts/train.py:232 step:72K smpl:573K ep:1K epch:11.46 loss:0.009 grdn:0.209 lr:1.9e-05 updt_s:0.173 data_s:1.229 +INFO 2025-08-05 10:32:47 ts/train.py:232 step:72K smpl:574K ep:1K epch:11.49 loss:0.010 grdn:0.253 lr:1.9e-05 updt_s:0.170 data_s:1.233 +INFO 2025-08-05 10:37:26 ts/train.py:232 step:72K smpl:576K ep:1K epch:11.52 loss:0.009 grdn:0.220 lr:1.8e-05 updt_s:0.174 data_s:1.221 +INFO 2025-08-05 10:42:07 ts/train.py:232 step:72K smpl:578K ep:1K epch:11.55 loss:0.009 grdn:0.225 lr:1.8e-05 updt_s:0.177 data_s:1.222 +INFO 2025-08-05 10:46:56 ts/train.py:232 step:72K smpl:579K ep:1K epch:11.58 loss:0.009 grdn:0.232 lr:1.8e-05 updt_s:0.173 data_s:1.271 +INFO 2025-08-05 10:51:42 ts/train.py:232 step:73K smpl:581K ep:1K epch:11.62 loss:0.009 grdn:0.224 lr:1.8e-05 updt_s:0.175 data_s:1.256 +INFO 2025-08-05 10:56:31 ts/train.py:232 step:73K smpl:582K ep:1K epch:11.65 loss:0.010 grdn:0.242 lr:1.7e-05 updt_s:0.177 data_s:1.268 +INFO 2025-08-05 11:01:17 ts/train.py:232 step:73K smpl:584K ep:1K epch:11.68 loss:0.009 grdn:0.230 lr:1.7e-05 updt_s:0.177 data_s:1.253 +INFO 2025-08-05 11:06:04 ts/train.py:232 step:73K smpl:586K ep:1K epch:11.71 loss:0.009 grdn:0.221 lr:1.7e-05 updt_s:0.177 data_s:1.253 +INFO 2025-08-05 11:10:51 ts/train.py:232 step:73K smpl:587K ep:1K epch:11.74 loss:0.009 grdn:0.215 lr:1.7e-05 updt_s:0.177 data_s:1.260 +INFO 2025-08-05 11:15:37 ts/train.py:232 step:74K smpl:589K ep:1K epch:11.78 loss:0.010 grdn:0.239 lr:1.7e-05 updt_s:0.175 data_s:1.253 +INFO 2025-08-05 11:20:27 ts/train.py:232 step:74K smpl:590K ep:1K epch:11.81 loss:0.009 grdn:0.223 lr:1.6e-05 updt_s:0.174 data_s:1.277 +INFO 2025-08-05 11:25:13 ts/train.py:232 step:74K smpl:592K ep:1K epch:11.84 loss:0.009 grdn:0.218 lr:1.6e-05 updt_s:0.175 data_s:1.250 +INFO 2025-08-05 11:30:03 ts/train.py:232 step:74K smpl:594K ep:1K epch:11.87 loss:0.009 grdn:0.224 lr:1.6e-05 updt_s:0.174 data_s:1.275 +INFO 2025-08-05 11:34:52 ts/train.py:232 step:74K smpl:595K ep:1K epch:11.90 loss:0.009 grdn:0.221 lr:1.6e-05 updt_s:0.169 data_s:1.276 +INFO 2025-08-05 11:39:35 ts/train.py:232 step:75K smpl:597K ep:1K epch:11.94 loss:0.009 grdn:0.228 lr:1.5e-05 updt_s:0.173 data_s:1.241 +INFO 2025-08-05 11:44:24 ts/train.py:232 step:75K smpl:598K ep:1K epch:11.97 loss:0.009 grdn:0.227 lr:1.5e-05 updt_s:0.174 data_s:1.271 +INFO 2025-08-05 11:49:08 ts/train.py:232 step:75K smpl:600K ep:1K epch:12.00 loss:0.009 grdn:0.224 lr:1.5e-05 updt_s:0.170 data_s:1.252 +INFO 2025-08-05 11:53:53 ts/train.py:232 step:75K smpl:602K ep:1K epch:12.03 loss:0.009 grdn:0.223 lr:1.5e-05 updt_s:0.171 data_s:1.251 +INFO 2025-08-05 11:58:42 ts/train.py:232 step:75K smpl:603K ep:1K epch:12.06 loss:0.010 grdn:0.251 lr:1.4e-05 updt_s:0.178 data_s:1.266 +INFO 2025-08-05 12:03:31 ts/train.py:232 step:76K smpl:605K ep:1K epch:12.10 loss:0.009 grdn:0.220 lr:1.4e-05 updt_s:0.178 data_s:1.266 +INFO 2025-08-05 12:08:13 ts/train.py:232 step:76K smpl:606K ep:1K epch:12.13 loss:0.009 grdn:0.221 lr:1.4e-05 updt_s:0.172 data_s:1.239 +INFO 2025-08-05 12:12:56 ts/train.py:232 step:76K smpl:608K ep:1K epch:12.16 loss:0.009 grdn:0.228 lr:1.4e-05 updt_s:0.180 data_s:1.233 +INFO 2025-08-05 12:17:44 ts/train.py:232 step:76K smpl:610K ep:1K epch:12.19 loss:0.009 grdn:0.225 lr:1.4e-05 updt_s:0.173 data_s:1.264 +INFO 2025-08-05 12:22:29 ts/train.py:232 step:76K smpl:611K ep:1K epch:12.22 loss:0.009 grdn:0.236 lr:1.3e-05 updt_s:0.174 data_s:1.253 +INFO 2025-08-05 12:27:13 ts/train.py:232 step:77K smpl:613K ep:1K epch:12.26 loss:0.009 grdn:0.233 lr:1.3e-05 updt_s:0.170 data_s:1.252 +INFO 2025-08-05 12:31:56 ts/train.py:232 step:77K smpl:614K ep:1K epch:12.29 loss:0.009 grdn:0.224 lr:1.3e-05 updt_s:0.175 data_s:1.236 +INFO 2025-08-05 12:36:44 ts/train.py:232 step:77K smpl:616K ep:1K epch:12.32 loss:0.008 grdn:0.206 lr:1.3e-05 updt_s:0.170 data_s:1.270 +INFO 2025-08-05 12:41:33 ts/train.py:232 step:77K smpl:618K ep:1K epch:12.35 loss:0.009 grdn:0.226 lr:1.3e-05 updt_s:0.168 data_s:1.277 +INFO 2025-08-05 12:46:21 ts/train.py:232 step:77K smpl:619K ep:1K epch:12.38 loss:0.009 grdn:0.228 lr:1.2e-05 updt_s:0.171 data_s:1.266 +INFO 2025-08-05 12:51:10 ts/train.py:232 step:78K smpl:621K ep:1K epch:12.42 loss:0.010 grdn:0.236 lr:1.2e-05 updt_s:0.168 data_s:1.277 +INFO 2025-08-05 12:55:55 ts/train.py:232 step:78K smpl:622K ep:1K epch:12.45 loss:0.010 grdn:0.230 lr:1.2e-05 updt_s:0.166 data_s:1.260 +INFO 2025-08-05 13:00:46 ts/train.py:232 step:78K smpl:624K ep:1K epch:12.48 loss:0.009 grdn:0.235 lr:1.2e-05 updt_s:0.169 data_s:1.284 +INFO 2025-08-05 13:05:35 ts/train.py:232 step:78K smpl:626K ep:1K epch:12.51 loss:0.009 grdn:0.226 lr:1.1e-05 updt_s:0.167 data_s:1.274 +INFO 2025-08-05 13:10:21 ts/train.py:232 step:78K smpl:627K ep:1K epch:12.54 loss:0.008 grdn:0.208 lr:1.1e-05 updt_s:0.166 data_s:1.265 +INFO 2025-08-05 13:15:09 ts/train.py:232 step:79K smpl:629K ep:1K epch:12.58 loss:0.009 grdn:0.232 lr:1.1e-05 updt_s:0.172 data_s:1.265 +INFO 2025-08-05 13:20:02 ts/train.py:232 step:79K smpl:630K ep:1K epch:12.61 loss:0.009 grdn:0.220 lr:1.1e-05 updt_s:0.181 data_s:1.286 +INFO 2025-08-05 13:24:50 ts/train.py:232 step:79K smpl:632K ep:1K epch:12.64 loss:0.009 grdn:0.229 lr:1.1e-05 updt_s:0.172 data_s:1.266 +INFO 2025-08-05 13:29:40 ts/train.py:232 step:79K smpl:634K ep:1K epch:12.67 loss:0.009 grdn:0.231 lr:1.0e-05 updt_s:0.178 data_s:1.272 +INFO 2025-08-05 13:34:31 ts/train.py:232 step:79K smpl:635K ep:1K epch:12.70 loss:0.009 grdn:0.246 lr:1.0e-05 updt_s:0.174 data_s:1.281 +INFO 2025-08-05 13:39:22 ts/train.py:232 step:80K smpl:637K ep:1K epch:12.74 loss:0.009 grdn:0.204 lr:1.0e-05 updt_s:0.175 data_s:1.277 +INFO 2025-08-05 13:44:12 ts/train.py:232 step:80K smpl:638K ep:1K epch:12.77 loss:0.010 grdn:0.222 lr:9.9e-06 updt_s:0.173 data_s:1.276 +INFO 2025-08-05 13:49:00 ts/train.py:232 step:80K smpl:640K ep:1K epch:12.80 loss:0.010 grdn:0.236 lr:9.7e-06 updt_s:0.175 data_s:1.265 +INFO 2025-08-05 13:49:00 ts/train.py:241 Checkpoint policy after step 80000 +INFO 2025-08-05 13:53:59 ts/train.py:232 step:80K smpl:642K ep:1K epch:12.83 loss:0.009 grdn:0.210 lr:9.5e-06 updt_s:0.180 data_s:1.248 +INFO 2025-08-05 13:58:47 ts/train.py:232 step:80K smpl:643K ep:1K epch:12.86 loss:0.009 grdn:0.228 lr:9.4e-06 updt_s:0.179 data_s:1.261 +INFO 2025-08-05 14:03:34 ts/train.py:232 step:81K smpl:645K ep:1K epch:12.90 loss:0.008 grdn:0.205 lr:9.2e-06 updt_s:0.180 data_s:1.255 +INFO 2025-08-05 14:08:18 ts/train.py:232 step:81K smpl:646K ep:1K epch:12.93 loss:0.009 grdn:0.218 lr:9.0e-06 updt_s:0.182 data_s:1.237 +INFO 2025-08-05 14:13:04 ts/train.py:232 step:81K smpl:648K ep:1K epch:12.96 loss:0.009 grdn:0.232 lr:8.8e-06 updt_s:0.173 data_s:1.258 +INFO 2025-08-05 14:17:46 ts/train.py:232 step:81K smpl:650K ep:1K epch:12.99 loss:0.009 grdn:0.221 lr:8.6e-06 updt_s:0.169 data_s:1.239 +INFO 2025-08-05 14:22:28 ts/train.py:232 step:81K smpl:651K ep:1K epch:13.02 loss:0.008 grdn:0.217 lr:8.5e-06 updt_s:0.169 data_s:1.239 +INFO 2025-08-05 14:27:13 ts/train.py:232 step:82K smpl:653K ep:1K epch:13.06 loss:0.009 grdn:0.239 lr:8.3e-06 updt_s:0.174 data_s:1.247 +INFO 2025-08-05 14:32:00 ts/train.py:232 step:82K smpl:654K ep:1K epch:13.09 loss:0.009 grdn:0.219 lr:8.1e-06 updt_s:0.175 data_s:1.260 +INFO 2025-08-05 14:36:40 ts/train.py:232 step:82K smpl:656K ep:1K epch:13.12 loss:0.008 grdn:0.202 lr:7.9e-06 updt_s:0.170 data_s:1.233 +INFO 2025-08-05 14:41:25 ts/train.py:232 step:82K smpl:658K ep:1K epch:13.15 loss:0.007 grdn:0.202 lr:7.8e-06 updt_s:0.168 data_s:1.254 +INFO 2025-08-05 14:46:05 ts/train.py:232 step:82K smpl:659K ep:1K epch:13.18 loss:0.009 grdn:0.227 lr:7.6e-06 updt_s:0.173 data_s:1.227 +INFO 2025-08-05 14:50:48 ts/train.py:232 step:83K smpl:661K ep:1K epch:13.22 loss:0.009 grdn:0.227 lr:7.4e-06 updt_s:0.177 data_s:1.236 +INFO 2025-08-05 14:55:35 ts/train.py:232 step:83K smpl:662K ep:1K epch:13.25 loss:0.008 grdn:0.223 lr:7.3e-06 updt_s:0.176 data_s:1.260 +INFO 2025-08-05 15:00:17 ts/train.py:232 step:83K smpl:664K ep:1K epch:13.28 loss:0.009 grdn:0.214 lr:7.1e-06 updt_s:0.174 data_s:1.233 +INFO 2025-08-05 15:05:02 ts/train.py:232 step:83K smpl:666K ep:1K epch:13.31 loss:0.009 grdn:0.238 lr:7.0e-06 updt_s:0.173 data_s:1.249 +INFO 2025-08-05 15:09:46 ts/train.py:232 step:83K smpl:667K ep:1K epch:13.34 loss:0.008 grdn:0.223 lr:6.8e-06 updt_s:0.170 data_s:1.250 +INFO 2025-08-05 15:14:29 ts/train.py:232 step:84K smpl:669K ep:1K epch:13.38 loss:0.009 grdn:0.227 lr:6.6e-06 updt_s:0.176 data_s:1.240 +INFO 2025-08-05 15:19:14 ts/train.py:232 step:84K smpl:670K ep:1K epch:13.41 loss:0.008 grdn:0.212 lr:6.5e-06 updt_s:0.180 data_s:1.245 +INFO 2025-08-05 15:23:58 ts/train.py:232 step:84K smpl:672K ep:1K epch:13.44 loss:0.009 grdn:0.238 lr:6.3e-06 updt_s:0.168 data_s:1.249 +INFO 2025-08-05 15:28:40 ts/train.py:232 step:84K smpl:674K ep:1K epch:13.47 loss:0.009 grdn:0.223 lr:6.2e-06 updt_s:0.178 data_s:1.231 +INFO 2025-08-05 15:33:20 ts/train.py:232 step:84K smpl:675K ep:1K epch:13.50 loss:0.008 grdn:0.223 lr:6.0e-06 updt_s:0.176 data_s:1.226 +INFO 2025-08-05 15:38:06 ts/train.py:232 step:85K smpl:677K ep:1K epch:13.54 loss:0.008 grdn:0.212 lr:5.9e-06 updt_s:0.174 data_s:1.251 +INFO 2025-08-05 15:42:55 ts/train.py:232 step:85K smpl:678K ep:1K epch:13.57 loss:0.008 grdn:0.214 lr:5.7e-06 updt_s:0.175 data_s:1.269 +INFO 2025-08-05 15:47:51 ts/train.py:232 step:85K smpl:680K ep:1K epch:13.60 loss:0.009 grdn:0.216 lr:5.6e-06 updt_s:0.174 data_s:1.305 +INFO 2025-08-05 15:52:46 ts/train.py:232 step:85K smpl:682K ep:1K epch:13.63 loss:0.008 grdn:0.221 lr:5.4e-06 updt_s:0.175 data_s:1.299 +INFO 2025-08-05 15:57:37 ts/train.py:232 step:85K smpl:683K ep:1K epch:13.66 loss:0.009 grdn:0.246 lr:5.3e-06 updt_s:0.176 data_s:1.281 +INFO 2025-08-05 16:02:36 ts/train.py:232 step:86K smpl:685K ep:1K epch:13.70 loss:0.008 grdn:0.210 lr:5.1e-06 updt_s:0.177 data_s:1.313 +INFO 2025-08-05 16:07:32 ts/train.py:232 step:86K smpl:686K ep:1K epch:13.73 loss:0.009 grdn:0.236 lr:5.0e-06 updt_s:0.172 data_s:1.308 +INFO 2025-08-05 16:12:25 ts/train.py:232 step:86K smpl:688K ep:1K epch:13.76 loss:0.009 grdn:0.231 lr:4.9e-06 updt_s:0.175 data_s:1.290 +INFO 2025-08-05 16:17:19 ts/train.py:232 step:86K smpl:690K ep:1K epch:13.79 loss:0.009 grdn:0.225 lr:4.7e-06 updt_s:0.175 data_s:1.293 +INFO 2025-08-05 16:22:11 ts/train.py:232 step:86K smpl:691K ep:1K epch:13.82 loss:0.008 grdn:0.218 lr:4.6e-06 updt_s:0.178 data_s:1.284 +INFO 2025-08-05 16:27:01 ts/train.py:232 step:87K smpl:693K ep:1K epch:13.86 loss:0.008 grdn:0.221 lr:4.5e-06 updt_s:0.174 data_s:1.273 +INFO 2025-08-05 16:31:55 ts/train.py:232 step:87K smpl:694K ep:1K epch:13.89 loss:0.008 grdn:0.214 lr:4.3e-06 updt_s:0.176 data_s:1.293 +INFO 2025-08-05 16:36:49 ts/train.py:232 step:87K smpl:696K ep:1K epch:13.92 loss:0.009 grdn:0.214 lr:4.2e-06 updt_s:0.175 data_s:1.298 +INFO 2025-08-05 16:41:48 ts/train.py:232 step:87K smpl:698K ep:1K epch:13.95 loss:0.008 grdn:0.209 lr:4.1e-06 updt_s:0.172 data_s:1.322 +INFO 2025-08-05 16:46:41 ts/train.py:232 step:87K smpl:699K ep:1K epch:13.98 loss:0.009 grdn:0.213 lr:4.0e-06 updt_s:0.169 data_s:1.293 +INFO 2025-08-05 16:51:36 ts/train.py:232 step:88K smpl:701K ep:1K epch:14.02 loss:0.008 grdn:0.213 lr:3.8e-06 updt_s:0.173 data_s:1.304 +INFO 2025-08-05 16:56:32 ts/train.py:232 step:88K smpl:702K ep:1K epch:14.05 loss:0.008 grdn:0.235 lr:3.7e-06 updt_s:0.167 data_s:1.311 +INFO 2025-08-05 17:01:21 ts/train.py:232 step:88K smpl:704K ep:1K epch:14.08 loss:0.008 grdn:0.207 lr:3.6e-06 updt_s:0.169 data_s:1.274 +INFO 2025-08-05 17:06:15 ts/train.py:232 step:88K smpl:706K ep:1K epch:14.11 loss:0.009 grdn:0.247 lr:3.5e-06 updt_s:0.168 data_s:1.301 +INFO 2025-08-05 17:11:08 ts/train.py:232 step:88K smpl:707K ep:1K epch:14.14 loss:0.009 grdn:0.216 lr:3.4e-06 updt_s:0.170 data_s:1.295 +INFO 2025-08-05 17:15:59 ts/train.py:232 step:89K smpl:709K ep:1K epch:14.18 loss:0.008 grdn:0.219 lr:3.3e-06 updt_s:0.169 data_s:1.285 +INFO 2025-08-05 17:20:48 ts/train.py:232 step:89K smpl:710K ep:1K epch:14.21 loss:0.008 grdn:0.213 lr:3.1e-06 updt_s:0.166 data_s:1.279 +INFO 2025-08-05 17:25:39 ts/train.py:232 step:89K smpl:712K ep:1K epch:14.24 loss:0.009 grdn:0.214 lr:3.0e-06 updt_s:0.167 data_s:1.283 +INFO 2025-08-05 17:30:27 ts/train.py:232 step:89K smpl:714K ep:1K epch:14.27 loss:0.008 grdn:0.207 lr:2.9e-06 updt_s:0.166 data_s:1.274 +INFO 2025-08-05 17:35:16 ts/train.py:232 step:89K smpl:715K ep:1K epch:14.30 loss:0.008 grdn:0.206 lr:2.8e-06 updt_s:0.166 data_s:1.281 +INFO 2025-08-05 17:40:09 ts/train.py:232 step:90K smpl:717K ep:1K epch:14.34 loss:0.008 grdn:0.235 lr:2.7e-06 updt_s:0.171 data_s:1.294 +INFO 2025-08-05 17:45:01 ts/train.py:232 step:90K smpl:718K ep:1K epch:14.37 loss:0.008 grdn:0.204 lr:2.6e-06 updt_s:0.172 data_s:1.287 +INFO 2025-08-05 17:49:54 ts/train.py:232 step:90K smpl:720K ep:1K epch:14.40 loss:0.009 grdn:0.238 lr:2.5e-06 updt_s:0.163 data_s:1.297 +INFO 2025-08-05 17:54:45 ts/train.py:232 step:90K smpl:722K ep:1K epch:14.43 loss:0.009 grdn:0.229 lr:2.4e-06 updt_s:0.170 data_s:1.289 +INFO 2025-08-05 17:59:41 ts/train.py:232 step:90K smpl:723K ep:1K epch:14.46 loss:0.009 grdn:0.220 lr:2.3e-06 updt_s:0.170 data_s:1.308 +INFO 2025-08-05 18:04:31 ts/train.py:232 step:91K smpl:725K ep:1K epch:14.50 loss:0.009 grdn:0.229 lr:2.2e-06 updt_s:0.171 data_s:1.278 +INFO 2025-08-05 18:09:23 ts/train.py:232 step:91K smpl:726K ep:1K epch:14.53 loss:0.008 grdn:0.196 lr:2.1e-06 updt_s:0.168 data_s:1.293 +INFO 2025-08-05 18:14:20 ts/train.py:232 step:91K smpl:728K ep:1K epch:14.56 loss:0.009 grdn:0.215 lr:2.0e-06 updt_s:0.177 data_s:1.303 +INFO 2025-08-05 18:19:13 ts/train.py:232 step:91K smpl:730K ep:1K epch:14.59 loss:0.009 grdn:0.225 lr:2.0e-06 updt_s:0.178 data_s:1.288 +INFO 2025-08-05 18:24:04 ts/train.py:232 step:91K smpl:731K ep:1K epch:14.62 loss:0.008 grdn:0.231 lr:1.9e-06 updt_s:0.173 data_s:1.281 +INFO 2025-08-05 18:28:45 ts/train.py:232 step:92K smpl:733K ep:1K epch:14.66 loss:0.008 grdn:0.212 lr:1.8e-06 updt_s:0.172 data_s:1.234 +INFO 2025-08-05 18:33:25 ts/train.py:232 step:92K smpl:734K ep:1K epch:14.69 loss:0.009 grdn:0.231 lr:1.7e-06 updt_s:0.171 data_s:1.228 +INFO 2025-08-05 18:38:08 ts/train.py:232 step:92K smpl:736K ep:1K epch:14.72 loss:0.008 grdn:0.206 lr:1.6e-06 updt_s:0.177 data_s:1.236 +INFO 2025-08-05 18:42:49 ts/train.py:232 step:92K smpl:738K ep:1K epch:14.75 loss:0.008 grdn:0.206 lr:1.5e-06 updt_s:0.173 data_s:1.230 +INFO 2025-08-05 18:47:29 ts/train.py:232 step:92K smpl:739K ep:1K epch:14.78 loss:0.008 grdn:0.202 lr:1.5e-06 updt_s:0.172 data_s:1.230 +INFO 2025-08-05 18:52:09 ts/train.py:232 step:93K smpl:741K ep:1K epch:14.82 loss:0.009 grdn:0.219 lr:1.4e-06 updt_s:0.171 data_s:1.223 +INFO 2025-08-05 18:56:50 ts/train.py:232 step:93K smpl:742K ep:1K epch:14.85 loss:0.008 grdn:0.209 lr:1.3e-06 updt_s:0.168 data_s:1.239 +INFO 2025-08-05 19:01:32 ts/train.py:232 step:93K smpl:744K ep:1K epch:14.88 loss:0.008 grdn:0.217 lr:1.3e-06 updt_s:0.171 data_s:1.238 +INFO 2025-08-05 19:06:15 ts/train.py:232 step:93K smpl:746K ep:1K epch:14.91 loss:0.008 grdn:0.197 lr:1.2e-06 updt_s:0.169 data_s:1.245 +INFO 2025-08-05 19:10:58 ts/train.py:232 step:93K smpl:747K ep:1K epch:14.94 loss:0.008 grdn:0.187 lr:1.1e-06 updt_s:0.169 data_s:1.243 +INFO 2025-08-05 19:15:41 ts/train.py:232 step:94K smpl:749K ep:1K epch:14.98 loss:0.008 grdn:0.208 lr:1.0e-06 updt_s:0.174 data_s:1.240 +INFO 2025-08-05 19:20:23 ts/train.py:232 step:94K smpl:750K ep:2K epch:15.01 loss:0.008 grdn:0.212 lr:9.9e-07 updt_s:0.172 data_s:1.235 +INFO 2025-08-05 19:25:06 ts/train.py:232 step:94K smpl:752K ep:2K epch:15.04 loss:0.008 grdn:0.217 lr:9.2e-07 updt_s:0.170 data_s:1.247 +INFO 2025-08-05 19:29:49 ts/train.py:232 step:94K smpl:754K ep:2K epch:15.07 loss:0.009 grdn:0.225 lr:8.6e-07 updt_s:0.169 data_s:1.246 +INFO 2025-08-05 19:34:28 ts/train.py:232 step:94K smpl:755K ep:2K epch:15.10 loss:0.008 grdn:0.220 lr:8.1e-07 updt_s:0.174 data_s:1.217 +INFO 2025-08-05 19:39:12 ts/train.py:232 step:95K smpl:757K ep:2K epch:15.14 loss:0.009 grdn:0.215 lr:7.5e-07 updt_s:0.175 data_s:1.248 +INFO 2025-08-05 19:43:54 ts/train.py:232 step:95K smpl:758K ep:2K epch:15.17 loss:0.007 grdn:0.203 lr:7.0e-07 updt_s:0.164 data_s:1.245 +INFO 2025-08-05 19:48:49 ts/train.py:232 step:95K smpl:760K ep:2K epch:15.20 loss:0.008 grdn:0.222 lr:6.5e-07 updt_s:0.169 data_s:1.304 +INFO 2025-08-05 19:55:17 ts/train.py:232 step:95K smpl:762K ep:2K epch:15.23 loss:0.008 grdn:0.207 lr:6.0e-07 updt_s:0.204 data_s:1.732 +INFO 2025-08-05 20:01:42 ts/train.py:232 step:95K smpl:763K ep:2K epch:15.26 loss:0.007 grdn:0.209 lr:5.5e-07 updt_s:0.211 data_s:1.717 +INFO 2025-08-05 20:08:13 ts/train.py:232 step:96K smpl:765K ep:2K epch:15.30 loss:0.008 grdn:0.220 lr:5.0e-07 updt_s:0.212 data_s:1.740 +INFO 2025-08-05 20:14:50 ts/train.py:232 step:96K smpl:766K ep:2K epch:15.33 loss:0.009 grdn:0.239 lr:4.6e-07 updt_s:0.209 data_s:1.773 +INFO 2025-08-05 20:21:21 ts/train.py:232 step:96K smpl:768K ep:2K epch:15.36 loss:0.008 grdn:0.206 lr:4.2e-07 updt_s:0.208 data_s:1.748 +INFO 2025-08-05 20:27:56 ts/train.py:232 step:96K smpl:770K ep:2K epch:15.39 loss:0.009 grdn:0.221 lr:3.8e-07 updt_s:0.204 data_s:1.770 +INFO 2025-08-05 20:34:24 ts/train.py:232 step:96K smpl:771K ep:2K epch:15.42 loss:0.008 grdn:0.213 lr:3.4e-07 updt_s:0.205 data_s:1.732 +INFO 2025-08-05 20:41:01 ts/train.py:232 step:97K smpl:773K ep:2K epch:15.46 loss:0.008 grdn:0.201 lr:3.0e-07 updt_s:0.210 data_s:1.775 +INFO 2025-08-05 20:47:36 ts/train.py:232 step:97K smpl:774K ep:2K epch:15.49 loss:0.009 grdn:0.226 lr:2.7e-07 updt_s:0.210 data_s:1.761 +INFO 2025-08-05 20:54:08 ts/train.py:232 step:97K smpl:776K ep:2K epch:15.52 loss:0.008 grdn:0.229 lr:2.4e-07 updt_s:0.211 data_s:1.752 +INFO 2025-08-05 21:00:42 ts/train.py:232 step:97K smpl:778K ep:2K epch:15.55 loss:0.008 grdn:0.199 lr:2.1e-07 updt_s:0.213 data_s:1.756 +INFO 2025-08-05 21:07:12 ts/train.py:232 step:97K smpl:779K ep:2K epch:15.58 loss:0.009 grdn:0.227 lr:1.8e-07 updt_s:0.203 data_s:1.744 +INFO 2025-08-05 21:13:44 ts/train.py:232 step:98K smpl:781K ep:2K epch:15.62 loss:0.008 grdn:0.204 lr:1.6e-07 updt_s:0.205 data_s:1.755 +INFO 2025-08-05 21:20:19 ts/train.py:232 step:98K smpl:782K ep:2K epch:15.65 loss:0.008 grdn:0.210 lr:1.3e-07 updt_s:0.205 data_s:1.764 +INFO 2025-08-05 21:26:52 ts/train.py:232 step:98K smpl:784K ep:2K epch:15.68 loss:0.009 grdn:0.211 lr:1.1e-07 updt_s:0.202 data_s:1.765 +INFO 2025-08-05 21:33:24 ts/train.py:232 step:98K smpl:786K ep:2K epch:15.71 loss:0.007 grdn:0.211 lr:9.0e-08 updt_s:0.201 data_s:1.761 +INFO 2025-08-05 21:40:03 ts/train.py:232 step:98K smpl:787K ep:2K epch:15.74 loss:0.008 grdn:0.218 lr:7.2e-08 updt_s:0.204 data_s:1.786 +INFO 2025-08-05 21:46:32 ts/train.py:232 step:99K smpl:789K ep:2K epch:15.78 loss:0.008 grdn:0.223 lr:5.6e-08 updt_s:0.205 data_s:1.740 +INFO 2025-08-05 21:53:00 ts/train.py:232 step:99K smpl:790K ep:2K epch:15.81 loss:0.009 grdn:0.234 lr:4.2e-08 updt_s:0.197 data_s:1.744 +INFO 2025-08-05 21:59:31 ts/train.py:232 step:99K smpl:792K ep:2K epch:15.84 loss:0.008 grdn:0.213 lr:3.0e-08 updt_s:0.197 data_s:1.756 +INFO 2025-08-05 22:06:04 ts/train.py:232 step:99K smpl:794K ep:2K epch:15.87 loss:0.008 grdn:0.208 lr:2.0e-08 updt_s:0.198 data_s:1.765 +INFO 2025-08-05 22:12:30 ts/train.py:232 step:99K smpl:795K ep:2K epch:15.90 loss:0.009 grdn:0.226 lr:1.2e-08 updt_s:0.199 data_s:1.732 +INFO 2025-08-05 22:19:04 ts/train.py:232 step:100K smpl:797K ep:2K epch:15.94 loss:0.008 grdn:0.216 lr:6.3e-09 updt_s:0.198 data_s:1.768 +INFO 2025-08-05 22:25:37 ts/train.py:232 step:100K smpl:798K ep:2K epch:15.97 loss:0.009 grdn:0.219 lr:2.3e-09 updt_s:0.194 data_s:1.770 +INFO 2025-08-05 22:32:03 ts/train.py:232 step:100K smpl:800K ep:2K epch:16.00 loss:0.008 grdn:0.215 lr:3.3e-10 updt_s:0.194 data_s:1.735 +INFO 2025-08-05 22:32:03 ts/train.py:241 Checkpoint policy after step 100000 +Traceback (most recent call last): + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/runpy.py", line 196, in _run_module_as_main + return _run_code(code, main_globals, None, + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/runpy.py", line 86, in _run_code + exec(code, run_globals) + File "/gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/src/lerobot/scripts/train.py", line 295, in + main() + File "/gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/src/lerobot/scripts/train.py", line 291, in main + train() + File "/gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/src/lerobot/configs/parser.py", line 225, in wrapper_inner + response = fn(cfg, *args, **kwargs) + File "/gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/src/lerobot/scripts/train.py", line 246, in train + wandb_logger.log_policy(checkpoint_dir) + File "/gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/src/lerobot/utils/wandb_utils.py", line 115, in log_policy + artifact.add_file(checkpoint_dir / PRETRAINED_MODEL_DIR / SAFETENSORS_SINGLE_FILE) + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/wandb/sdk/artifacts/_validators.py", line 258, in wrapper + return method(self, *args, **kwargs) + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/wandb/sdk/artifacts/artifact.py", line 1529, in add_file + return self._add_local_file( + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/wandb/sdk/artifacts/artifact.py", line 1801, in _add_local_file + shutil.copyfile(path, staging_path) + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/shutil.py", line 267, in copyfile + _fastcopy_sendfile(fsrc, fdst) + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/shutil.py", line 162, in _fastcopy_sendfile + raise err + File "/projects/p32775/pythonenvs/tac_vla/lib/python3.10/shutil.py", line 142, in _fastcopy_sendfile + sent = os.sendfile(outfd, infd, offset, blocksize) +OSError: [Errno 122] Disk quota exceeded: 'outputs/train/diff_tac_insert/checkpoints/100000/pretrained_model/model.safetensors' -> '/home/aye8078/.local/share/wandb/artifacts/staging/tmp_1cye2v5' diff --git a/wandb/run-20250805_055539-ftnu6goz/files/requirements.txt b/wandb/run-20250805_055539-ftnu6goz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..47864fcff4e2976df45f0e2a69161f599efb87ef --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/files/requirements.txt @@ -0,0 +1,146 @@ +lightning-utilities==0.14.3 +uv==0.7.19 +umap-learn==0.5.9.post2 +torchmetrics==1.7.4 +anndata==0.11.4 +pynndescent==0.5.13 +array-api-compat==1.12.0 +contourpy==1.3.2 +tzdata==2025.2 +filelock==3.18.0 +natsort==8.4.0 +scipy==1.15.3 +kiwisolver==1.4.8 +networkx==3.4.2 +numba==0.61.2 +patsy==1.0.1 +legacy-api-wrap==1.4.1 +pillow==11.3.0 +pytorch-lightning==2.5.2 +cycler==0.12.1 +h5py==3.14.0 +pyparsing==3.2.3 +packaging==25.0 +joblib==1.5.1 +session-info2==0.2 +scikit-learn==1.7.1 +cell-load==0.7.4 +statsmodels==0.14.5 +numpy==2.2.6 +mpmath==1.3.0 +lightning==2.5.2 +fonttools==4.59.0 +typing_extensions==4.14.1 +seaborn==0.13.2 +scanpy==1.11.3 +toml==0.10.2 +matplotlib==3.10.3 +threadpoolctl==3.6.0 +llvmlite==0.44.0 +pandas==2.3.1 +nvidia-cufile-cu12==1.11.1.6 +triton==3.3.1 +charset-normalizer==3.4.2 +sentry-sdk==2.34.1 +nvidia-cuda-runtime-cu12==12.6.77 +pyarrow==21.0.0 +sympy==1.14.0 +pynput==1.8.1 +pfzy==0.3.4 +Jinja2==3.1.6 +pydantic_core==2.33.2 +cmake==4.0.3 +nvidia-cusparse-cu12==12.5.4.2 +gitdb==4.0.12 +aiosignal==1.4.0 +yarl==1.20.1 +platformdirs==4.3.8 +jsonlines==4.0.0 +multiprocess==0.70.16 +certifi==2025.8.3 +inquirerpy==0.3.4 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-cublas-cu12==12.6.4.1 +pydantic==2.11.7 +async-timeout==5.0.1 +annotated-types==0.7.0 +typing-inspect==0.9.0 +imageio==2.37.0 +nvidia-cuda-cupti-cu12==12.6.80 +wcwidth==0.2.13 +six==1.17.0 +tqdm==4.67.1 +zipp==3.23.0 +torchcodec==0.5 +cloudpickle==3.1.1 +lerobot==0.3.2 +Flask==3.1.1 +python-xlib==0.33 +wandb==0.21.0 +urllib3==2.5.0 +nvidia-nccl-cu12==2.26.2 +typing-inspection==0.4.1 +evdev==1.9.2 +prompt_toolkit==3.0.51 +gymnasium==0.29.1 +nvidia-curand-cu12==10.3.7.77 +GitPython==3.1.45 +opencv-python-headless==4.12.0.88 +datasets==3.6.0 +nvidia-cusolver-cu12==11.7.1.2 +termcolor==3.1.0 +mypy_extensions==1.1.0 +idna==3.10 +MarkupSafe==3.0.2 +frozenlist==1.7.0 +regex==2025.7.34 +Werkzeug==3.1.3 +imageio-ffmpeg==0.6.0 +importlib_metadata==8.7.0 +blinker==1.9.0 +psutil==7.0.0 +fsspec==2025.3.0 +deepdiff==8.5.0 +dill==0.3.8 +diffusers==0.34.0 +wheel==0.45.1 +nvidia-nvtx-cu12==12.6.77 +av==15.0.0 +nvidia-cusparselt-cu12==0.6.3 +propcache==0.3.2 +python-can==4.5.0 +msgpack==1.1.1 +einops==0.8.1 +safetensors==0.5.3 +setuptools==78.1.1 +requests==2.32.4 +torchvision==0.22.1 +wrapt==1.17.2 +pyyaml-include==1.4.1 +piper-sdk==0.4.1 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cuda-nvrtc-cu12==12.6.77 +Farama-Notifications==0.0.4 +click==8.2.1 +pyserial==3.5 +smmap==5.0.2 +attrs==25.3.0 +draccus==0.10.0 +mergedeep==1.3.4 +huggingface-hub==0.34.3 +pytz==2025.2 +aiohappyeyeballs==2.6.1 +python-dateutil==2.9.0.post0 +itsdangerous==2.2.0 +torch==2.7.1 +multidict==6.6.3 +aiohttp==3.12.15 +protobuf==6.31.1 +hf_transfer==0.1.9 +xxhash==3.5.0 +pip==25.1 +PyYAML==6.0.2 +orderly-set==5.5.0 +nvidia-cudnn-cu12==9.5.1.17 +hf-xet==1.1.5 +lerobot==0.3.2 diff --git a/wandb/run-20250805_055539-ftnu6goz/files/wandb-metadata.json b/wandb/run-20250805_055539-ftnu6goz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..ae00638fe41f5fdf26f111e6283a8a6f99d93e17 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/files/wandb-metadata.json @@ -0,0 +1,72 @@ +{ + "os": "Linux-4.18.0-553.36.1.el8_10.x86_64-x86_64-with-glibc2.28", + "python": "CPython 3.10.18", + "startedAt": "2025-08-05T10:55:39.023857Z", + "args": [ + "--config_path=/home/aye8078/Documents/Github/tac_vla/lerobot/outputs/train/diff_tac_insert/checkpoints/last/pretrained_model/train_config.json", + "--resume=true" + ], + "program": "-m lerobot.scripts.train", + "root": "outputs/train/diff_tac_insert", + "host": "qgpu0404", + "executable": "/projects/p32775/pythonenvs/tac_vla/bin/python", + "cpu_count": 52, + "cpu_count_logical": 52, + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_count": 1, + "disk": { + "/": { + "total": "101237329920", + "used": "9976344576" + } + }, + "memory": { + "total": "202474663936" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-PCIE-40GB", + "memoryTotal": "42949672960", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-a233ec62-e827-0562-129f-d9164b24f210" + } + ], + "cudaVersion": "12.8", + "slurm": { + "cluster_name": "quest", + "conf": "/etc/slurm/slurm.conf", + "cpus_on_node": "1", + "gpus_on_node": "1", + "gtids": "0", + "job_account": "p32775", + "job_cpus_per_node": "1", + "job_end_time": "1754477643", + "job_gid": "2000033", + "job_gpus": "0", + "job_id": "839311", + "job_name": "diff_tac_insert", + "job_nodelist": "qgpu0404", + "job_num_nodes": "1", + "job_partition": "gengpu", + "job_qos": "normal", + "job_start_time": "1754391243", + "job_uid": "2000033", + "job_user": "aye8078", + "jobid": "839311", + "localid": "0", + "mem_per_node": "49152", + "nnodes": "1", + "nodeid": "0", + "nodelist": "qgpu0404", + "prio_process": "0", + "procid": "0", + "submit_dir": "/gpfs/home/aye8078/Documents/Github/tac_vla/slurm", + "submit_host": "quser43", + "task_pid": "379968", + "tasks_per_node": "1", + "topology_addr": "qgpu0404", + "topology_addr_pattern": "node" + }, + "writerId": "yd2qhrsop30spzg78awqeiox3wxlduul" +} \ No newline at end of file diff --git a/wandb/run-20250805_055539-ftnu6goz/files/wandb-summary.json b/wandb/run-20250805_055539-ftnu6goz/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..80d097b95655bc127793746bd3c95249dc5e1c83 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/update_s":0.1944363382831216,"train/lr":3.298127591122574e-10,"_wandb":{"runtime":146497},"_timestamp":1.7544511233785694e+09,"_step":100000,"_runtime":146497.919883353,"train/grad_norm":0.21477259891107678,"train/epochs":16,"train/loss":0.008437735554471146,"train/samples":800000,"train/steps":100000,"train/dataloading_s":1.7351347128208727,"train/episodes":1600} \ No newline at end of file diff --git a/wandb/run-20250805_055539-ftnu6goz/logs/debug-core.log b/wandb/run-20250805_055539-ftnu6goz/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..5578789cab2a40be688ecb922782a60add813423 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-08-05T05:55:39.628100311-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpz5nhzkq5/port-393681.txt","pid":393681,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-08-05T05:55:39.629398899-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-393681-429628-1269157952/socket","Net":"unix"}} +{"time":"2025-08-05T05:55:39.629676301-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":393681} +{"time":"2025-08-05T05:55:39.63414028-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-08-05T05:55:39.738541255-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"ftnu6goz","id":"1(@)"} +{"time":"2025-08-05T05:55:39.997893997-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ftnu6goz","id":"1(@)"} +{"time":"2025-08-05T22:32:18.206122737-05:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-08-05T22:32:18.206956489-05:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-08-05T22:32:18.207042625-05:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-08-05T22:32:18.207065868-05:00","level":"INFO","msg":"server is shutting down"} +{"time":"2025-08-05T22:32:18.207183658-05:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-393681-429628-1269157952/socket","Net":"unix"}} +{"time":"2025-08-05T22:32:20.435505093-05:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-08-05T22:32:20.435694893-05:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-08-05T22:32:20.435729405-05:00","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250805_055539-ftnu6goz/logs/debug-internal.log b/wandb/run-20250805_055539-ftnu6goz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c5828de942852bfd55296e7f11c9796484d3a2d5 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/logs/debug-internal.log @@ -0,0 +1,149 @@ +{"time":"2025-08-05T05:55:39.753303528-05:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-08-05T05:55:39.997729541-05:00","level":"INFO","msg":"stream: created new stream","id":"ftnu6goz"} +{"time":"2025-08-05T05:55:39.997853611-05:00","level":"INFO","msg":"stream: started","id":"ftnu6goz"} +{"time":"2025-08-05T05:55:40.001734161-05:00","level":"INFO","msg":"handler: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T05:55:40.001847432-05:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T05:55:40.002352867-05:00","level":"INFO","msg":"sender: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T06:00:55.038446172-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.038700785-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.038839628-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.038960391-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.039070992-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.039177472-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.039374489-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.039590026-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:00:55.040412479-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60200,"current":63001} +{"time":"2025-08-05T06:05:34.857981389-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.858296006-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.859569926-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.859733787-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.85986243-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.859985562-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.860119686-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.860232879-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:05:34.860444043-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60400,"current":63001} +{"time":"2025-08-05T06:10:14.675058372-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.675258896-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.677039373-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.677300842-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.677401826-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.677537667-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.679736272-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.679749767-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:10:14.679755196-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60600,"current":63001} +{"time":"2025-08-05T06:14:54.528914003-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.530419409-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.537089728-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.538531131-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.538728826-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.538899289-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.539066406-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.544561212-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:14:54.544880406-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":60800,"current":63001} +{"time":"2025-08-05T06:19:33.463483074-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.464104297-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.469437423-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.46961381-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.469837519-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.470041472-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.472268123-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.472489151-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:19:33.4727071-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61000,"current":63001} +{"time":"2025-08-05T06:24:18.00398514-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.008603728-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.009915773-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010068939-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010216051-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010341366-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010478584-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010594735-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:24:18.010701897-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61200,"current":63001} +{"time":"2025-08-05T06:28:54.422806176-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.423031933-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.423270276-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.429547917-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.429818219-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.431061115-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.431201792-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.431325297-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:28:54.431462351-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61400,"current":63001} +{"time":"2025-08-05T06:33:28.270657506-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.270996235-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.27539403-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.275558116-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.278789528-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.278919675-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.279039833-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.279146278-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:33:28.279249277-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61600,"current":63001} +{"time":"2025-08-05T06:38:16.004812334-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005005525-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005134185-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005251999-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005366364-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005486876-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005598264-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005702816-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:38:16.005803713-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":61800,"current":63001} +{"time":"2025-08-05T06:42:59.27956925-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.279796641-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.27992556-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.28205338-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.282237432-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.282365807-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.282500568-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.282615992-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:42:59.282732715-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62000,"current":63001} +{"time":"2025-08-05T06:47:38.429848909-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.464612036-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.464780098-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.466455518-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.46661999-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.466741258-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.466859778-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.466966222-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:47:38.467071288-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62200,"current":63001} +{"time":"2025-08-05T06:52:19.34665052-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.351591575-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.351749453-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354261939-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354440377-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354573829-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354696583-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354807069-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:52:19.354915923-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62400,"current":63001} +{"time":"2025-08-05T06:57:02.340075271-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.346333656-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.34656631-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.347813075-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.348113322-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.348256491-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.353457812-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.353606146-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T06:57:02.353725276-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62600,"current":63001} +{"time":"2025-08-05T07:01:41.249710437-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.249900034-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.250027508-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.25040266-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.25053196-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.250716787-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.256492271-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.256648785-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:01:41.256773744-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":62800,"current":63001} +{"time":"2025-08-05T07:06:21.085840252-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.086043036-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.086174744-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.086303558-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.0864213-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.086526462-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.087657817-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.087779985-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T07:06:21.087885558-05:00","level":"WARN","msg":"handler: ignoring partial history record","step":63000,"current":63001} +{"time":"2025-08-05T12:50:12.816840215-05:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"} +{"time":"2025-08-05T22:12:17.983408038-05:00","level":"INFO","msg":"api: retrying HTTP error","status":429,"url":"https://api.wandb.ai/files/a10v-1/lerobot/ftnu6goz/file_stream","body":"{\"error\":\"rate limit exceeded\"}"} +{"time":"2025-08-05T22:32:18.20683731-05:00","level":"INFO","msg":"stream: closing","id":"ftnu6goz"} +{"time":"2025-08-05T22:32:19.369449649-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-08-05T22:32:20.317226228-05:00","level":"INFO","msg":"handler: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:32:20.317308631-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:32:20.317573207-05:00","level":"INFO","msg":"sender: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:32:20.374801589-05:00","level":"INFO","msg":"stream: closed","id":"ftnu6goz"} diff --git a/wandb/run-20250805_055539-ftnu6goz/logs/debug.log b/wandb/run-20250805_055539-ftnu6goz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..85e7ffa8095d1441d506ea893723f89544c29e31 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/logs/debug.log @@ -0,0 +1,23 @@ +2025-08-05 05:55:39,140 INFO MainThread:393681 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-08-05 05:55:39,140 INFO MainThread:393681 [wandb_setup.py:_flush():80] Configure stats pid to 393681 +2025-08-05 05:55:39,140 INFO MainThread:393681 [wandb_setup.py:_flush():80] Loading settings from /home/aye8078/.config/wandb/settings +2025-08-05 05:55:39,140 INFO MainThread:393681 [wandb_setup.py:_flush():80] Loading settings from /gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/wandb/settings +2025-08-05 05:55:39,140 INFO MainThread:393681 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-08-05 05:55:39,141 INFO MainThread:393681 [wandb_init.py:setup_run_log_directory():703] Logging user logs to outputs/train/diff_tac_insert/wandb/run-20250805_055539-ftnu6goz/logs/debug.log +2025-08-05 05:55:39,141 INFO MainThread:393681 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to outputs/train/diff_tac_insert/wandb/run-20250805_055539-ftnu6goz/logs/debug-internal.log +2025-08-05 05:55:39,141 INFO MainThread:393681 [wandb_init.py:init():830] calling init triggers +2025-08-05 05:55:39,141 INFO MainThread:393681 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'a1o/tac_insert', 'root': None, 'episodes': None, 'image_transforms': {'enable': False, 'max_num_transforms': 3, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec'}, 'env': None, 'policy': {'type': 'diffusion', 'n_obs_steps': 2, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {'observation.state': {'type': , 'shape': [14]}, 'observation.effort': {'type': , 'shape': [14]}, 'observation.qvel': {'type': , 'shape': [14]}, 'observation.images.cam_high': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_left_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_right_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile1': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile2': {'type': , 'shape': [3, 480, 640]}}, 'output_features': {'action': {'type': , 'shape': [14]}}, 'device': 'cuda', 'use_amp': False, 'push_to_hub': True, 'repo_id': 'a1o/diff_pick_tac_insert_policy', 'private': None, 'tags': None, 'license': None, 'horizon': 16, 'n_action_steps': 8, 'drop_n_last_frames': 7, 'vision_backbone': 'resnet18', 'crop_shape': [84, 84], 'crop_is_random': True, 'pretrained_backbone_weights': None, 'use_group_norm': True, 'spatial_softmax_num_keypoints': 32, 'use_separate_rgb_encoder_per_camera': False, 'down_dims': [512, 1024, 2048], 'kernel_size': 5, 'n_groups': 8, 'diffusion_step_embed_dim': 128, 'use_film_scale_modulation': True, 'noise_scheduler_type': 'DDPM', 'num_train_timesteps': 100, 'beta_schedule': 'squaredcos_cap_v2', 'beta_start': 0.0001, 'beta_end': 0.02, 'prediction_type': 'epsilon', 'clip_sample': True, 'clip_sample_range': 1.0, 'num_inference_steps': None, 'do_mask_loss_for_padding': False, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.95, 0.999], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-06, 'scheduler_name': 'cosine', 'scheduler_warmup_steps': 500}, 'output_dir': 'outputs/train/diff_tac_insert', 'job_name': 'diff_piper_tac_insert', 'resume': True, 'seed': 1000, 'num_workers': 4, 'batch_size': 8, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 20000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adam', 'lr': 0.0001, 'weight_decay': 1e-06, 'grad_clip_norm': 10.0, 'betas': [0.95, 0.999], 'eps': 1e-08}, 'scheduler': {'type': 'diffuser', 'num_warmup_steps': 500, 'name': 'cosine'}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': 'ftnu6goz', 'mode': None}, '_wandb': {}} +2025-08-05 05:55:39,144 INFO MainThread:393681 [wandb_init.py:init():871] starting backend +2025-08-05 05:55:39,634 INFO MainThread:393681 [wandb_init.py:init():874] sending inform_init request +2025-08-05 05:55:39,727 INFO MainThread:393681 [wandb_init.py:init():882] backend started and connected +2025-08-05 05:55:39,731 INFO MainThread:393681 [wandb_init.py:init():953] updated telemetry +2025-08-05 05:55:39,731 INFO MainThread:393681 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-08-05 05:55:40,276 INFO MainThread:393681 [wandb_init.py:init():1024] run resumed +2025-08-05 05:55:40,282 INFO MainThread:393681 [wandb_init.py:init():1029] starting run threads in backend +2025-08-05 05:55:42,205 INFO MainThread:393681 [wandb_run.py:_console_start():2458] atexit reg +2025-08-05 05:55:42,206 INFO MainThread:393681 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-08-05 05:55:42,206 INFO MainThread:393681 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-08-05 05:55:42,206 INFO MainThread:393681 [wandb_run.py:_redirect():2398] Redirects installed. +2025-08-05 05:55:42,214 INFO MainThread:393681 [wandb_init.py:init():1075] run started, returning control to user process +2025-08-05 22:32:18,204 INFO MsgRouterThr:393681 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20250805_055539-ftnu6goz/run-ftnu6goz.wandb b/wandb/run-20250805_055539-ftnu6goz/run-ftnu6goz.wandb new file mode 100644 index 0000000000000000000000000000000000000000..635d40dbb39fa347dfb1c0f72bdfadbf95638e75 --- /dev/null +++ b/wandb/run-20250805_055539-ftnu6goz/run-ftnu6goz.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d287efba4a774d2d9f0f6cf5cdfc02e0d0fd33619b51f796c0e98325b2b5c788 +size 3609844 diff --git a/wandb/run-20250805_225334-ftnu6goz/files/config.yaml b/wandb/run-20250805_225334-ftnu6goz/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82b775adaa9d096bb716924d37dbd0b10a95ab77 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/files/config.yaml @@ -0,0 +1,298 @@ +_wandb: + value: + cli_version: 0.21.0 + e: + ndjd4pdq9v1yya4dyktyjt1smg3no50d: + args: + - --config_path=/home/aye8078/Documents/Github/tac_vla/lerobot/outputs/train/diff_tac_insert/checkpoints/last/pretrained_model/train_config.json + - --resume=true + cpu_count: 52 + cpu_count_logical: 52 + cudaVersion: "12.8" + disk: + /: + total: "101237329920" + used: "6670987264" + executable: /projects/p32775/pythonenvs/tac_vla/bin/python + gpu: NVIDIA A100-PCIE-40GB + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "42949672960" + name: NVIDIA A100-PCIE-40GB + uuid: GPU-c91b110a-9eb1-15b6-ff0a-7aeb47b26ff0 + host: qgpu0401 + memory: + total: "202474663936" + os: Linux-4.18.0-553.36.1.el8_10.x86_64-x86_64-with-glibc2.28 + program: -m lerobot.scripts.train + python: CPython 3.10.18 + root: outputs/train/diff_tac_insert + slurm: + cluster_name: quest + conf: /etc/slurm/slurm.conf + cpus_on_node: "1" + gpus_on_node: "1" + gtids: "0" + job_account: p32775 + job_cpus_per_node: "1" + job_end_time: "1754538730" + job_gid: "2000033" + job_gpus: "0" + job_id: "942145" + job_name: diff_tac_insert + job_nodelist: qgpu0401 + job_num_nodes: "1" + job_partition: gengpu + job_qos: normal + job_start_time: "1754452330" + job_uid: "2000033" + job_user: aye8078 + jobid: "942145" + localid: "0" + mem_per_node: "49152" + nnodes: "1" + nodeid: "0" + nodelist: qgpu0401 + prio_process: "0" + procid: "0" + submit_dir: /gpfs/home/aye8078/Documents/Github/tac_vla/slurm + submit_host: quser44 + task_pid: "1510855" + tasks_per_node: "1" + topology_addr: qgpu0401 + topology_addr_pattern: node + startedAt: "2025-08-06T03:53:34.220159Z" + writerId: ndjd4pdq9v1yya4dyktyjt1smg3no50d + m: [] + python_version: 3.10.18 + t: + "1": + - 1 + - 41 + - 49 + - 51 + "2": + - 1 + - 41 + - 49 + - 51 + - 83 + "3": + - 5 + - 13 + - 14 + - 15 + - 16 + - 62 + "4": 3.10.18 + "5": 0.21.0 + "10": + - 21 + "12": 0.21.0 + "13": linux-x86_64 +batch_size: + value: 8 +dataset: + value: + episodes: null + image_transforms: + enable: false + max_num_transforms: 3 + random_order: false + tfs: + brightness: + kwargs: + brightness: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + contrast: + kwargs: + contrast: + - 0.8 + - 1.2 + type: ColorJitter + weight: 1 + hue: + kwargs: + hue: + - -0.05 + - 0.05 + type: ColorJitter + weight: 1 + saturation: + kwargs: + saturation: + - 0.5 + - 1.5 + type: ColorJitter + weight: 1 + sharpness: + kwargs: + sharpness: + - 0.5 + - 1.5 + type: SharpnessJitter + weight: 1 + repo_id: a1o/tac_insert + revision: null + root: null + use_imagenet_stats: true + video_backend: torchcodec +env: + value: null +eval: + value: + batch_size: 50 + n_episodes: 50 + use_async_envs: false +eval_freq: + value: 20000 +job_name: + value: diff_piper_tac_insert +log_freq: + value: 200 +num_workers: + value: 4 +optimizer: + value: + betas: + - 0.95 + - 0.999 + eps: 1e-08 + grad_clip_norm: 10 + lr: 0.0001 + type: adam + weight_decay: 1e-06 +output_dir: + value: outputs/train/diff_tac_insert +policy: + value: + beta_end: 0.02 + beta_schedule: squaredcos_cap_v2 + beta_start: 0.0001 + clip_sample: true + clip_sample_range: 1 + crop_is_random: true + crop_shape: + - 84 + - 84 + device: cuda + diffusion_step_embed_dim: 128 + do_mask_loss_for_padding: false + down_dims: + - 512 + - 1024 + - 2048 + drop_n_last_frames: 7 + horizon: 16 + input_features: + observation.effort: + shape: + - 14 + type: STATE + observation.images.cam_high: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.images.cam_left_wrist: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.images.cam_right_wrist: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.qvel: + shape: + - 14 + type: STATE + observation.state: + shape: + - 14 + type: STATE + observation.tactile1: + shape: + - 3 + - 480 + - 640 + type: VISUAL + observation.tactile2: + shape: + - 3 + - 480 + - 640 + type: VISUAL + kernel_size: 5 + license: null + n_action_steps: 8 + n_groups: 8 + n_obs_steps: 2 + noise_scheduler_type: DDPM + normalization_mapping: + ACTION: MIN_MAX + STATE: MIN_MAX + VISUAL: MEAN_STD + num_inference_steps: null + num_train_timesteps: 100 + optimizer_betas: + - 0.95 + - 0.999 + optimizer_eps: 1e-08 + optimizer_lr: 0.0001 + optimizer_weight_decay: 1e-06 + output_features: + action: + shape: + - 14 + type: ACTION + prediction_type: epsilon + pretrained_backbone_weights: null + private: null + push_to_hub: true + repo_id: a1o/diff_pick_tac_insert_policy + scheduler_name: cosine + scheduler_warmup_steps: 500 + spatial_softmax_num_keypoints: 32 + tags: null + type: diffusion + use_amp: false + use_film_scale_modulation: true + use_group_norm: true + use_separate_rgb_encoder_per_camera: false + vision_backbone: resnet18 +resume: + value: true +save_checkpoint: + value: true +save_freq: + value: 20000 +scheduler: + value: + name: cosine + num_warmup_steps: 500 + type: diffuser +seed: + value: 1000 +steps: + value: 100000 +use_policy_training_preset: + value: true +wandb: + value: + disable_artifact: false + enable: true + entity: null + mode: null + notes: null + project: lerobot + run_id: ftnu6goz diff --git a/wandb/run-20250805_225334-ftnu6goz/files/output.log b/wandb/run-20250805_225334-ftnu6goz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..21dbfa1d28ebe54e85aa9bd8d03f3310a6e19656 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/files/output.log @@ -0,0 +1,18 @@ +Logs will be synced with wandb. +INFO 2025-08-05 22:53:37 db_utils.py:103 Track this run --> https://wandb.ai/a10v-1/lerobot/runs/ftnu6goz +INFO 2025-08-05 22:53:37 ts/train.py:127 Creating dataset +INFO 2025-08-05 22:53:42 ts/train.py:138 Creating policy +Loading weights from local directory +INFO 2025-08-05 22:53:48 ts/train.py:144 Creating optimizer and scheduler +INFO 2025-08-05 22:54:05 ts/train.py:156 Output dir: outputs/train/diff_tac_insert +INFO 2025-08-05 22:54:05 ts/train.py:159 cfg.steps=100000 (100K) +INFO 2025-08-05 22:54:05 ts/train.py:160 dataset.num_frames=50000 (50K) +INFO 2025-08-05 22:54:05 ts/train.py:161 dataset.num_episodes=100 +INFO 2025-08-05 22:54:05 ts/train.py:162 num_learnable_params=278120238 (278M) +INFO 2025-08-05 22:54:05 ts/train.py:163 num_total_params=278120408 (278M) +/projects/p32775/pythonenvs/tac_vla/lib/python3.10/site-packages/torch/utils/data/dataloader.py:626: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 1, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. + warnings.warn( +INFO 2025-08-05 22:54:05 ts/train.py:202 Start offline training on a fixed dataset +INFO 2025-08-05 22:54:05 ts/train.py:283 End of training +model.safetensors: 100%|██████████| 1.11G/1.11G [00:23<00:00, 48.1MB/s] +INFO 2025-08-05 22:54:37 etrained.py:232 Model pushed to https://huggingface.co/a1o/diff_pick_tac_insert_policy diff --git a/wandb/run-20250805_225334-ftnu6goz/files/requirements.txt b/wandb/run-20250805_225334-ftnu6goz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..47864fcff4e2976df45f0e2a69161f599efb87ef --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/files/requirements.txt @@ -0,0 +1,146 @@ +lightning-utilities==0.14.3 +uv==0.7.19 +umap-learn==0.5.9.post2 +torchmetrics==1.7.4 +anndata==0.11.4 +pynndescent==0.5.13 +array-api-compat==1.12.0 +contourpy==1.3.2 +tzdata==2025.2 +filelock==3.18.0 +natsort==8.4.0 +scipy==1.15.3 +kiwisolver==1.4.8 +networkx==3.4.2 +numba==0.61.2 +patsy==1.0.1 +legacy-api-wrap==1.4.1 +pillow==11.3.0 +pytorch-lightning==2.5.2 +cycler==0.12.1 +h5py==3.14.0 +pyparsing==3.2.3 +packaging==25.0 +joblib==1.5.1 +session-info2==0.2 +scikit-learn==1.7.1 +cell-load==0.7.4 +statsmodels==0.14.5 +numpy==2.2.6 +mpmath==1.3.0 +lightning==2.5.2 +fonttools==4.59.0 +typing_extensions==4.14.1 +seaborn==0.13.2 +scanpy==1.11.3 +toml==0.10.2 +matplotlib==3.10.3 +threadpoolctl==3.6.0 +llvmlite==0.44.0 +pandas==2.3.1 +nvidia-cufile-cu12==1.11.1.6 +triton==3.3.1 +charset-normalizer==3.4.2 +sentry-sdk==2.34.1 +nvidia-cuda-runtime-cu12==12.6.77 +pyarrow==21.0.0 +sympy==1.14.0 +pynput==1.8.1 +pfzy==0.3.4 +Jinja2==3.1.6 +pydantic_core==2.33.2 +cmake==4.0.3 +nvidia-cusparse-cu12==12.5.4.2 +gitdb==4.0.12 +aiosignal==1.4.0 +yarl==1.20.1 +platformdirs==4.3.8 +jsonlines==4.0.0 +multiprocess==0.70.16 +certifi==2025.8.3 +inquirerpy==0.3.4 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-cublas-cu12==12.6.4.1 +pydantic==2.11.7 +async-timeout==5.0.1 +annotated-types==0.7.0 +typing-inspect==0.9.0 +imageio==2.37.0 +nvidia-cuda-cupti-cu12==12.6.80 +wcwidth==0.2.13 +six==1.17.0 +tqdm==4.67.1 +zipp==3.23.0 +torchcodec==0.5 +cloudpickle==3.1.1 +lerobot==0.3.2 +Flask==3.1.1 +python-xlib==0.33 +wandb==0.21.0 +urllib3==2.5.0 +nvidia-nccl-cu12==2.26.2 +typing-inspection==0.4.1 +evdev==1.9.2 +prompt_toolkit==3.0.51 +gymnasium==0.29.1 +nvidia-curand-cu12==10.3.7.77 +GitPython==3.1.45 +opencv-python-headless==4.12.0.88 +datasets==3.6.0 +nvidia-cusolver-cu12==11.7.1.2 +termcolor==3.1.0 +mypy_extensions==1.1.0 +idna==3.10 +MarkupSafe==3.0.2 +frozenlist==1.7.0 +regex==2025.7.34 +Werkzeug==3.1.3 +imageio-ffmpeg==0.6.0 +importlib_metadata==8.7.0 +blinker==1.9.0 +psutil==7.0.0 +fsspec==2025.3.0 +deepdiff==8.5.0 +dill==0.3.8 +diffusers==0.34.0 +wheel==0.45.1 +nvidia-nvtx-cu12==12.6.77 +av==15.0.0 +nvidia-cusparselt-cu12==0.6.3 +propcache==0.3.2 +python-can==4.5.0 +msgpack==1.1.1 +einops==0.8.1 +safetensors==0.5.3 +setuptools==78.1.1 +requests==2.32.4 +torchvision==0.22.1 +wrapt==1.17.2 +pyyaml-include==1.4.1 +piper-sdk==0.4.1 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cuda-nvrtc-cu12==12.6.77 +Farama-Notifications==0.0.4 +click==8.2.1 +pyserial==3.5 +smmap==5.0.2 +attrs==25.3.0 +draccus==0.10.0 +mergedeep==1.3.4 +huggingface-hub==0.34.3 +pytz==2025.2 +aiohappyeyeballs==2.6.1 +python-dateutil==2.9.0.post0 +itsdangerous==2.2.0 +torch==2.7.1 +multidict==6.6.3 +aiohttp==3.12.15 +protobuf==6.31.1 +hf_transfer==0.1.9 +xxhash==3.5.0 +pip==25.1 +PyYAML==6.0.2 +orderly-set==5.5.0 +nvidia-cudnn-cu12==9.5.1.17 +hf-xet==1.1.5 +lerobot==0.3.2 diff --git a/wandb/run-20250805_225334-ftnu6goz/files/wandb-metadata.json b/wandb/run-20250805_225334-ftnu6goz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4c208d5d0cf7f37de97cd9271232c10bd5f9e564 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/files/wandb-metadata.json @@ -0,0 +1,72 @@ +{ + "os": "Linux-4.18.0-553.36.1.el8_10.x86_64-x86_64-with-glibc2.28", + "python": "CPython 3.10.18", + "startedAt": "2025-08-06T03:53:34.220159Z", + "args": [ + "--config_path=/home/aye8078/Documents/Github/tac_vla/lerobot/outputs/train/diff_tac_insert/checkpoints/last/pretrained_model/train_config.json", + "--resume=true" + ], + "program": "-m lerobot.scripts.train", + "root": "outputs/train/diff_tac_insert", + "host": "qgpu0401", + "executable": "/projects/p32775/pythonenvs/tac_vla/bin/python", + "cpu_count": 52, + "cpu_count_logical": 52, + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_count": 1, + "disk": { + "/": { + "total": "101237329920", + "used": "6670987264" + } + }, + "memory": { + "total": "202474663936" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-PCIE-40GB", + "memoryTotal": "42949672960", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-c91b110a-9eb1-15b6-ff0a-7aeb47b26ff0" + } + ], + "cudaVersion": "12.8", + "slurm": { + "cluster_name": "quest", + "conf": "/etc/slurm/slurm.conf", + "cpus_on_node": "1", + "gpus_on_node": "1", + "gtids": "0", + "job_account": "p32775", + "job_cpus_per_node": "1", + "job_end_time": "1754538730", + "job_gid": "2000033", + "job_gpus": "0", + "job_id": "942145", + "job_name": "diff_tac_insert", + "job_nodelist": "qgpu0401", + "job_num_nodes": "1", + "job_partition": "gengpu", + "job_qos": "normal", + "job_start_time": "1754452330", + "job_uid": "2000033", + "job_user": "aye8078", + "jobid": "942145", + "localid": "0", + "mem_per_node": "49152", + "nnodes": "1", + "nodeid": "0", + "nodelist": "qgpu0401", + "prio_process": "0", + "procid": "0", + "submit_dir": "/gpfs/home/aye8078/Documents/Github/tac_vla/slurm", + "submit_host": "quser44", + "task_pid": "1510855", + "tasks_per_node": "1", + "topology_addr": "qgpu0401", + "topology_addr_pattern": "node" + }, + "writerId": "ndjd4pdq9v1yya4dyktyjt1smg3no50d" +} \ No newline at end of file diff --git a/wandb/run-20250805_225334-ftnu6goz/files/wandb-summary.json b/wandb/run-20250805_225334-ftnu6goz/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..7571f9cb67dcbf8f55145981f1d60ceb83f8f9fb --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/samples":800000,"_runtime":146558,"train/epochs":16,"train/update_s":0.1944363382831216,"train/loss":0.008437735554471146,"train/lr":3.2981275911225737e-10,"train/episodes":1600,"_step":100000,"_wandb":{"runtime":146558},"train/dataloading_s":1.7351347128208727,"train/grad_norm":0.2147725989110768,"_timestamp":1.7544511233785694e+09,"train/steps":100000} \ No newline at end of file diff --git a/wandb/run-20250805_225334-ftnu6goz/logs/debug-core.log b/wandb/run-20250805_225334-ftnu6goz/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..1667f87f0cb1f432661ccff55becd9b7e5f84bc0 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-08-05T22:53:34.710754176-05:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdbv7f_le/port-1532602.txt","pid":1532602,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-08-05T22:53:34.714815259-05:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1532602-1674152-3079671434/socket","Net":"unix"}} +{"time":"2025-08-05T22:53:34.715020541-05:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1532602} +{"time":"2025-08-05T22:53:34.776969347-05:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-08-05T22:53:34.872953552-05:00","level":"INFO","msg":"handleInformInit: received","streamId":"ftnu6goz","id":"1(@)"} +{"time":"2025-08-05T22:53:35.113446256-05:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"ftnu6goz","id":"1(@)"} +{"time":"2025-08-05T22:54:37.424605675-05:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-08-05T22:54:37.425064081-05:00","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-08-05T22:54:37.425147163-05:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-08-05T22:54:37.425169533-05:00","level":"INFO","msg":"server is shutting down"} +{"time":"2025-08-05T22:54:37.425275058-05:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1532602-1674152-3079671434/socket","Net":"unix"}} +{"time":"2025-08-05T22:54:37.991587446-05:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-08-05T22:54:37.991660311-05:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-08-05T22:54:37.991684426-05:00","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20250805_225334-ftnu6goz/logs/debug-internal.log b/wandb/run-20250805_225334-ftnu6goz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4217a2e1da62952a58ed1f83650f6dc45f1e1ed5 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/logs/debug-internal.log @@ -0,0 +1,12 @@ +{"time":"2025-08-05T22:53:34.874153673-05:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"} +{"time":"2025-08-05T22:53:35.113343469-05:00","level":"INFO","msg":"stream: created new stream","id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.113431564-05:00","level":"INFO","msg":"stream: started","id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115576518-05:00","level":"INFO","msg":"handler: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115694248-05:00","level":"INFO","msg":"writer: Do: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:53:35.115890877-05:00","level":"INFO","msg":"sender: started","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.424943433-05:00","level":"INFO","msg":"stream: closing","id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.836519735-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-08-05T22:54:37.990590611-05:00","level":"INFO","msg":"handler: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.990724324-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.99089975-05:00","level":"INFO","msg":"sender: closed","stream_id":"ftnu6goz"} +{"time":"2025-08-05T22:54:37.991141503-05:00","level":"INFO","msg":"stream: closed","id":"ftnu6goz"} diff --git a/wandb/run-20250805_225334-ftnu6goz/logs/debug.log b/wandb/run-20250805_225334-ftnu6goz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..7cc21ff7009672ba120e4f04bff014dde771df00 --- /dev/null +++ b/wandb/run-20250805_225334-ftnu6goz/logs/debug.log @@ -0,0 +1,23 @@ +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0 +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Configure stats pid to 1532602 +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from /home/aye8078/.config/wandb/settings +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from /gpfs/home/aye8078/Documents/Github/tac_vla/lerobot/wandb/settings +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:setup_run_log_directory():703] Logging user logs to outputs/train/diff_tac_insert/wandb/run-20250805_225334-ftnu6goz/logs/debug.log +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to outputs/train/diff_tac_insert/wandb/run-20250805_225334-ftnu6goz/logs/debug-internal.log +2025-08-05 22:53:34,238 INFO MainThread:1532602 [wandb_init.py:init():830] calling init triggers +2025-08-05 22:53:34,239 INFO MainThread:1532602 [wandb_init.py:init():835] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'a1o/tac_insert', 'root': None, 'episodes': None, 'image_transforms': {'enable': False, 'max_num_transforms': 3, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec'}, 'env': None, 'policy': {'type': 'diffusion', 'n_obs_steps': 2, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'input_features': {'observation.state': {'type': , 'shape': [14]}, 'observation.effort': {'type': , 'shape': [14]}, 'observation.qvel': {'type': , 'shape': [14]}, 'observation.images.cam_high': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_left_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.images.cam_right_wrist': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile1': {'type': , 'shape': [3, 480, 640]}, 'observation.tactile2': {'type': , 'shape': [3, 480, 640]}}, 'output_features': {'action': {'type': , 'shape': [14]}}, 'device': 'cuda', 'use_amp': False, 'push_to_hub': True, 'repo_id': 'a1o/diff_pick_tac_insert_policy', 'private': None, 'tags': None, 'license': None, 'horizon': 16, 'n_action_steps': 8, 'drop_n_last_frames': 7, 'vision_backbone': 'resnet18', 'crop_shape': [84, 84], 'crop_is_random': True, 'pretrained_backbone_weights': None, 'use_group_norm': True, 'spatial_softmax_num_keypoints': 32, 'use_separate_rgb_encoder_per_camera': False, 'down_dims': [512, 1024, 2048], 'kernel_size': 5, 'n_groups': 8, 'diffusion_step_embed_dim': 128, 'use_film_scale_modulation': True, 'noise_scheduler_type': 'DDPM', 'num_train_timesteps': 100, 'beta_schedule': 'squaredcos_cap_v2', 'beta_start': 0.0001, 'beta_end': 0.02, 'prediction_type': 'epsilon', 'clip_sample': True, 'clip_sample_range': 1.0, 'num_inference_steps': None, 'do_mask_loss_for_padding': False, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.95, 0.999], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-06, 'scheduler_name': 'cosine', 'scheduler_warmup_steps': 500}, 'output_dir': 'outputs/train/diff_tac_insert', 'job_name': 'diff_piper_tac_insert', 'resume': True, 'seed': 1000, 'num_workers': 4, 'batch_size': 8, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 200, 'save_checkpoint': True, 'save_freq': 20000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adam', 'lr': 0.0001, 'weight_decay': 1e-06, 'grad_clip_norm': 10.0, 'betas': [0.95, 0.999], 'eps': 1e-08}, 'scheduler': {'type': 'diffuser', 'num_warmup_steps': 500, 'name': 'cosine'}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': 'ftnu6goz', 'mode': None}, '_wandb': {}} +2025-08-05 22:53:34,240 INFO MainThread:1532602 [wandb_init.py:init():871] starting backend +2025-08-05 22:53:34,777 INFO MainThread:1532602 [wandb_init.py:init():874] sending inform_init request +2025-08-05 22:53:34,864 INFO MainThread:1532602 [wandb_init.py:init():882] backend started and connected +2025-08-05 22:53:34,867 INFO MainThread:1532602 [wandb_init.py:init():953] updated telemetry +2025-08-05 22:53:34,867 INFO MainThread:1532602 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout +2025-08-05 22:53:35,434 INFO MainThread:1532602 [wandb_init.py:init():1024] run resumed +2025-08-05 22:53:35,440 INFO MainThread:1532602 [wandb_init.py:init():1029] starting run threads in backend +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_console_start():2458] atexit reg +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2306] redirect: wrap_raw +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2375] Wrapping output streams. +2025-08-05 22:53:37,306 INFO MainThread:1532602 [wandb_run.py:_redirect():2398] Redirects installed. +2025-08-05 22:53:37,314 INFO MainThread:1532602 [wandb_init.py:init():1075] run started, returning control to user process +2025-08-05 22:54:37,423 INFO MsgRouterThr:1532602 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20250805_225334-ftnu6goz/run-ftnu6goz.wandb b/wandb/run-20250805_225334-ftnu6goz/run-ftnu6goz.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6755ce073c30a23c8562b9feb9c52f07ef617367 Binary files /dev/null and b/wandb/run-20250805_225334-ftnu6goz/run-ftnu6goz.wandb differ