Upload folder using huggingface_hub
Browse files- psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/argv.txt +53 -0
- psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/checkpoints/ckpt_40000/model.safetensors +3 -0
- psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/envs.txt +17 -0
- psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/run_config.json +347 -0
- psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/argv.txt +53 -0
- psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/checkpoints/ckpt_40000/model.safetensors +3 -0
- psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/envs.txt +17 -0
- psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/run_config.json +348 -0
- psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/argv.txt +53 -0
- psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/checkpoints/ckpt_40000/model.safetensors +3 -0
- psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/envs.txt +17 -0
- psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/run_config.json +348 -0
- psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/argv.txt +53 -0
- psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/checkpoints/ckpt_40000/model.safetensors +3 -0
- psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/envs.txt +17 -0
- psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/run_config.json +348 -0
- psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/argv.txt +53 -0
- psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/checkpoints/ckpt_40000/model.safetensors +3 -0
- psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/envs.txt +17 -0
- psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/run_config.json +348 -0
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/argv.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scripts/train.py
|
| 2 |
+
finetune_simple_psi0_config
|
| 3 |
+
--seed=292285
|
| 4 |
+
--exp=g1wholebodybendpick-v0
|
| 5 |
+
--train.name=finetune
|
| 6 |
+
--train.data_parallel=ddp
|
| 7 |
+
--train.mixed_precision=bf16
|
| 8 |
+
--train.train_batch_size=32
|
| 9 |
+
--train.max_checkpoints_to_keep=5
|
| 10 |
+
--train.gradient_accumulation_steps=1
|
| 11 |
+
--train.learning_rate=1e-4
|
| 12 |
+
--train.max_training_steps=40000
|
| 13 |
+
--train.warmup_ratio=None
|
| 14 |
+
--train.warmup_steps=1000
|
| 15 |
+
--train.checkpointing_steps=5000
|
| 16 |
+
--train.validation_steps=500
|
| 17 |
+
--train.val_num_batches=20
|
| 18 |
+
--train.max_grad_norm=1.0
|
| 19 |
+
--train.lr_scheduler_type=cosine
|
| 20 |
+
--train.lr_scheduler_kwargs.weight_decay=1e-6
|
| 21 |
+
--train.lr_scheduler_kwargs.betas 0.95 0.999
|
| 22 |
+
--log.report_to=wandb
|
| 23 |
+
--data.root_dir=/data/jliu/data
|
| 24 |
+
--data.train-repo-ids=G1WholebodyBendPick-v0
|
| 25 |
+
--data.transform.repack.pad-action-dim=36
|
| 26 |
+
--data.transform.repack.pad-state-dim=36
|
| 27 |
+
--data.transform.field.stat-path=meta/stats_psi0.json
|
| 28 |
+
--data.transform.field.stat-action-key=action
|
| 29 |
+
--data.transform.field.stat-state-key=states
|
| 30 |
+
--data.transform.field.action_norm_type=bounds
|
| 31 |
+
--data.transform.field.no-use-norm-mask
|
| 32 |
+
--data.transform.field.normalize-state
|
| 33 |
+
--data.transform.field.pad-action-dim=36
|
| 34 |
+
--data.transform.field.pad-state-dim=36
|
| 35 |
+
--data.transform.model.img-aug
|
| 36 |
+
--data.transform.model.resize.size 180 320
|
| 37 |
+
--data.transform.model.center_crop.size 180 320
|
| 38 |
+
--model.model_name_or_path=/hfm/cache/checkpoints/hfm.pre.fast.mixed.1by1.2601091803.ckpt30k
|
| 39 |
+
--model.pretrained-action-header-path=/hfm/cache/checkpoints/postpre.1by130k.pad36.mixed.2601131206.ckpt34k
|
| 40 |
+
--model.noise-scheduler=flow
|
| 41 |
+
--model.train-diffusion-steps=1000
|
| 42 |
+
--model.n_conditions=0
|
| 43 |
+
--model.action-chunk-size=30
|
| 44 |
+
--model.action-dim=36
|
| 45 |
+
--model.action-exec-horizon=30
|
| 46 |
+
--model.observation-horizon=1
|
| 47 |
+
--model.odim=36
|
| 48 |
+
--model.view_feature_dim=2048
|
| 49 |
+
--model.no-tune-vlm
|
| 50 |
+
--model.no-use_film
|
| 51 |
+
--model.no-combined_temb
|
| 52 |
+
--model.rtc
|
| 53 |
+
--model.max-delay=8
|
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/checkpoints/ckpt_40000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9237021ab453b839b14a1e3bcd680102579aaa829d60e6bf91b0f702d653e879
|
| 3 |
+
size 6253648840
|
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/envs.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OMP_NUM_THREADS=32
|
| 2 |
+
HF_HOME=/data/cache
|
| 3 |
+
TORCH_HOME=/data/cache
|
| 4 |
+
HF_TOKEN=hf_...TiKa
|
| 5 |
+
HF_LEROBOT_HOME=/data/data/lerobot
|
| 6 |
+
WE_HOME=Not Set
|
| 7 |
+
DATA_HOME=/data/data
|
| 8 |
+
UV_CACHE_DIR=/data/cache
|
| 9 |
+
WANDB_API_KEY=90e...5c06
|
| 10 |
+
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
| 11 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
| 12 |
+
WORLD_SIZE=8
|
| 13 |
+
LOCAL_WORLD_SIZE=8
|
| 14 |
+
RANK=0
|
| 15 |
+
LOCAL_RANK=0
|
| 16 |
+
MASTER_ADDR=127.0.0.1
|
| 17 |
+
MASTER_PORT=29508
|
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/run_config.json
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exp": "g1wholebodybendpick-v0",
|
| 3 |
+
"seed": 292285,
|
| 4 |
+
"auto_tag_run": false,
|
| 5 |
+
"eval": false,
|
| 6 |
+
"debug": false,
|
| 7 |
+
"timestamp": "2603151312",
|
| 8 |
+
"log": {
|
| 9 |
+
"logging_dir": "logs",
|
| 10 |
+
"report_to": "wandb",
|
| 11 |
+
"log_freq": 100
|
| 12 |
+
},
|
| 13 |
+
"wandb": {
|
| 14 |
+
"project": "psi",
|
| 15 |
+
"entity": "jliu530-soochow-university",
|
| 16 |
+
"group": "finetune",
|
| 17 |
+
"id": "rs0xceim",
|
| 18 |
+
"name": "g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312",
|
| 19 |
+
"resume": "allow"
|
| 20 |
+
},
|
| 21 |
+
"train": {
|
| 22 |
+
"num_workers": 8,
|
| 23 |
+
"overfit_single_batch": false,
|
| 24 |
+
"name": "finetune",
|
| 25 |
+
"resume_from_checkpoint": null,
|
| 26 |
+
"skip_resumed_steps": false,
|
| 27 |
+
"hf_token": ".hf_token",
|
| 28 |
+
"lora": false,
|
| 29 |
+
"output_dir": ".runs",
|
| 30 |
+
"gradient_accumulation_steps": 1,
|
| 31 |
+
"mixed_precision": "bf16",
|
| 32 |
+
"max_grad_norm": 1.0,
|
| 33 |
+
"train_batch_size": 32,
|
| 34 |
+
"val_batch_size": 16,
|
| 35 |
+
"val_num_batches": 20,
|
| 36 |
+
"checkpointing_steps": 5000,
|
| 37 |
+
"max_checkpoints_to_keep": 5,
|
| 38 |
+
"validation_steps": 500,
|
| 39 |
+
"learning_rate": 0.0001,
|
| 40 |
+
"lr_scheduler_type": "cosine",
|
| 41 |
+
"lr_scheduler_kwargs": {
|
| 42 |
+
"betas": [
|
| 43 |
+
0.95,
|
| 44 |
+
0.999
|
| 45 |
+
],
|
| 46 |
+
"weight_decay": 1e-6,
|
| 47 |
+
"eps": 1e-8
|
| 48 |
+
},
|
| 49 |
+
"scheduler_specific_kwargs": {},
|
| 50 |
+
"data_parallel": "ddp",
|
| 51 |
+
"sharding_strategy": "full-shard",
|
| 52 |
+
"deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
|
| 53 |
+
"enable_gradient_checkpointing": true,
|
| 54 |
+
"enable_mixed_precision_training": true,
|
| 55 |
+
"reduce_in_full_precision": true,
|
| 56 |
+
"max_training_steps": 40000,
|
| 57 |
+
"num_train_epochs": null,
|
| 58 |
+
"warmup_steps": 1000,
|
| 59 |
+
"warmup_ratio": null
|
| 60 |
+
},
|
| 61 |
+
"data": {
|
| 62 |
+
"transform": {
|
| 63 |
+
"repack": {
|
| 64 |
+
"dataset_name": "simple",
|
| 65 |
+
"num_past_frames": 0,
|
| 66 |
+
"action_chunk_size": 30,
|
| 67 |
+
"pad_action_dim": 36,
|
| 68 |
+
"pad_state_dim": 36
|
| 69 |
+
},
|
| 70 |
+
"model": {
|
| 71 |
+
"resize": {
|
| 72 |
+
"size": [
|
| 73 |
+
180,
|
| 74 |
+
320
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
"center_crop": {
|
| 78 |
+
"size": [
|
| 79 |
+
180,
|
| 80 |
+
320
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
"color_jitter": {
|
| 84 |
+
"brightness": 0.2,
|
| 85 |
+
"contrast": [
|
| 86 |
+
0.8,
|
| 87 |
+
1.2
|
| 88 |
+
],
|
| 89 |
+
"saturation": [
|
| 90 |
+
0.8,
|
| 91 |
+
1.2
|
| 92 |
+
],
|
| 93 |
+
"hue": 0.05
|
| 94 |
+
},
|
| 95 |
+
"gaussian_noise": {
|
| 96 |
+
"mean": 0.0,
|
| 97 |
+
"std": 3.0,
|
| 98 |
+
"prob_skip": 0.1
|
| 99 |
+
},
|
| 100 |
+
"img_aug": true,
|
| 101 |
+
"adaptive_resize": false,
|
| 102 |
+
"img_sizes": {
|
| 103 |
+
"egodex": [
|
| 104 |
+
270,
|
| 105 |
+
480
|
| 106 |
+
],
|
| 107 |
+
"he": [
|
| 108 |
+
240,
|
| 109 |
+
320
|
| 110 |
+
]
|
| 111 |
+
}
|
| 112 |
+
},
|
| 113 |
+
"field": {
|
| 114 |
+
"stat_path": "meta/stats_psi0.json",
|
| 115 |
+
"action_norm_type": "bounds",
|
| 116 |
+
"stat_action_key": "action",
|
| 117 |
+
"stat_state_key": "states",
|
| 118 |
+
"use_norm_mask": false,
|
| 119 |
+
"action_norm_masks": [
|
| 120 |
+
true,
|
| 121 |
+
true,
|
| 122 |
+
true,
|
| 123 |
+
true,
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
false
|
| 127 |
+
],
|
| 128 |
+
"action_min": [
|
| 129 |
+
-0.13059291243553162,
|
| 130 |
+
-0.09108058363199234,
|
| 131 |
+
-0.0024844733998179436,
|
| 132 |
+
-0.20733775198459625,
|
| 133 |
+
-0.15850023925304413,
|
| 134 |
+
-0.17450474202632904,
|
| 135 |
+
-0.2997315526008606,
|
| 136 |
+
-0.015391111373901367,
|
| 137 |
+
-0.34571564197540283,
|
| 138 |
+
-0.4991437792778015,
|
| 139 |
+
0.0,
|
| 140 |
+
0.0,
|
| 141 |
+
0.0,
|
| 142 |
+
0.0,
|
| 143 |
+
-0.1015840545296669,
|
| 144 |
+
-0.06647031009197235,
|
| 145 |
+
-0.16578954458236694,
|
| 146 |
+
-0.14477218687534332,
|
| 147 |
+
-0.3665394186973572,
|
| 148 |
+
-0.28364259004592896,
|
| 149 |
+
-0.1775387078523636,
|
| 150 |
+
-0.48419490456581116,
|
| 151 |
+
-0.7551082968711853,
|
| 152 |
+
-0.2692946195602417,
|
| 153 |
+
-0.03164339065551758,
|
| 154 |
+
-0.00003876500704791397,
|
| 155 |
+
-0.3909206688404083,
|
| 156 |
+
0.0,
|
| 157 |
+
-0.04351663216948509,
|
| 158 |
+
-0.014203650876879692,
|
| 159 |
+
-0.049649015069007874,
|
| 160 |
+
0.44999998807907104,
|
| 161 |
+
0.0,
|
| 162 |
+
0.0,
|
| 163 |
+
0.0,
|
| 164 |
+
0.0
|
| 165 |
+
],
|
| 166 |
+
"action_max": [
|
| 167 |
+
0.08620641380548477,
|
| 168 |
+
0.13058121502399445,
|
| 169 |
+
0.22948147356510162,
|
| 170 |
+
0.020551620051264763,
|
| 171 |
+
0.005824880674481392,
|
| 172 |
+
0.010019193403422832,
|
| 173 |
+
8.43817247186962e-7,
|
| 174 |
+
0.39566752314567566,
|
| 175 |
+
0.0,
|
| 176 |
+
0.0,
|
| 177 |
+
0.4860266447067261,
|
| 178 |
+
1.0467392206192017,
|
| 179 |
+
0.6470075845718384,
|
| 180 |
+
0.8298009037971497,
|
| 181 |
+
0.03516175225377083,
|
| 182 |
+
0.11019192636013031,
|
| 183 |
+
0.04779902100563049,
|
| 184 |
+
0.12850724160671234,
|
| 185 |
+
0.000038688118365826085,
|
| 186 |
+
0.0012142359046265483,
|
| 187 |
+
0.000033343669201713055,
|
| 188 |
+
0.002679983852431178,
|
| 189 |
+
0.00041063950629904866,
|
| 190 |
+
0.1973484456539154,
|
| 191 |
+
0.2633756697177887,
|
| 192 |
+
0.34943076968193054,
|
| 193 |
+
0.0012102096807211637,
|
| 194 |
+
0.8342975974082947,
|
| 195 |
+
0.31870752573013306,
|
| 196 |
+
0.45533719658851624,
|
| 197 |
+
0.15729404985904694,
|
| 198 |
+
0.75,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0
|
| 203 |
+
],
|
| 204 |
+
"state_min": [
|
| 205 |
+
-0.13899999856948853,
|
| 206 |
+
-0.09099991619586945,
|
| 207 |
+
-5.989517215532203e-11,
|
| 208 |
+
-0.20900000631809235,
|
| 209 |
+
-0.1589999943971634,
|
| 210 |
+
-0.210999995470047,
|
| 211 |
+
-0.3009999990463257,
|
| 212 |
+
-0.01600000075995922,
|
| 213 |
+
-0.1860000044107437,
|
| 214 |
+
-0.6940000057220459,
|
| 215 |
+
0.0,
|
| 216 |
+
0.0,
|
| 217 |
+
0.0,
|
| 218 |
+
0.0,
|
| 219 |
+
-0.1019991859793663,
|
| 220 |
+
-0.06899992376565933,
|
| 221 |
+
-0.16899999976158142,
|
| 222 |
+
-0.14499999582767487,
|
| 223 |
+
-0.3709999918937683,
|
| 224 |
+
-0.28700000047683716,
|
| 225 |
+
-0.17800045013427734,
|
| 226 |
+
-0.4869999885559082,
|
| 227 |
+
-0.7599999904632568,
|
| 228 |
+
-0.27300000190734863,
|
| 229 |
+
-0.029999999329447746,
|
| 230 |
+
0.0,
|
| 231 |
+
-0.39100033044815063,
|
| 232 |
+
-0.0010000000474974513,
|
| 233 |
+
0.0,
|
| 234 |
+
-0.15000000596046448,
|
| 235 |
+
0.0,
|
| 236 |
+
0.44999998807907104,
|
| 237 |
+
0.0,
|
| 238 |
+
0.0,
|
| 239 |
+
0.0,
|
| 240 |
+
0.0
|
| 241 |
+
],
|
| 242 |
+
"state_max": [
|
| 243 |
+
0.0860000029206276,
|
| 244 |
+
0.2720000147819519,
|
| 245 |
+
0.23100000619888306,
|
| 246 |
+
0.0,
|
| 247 |
+
6.510182259944486e-8,
|
| 248 |
+
0.0,
|
| 249 |
+
0.0,
|
| 250 |
+
0.5550000071525574,
|
| 251 |
+
0.02100004442036152,
|
| 252 |
+
0.0,
|
| 253 |
+
0.5429999828338623,
|
| 254 |
+
1.13100004196167,
|
| 255 |
+
0.5770000219345093,
|
| 256 |
+
0.9580000042915344,
|
| 257 |
+
0.07141251862049103,
|
| 258 |
+
0.10899999737739563,
|
| 259 |
+
0.04699999839067459,
|
| 260 |
+
0.13600000739097595,
|
| 261 |
+
0.0,
|
| 262 |
+
0.003000000026077032,
|
| 263 |
+
0.0,
|
| 264 |
+
0.009999999776482582,
|
| 265 |
+
0.0020000000949949026,
|
| 266 |
+
0.2029999941587448,
|
| 267 |
+
0.2759999930858612,
|
| 268 |
+
0.3499999940395355,
|
| 269 |
+
0.003000000026077032,
|
| 270 |
+
0.8370000123977661,
|
| 271 |
+
0.0,
|
| 272 |
+
0.0,
|
| 273 |
+
0.0,
|
| 274 |
+
0.75,
|
| 275 |
+
0.0,
|
| 276 |
+
0.0,
|
| 277 |
+
0.0,
|
| 278 |
+
0.0
|
| 279 |
+
],
|
| 280 |
+
"normalize_state": true,
|
| 281 |
+
"pad_action_dim": 36,
|
| 282 |
+
"pad_state_dim": 36
|
| 283 |
+
}
|
| 284 |
+
},
|
| 285 |
+
"root_dir": "/data/jliu/data",
|
| 286 |
+
"train_repo_ids": [
|
| 287 |
+
"G1WholebodyBendPick-v0"
|
| 288 |
+
],
|
| 289 |
+
"val_repo_ids": [
|
| 290 |
+
"G1WholebodyBendPick-v0"
|
| 291 |
+
]
|
| 292 |
+
},
|
| 293 |
+
"model": {
|
| 294 |
+
"resnet_store_path": null,
|
| 295 |
+
"pretrained_action_header_path": "/hfm/cache/checkpoints/postpre.1by130k.pad36.mixed.2601131206.ckpt34k",
|
| 296 |
+
"rtc": true,
|
| 297 |
+
"max_delay": 8,
|
| 298 |
+
"action_dim": 36,
|
| 299 |
+
"action_chunk_size": 30,
|
| 300 |
+
"action_exec_horizon": 30,
|
| 301 |
+
"observation_horizon": 1,
|
| 302 |
+
"img_chunk": 1,
|
| 303 |
+
"n_cams": 1,
|
| 304 |
+
"use_obs": "add_token",
|
| 305 |
+
"dropout": 0.1,
|
| 306 |
+
"noise_scheduler": "flow",
|
| 307 |
+
"train_diffusion_steps": 1000,
|
| 308 |
+
"eval_diffusion_steps": 10,
|
| 309 |
+
"share_cam_features": false,
|
| 310 |
+
"early_fusion": false,
|
| 311 |
+
"odim": 36,
|
| 312 |
+
"n_conditions": 0,
|
| 313 |
+
"token_fusion": "concat",
|
| 314 |
+
"loss_w": [
|
| 315 |
+
0.1,
|
| 316 |
+
0.2,
|
| 317 |
+
0.1
|
| 318 |
+
],
|
| 319 |
+
"time_dim": 256,
|
| 320 |
+
"hidden_dim": 1536,
|
| 321 |
+
"num_blocks": 6,
|
| 322 |
+
"dim_feedforward": 2048,
|
| 323 |
+
"nhead": 24,
|
| 324 |
+
"activation": "gelu",
|
| 325 |
+
"view_feature_dim": 2048,
|
| 326 |
+
"use_film": false,
|
| 327 |
+
"combined_temb": false,
|
| 328 |
+
"use_dit": false,
|
| 329 |
+
"weight_decay": 0.01,
|
| 330 |
+
"model_name_or_path": "/hfm/cache/checkpoints/hfm.pre.fast.mixed.1by1.2601091803.ckpt30k",
|
| 331 |
+
"vlm_ckpt_step": null,
|
| 332 |
+
"tune_vlm": false,
|
| 333 |
+
"tune_mm_llm": false,
|
| 334 |
+
"tune_mm_vision": false,
|
| 335 |
+
"tune_mm_mlp": false,
|
| 336 |
+
"gradient_checkpointing": true,
|
| 337 |
+
"lang_backbone_lr": 0.00001,
|
| 338 |
+
"mm_projector_lr": 0.00001,
|
| 339 |
+
"vision_tower_lr": 1e-6,
|
| 340 |
+
"optim": "adamw_torch",
|
| 341 |
+
"model_max_length": 4096,
|
| 342 |
+
"data_flatten": true,
|
| 343 |
+
"data_packing": true,
|
| 344 |
+
"max_pixels": 451584,
|
| 345 |
+
"min_pixels": 12544
|
| 346 |
+
}
|
| 347 |
+
}
|
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/argv.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scripts/train.py
|
| 2 |
+
finetune_simple_psi0_config
|
| 3 |
+
--seed=292285
|
| 4 |
+
--exp=g1wholebodyhandover-v0
|
| 5 |
+
--train.name=finetune
|
| 6 |
+
--train.data_parallel=ddp
|
| 7 |
+
--train.mixed_precision=bf16
|
| 8 |
+
--train.train_batch_size=16
|
| 9 |
+
--train.max_checkpoints_to_keep=5
|
| 10 |
+
--train.gradient_accumulation_steps=1
|
| 11 |
+
--train.learning_rate=1e-4
|
| 12 |
+
--train.max_training_steps=40000
|
| 13 |
+
--train.warmup_ratio=None
|
| 14 |
+
--train.warmup_steps=1000
|
| 15 |
+
--train.checkpointing_steps=10000
|
| 16 |
+
--train.validation_steps=500
|
| 17 |
+
--train.val_num_batches=20
|
| 18 |
+
--train.max_grad_norm=1.0
|
| 19 |
+
--train.lr_scheduler_type=cosine
|
| 20 |
+
--train.lr_scheduler_kwargs.weight_decay=1e-6
|
| 21 |
+
--train.lr_scheduler_kwargs.betas 0.95 0.999
|
| 22 |
+
--log.report_to=wandb
|
| 23 |
+
--data.root_dir=/data/jliu/data
|
| 24 |
+
--data.train-repo-ids=G1WholebodyHandover-v0
|
| 25 |
+
--data.transform.repack.pad-action-dim=36
|
| 26 |
+
--data.transform.repack.pad-state-dim=36
|
| 27 |
+
--data.transform.field.stat-path=meta/stats_psi0.json
|
| 28 |
+
--data.transform.field.stat-action-key=action
|
| 29 |
+
--data.transform.field.stat-state-key=states
|
| 30 |
+
--data.transform.field.action_norm_type=bounds
|
| 31 |
+
--data.transform.field.no-use-norm-mask
|
| 32 |
+
--data.transform.field.normalize-state
|
| 33 |
+
--data.transform.field.pad-action-dim=36
|
| 34 |
+
--data.transform.field.pad-state-dim=36
|
| 35 |
+
--data.transform.model.img-aug
|
| 36 |
+
--data.transform.model.resize.size 180 320
|
| 37 |
+
--data.transform.model.center_crop.size 180 320
|
| 38 |
+
--model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
|
| 39 |
+
--model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
|
| 40 |
+
--model.noise-scheduler=flow
|
| 41 |
+
--model.train-diffusion-steps=1000
|
| 42 |
+
--model.n_conditions=0
|
| 43 |
+
--model.action-chunk-size=30
|
| 44 |
+
--model.action-dim=36
|
| 45 |
+
--model.action-exec-horizon=30
|
| 46 |
+
--model.observation-horizon=1
|
| 47 |
+
--model.odim=36
|
| 48 |
+
--model.view_feature_dim=2048
|
| 49 |
+
--model.no-tune-vlm
|
| 50 |
+
--model.no-use_film
|
| 51 |
+
--model.no-combined_temb
|
| 52 |
+
--model.rtc
|
| 53 |
+
--model.max-delay=8
|
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/checkpoints/ckpt_40000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:085c4b290a2f39ff0f75b0d72dd23bc4d59d48198122723e7a5eb6f27a706a0f
|
| 3 |
+
size 6253648840
|
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/envs.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OMP_NUM_THREADS=32
|
| 2 |
+
HF_HOME=/data/cache
|
| 3 |
+
TORCH_HOME=/data/cache
|
| 4 |
+
HF_TOKEN=hf_...TiKa
|
| 5 |
+
HF_LEROBOT_HOME=/data/data/lerobot
|
| 6 |
+
WE_HOME=Not Set
|
| 7 |
+
DATA_HOME=/data/data
|
| 8 |
+
UV_CACHE_DIR=/data/cache
|
| 9 |
+
WANDB_API_KEY=90e...5c06
|
| 10 |
+
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
| 11 |
+
CUDA_VISIBLE_DEVICES=4,5,6,7
|
| 12 |
+
WORLD_SIZE=4
|
| 13 |
+
LOCAL_WORLD_SIZE=4
|
| 14 |
+
RANK=0
|
| 15 |
+
LOCAL_RANK=0
|
| 16 |
+
MASTER_ADDR=127.0.0.1
|
| 17 |
+
MASTER_PORT=29509
|
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/run_config.json
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exp": "g1wholebodyhandover-v0",
|
| 3 |
+
"seed": 292285,
|
| 4 |
+
"auto_tag_run": false,
|
| 5 |
+
"eval": false,
|
| 6 |
+
"debug": false,
|
| 7 |
+
"timestamp": "2604071507",
|
| 8 |
+
"log": {
|
| 9 |
+
"logging_dir": "logs",
|
| 10 |
+
"report_to": "wandb",
|
| 11 |
+
"log_freq": 100
|
| 12 |
+
},
|
| 13 |
+
"wandb": {
|
| 14 |
+
"project": "psi",
|
| 15 |
+
"entity": "jliu530-soochow-university",
|
| 16 |
+
"group": "finetune",
|
| 17 |
+
"id": "0etggzyx",
|
| 18 |
+
"name": "g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507",
|
| 19 |
+
"resume": "allow"
|
| 20 |
+
},
|
| 21 |
+
"train": {
|
| 22 |
+
"num_workers": 8,
|
| 23 |
+
"overfit_single_batch": false,
|
| 24 |
+
"name": "finetune",
|
| 25 |
+
"resume_from_checkpoint": null,
|
| 26 |
+
"skip_resumed_steps": false,
|
| 27 |
+
"hf_token": ".hf_token",
|
| 28 |
+
"lora": false,
|
| 29 |
+
"output_dir": ".runs",
|
| 30 |
+
"gradient_accumulation_steps": 1,
|
| 31 |
+
"mixed_precision": "bf16",
|
| 32 |
+
"max_grad_norm": 1.0,
|
| 33 |
+
"optimizer_foreach": null,
|
| 34 |
+
"train_batch_size": 16,
|
| 35 |
+
"val_batch_size": 16,
|
| 36 |
+
"val_num_batches": 20,
|
| 37 |
+
"checkpointing_steps": 10000,
|
| 38 |
+
"max_checkpoints_to_keep": 5,
|
| 39 |
+
"validation_steps": 500,
|
| 40 |
+
"learning_rate": 0.0001,
|
| 41 |
+
"lr_scheduler_type": "cosine",
|
| 42 |
+
"lr_scheduler_kwargs": {
|
| 43 |
+
"betas": [
|
| 44 |
+
0.95,
|
| 45 |
+
0.999
|
| 46 |
+
],
|
| 47 |
+
"weight_decay": 1e-6,
|
| 48 |
+
"eps": 1e-8
|
| 49 |
+
},
|
| 50 |
+
"scheduler_specific_kwargs": {},
|
| 51 |
+
"data_parallel": "ddp",
|
| 52 |
+
"sharding_strategy": "full-shard",
|
| 53 |
+
"deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
|
| 54 |
+
"enable_gradient_checkpointing": true,
|
| 55 |
+
"enable_mixed_precision_training": true,
|
| 56 |
+
"reduce_in_full_precision": true,
|
| 57 |
+
"max_training_steps": 40000,
|
| 58 |
+
"num_train_epochs": null,
|
| 59 |
+
"warmup_steps": 1000,
|
| 60 |
+
"warmup_ratio": null
|
| 61 |
+
},
|
| 62 |
+
"data": {
|
| 63 |
+
"transform": {
|
| 64 |
+
"repack": {
|
| 65 |
+
"dataset_name": "simple",
|
| 66 |
+
"num_past_frames": 0,
|
| 67 |
+
"action_chunk_size": 30,
|
| 68 |
+
"pad_action_dim": 36,
|
| 69 |
+
"pad_state_dim": 36
|
| 70 |
+
},
|
| 71 |
+
"model": {
|
| 72 |
+
"resize": {
|
| 73 |
+
"size": [
|
| 74 |
+
180,
|
| 75 |
+
320
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"center_crop": {
|
| 79 |
+
"size": [
|
| 80 |
+
180,
|
| 81 |
+
320
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"color_jitter": {
|
| 85 |
+
"brightness": 0.2,
|
| 86 |
+
"contrast": [
|
| 87 |
+
0.8,
|
| 88 |
+
1.2
|
| 89 |
+
],
|
| 90 |
+
"saturation": [
|
| 91 |
+
0.8,
|
| 92 |
+
1.2
|
| 93 |
+
],
|
| 94 |
+
"hue": 0.05
|
| 95 |
+
},
|
| 96 |
+
"gaussian_noise": {
|
| 97 |
+
"mean": 0.0,
|
| 98 |
+
"std": 3.0,
|
| 99 |
+
"prob_skip": 0.1
|
| 100 |
+
},
|
| 101 |
+
"img_aug": true,
|
| 102 |
+
"adaptive_resize": false,
|
| 103 |
+
"img_sizes": {
|
| 104 |
+
"egodex": [
|
| 105 |
+
270,
|
| 106 |
+
480
|
| 107 |
+
],
|
| 108 |
+
"he": [
|
| 109 |
+
240,
|
| 110 |
+
320
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
"field": {
|
| 115 |
+
"stat_path": "meta/stats_psi0.json",
|
| 116 |
+
"action_norm_type": "bounds",
|
| 117 |
+
"stat_action_key": "action",
|
| 118 |
+
"stat_state_key": "states",
|
| 119 |
+
"use_norm_mask": false,
|
| 120 |
+
"action_norm_masks": [
|
| 121 |
+
true,
|
| 122 |
+
true,
|
| 123 |
+
true,
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
true,
|
| 127 |
+
false
|
| 128 |
+
],
|
| 129 |
+
"action_min": [
|
| 130 |
+
-0.5,
|
| 131 |
+
-1.1095792134107943e-16,
|
| 132 |
+
-1.1095792134107943e-16,
|
| 133 |
+
-1.5,
|
| 134 |
+
-1.5,
|
| 135 |
+
-1.5,
|
| 136 |
+
-1.5,
|
| 137 |
+
-0.5,
|
| 138 |
+
-0.699999988079071,
|
| 139 |
+
-0.699999988079071,
|
| 140 |
+
-2.2166350627321588e-16,
|
| 141 |
+
-2.2166350627321588e-16,
|
| 142 |
+
0.0,
|
| 143 |
+
-2.2166350627321588e-16,
|
| 144 |
+
-0.47567468881607056,
|
| 145 |
+
0.1900009959936142,
|
| 146 |
+
-0.512170135974884,
|
| 147 |
+
-0.6265152096748352,
|
| 148 |
+
-0.5008617043495178,
|
| 149 |
+
-0.8220608830451965,
|
| 150 |
+
-0.9223371148109436,
|
| 151 |
+
-0.49507391452789307,
|
| 152 |
+
-0.3437551259994507,
|
| 153 |
+
-0.6871383190155029,
|
| 154 |
+
-0.7637607455253601,
|
| 155 |
+
-0.7568023204803467,
|
| 156 |
+
-0.576077401638031,
|
| 157 |
+
-0.4588268995285034,
|
| 158 |
+
-0.13876836001873016,
|
| 159 |
+
-0.10360867530107498,
|
| 160 |
+
-0.47856518626213074,
|
| 161 |
+
0.7400000095367432,
|
| 162 |
+
-0.5,
|
| 163 |
+
-0.5,
|
| 164 |
+
-0.26161932945251465,
|
| 165 |
+
-0.06718750298023224
|
| 166 |
+
],
|
| 167 |
+
"action_max": [
|
| 168 |
+
0.5,
|
| 169 |
+
0.699999988079071,
|
| 170 |
+
0.699999988079071,
|
| 171 |
+
0.0,
|
| 172 |
+
2.2146225653890418e-16,
|
| 173 |
+
2.2146225653890418e-16,
|
| 174 |
+
2.2146225653890418e-16,
|
| 175 |
+
1.2266071310501902e-19,
|
| 176 |
+
1.1078670818917075e-16,
|
| 177 |
+
1.1078670818917075e-16,
|
| 178 |
+
1.5,
|
| 179 |
+
1.5,
|
| 180 |
+
0.6000000238418579,
|
| 181 |
+
1.5,
|
| 182 |
+
0.2472410947084427,
|
| 183 |
+
0.7092280983924866,
|
| 184 |
+
1.2571598291397095,
|
| 185 |
+
0.42311304807662964,
|
| 186 |
+
0.8564174771308899,
|
| 187 |
+
0.5002086162567139,
|
| 188 |
+
0.5172277092933655,
|
| 189 |
+
0.16140148043632507,
|
| 190 |
+
-0.1900009959936142,
|
| 191 |
+
0.5362864136695862,
|
| 192 |
+
0.5715147256851196,
|
| 193 |
+
0.5002322196960449,
|
| 194 |
+
0.566592276096344,
|
| 195 |
+
0.6392397880554199,
|
| 196 |
+
0.1580466777086258,
|
| 197 |
+
0.2233395129442215,
|
| 198 |
+
0.2582152187824249,
|
| 199 |
+
0.7400000095367432,
|
| 200 |
+
0.5,
|
| 201 |
+
0.5,
|
| 202 |
+
0.3454970121383667,
|
| 203 |
+
0.2899305522441864
|
| 204 |
+
],
|
| 205 |
+
"state_min": [
|
| 206 |
+
-0.5564982891082764,
|
| 207 |
+
-0.48307520151138306,
|
| 208 |
+
-0.0005447770818136632,
|
| 209 |
+
-0.8388738632202148,
|
| 210 |
+
-1.3970016241073608,
|
| 211 |
+
-0.8296014666557312,
|
| 212 |
+
-1.4599460363388062,
|
| 213 |
+
-0.5806806683540344,
|
| 214 |
+
-0.5149835348129272,
|
| 215 |
+
-0.6775947213172913,
|
| 216 |
+
-0.001480442238971591,
|
| 217 |
+
-0.0002713006397243589,
|
| 218 |
+
-0.000914653530344367,
|
| 219 |
+
-0.00019419840828049928,
|
| 220 |
+
-0.4206617772579193,
|
| 221 |
+
0.13972464203834534,
|
| 222 |
+
-0.546251654624939,
|
| 223 |
+
-0.5596316456794739,
|
| 224 |
+
-0.4764360189437866,
|
| 225 |
+
-0.7253566384315491,
|
| 226 |
+
-0.9443663954734802,
|
| 227 |
+
-0.4381798803806305,
|
| 228 |
+
-0.3338131606578827,
|
| 229 |
+
-0.667724072933197,
|
| 230 |
+
-0.6881827116012573,
|
| 231 |
+
-0.7544379830360413,
|
| 232 |
+
-0.5189417600631714,
|
| 233 |
+
-0.4484957158565521,
|
| 234 |
+
-0.13709338009357452,
|
| 235 |
+
-0.07360810041427612,
|
| 236 |
+
-0.4748336970806122,
|
| 237 |
+
0.7400000095367432,
|
| 238 |
+
0.0,
|
| 239 |
+
0.0,
|
| 240 |
+
0.0,
|
| 241 |
+
0.0
|
| 242 |
+
],
|
| 243 |
+
"state_max": [
|
| 244 |
+
0.43566983938217163,
|
| 245 |
+
0.3739710748195648,
|
| 246 |
+
0.6575677990913391,
|
| 247 |
+
0.004060761071741581,
|
| 248 |
+
0.0005700877518393099,
|
| 249 |
+
0.0004725759499706328,
|
| 250 |
+
0.00010080631182063371,
|
| 251 |
+
0.00001310737025050912,
|
| 252 |
+
0.21882089972496033,
|
| 253 |
+
0.0005271440604701638,
|
| 254 |
+
0.530737042427063,
|
| 255 |
+
1.4406861066818237,
|
| 256 |
+
1.4605127573013306,
|
| 257 |
+
1.4595911502838135,
|
| 258 |
+
0.2663630545139313,
|
| 259 |
+
0.657910943031311,
|
| 260 |
+
1.2515853643417358,
|
| 261 |
+
0.502498209476471,
|
| 262 |
+
0.8292973637580872,
|
| 263 |
+
0.5248894095420837,
|
| 264 |
+
0.4653257131576538,
|
| 265 |
+
0.18638382852077484,
|
| 266 |
+
-0.16696421802043915,
|
| 267 |
+
0.49318820238113403,
|
| 268 |
+
0.6363148093223572,
|
| 269 |
+
0.45773962140083313,
|
| 270 |
+
0.6238265037536621,
|
| 271 |
+
0.653800904750824,
|
| 272 |
+
0.1436084657907486,
|
| 273 |
+
0.25937986373901367,
|
| 274 |
+
0.26422709226608276,
|
| 275 |
+
0.7400000095367432,
|
| 276 |
+
0.0,
|
| 277 |
+
0.0,
|
| 278 |
+
0.0,
|
| 279 |
+
0.0
|
| 280 |
+
],
|
| 281 |
+
"normalize_state": true,
|
| 282 |
+
"pad_action_dim": 36,
|
| 283 |
+
"pad_state_dim": 36
|
| 284 |
+
}
|
| 285 |
+
},
|
| 286 |
+
"root_dir": "/data/jliu/data",
|
| 287 |
+
"train_repo_ids": [
|
| 288 |
+
"G1WholebodyHandover-v0"
|
| 289 |
+
],
|
| 290 |
+
"val_repo_ids": [
|
| 291 |
+
"G1WholebodyHandover-v0"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
"model": {
|
| 295 |
+
"resnet_store_path": null,
|
| 296 |
+
"pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
|
| 297 |
+
"rtc": true,
|
| 298 |
+
"max_delay": 8,
|
| 299 |
+
"action_dim": 36,
|
| 300 |
+
"action_chunk_size": 30,
|
| 301 |
+
"action_exec_horizon": 30,
|
| 302 |
+
"observation_horizon": 1,
|
| 303 |
+
"img_chunk": 1,
|
| 304 |
+
"n_cams": 1,
|
| 305 |
+
"use_obs": "add_token",
|
| 306 |
+
"dropout": 0.1,
|
| 307 |
+
"noise_scheduler": "flow",
|
| 308 |
+
"train_diffusion_steps": 1000,
|
| 309 |
+
"eval_diffusion_steps": 10,
|
| 310 |
+
"share_cam_features": false,
|
| 311 |
+
"early_fusion": false,
|
| 312 |
+
"odim": 36,
|
| 313 |
+
"n_conditions": 0,
|
| 314 |
+
"token_fusion": "concat",
|
| 315 |
+
"loss_w": [
|
| 316 |
+
0.1,
|
| 317 |
+
0.2,
|
| 318 |
+
0.1
|
| 319 |
+
],
|
| 320 |
+
"time_dim": 256,
|
| 321 |
+
"hidden_dim": 1536,
|
| 322 |
+
"num_blocks": 6,
|
| 323 |
+
"dim_feedforward": 2048,
|
| 324 |
+
"nhead": 24,
|
| 325 |
+
"activation": "gelu",
|
| 326 |
+
"view_feature_dim": 2048,
|
| 327 |
+
"use_film": false,
|
| 328 |
+
"combined_temb": false,
|
| 329 |
+
"use_dit": false,
|
| 330 |
+
"weight_decay": 0.01,
|
| 331 |
+
"model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
|
| 332 |
+
"vlm_ckpt_step": null,
|
| 333 |
+
"tune_vlm": false,
|
| 334 |
+
"tune_mm_llm": false,
|
| 335 |
+
"tune_mm_vision": false,
|
| 336 |
+
"tune_mm_mlp": false,
|
| 337 |
+
"gradient_checkpointing": true,
|
| 338 |
+
"lang_backbone_lr": 0.00001,
|
| 339 |
+
"mm_projector_lr": 0.00001,
|
| 340 |
+
"vision_tower_lr": 1e-6,
|
| 341 |
+
"optim": "adamw_torch",
|
| 342 |
+
"model_max_length": 4096,
|
| 343 |
+
"data_flatten": true,
|
| 344 |
+
"data_packing": true,
|
| 345 |
+
"max_pixels": 451584,
|
| 346 |
+
"min_pixels": 12544
|
| 347 |
+
}
|
| 348 |
+
}
|
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/argv.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scripts/train.py
|
| 2 |
+
finetune_simple_psi0_config
|
| 3 |
+
--seed=292285
|
| 4 |
+
--exp=g1wholebodylocomotionpickbetweentablesteleop-v0
|
| 5 |
+
--train.name=finetune
|
| 6 |
+
--train.data_parallel=ddp
|
| 7 |
+
--train.mixed_precision=bf16
|
| 8 |
+
--train.train_batch_size=16
|
| 9 |
+
--train.max_checkpoints_to_keep=5
|
| 10 |
+
--train.gradient_accumulation_steps=1
|
| 11 |
+
--train.learning_rate=1e-4
|
| 12 |
+
--train.max_training_steps=40000
|
| 13 |
+
--train.warmup_ratio=None
|
| 14 |
+
--train.warmup_steps=1000
|
| 15 |
+
--train.checkpointing_steps=10000
|
| 16 |
+
--train.validation_steps=500
|
| 17 |
+
--train.val_num_batches=20
|
| 18 |
+
--train.max_grad_norm=1.0
|
| 19 |
+
--train.lr_scheduler_type=cosine
|
| 20 |
+
--train.lr_scheduler_kwargs.weight_decay=1e-6
|
| 21 |
+
--train.lr_scheduler_kwargs.betas 0.95 0.999
|
| 22 |
+
--log.report_to=wandb
|
| 23 |
+
--data.root_dir=/data/jliu/data
|
| 24 |
+
--data.train-repo-ids=G1WholebodyLocomotionPickBetweenTablesTeleop-v0
|
| 25 |
+
--data.transform.repack.pad-action-dim=36
|
| 26 |
+
--data.transform.repack.pad-state-dim=36
|
| 27 |
+
--data.transform.field.stat-path=meta/stats_psi0.json
|
| 28 |
+
--data.transform.field.stat-action-key=action
|
| 29 |
+
--data.transform.field.stat-state-key=states
|
| 30 |
+
--data.transform.field.action_norm_type=bounds
|
| 31 |
+
--data.transform.field.no-use-norm-mask
|
| 32 |
+
--data.transform.field.normalize-state
|
| 33 |
+
--data.transform.field.pad-action-dim=36
|
| 34 |
+
--data.transform.field.pad-state-dim=36
|
| 35 |
+
--data.transform.model.img-aug
|
| 36 |
+
--data.transform.model.resize.size 180 320
|
| 37 |
+
--data.transform.model.center_crop.size 180 320
|
| 38 |
+
--model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
|
| 39 |
+
--model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
|
| 40 |
+
--model.noise-scheduler=flow
|
| 41 |
+
--model.train-diffusion-steps=1000
|
| 42 |
+
--model.n_conditions=0
|
| 43 |
+
--model.action-chunk-size=30
|
| 44 |
+
--model.action-dim=36
|
| 45 |
+
--model.action-exec-horizon=30
|
| 46 |
+
--model.observation-horizon=1
|
| 47 |
+
--model.odim=36
|
| 48 |
+
--model.view_feature_dim=2048
|
| 49 |
+
--model.no-tune-vlm
|
| 50 |
+
--model.no-use_film
|
| 51 |
+
--model.no-combined_temb
|
| 52 |
+
--model.rtc
|
| 53 |
+
--model.max-delay=8
|
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/checkpoints/ckpt_40000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c69c2ae866cdff4c51aee3aa2948b6400e8f001e73d38cbff2b364e41b4cf07b
|
| 3 |
+
size 6253648840
|
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/envs.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OMP_NUM_THREADS=32
|
| 2 |
+
HF_HOME=/data/cache
|
| 3 |
+
TORCH_HOME=/data/cache
|
| 4 |
+
HF_TOKEN=hf_...TiKa
|
| 5 |
+
HF_LEROBOT_HOME=/data/data/lerobot
|
| 6 |
+
WE_HOME=Not Set
|
| 7 |
+
DATA_HOME=/data/data
|
| 8 |
+
UV_CACHE_DIR=/data/cache
|
| 9 |
+
WANDB_API_KEY=90e...5c06
|
| 10 |
+
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
| 11 |
+
CUDA_VISIBLE_DEVICES=3,4,5,7
|
| 12 |
+
WORLD_SIZE=4
|
| 13 |
+
LOCAL_WORLD_SIZE=4
|
| 14 |
+
RANK=0
|
| 15 |
+
LOCAL_RANK=0
|
| 16 |
+
MASTER_ADDR=127.0.0.1
|
| 17 |
+
MASTER_PORT=29509
|
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/run_config.json
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exp": "g1wholebodylocomotionpickbetweentablesteleop-v0",
|
| 3 |
+
"seed": 292285,
|
| 4 |
+
"auto_tag_run": false,
|
| 5 |
+
"eval": false,
|
| 6 |
+
"debug": false,
|
| 7 |
+
"timestamp": "2604081126",
|
| 8 |
+
"log": {
|
| 9 |
+
"logging_dir": "logs",
|
| 10 |
+
"report_to": "wandb",
|
| 11 |
+
"log_freq": 100
|
| 12 |
+
},
|
| 13 |
+
"wandb": {
|
| 14 |
+
"project": "psi",
|
| 15 |
+
"entity": "jliu530-soochow-university",
|
| 16 |
+
"group": "finetune",
|
| 17 |
+
"id": "wmpgbes7",
|
| 18 |
+
"name": "g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126",
|
| 19 |
+
"resume": "allow"
|
| 20 |
+
},
|
| 21 |
+
"train": {
|
| 22 |
+
"num_workers": 8,
|
| 23 |
+
"overfit_single_batch": false,
|
| 24 |
+
"name": "finetune",
|
| 25 |
+
"resume_from_checkpoint": null,
|
| 26 |
+
"skip_resumed_steps": false,
|
| 27 |
+
"hf_token": ".hf_token",
|
| 28 |
+
"lora": false,
|
| 29 |
+
"output_dir": ".runs",
|
| 30 |
+
"gradient_accumulation_steps": 1,
|
| 31 |
+
"mixed_precision": "bf16",
|
| 32 |
+
"max_grad_norm": 1.0,
|
| 33 |
+
"optimizer_foreach": null,
|
| 34 |
+
"train_batch_size": 16,
|
| 35 |
+
"val_batch_size": 16,
|
| 36 |
+
"val_num_batches": 20,
|
| 37 |
+
"checkpointing_steps": 10000,
|
| 38 |
+
"max_checkpoints_to_keep": 5,
|
| 39 |
+
"validation_steps": 500,
|
| 40 |
+
"learning_rate": 0.0001,
|
| 41 |
+
"lr_scheduler_type": "cosine",
|
| 42 |
+
"lr_scheduler_kwargs": {
|
| 43 |
+
"betas": [
|
| 44 |
+
0.95,
|
| 45 |
+
0.999
|
| 46 |
+
],
|
| 47 |
+
"weight_decay": 1e-6,
|
| 48 |
+
"eps": 1e-8
|
| 49 |
+
},
|
| 50 |
+
"scheduler_specific_kwargs": {},
|
| 51 |
+
"data_parallel": "ddp",
|
| 52 |
+
"sharding_strategy": "full-shard",
|
| 53 |
+
"deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
|
| 54 |
+
"enable_gradient_checkpointing": true,
|
| 55 |
+
"enable_mixed_precision_training": true,
|
| 56 |
+
"reduce_in_full_precision": true,
|
| 57 |
+
"max_training_steps": 40000,
|
| 58 |
+
"num_train_epochs": null,
|
| 59 |
+
"warmup_steps": 1000,
|
| 60 |
+
"warmup_ratio": null
|
| 61 |
+
},
|
| 62 |
+
"data": {
|
| 63 |
+
"transform": {
|
| 64 |
+
"repack": {
|
| 65 |
+
"dataset_name": "simple",
|
| 66 |
+
"num_past_frames": 0,
|
| 67 |
+
"action_chunk_size": 30,
|
| 68 |
+
"pad_action_dim": 36,
|
| 69 |
+
"pad_state_dim": 36
|
| 70 |
+
},
|
| 71 |
+
"model": {
|
| 72 |
+
"resize": {
|
| 73 |
+
"size": [
|
| 74 |
+
180,
|
| 75 |
+
320
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"center_crop": {
|
| 79 |
+
"size": [
|
| 80 |
+
180,
|
| 81 |
+
320
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"color_jitter": {
|
| 85 |
+
"brightness": 0.2,
|
| 86 |
+
"contrast": [
|
| 87 |
+
0.8,
|
| 88 |
+
1.2
|
| 89 |
+
],
|
| 90 |
+
"saturation": [
|
| 91 |
+
0.8,
|
| 92 |
+
1.2
|
| 93 |
+
],
|
| 94 |
+
"hue": 0.05
|
| 95 |
+
},
|
| 96 |
+
"gaussian_noise": {
|
| 97 |
+
"mean": 0.0,
|
| 98 |
+
"std": 3.0,
|
| 99 |
+
"prob_skip": 0.1
|
| 100 |
+
},
|
| 101 |
+
"img_aug": true,
|
| 102 |
+
"adaptive_resize": false,
|
| 103 |
+
"img_sizes": {
|
| 104 |
+
"egodex": [
|
| 105 |
+
270,
|
| 106 |
+
480
|
| 107 |
+
],
|
| 108 |
+
"he": [
|
| 109 |
+
240,
|
| 110 |
+
320
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
"field": {
|
| 115 |
+
"stat_path": "meta/stats_psi0.json",
|
| 116 |
+
"action_norm_type": "bounds",
|
| 117 |
+
"stat_action_key": "action",
|
| 118 |
+
"stat_state_key": "states",
|
| 119 |
+
"use_norm_mask": false,
|
| 120 |
+
"action_norm_masks": [
|
| 121 |
+
true,
|
| 122 |
+
true,
|
| 123 |
+
true,
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
true,
|
| 127 |
+
false
|
| 128 |
+
],
|
| 129 |
+
"action_min": [
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0,
|
| 133 |
+
-1.5,
|
| 134 |
+
-1.5,
|
| 135 |
+
-0.6000000238418579,
|
| 136 |
+
-1.5,
|
| 137 |
+
-0.5,
|
| 138 |
+
-0.699999988079071,
|
| 139 |
+
-0.699999988079071,
|
| 140 |
+
-2.216935230032842e-16,
|
| 141 |
+
-2.216935230032842e-16,
|
| 142 |
+
-4.0845591349633594e-18,
|
| 143 |
+
-2.216935230032842e-16,
|
| 144 |
+
-0.4883034825325012,
|
| 145 |
+
0.1900009959936142,
|
| 146 |
+
-0.5470856428146362,
|
| 147 |
+
-0.34318920969963074,
|
| 148 |
+
-0.35952919721603394,
|
| 149 |
+
-0.35302427411079407,
|
| 150 |
+
-0.4469815790653229,
|
| 151 |
+
-0.6371198296546936,
|
| 152 |
+
-0.7683824300765991,
|
| 153 |
+
-1.0653810501098633,
|
| 154 |
+
-0.8479154706001282,
|
| 155 |
+
-1.0297260284423828,
|
| 156 |
+
-0.42936205863952637,
|
| 157 |
+
-0.5147944092750549,
|
| 158 |
+
-0.16820405423641205,
|
| 159 |
+
-0.045328833162784576,
|
| 160 |
+
-0.13282617926597595,
|
| 161 |
+
0.7400000095367432,
|
| 162 |
+
-0.5,
|
| 163 |
+
-0.5,
|
| 164 |
+
-1.0,
|
| 165 |
+
-3.138223648071289
|
| 166 |
+
],
|
| 167 |
+
"action_max": [
|
| 168 |
+
0.5,
|
| 169 |
+
0.699999988079071,
|
| 170 |
+
0.699999988079071,
|
| 171 |
+
0.0,
|
| 172 |
+
0.0,
|
| 173 |
+
0.0,
|
| 174 |
+
0.0,
|
| 175 |
+
6.930528109384597e-19,
|
| 176 |
+
1.108467615016421e-16,
|
| 177 |
+
1.108467615016421e-16,
|
| 178 |
+
1.5,
|
| 179 |
+
1.5,
|
| 180 |
+
1.0,
|
| 181 |
+
1.5,
|
| 182 |
+
0.2557959258556366,
|
| 183 |
+
0.35884979367256165,
|
| 184 |
+
0.5090755820274353,
|
| 185 |
+
0.19132143259048462,
|
| 186 |
+
0.21249642968177795,
|
| 187 |
+
0.3992660641670227,
|
| 188 |
+
0.4283020794391632,
|
| 189 |
+
0.1457289606332779,
|
| 190 |
+
-0.1900009959936142,
|
| 191 |
+
0.6150448322296143,
|
| 192 |
+
0.35468167066574097,
|
| 193 |
+
0.8703295588493347,
|
| 194 |
+
0.7531875371932983,
|
| 195 |
+
0.971237301826477,
|
| 196 |
+
0.13985762000083923,
|
| 197 |
+
0.15686897933483124,
|
| 198 |
+
0.4661160111427307,
|
| 199 |
+
0.7400000095367432,
|
| 200 |
+
0.5,
|
| 201 |
+
0.5,
|
| 202 |
+
1.0,
|
| 203 |
+
3.1414895057678223
|
| 204 |
+
],
|
| 205 |
+
"state_min": [
|
| 206 |
+
-0.02442001923918724,
|
| 207 |
+
-0.0517612099647522,
|
| 208 |
+
-0.0006534014828503132,
|
| 209 |
+
-0.5095356106758118,
|
| 210 |
+
-1.323034405708313,
|
| 211 |
+
-1.3221508264541626,
|
| 212 |
+
-1.3230019807815552,
|
| 213 |
+
-0.5770347714424133,
|
| 214 |
+
-0.4338151812553406,
|
| 215 |
+
-0.6721642017364502,
|
| 216 |
+
-0.0017213862156495452,
|
| 217 |
+
-7.534810038123396e-7,
|
| 218 |
+
-0.001927333534695208,
|
| 219 |
+
-1.075333216249419e-6,
|
| 220 |
+
-0.43650975823402405,
|
| 221 |
+
0.15721464157104492,
|
| 222 |
+
-0.5489339232444763,
|
| 223 |
+
-0.2632291913032532,
|
| 224 |
+
-0.3508843183517456,
|
| 225 |
+
-0.23784859478473663,
|
| 226 |
+
-0.4281824827194214,
|
| 227 |
+
-0.5803383588790894,
|
| 228 |
+
-0.7118590474128723,
|
| 229 |
+
-1.0344431400299072,
|
| 230 |
+
-0.7932196259498596,
|
| 231 |
+
-1.0205217599868774,
|
| 232 |
+
-0.3445618450641632,
|
| 233 |
+
-0.5986371040344238,
|
| 234 |
+
-0.13537253439426422,
|
| 235 |
+
-0.0017330688424408436,
|
| 236 |
+
-0.1421850621700287,
|
| 237 |
+
0.7400000095367432,
|
| 238 |
+
0.0,
|
| 239 |
+
0.0,
|
| 240 |
+
0.0,
|
| 241 |
+
0.0
|
| 242 |
+
],
|
| 243 |
+
"state_max": [
|
| 244 |
+
0.47981399297714233,
|
| 245 |
+
0.6772664189338684,
|
| 246 |
+
0.6746510863304138,
|
| 247 |
+
0.0010172375477850437,
|
| 248 |
+
0.0007091082516126335,
|
| 249 |
+
0.001881288131698966,
|
| 250 |
+
0.0011398319620639086,
|
| 251 |
+
6.141255539660051e-7,
|
| 252 |
+
0.3043450713157654,
|
| 253 |
+
6.343479981296696e-7,
|
| 254 |
+
0.6933000087738037,
|
| 255 |
+
1.4612544775009155,
|
| 256 |
+
1.4651201963424683,
|
| 257 |
+
1.4609057903289795,
|
| 258 |
+
0.2809508740901947,
|
| 259 |
+
0.34028318524360657,
|
| 260 |
+
0.47627460956573486,
|
| 261 |
+
0.26476219296455383,
|
| 262 |
+
0.20825636386871338,
|
| 263 |
+
0.4566418528556824,
|
| 264 |
+
0.42864030599594116,
|
| 265 |
+
0.1656116098165512,
|
| 266 |
+
-0.1549365073442459,
|
| 267 |
+
0.5154499411582947,
|
| 268 |
+
0.4242899715900421,
|
| 269 |
+
0.8548054695129395,
|
| 270 |
+
0.8040095567703247,
|
| 271 |
+
0.9811649322509766,
|
| 272 |
+
0.136736661195755,
|
| 273 |
+
0.195722296833992,
|
| 274 |
+
0.45781663060188293,
|
| 275 |
+
0.7400000095367432,
|
| 276 |
+
0.0,
|
| 277 |
+
0.0,
|
| 278 |
+
0.0,
|
| 279 |
+
0.0
|
| 280 |
+
],
|
| 281 |
+
"normalize_state": true,
|
| 282 |
+
"pad_action_dim": 36,
|
| 283 |
+
"pad_state_dim": 36
|
| 284 |
+
}
|
| 285 |
+
},
|
| 286 |
+
"root_dir": "/data/jliu/data",
|
| 287 |
+
"train_repo_ids": [
|
| 288 |
+
"G1WholebodyLocomotionPickBetweenTablesTeleop-v0"
|
| 289 |
+
],
|
| 290 |
+
"val_repo_ids": [
|
| 291 |
+
"G1WholebodyLocomotionPickBetweenTablesTeleop-v0"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
"model": {
|
| 295 |
+
"resnet_store_path": null,
|
| 296 |
+
"pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
|
| 297 |
+
"rtc": true,
|
| 298 |
+
"max_delay": 8,
|
| 299 |
+
"action_dim": 36,
|
| 300 |
+
"action_chunk_size": 30,
|
| 301 |
+
"action_exec_horizon": 30,
|
| 302 |
+
"observation_horizon": 1,
|
| 303 |
+
"img_chunk": 1,
|
| 304 |
+
"n_cams": 1,
|
| 305 |
+
"use_obs": "add_token",
|
| 306 |
+
"dropout": 0.1,
|
| 307 |
+
"noise_scheduler": "flow",
|
| 308 |
+
"train_diffusion_steps": 1000,
|
| 309 |
+
"eval_diffusion_steps": 10,
|
| 310 |
+
"share_cam_features": false,
|
| 311 |
+
"early_fusion": false,
|
| 312 |
+
"odim": 36,
|
| 313 |
+
"n_conditions": 0,
|
| 314 |
+
"token_fusion": "concat",
|
| 315 |
+
"loss_w": [
|
| 316 |
+
0.1,
|
| 317 |
+
0.2,
|
| 318 |
+
0.1
|
| 319 |
+
],
|
| 320 |
+
"time_dim": 256,
|
| 321 |
+
"hidden_dim": 1536,
|
| 322 |
+
"num_blocks": 6,
|
| 323 |
+
"dim_feedforward": 2048,
|
| 324 |
+
"nhead": 24,
|
| 325 |
+
"activation": "gelu",
|
| 326 |
+
"view_feature_dim": 2048,
|
| 327 |
+
"use_film": false,
|
| 328 |
+
"combined_temb": false,
|
| 329 |
+
"use_dit": false,
|
| 330 |
+
"weight_decay": 0.01,
|
| 331 |
+
"model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
|
| 332 |
+
"vlm_ckpt_step": null,
|
| 333 |
+
"tune_vlm": false,
|
| 334 |
+
"tune_mm_llm": false,
|
| 335 |
+
"tune_mm_vision": false,
|
| 336 |
+
"tune_mm_mlp": false,
|
| 337 |
+
"gradient_checkpointing": true,
|
| 338 |
+
"lang_backbone_lr": 0.00001,
|
| 339 |
+
"mm_projector_lr": 0.00001,
|
| 340 |
+
"vision_tower_lr": 1e-6,
|
| 341 |
+
"optim": "adamw_torch",
|
| 342 |
+
"model_max_length": 4096,
|
| 343 |
+
"data_flatten": true,
|
| 344 |
+
"data_packing": true,
|
| 345 |
+
"max_pixels": 451584,
|
| 346 |
+
"min_pixels": 12544
|
| 347 |
+
}
|
| 348 |
+
}
|
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/argv.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scripts/train.py
|
| 2 |
+
finetune_simple_psi0_config
|
| 3 |
+
--seed=292285
|
| 4 |
+
--exp=g1wholebodytabletopgrasp-v0
|
| 5 |
+
--train.name=finetune
|
| 6 |
+
--train.data_parallel=ddp
|
| 7 |
+
--train.mixed_precision=bf16
|
| 8 |
+
--train.train_batch_size=16
|
| 9 |
+
--train.max_checkpoints_to_keep=5
|
| 10 |
+
--train.gradient_accumulation_steps=1
|
| 11 |
+
--train.learning_rate=1e-4
|
| 12 |
+
--train.max_training_steps=40000
|
| 13 |
+
--train.warmup_ratio=None
|
| 14 |
+
--train.warmup_steps=1000
|
| 15 |
+
--train.checkpointing_steps=10000
|
| 16 |
+
--train.validation_steps=500
|
| 17 |
+
--train.val_num_batches=20
|
| 18 |
+
--train.max_grad_norm=1.0
|
| 19 |
+
--train.lr_scheduler_type=cosine
|
| 20 |
+
--train.lr_scheduler_kwargs.weight_decay=1e-6
|
| 21 |
+
--train.lr_scheduler_kwargs.betas 0.95 0.999
|
| 22 |
+
--log.report_to=wandb
|
| 23 |
+
--data.root_dir=/data/jliu/data
|
| 24 |
+
--data.train-repo-ids=G1WholebodyTabletopGrasp-v0
|
| 25 |
+
--data.transform.repack.pad-action-dim=36
|
| 26 |
+
--data.transform.repack.pad-state-dim=36
|
| 27 |
+
--data.transform.field.stat-path=meta/stats_psi0.json
|
| 28 |
+
--data.transform.field.stat-action-key=action
|
| 29 |
+
--data.transform.field.stat-state-key=states
|
| 30 |
+
--data.transform.field.action_norm_type=bounds
|
| 31 |
+
--data.transform.field.no-use-norm-mask
|
| 32 |
+
--data.transform.field.normalize-state
|
| 33 |
+
--data.transform.field.pad-action-dim=36
|
| 34 |
+
--data.transform.field.pad-state-dim=36
|
| 35 |
+
--data.transform.model.img-aug
|
| 36 |
+
--data.transform.model.resize.size 180 320
|
| 37 |
+
--data.transform.model.center_crop.size 180 320
|
| 38 |
+
--model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
|
| 39 |
+
--model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
|
| 40 |
+
--model.noise-scheduler=flow
|
| 41 |
+
--model.train-diffusion-steps=1000
|
| 42 |
+
--model.n_conditions=0
|
| 43 |
+
--model.action-chunk-size=30
|
| 44 |
+
--model.action-dim=36
|
| 45 |
+
--model.action-exec-horizon=30
|
| 46 |
+
--model.observation-horizon=1
|
| 47 |
+
--model.odim=36
|
| 48 |
+
--model.view_feature_dim=2048
|
| 49 |
+
--model.no-tune-vlm
|
| 50 |
+
--model.no-use_film
|
| 51 |
+
--model.no-combined_temb
|
| 52 |
+
--model.rtc
|
| 53 |
+
--model.max-delay=8
|
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/checkpoints/ckpt_40000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72f07e71e1a50bc943b9c4cba47051d7eb8f1d86ca81f3ea0ade7cc8dbad5458
|
| 3 |
+
size 6253648840
|
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/envs.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OMP_NUM_THREADS=32
|
| 2 |
+
HF_HOME=/data/cache
|
| 3 |
+
TORCH_HOME=/data/cache
|
| 4 |
+
HF_TOKEN=hf_...TiKa
|
| 5 |
+
HF_LEROBOT_HOME=/data/data/lerobot
|
| 6 |
+
WE_HOME=Not Set
|
| 7 |
+
DATA_HOME=/data/data
|
| 8 |
+
UV_CACHE_DIR=/data/cache
|
| 9 |
+
WANDB_API_KEY=90e...5c06
|
| 10 |
+
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
| 11 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
| 12 |
+
WORLD_SIZE=8
|
| 13 |
+
LOCAL_WORLD_SIZE=8
|
| 14 |
+
RANK=0
|
| 15 |
+
LOCAL_RANK=0
|
| 16 |
+
MASTER_ADDR=127.0.0.1
|
| 17 |
+
MASTER_PORT=29500
|
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/run_config.json
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exp": "g1wholebodytabletopgrasp-v0",
|
| 3 |
+
"seed": 292285,
|
| 4 |
+
"auto_tag_run": false,
|
| 5 |
+
"eval": false,
|
| 6 |
+
"debug": false,
|
| 7 |
+
"timestamp": "2603181503",
|
| 8 |
+
"log": {
|
| 9 |
+
"logging_dir": "logs",
|
| 10 |
+
"report_to": "wandb",
|
| 11 |
+
"log_freq": 100
|
| 12 |
+
},
|
| 13 |
+
"wandb": {
|
| 14 |
+
"project": "psi",
|
| 15 |
+
"entity": "jliu530-soochow-university",
|
| 16 |
+
"group": "finetune",
|
| 17 |
+
"id": "424b3khl",
|
| 18 |
+
"name": "g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503",
|
| 19 |
+
"resume": "allow"
|
| 20 |
+
},
|
| 21 |
+
"train": {
|
| 22 |
+
"num_workers": 8,
|
| 23 |
+
"overfit_single_batch": false,
|
| 24 |
+
"name": "finetune",
|
| 25 |
+
"resume_from_checkpoint": null,
|
| 26 |
+
"skip_resumed_steps": false,
|
| 27 |
+
"hf_token": ".hf_token",
|
| 28 |
+
"lora": false,
|
| 29 |
+
"output_dir": ".runs",
|
| 30 |
+
"gradient_accumulation_steps": 1,
|
| 31 |
+
"mixed_precision": "bf16",
|
| 32 |
+
"max_grad_norm": 1.0,
|
| 33 |
+
"optimizer_foreach": null,
|
| 34 |
+
"train_batch_size": 16,
|
| 35 |
+
"val_batch_size": 16,
|
| 36 |
+
"val_num_batches": 20,
|
| 37 |
+
"checkpointing_steps": 10000,
|
| 38 |
+
"max_checkpoints_to_keep": 5,
|
| 39 |
+
"validation_steps": 500,
|
| 40 |
+
"learning_rate": 0.0001,
|
| 41 |
+
"lr_scheduler_type": "cosine",
|
| 42 |
+
"lr_scheduler_kwargs": {
|
| 43 |
+
"betas": [
|
| 44 |
+
0.95,
|
| 45 |
+
0.999
|
| 46 |
+
],
|
| 47 |
+
"weight_decay": 1e-6,
|
| 48 |
+
"eps": 1e-8
|
| 49 |
+
},
|
| 50 |
+
"scheduler_specific_kwargs": {},
|
| 51 |
+
"data_parallel": "ddp",
|
| 52 |
+
"sharding_strategy": "full-shard",
|
| 53 |
+
"deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
|
| 54 |
+
"enable_gradient_checkpointing": true,
|
| 55 |
+
"enable_mixed_precision_training": true,
|
| 56 |
+
"reduce_in_full_precision": true,
|
| 57 |
+
"max_training_steps": 40000,
|
| 58 |
+
"num_train_epochs": null,
|
| 59 |
+
"warmup_steps": 1000,
|
| 60 |
+
"warmup_ratio": null
|
| 61 |
+
},
|
| 62 |
+
"data": {
|
| 63 |
+
"transform": {
|
| 64 |
+
"repack": {
|
| 65 |
+
"dataset_name": "simple",
|
| 66 |
+
"num_past_frames": 0,
|
| 67 |
+
"action_chunk_size": 30,
|
| 68 |
+
"pad_action_dim": 36,
|
| 69 |
+
"pad_state_dim": 36
|
| 70 |
+
},
|
| 71 |
+
"model": {
|
| 72 |
+
"resize": {
|
| 73 |
+
"size": [
|
| 74 |
+
180,
|
| 75 |
+
320
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"center_crop": {
|
| 79 |
+
"size": [
|
| 80 |
+
180,
|
| 81 |
+
320
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"color_jitter": {
|
| 85 |
+
"brightness": 0.2,
|
| 86 |
+
"contrast": [
|
| 87 |
+
0.8,
|
| 88 |
+
1.2
|
| 89 |
+
],
|
| 90 |
+
"saturation": [
|
| 91 |
+
0.8,
|
| 92 |
+
1.2
|
| 93 |
+
],
|
| 94 |
+
"hue": 0.05
|
| 95 |
+
},
|
| 96 |
+
"gaussian_noise": {
|
| 97 |
+
"mean": 0.0,
|
| 98 |
+
"std": 3.0,
|
| 99 |
+
"prob_skip": 0.1
|
| 100 |
+
},
|
| 101 |
+
"img_aug": true,
|
| 102 |
+
"adaptive_resize": false,
|
| 103 |
+
"img_sizes": {
|
| 104 |
+
"egodex": [
|
| 105 |
+
270,
|
| 106 |
+
480
|
| 107 |
+
],
|
| 108 |
+
"he": [
|
| 109 |
+
240,
|
| 110 |
+
320
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
"field": {
|
| 115 |
+
"stat_path": "meta/stats_psi0.json",
|
| 116 |
+
"action_norm_type": "bounds",
|
| 117 |
+
"stat_action_key": "action",
|
| 118 |
+
"stat_state_key": "states",
|
| 119 |
+
"use_norm_mask": false,
|
| 120 |
+
"action_norm_masks": [
|
| 121 |
+
true,
|
| 122 |
+
true,
|
| 123 |
+
true,
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
true,
|
| 127 |
+
false
|
| 128 |
+
],
|
| 129 |
+
"action_min": [
|
| 130 |
+
-0.21088384091854095,
|
| 131 |
+
-0.11738907545804977,
|
| 132 |
+
-0.013445371761918068,
|
| 133 |
+
-0.5775371193885803,
|
| 134 |
+
-0.30140629410743713,
|
| 135 |
+
-0.3430681824684143,
|
| 136 |
+
-0.37358492612838745,
|
| 137 |
+
-0.007524379529058933,
|
| 138 |
+
-0.01704181358218193,
|
| 139 |
+
-0.6932834386825562,
|
| 140 |
+
0.0,
|
| 141 |
+
0.0,
|
| 142 |
+
0.0,
|
| 143 |
+
0.0,
|
| 144 |
+
-0.022009270265698433,
|
| 145 |
+
-0.04607510566711426,
|
| 146 |
+
-0.24804681539535522,
|
| 147 |
+
-0.005683199502527714,
|
| 148 |
+
-0.25139108300209045,
|
| 149 |
+
-0.05519897863268852,
|
| 150 |
+
-0.04459292069077492,
|
| 151 |
+
-1.0918865203857422,
|
| 152 |
+
-0.3572312593460083,
|
| 153 |
+
-0.7850697636604309,
|
| 154 |
+
0.0,
|
| 155 |
+
-1.4517900943756104,
|
| 156 |
+
-0.5649155378341675,
|
| 157 |
+
-0.4592915177345276,
|
| 158 |
+
-0.08972926437854767,
|
| 159 |
+
-0.1079544723033905,
|
| 160 |
+
-0.04311269149184227,
|
| 161 |
+
0.75,
|
| 162 |
+
0.0,
|
| 163 |
+
0.0,
|
| 164 |
+
0.0,
|
| 165 |
+
0.0
|
| 166 |
+
],
|
| 167 |
+
"action_max": [
|
| 168 |
+
0.21513332426548004,
|
| 169 |
+
0.21692107617855072,
|
| 170 |
+
0.3652719259262085,
|
| 171 |
+
0.07139641791582108,
|
| 172 |
+
0.015001054853200912,
|
| 173 |
+
0.03918211907148361,
|
| 174 |
+
0.03575323149561882,
|
| 175 |
+
0.6107784509658813,
|
| 176 |
+
0.31583136320114136,
|
| 177 |
+
0.0,
|
| 178 |
+
0.6836385726928711,
|
| 179 |
+
1.4285058975219727,
|
| 180 |
+
0.8524638414382935,
|
| 181 |
+
1.7429704666137695,
|
| 182 |
+
0.00735096400603652,
|
| 183 |
+
0.25089067220687866,
|
| 184 |
+
0.04510946571826935,
|
| 185 |
+
0.017853474244475365,
|
| 186 |
+
0.046191196888685226,
|
| 187 |
+
0.0031940839253365993,
|
| 188 |
+
0.24179035425186157,
|
| 189 |
+
0.0037467884831130505,
|
| 190 |
+
0.0002910589682869613,
|
| 191 |
+
0.7421935796737671,
|
| 192 |
+
1.6607650518417358,
|
| 193 |
+
0.0,
|
| 194 |
+
0.3736472725868225,
|
| 195 |
+
0.30042290687561035,
|
| 196 |
+
0.009443609043955803,
|
| 197 |
+
0.11625207960605621,
|
| 198 |
+
0.013617209158837795,
|
| 199 |
+
0.75,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0
|
| 204 |
+
],
|
| 205 |
+
"state_min": [
|
| 206 |
+
-0.210999995470047,
|
| 207 |
+
-0.11699992418289185,
|
| 208 |
+
-1.1206404693098193e-8,
|
| 209 |
+
-0.5680000185966492,
|
| 210 |
+
-0.29699981212615967,
|
| 211 |
+
-0.3440000116825104,
|
| 212 |
+
-0.37400001287460327,
|
| 213 |
+
-0.00800000037997961,
|
| 214 |
+
-0.004000000189989805,
|
| 215 |
+
-0.6610000133514404,
|
| 216 |
+
0.0,
|
| 217 |
+
0.0,
|
| 218 |
+
0.0,
|
| 219 |
+
0.0,
|
| 220 |
+
-0.017999978736042976,
|
| 221 |
+
-0.04700015112757683,
|
| 222 |
+
-0.24899962544441223,
|
| 223 |
+
-0.0069999597035348415,
|
| 224 |
+
-0.2510001063346863,
|
| 225 |
+
-0.053999971598386765,
|
| 226 |
+
-0.045001156628131866,
|
| 227 |
+
-1.0800000429153442,
|
| 228 |
+
-0.3590024709701538,
|
| 229 |
+
-0.781000018119812,
|
| 230 |
+
-0.006000000052154064,
|
| 231 |
+
-1.4559999704360962,
|
| 232 |
+
-0.5649999976158142,
|
| 233 |
+
-0.46299999952316284,
|
| 234 |
+
0.0,
|
| 235 |
+
-0.15000000596046448,
|
| 236 |
+
0.0,
|
| 237 |
+
0.75,
|
| 238 |
+
0.0,
|
| 239 |
+
0.0,
|
| 240 |
+
0.0,
|
| 241 |
+
0.0
|
| 242 |
+
],
|
| 243 |
+
"state_max": [
|
| 244 |
+
0.2149999588727951,
|
| 245 |
+
0.21600016951560974,
|
| 246 |
+
0.36500000953674316,
|
| 247 |
+
0.0,
|
| 248 |
+
4.046002644031432e-9,
|
| 249 |
+
0.0,
|
| 250 |
+
1.0771045513835453e-11,
|
| 251 |
+
0.609000027179718,
|
| 252 |
+
0.3600001633167267,
|
| 253 |
+
1.1693318297152644e-13,
|
| 254 |
+
0.5249999761581421,
|
| 255 |
+
1.3730000257492065,
|
| 256 |
+
0.8119999766349792,
|
| 257 |
+
1.7453292608261108,
|
| 258 |
+
0.014000464230775833,
|
| 259 |
+
0.2500004470348358,
|
| 260 |
+
0.04610275477170944,
|
| 261 |
+
0.020999999716877937,
|
| 262 |
+
0.04600704088807106,
|
| 263 |
+
0.006000000052154064,
|
| 264 |
+
0.24200008809566498,
|
| 265 |
+
0.012999767437577248,
|
| 266 |
+
0.003000000026077032,
|
| 267 |
+
0.7419999837875366,
|
| 268 |
+
1.6640000343322754,
|
| 269 |
+
6.225707238627365e-6,
|
| 270 |
+
0.37400001287460327,
|
| 271 |
+
0.300999253988266,
|
| 272 |
+
0.0,
|
| 273 |
+
0.0,
|
| 274 |
+
0.0,
|
| 275 |
+
0.75,
|
| 276 |
+
0.0,
|
| 277 |
+
0.0,
|
| 278 |
+
0.0,
|
| 279 |
+
0.0
|
| 280 |
+
],
|
| 281 |
+
"normalize_state": true,
|
| 282 |
+
"pad_action_dim": 36,
|
| 283 |
+
"pad_state_dim": 36
|
| 284 |
+
}
|
| 285 |
+
},
|
| 286 |
+
"root_dir": "/data/jliu/data",
|
| 287 |
+
"train_repo_ids": [
|
| 288 |
+
"G1WholebodyTabletopGrasp-v0"
|
| 289 |
+
],
|
| 290 |
+
"val_repo_ids": [
|
| 291 |
+
"G1WholebodyTabletopGrasp-v0"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
"model": {
|
| 295 |
+
"resnet_store_path": null,
|
| 296 |
+
"pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
|
| 297 |
+
"rtc": true,
|
| 298 |
+
"max_delay": 8,
|
| 299 |
+
"action_dim": 36,
|
| 300 |
+
"action_chunk_size": 30,
|
| 301 |
+
"action_exec_horizon": 30,
|
| 302 |
+
"observation_horizon": 1,
|
| 303 |
+
"img_chunk": 1,
|
| 304 |
+
"n_cams": 1,
|
| 305 |
+
"use_obs": "add_token",
|
| 306 |
+
"dropout": 0.1,
|
| 307 |
+
"noise_scheduler": "flow",
|
| 308 |
+
"train_diffusion_steps": 1000,
|
| 309 |
+
"eval_diffusion_steps": 10,
|
| 310 |
+
"share_cam_features": false,
|
| 311 |
+
"early_fusion": false,
|
| 312 |
+
"odim": 36,
|
| 313 |
+
"n_conditions": 0,
|
| 314 |
+
"token_fusion": "concat",
|
| 315 |
+
"loss_w": [
|
| 316 |
+
0.1,
|
| 317 |
+
0.2,
|
| 318 |
+
0.1
|
| 319 |
+
],
|
| 320 |
+
"time_dim": 256,
|
| 321 |
+
"hidden_dim": 1536,
|
| 322 |
+
"num_blocks": 6,
|
| 323 |
+
"dim_feedforward": 2048,
|
| 324 |
+
"nhead": 24,
|
| 325 |
+
"activation": "gelu",
|
| 326 |
+
"view_feature_dim": 2048,
|
| 327 |
+
"use_film": false,
|
| 328 |
+
"combined_temb": false,
|
| 329 |
+
"use_dit": false,
|
| 330 |
+
"weight_decay": 0.01,
|
| 331 |
+
"model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
|
| 332 |
+
"vlm_ckpt_step": null,
|
| 333 |
+
"tune_vlm": false,
|
| 334 |
+
"tune_mm_llm": false,
|
| 335 |
+
"tune_mm_vision": false,
|
| 336 |
+
"tune_mm_mlp": false,
|
| 337 |
+
"gradient_checkpointing": true,
|
| 338 |
+
"lang_backbone_lr": 0.00001,
|
| 339 |
+
"mm_projector_lr": 0.00001,
|
| 340 |
+
"vision_tower_lr": 1e-6,
|
| 341 |
+
"optim": "adamw_torch",
|
| 342 |
+
"model_max_length": 4096,
|
| 343 |
+
"data_flatten": true,
|
| 344 |
+
"data_packing": true,
|
| 345 |
+
"max_pixels": 451584,
|
| 346 |
+
"min_pixels": 12544
|
| 347 |
+
}
|
| 348 |
+
}
|
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/argv.txt
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scripts/train.py
|
| 2 |
+
finetune_simple_psi0_config
|
| 3 |
+
--seed=292285
|
| 4 |
+
--exp=g1wholebodyxmovebendpickteleop-v0
|
| 5 |
+
--train.name=finetune
|
| 6 |
+
--train.data_parallel=ddp
|
| 7 |
+
--train.mixed_precision=bf16
|
| 8 |
+
--train.train_batch_size=16
|
| 9 |
+
--train.max_checkpoints_to_keep=5
|
| 10 |
+
--train.gradient_accumulation_steps=1
|
| 11 |
+
--train.learning_rate=1e-4
|
| 12 |
+
--train.max_training_steps=40000
|
| 13 |
+
--train.warmup_ratio=None
|
| 14 |
+
--train.warmup_steps=1000
|
| 15 |
+
--train.checkpointing_steps=10000
|
| 16 |
+
--train.validation_steps=500
|
| 17 |
+
--train.val_num_batches=20
|
| 18 |
+
--train.max_grad_norm=1.0
|
| 19 |
+
--train.lr_scheduler_type=cosine
|
| 20 |
+
--train.lr_scheduler_kwargs.weight_decay=1e-6
|
| 21 |
+
--train.lr_scheduler_kwargs.betas 0.95 0.999
|
| 22 |
+
--log.report_to=wandb
|
| 23 |
+
--data.root_dir=/data/jliu/data
|
| 24 |
+
--data.train-repo-ids=G1WholebodyXMoveBendPickTeleop-v0
|
| 25 |
+
--data.transform.repack.pad-action-dim=36
|
| 26 |
+
--data.transform.repack.pad-state-dim=36
|
| 27 |
+
--data.transform.field.stat-path=meta/stats_psi0.json
|
| 28 |
+
--data.transform.field.stat-action-key=action
|
| 29 |
+
--data.transform.field.stat-state-key=states
|
| 30 |
+
--data.transform.field.action_norm_type=bounds
|
| 31 |
+
--data.transform.field.no-use-norm-mask
|
| 32 |
+
--data.transform.field.normalize-state
|
| 33 |
+
--data.transform.field.pad-action-dim=36
|
| 34 |
+
--data.transform.field.pad-state-dim=36
|
| 35 |
+
--data.transform.model.img-aug
|
| 36 |
+
--data.transform.model.resize.size 180 320
|
| 37 |
+
--data.transform.model.center_crop.size 180 320
|
| 38 |
+
--model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
|
| 39 |
+
--model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
|
| 40 |
+
--model.noise-scheduler=flow
|
| 41 |
+
--model.train-diffusion-steps=1000
|
| 42 |
+
--model.n_conditions=0
|
| 43 |
+
--model.action-chunk-size=30
|
| 44 |
+
--model.action-dim=36
|
| 45 |
+
--model.action-exec-horizon=30
|
| 46 |
+
--model.observation-horizon=1
|
| 47 |
+
--model.odim=36
|
| 48 |
+
--model.view_feature_dim=2048
|
| 49 |
+
--model.no-tune-vlm
|
| 50 |
+
--model.no-use_film
|
| 51 |
+
--model.no-combined_temb
|
| 52 |
+
--model.rtc
|
| 53 |
+
--model.max-delay=8
|
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/checkpoints/ckpt_40000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b8a6ab62c8defd7099f834b0e7827a7ac868206dfab5f1083adf2719ccb77bc
|
| 3 |
+
size 6253648840
|
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/envs.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OMP_NUM_THREADS=32
|
| 2 |
+
HF_HOME=/data/cache
|
| 3 |
+
TORCH_HOME=/data/cache
|
| 4 |
+
HF_TOKEN=hf_...TiKa
|
| 5 |
+
HF_LEROBOT_HOME=/data/data/lerobot
|
| 6 |
+
WE_HOME=Not Set
|
| 7 |
+
DATA_HOME=/data/data
|
| 8 |
+
UV_CACHE_DIR=/data/cache
|
| 9 |
+
WANDB_API_KEY=90e...5c06
|
| 10 |
+
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
| 11 |
+
CUDA_VISIBLE_DEVICES=0,2,3,4,5,6,7
|
| 12 |
+
WORLD_SIZE=7
|
| 13 |
+
LOCAL_WORLD_SIZE=7
|
| 14 |
+
RANK=0
|
| 15 |
+
LOCAL_RANK=0
|
| 16 |
+
MASTER_ADDR=127.0.0.1
|
| 17 |
+
MASTER_PORT=29509
|
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/run_config.json
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"exp": "g1wholebodyxmovebendpickteleop-v0",
|
| 3 |
+
"seed": 292285,
|
| 4 |
+
"auto_tag_run": false,
|
| 5 |
+
"eval": false,
|
| 6 |
+
"debug": false,
|
| 7 |
+
"timestamp": "2604100422",
|
| 8 |
+
"log": {
|
| 9 |
+
"logging_dir": "logs",
|
| 10 |
+
"report_to": "wandb",
|
| 11 |
+
"log_freq": 100
|
| 12 |
+
},
|
| 13 |
+
"wandb": {
|
| 14 |
+
"project": "psi",
|
| 15 |
+
"entity": "jliu530-soochow-university",
|
| 16 |
+
"group": "finetune",
|
| 17 |
+
"id": "1jvqo3pw",
|
| 18 |
+
"name": "g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422",
|
| 19 |
+
"resume": "allow"
|
| 20 |
+
},
|
| 21 |
+
"train": {
|
| 22 |
+
"num_workers": 8,
|
| 23 |
+
"overfit_single_batch": false,
|
| 24 |
+
"name": "finetune",
|
| 25 |
+
"resume_from_checkpoint": null,
|
| 26 |
+
"skip_resumed_steps": false,
|
| 27 |
+
"hf_token": ".hf_token",
|
| 28 |
+
"lora": false,
|
| 29 |
+
"output_dir": ".runs",
|
| 30 |
+
"gradient_accumulation_steps": 1,
|
| 31 |
+
"mixed_precision": "bf16",
|
| 32 |
+
"max_grad_norm": 1.0,
|
| 33 |
+
"optimizer_foreach": null,
|
| 34 |
+
"train_batch_size": 16,
|
| 35 |
+
"val_batch_size": 16,
|
| 36 |
+
"val_num_batches": 20,
|
| 37 |
+
"checkpointing_steps": 10000,
|
| 38 |
+
"max_checkpoints_to_keep": 5,
|
| 39 |
+
"validation_steps": 500,
|
| 40 |
+
"learning_rate": 0.0001,
|
| 41 |
+
"lr_scheduler_type": "cosine",
|
| 42 |
+
"lr_scheduler_kwargs": {
|
| 43 |
+
"betas": [
|
| 44 |
+
0.95,
|
| 45 |
+
0.999
|
| 46 |
+
],
|
| 47 |
+
"weight_decay": 1e-6,
|
| 48 |
+
"eps": 1e-8
|
| 49 |
+
},
|
| 50 |
+
"scheduler_specific_kwargs": {},
|
| 51 |
+
"data_parallel": "ddp",
|
| 52 |
+
"sharding_strategy": "full-shard",
|
| 53 |
+
"deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
|
| 54 |
+
"enable_gradient_checkpointing": true,
|
| 55 |
+
"enable_mixed_precision_training": true,
|
| 56 |
+
"reduce_in_full_precision": true,
|
| 57 |
+
"max_training_steps": 40000,
|
| 58 |
+
"num_train_epochs": null,
|
| 59 |
+
"warmup_steps": 1000,
|
| 60 |
+
"warmup_ratio": null
|
| 61 |
+
},
|
| 62 |
+
"data": {
|
| 63 |
+
"transform": {
|
| 64 |
+
"repack": {
|
| 65 |
+
"dataset_name": "simple",
|
| 66 |
+
"num_past_frames": 0,
|
| 67 |
+
"action_chunk_size": 30,
|
| 68 |
+
"pad_action_dim": 36,
|
| 69 |
+
"pad_state_dim": 36
|
| 70 |
+
},
|
| 71 |
+
"model": {
|
| 72 |
+
"resize": {
|
| 73 |
+
"size": [
|
| 74 |
+
180,
|
| 75 |
+
320
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
"center_crop": {
|
| 79 |
+
"size": [
|
| 80 |
+
180,
|
| 81 |
+
320
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"color_jitter": {
|
| 85 |
+
"brightness": 0.2,
|
| 86 |
+
"contrast": [
|
| 87 |
+
0.8,
|
| 88 |
+
1.2
|
| 89 |
+
],
|
| 90 |
+
"saturation": [
|
| 91 |
+
0.8,
|
| 92 |
+
1.2
|
| 93 |
+
],
|
| 94 |
+
"hue": 0.05
|
| 95 |
+
},
|
| 96 |
+
"gaussian_noise": {
|
| 97 |
+
"mean": 0.0,
|
| 98 |
+
"std": 3.0,
|
| 99 |
+
"prob_skip": 0.1
|
| 100 |
+
},
|
| 101 |
+
"img_aug": true,
|
| 102 |
+
"adaptive_resize": false,
|
| 103 |
+
"img_sizes": {
|
| 104 |
+
"egodex": [
|
| 105 |
+
270,
|
| 106 |
+
480
|
| 107 |
+
],
|
| 108 |
+
"he": [
|
| 109 |
+
240,
|
| 110 |
+
320
|
| 111 |
+
]
|
| 112 |
+
}
|
| 113 |
+
},
|
| 114 |
+
"field": {
|
| 115 |
+
"stat_path": "meta/stats_psi0.json",
|
| 116 |
+
"action_norm_type": "bounds",
|
| 117 |
+
"stat_action_key": "action",
|
| 118 |
+
"stat_state_key": "states",
|
| 119 |
+
"use_norm_mask": false,
|
| 120 |
+
"action_norm_masks": [
|
| 121 |
+
true,
|
| 122 |
+
true,
|
| 123 |
+
true,
|
| 124 |
+
true,
|
| 125 |
+
true,
|
| 126 |
+
true,
|
| 127 |
+
false
|
| 128 |
+
],
|
| 129 |
+
"action_min": [
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0,
|
| 133 |
+
0.0,
|
| 134 |
+
0.0,
|
| 135 |
+
0.0,
|
| 136 |
+
0.0,
|
| 137 |
+
-0.5,
|
| 138 |
+
-0.699999988079071,
|
| 139 |
+
-0.699999988079071,
|
| 140 |
+
-1.8059087783367424e-18,
|
| 141 |
+
-1.8059087783367424e-18,
|
| 142 |
+
-2.3222253007177214e-19,
|
| 143 |
+
-1.8059087783367424e-18,
|
| 144 |
+
-0.3314070701599121,
|
| 145 |
+
0.1900009959936142,
|
| 146 |
+
-0.8766500353813171,
|
| 147 |
+
-0.12303244322538376,
|
| 148 |
+
-0.4908517599105835,
|
| 149 |
+
-0.2786784768104553,
|
| 150 |
+
-0.022629141807556152,
|
| 151 |
+
-0.6784858703613281,
|
| 152 |
+
-0.5865002870559692,
|
| 153 |
+
-0.645729660987854,
|
| 154 |
+
-0.3608185946941376,
|
| 155 |
+
-0.15172408521175385,
|
| 156 |
+
-0.4648345112800598,
|
| 157 |
+
-0.2964947521686554,
|
| 158 |
+
-0.10700750350952148,
|
| 159 |
+
-0.21067920327186584,
|
| 160 |
+
-0.08102670311927795,
|
| 161 |
+
0.44999998807907104,
|
| 162 |
+
-0.5,
|
| 163 |
+
-0.26561295986175537,
|
| 164 |
+
-0.11697302013635635,
|
| 165 |
+
0.0
|
| 166 |
+
],
|
| 167 |
+
"action_max": [
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
0.0,
|
| 172 |
+
0.0,
|
| 173 |
+
0.0,
|
| 174 |
+
0.0,
|
| 175 |
+
2.189282125137906e-19,
|
| 176 |
+
3.6880364598559585e-19,
|
| 177 |
+
3.6880364598559585e-19,
|
| 178 |
+
1.5,
|
| 179 |
+
1.5,
|
| 180 |
+
0.6000000238418579,
|
| 181 |
+
1.5,
|
| 182 |
+
0.06430592387914658,
|
| 183 |
+
0.2996276319026947,
|
| 184 |
+
0.5128592252731323,
|
| 185 |
+
0.3017215132713318,
|
| 186 |
+
-0.007464056834578514,
|
| 187 |
+
0.21968720853328705,
|
| 188 |
+
0.827497124671936,
|
| 189 |
+
0.044904597103595734,
|
| 190 |
+
-0.1900009959936142,
|
| 191 |
+
0.413065105676651,
|
| 192 |
+
0.41873428225517273,
|
| 193 |
+
0.6618388891220093,
|
| 194 |
+
0.4026392698287964,
|
| 195 |
+
0.8194853663444519,
|
| 196 |
+
0.12383800745010376,
|
| 197 |
+
0.16346246004104614,
|
| 198 |
+
0.15494900941848755,
|
| 199 |
+
0.7400000095367432,
|
| 200 |
+
0.5,
|
| 201 |
+
0.21786384284496307,
|
| 202 |
+
0.1749052256345749,
|
| 203 |
+
0.0
|
| 204 |
+
],
|
| 205 |
+
"state_min": [
|
| 206 |
+
-0.00044060105574317276,
|
| 207 |
+
-0.029227260500192642,
|
| 208 |
+
-0.0007062808726914227,
|
| 209 |
+
-0.006396367214620113,
|
| 210 |
+
-0.034731876105070114,
|
| 211 |
+
-0.00020073111227247864,
|
| 212 |
+
-8.215621392082539e-7,
|
| 213 |
+
-0.5499086976051331,
|
| 214 |
+
-0.5100165009498596,
|
| 215 |
+
-0.613179087638855,
|
| 216 |
+
-0.0030598489101976156,
|
| 217 |
+
-0.0002515389060135931,
|
| 218 |
+
-0.00361030176281929,
|
| 219 |
+
-0.003131122561171651,
|
| 220 |
+
-0.30267173051834106,
|
| 221 |
+
0.162300705909729,
|
| 222 |
+
-0.8084174394607544,
|
| 223 |
+
-0.053157128393650055,
|
| 224 |
+
-0.48188674449920654,
|
| 225 |
+
-0.28324440121650696,
|
| 226 |
+
-0.02153456024825573,
|
| 227 |
+
-0.559512734413147,
|
| 228 |
+
-0.4063037037849426,
|
| 229 |
+
-0.625334620475769,
|
| 230 |
+
-0.17857033014297485,
|
| 231 |
+
-0.14080968499183655,
|
| 232 |
+
-0.3861367404460907,
|
| 233 |
+
-0.2920348048210144,
|
| 234 |
+
-0.0902835875749588,
|
| 235 |
+
-0.1666938215494156,
|
| 236 |
+
-0.07615894079208374,
|
| 237 |
+
0.44999998807907104,
|
| 238 |
+
0.0,
|
| 239 |
+
0.0,
|
| 240 |
+
0.0,
|
| 241 |
+
0.0
|
| 242 |
+
],
|
| 243 |
+
"state_max": [
|
| 244 |
+
0.013749510049819946,
|
| 245 |
+
0.0003444451722316444,
|
| 246 |
+
5.732499630539678e-6,
|
| 247 |
+
0.0019246992887929082,
|
| 248 |
+
0.0014607172925025225,
|
| 249 |
+
0.0007710650679655373,
|
| 250 |
+
0.0006001993897370994,
|
| 251 |
+
4.888642592959513e-7,
|
| 252 |
+
0.06670719385147095,
|
| 253 |
+
1.4086220971876173e-6,
|
| 254 |
+
0.43387407064437866,
|
| 255 |
+
1.2414171695709229,
|
| 256 |
+
0.6964682936668396,
|
| 257 |
+
1.2072811126708984,
|
| 258 |
+
0.0866343304514885,
|
| 259 |
+
0.2651435434818268,
|
| 260 |
+
0.49075624346733093,
|
| 261 |
+
0.34916067123413086,
|
| 262 |
+
-0.0007500328356400132,
|
| 263 |
+
0.2507650554180145,
|
| 264 |
+
0.9099032282829285,
|
| 265 |
+
0.07794909924268723,
|
| 266 |
+
-0.15903376042842865,
|
| 267 |
+
0.29115578532218933,
|
| 268 |
+
0.48632845282554626,
|
| 269 |
+
0.4680853486061096,
|
| 270 |
+
0.40000519156455994,
|
| 271 |
+
0.7901750206947327,
|
| 272 |
+
0.11165501922369003,
|
| 273 |
+
0.1871986985206604,
|
| 274 |
+
0.15685616433620453,
|
| 275 |
+
0.7400000095367432,
|
| 276 |
+
0.0,
|
| 277 |
+
0.0,
|
| 278 |
+
0.0,
|
| 279 |
+
0.0
|
| 280 |
+
],
|
| 281 |
+
"normalize_state": true,
|
| 282 |
+
"pad_action_dim": 36,
|
| 283 |
+
"pad_state_dim": 36
|
| 284 |
+
}
|
| 285 |
+
},
|
| 286 |
+
"root_dir": "/data/jliu/data",
|
| 287 |
+
"train_repo_ids": [
|
| 288 |
+
"G1WholebodyXMoveBendPickTeleop-v0"
|
| 289 |
+
],
|
| 290 |
+
"val_repo_ids": [
|
| 291 |
+
"G1WholebodyXMoveBendPickTeleop-v0"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
"model": {
|
| 295 |
+
"resnet_store_path": null,
|
| 296 |
+
"pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
|
| 297 |
+
"rtc": true,
|
| 298 |
+
"max_delay": 8,
|
| 299 |
+
"action_dim": 36,
|
| 300 |
+
"action_chunk_size": 30,
|
| 301 |
+
"action_exec_horizon": 30,
|
| 302 |
+
"observation_horizon": 1,
|
| 303 |
+
"img_chunk": 1,
|
| 304 |
+
"n_cams": 1,
|
| 305 |
+
"use_obs": "add_token",
|
| 306 |
+
"dropout": 0.1,
|
| 307 |
+
"noise_scheduler": "flow",
|
| 308 |
+
"train_diffusion_steps": 1000,
|
| 309 |
+
"eval_diffusion_steps": 10,
|
| 310 |
+
"share_cam_features": false,
|
| 311 |
+
"early_fusion": false,
|
| 312 |
+
"odim": 36,
|
| 313 |
+
"n_conditions": 0,
|
| 314 |
+
"token_fusion": "concat",
|
| 315 |
+
"loss_w": [
|
| 316 |
+
0.1,
|
| 317 |
+
0.2,
|
| 318 |
+
0.1
|
| 319 |
+
],
|
| 320 |
+
"time_dim": 256,
|
| 321 |
+
"hidden_dim": 1536,
|
| 322 |
+
"num_blocks": 6,
|
| 323 |
+
"dim_feedforward": 2048,
|
| 324 |
+
"nhead": 24,
|
| 325 |
+
"activation": "gelu",
|
| 326 |
+
"view_feature_dim": 2048,
|
| 327 |
+
"use_film": false,
|
| 328 |
+
"combined_temb": false,
|
| 329 |
+
"use_dit": false,
|
| 330 |
+
"weight_decay": 0.01,
|
| 331 |
+
"model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
|
| 332 |
+
"vlm_ckpt_step": null,
|
| 333 |
+
"tune_vlm": false,
|
| 334 |
+
"tune_mm_llm": false,
|
| 335 |
+
"tune_mm_vision": false,
|
| 336 |
+
"tune_mm_mlp": false,
|
| 337 |
+
"gradient_checkpointing": true,
|
| 338 |
+
"lang_backbone_lr": 0.00001,
|
| 339 |
+
"mm_projector_lr": 0.00001,
|
| 340 |
+
"vision_tower_lr": 1e-6,
|
| 341 |
+
"optim": "adamw_torch",
|
| 342 |
+
"model_max_length": 4096,
|
| 343 |
+
"data_flatten": true,
|
| 344 |
+
"data_packing": true,
|
| 345 |
+
"max_pixels": 451584,
|
| 346 |
+
"min_pixels": 12544
|
| 347 |
+
}
|
| 348 |
+
}
|