Commit ·
c9ccfbe
1
Parent(s): dd235e3
Add model checkpoint gr00t_n1_5_rbcs_bs32_60k
Browse files- .gitattributes +4 -0
- checkpoints/step-010000-epoch-00-loss=0.0527.pt +3 -0
- checkpoints/step-020000-epoch-00-loss=0.0626.pt +3 -0
- checkpoints/step-030000-epoch-00-loss=0.0623.pt +3 -0
- checkpoints/step-040000-epoch-00-loss=0.0878.pt +3 -0
- config.json +3 -0
- config.yaml +3 -0
- dataset_statistics.json +3 -0
- prism-dinosiglip-224px+oxe+diffusion+n0+b8+x42+t1758331480--image_aug.jsonl +0 -0
- run-metrics.jsonl +1 -0
- wandb/debug-internal.log +6 -0
- wandb/debug.log +0 -0
- wandb/latest-run/files/output.log +28 -0
- wandb/latest-run/files/requirements.txt +147 -0
- wandb/latest-run/files/wandb-metadata.json +3 -0
- wandb/latest-run/logs/debug-core.log +6 -0
- wandb/latest-run/logs/debug-internal.log +6 -0
- wandb/latest-run/logs/debug.log +0 -0
- wandb/latest-run/run-gkot7bhi.wandb +3 -0
- wandb/run-20250920_012912-gkot7bhi/files/output.log +28 -0
- wandb/run-20250920_012912-gkot7bhi/files/requirements.txt +147 -0
- wandb/run-20250920_012912-gkot7bhi/files/wandb-metadata.json +3 -0
- wandb/run-20250920_012912-gkot7bhi/logs/debug-core.log +6 -0
- wandb/run-20250920_012912-gkot7bhi/logs/debug-internal.log +6 -0
- wandb/run-20250920_012912-gkot7bhi/logs/debug.log +0 -0
- wandb/run-20250920_012912-gkot7bhi/run-gkot7bhi.wandb +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.yaml filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
wandb/latest-run/run-gkot7bhi.wandb filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
wandb/run-20250920_012912-gkot7bhi/run-gkot7bhi.wandb filter=lfs diff=lfs merge=lfs -text
|
checkpoints/step-010000-epoch-00-loss=0.0527.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1874c41fdf3641bb8b8e0130bb3119cfa8bac21c1fed36aa50b688bd7786504
|
| 3 |
+
size 30521296261
|
checkpoints/step-020000-epoch-00-loss=0.0626.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9f5e650e6797a290366bfa655b4aaff2abab26e4e5995651e0e937d2d97d64f
|
| 3 |
+
size 30521296261
|
checkpoints/step-030000-epoch-00-loss=0.0623.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c967f6c7e35266107290948e978be1c6946f8525a642656e8555ca74593d1704
|
| 3 |
+
size 30521296261
|
checkpoints/step-040000-epoch-00-loss=0.0878.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:307a98e323774d784c6880bbd2b222ac13a1573551975082684186f122462e2d
|
| 3 |
+
size 30521296261
|
config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1708d58ff80b38086e4b3c8812f3a697ac86188f901c73cc210bfbbe56b8bf7c
|
| 3 |
+
size 2440
|
config.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d541c5bdab4829c9fecee08ff3eb26fa455c93bec75ac48e77911a9276fe429d
|
| 3 |
+
size 2015
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebd47beb7c2a64827bf3c5aaeeb2ce16a3575537bc414015522f6e939ca70c15
|
| 3 |
+
size 2263
|
prism-dinosiglip-224px+oxe+diffusion+n0+b8+x42+t1758331480--image_aug.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
run-metrics.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"hparams": {"action_dim": 7, "action_model_type": "DiT-B", "alignment_loss_weight": 0.1, "data_root_dir": "/fsx/OpenXRLDS/bridgev2_processed", "freeze_meta_emb": false, "future_action_window_size": 15, "gate_bias_init": -2.0, "hf_token": "HF_TOKEN", "image_aug": true, "is_resume": false, "load_all_data_for_training": true, "load_meta_emb_from": null, "memory_hidden_factor": 4, "memory_num_layers": 2, "memory_type": "transformer", "n_meta_queries": 4, "past_action_window_size": 0, "past_length": 4, "prealign_meta_queries_with": null, "pretrained_checkpoint": "CogACT/CogACT-Base", "re_initialize_memory": false, "repeated_diffusion_steps": 8, "resume_epoch": null, "resume_step": null, "run_id": "prism-dinosiglip-224px+oxe+diffusion+n0+b8+x42+t1758331480--image_aug", "run_id_note": null, "run_root_dir": "/fsx/daewon/ckpt/bridege/cogact_base_naive_multi_frame", "save_interval": 10000, "scalar_gate": false, "seed": 42, "tcl_tau": 0.07, "tmf_hidden_act": "silu", "tmf_initializer_range": 0.02, "tmf_max_position_embeddings": 3, "tmf_num_attention_heads": 16, "tmf_num_key_value_heads": 16, "tmf_rms_norm_eps": 1e-05, "token_wise_memory": false, "trackers": ["jsonl", "wandb"], "use_block_attn": false, "use_ema": false, "use_gate": false, "use_meta_queries": false, "use_mq_time_embedder": true, "use_mq_z_embedder": false, "use_naive_multi_frame": true, "use_original_max_pos_emb": false, "use_past_meta_queries_recursive": false, "use_projection_layer": false, "use_sinu_pe": false, "vla": {"base_vlm": "prism-dinosiglip-224px+7b", "data_mix": "bridge", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 100, "expected_world_size": 4, "freeze_llm_backbone": false, "freeze_vision_backbone": false, "global_batch_size": 32, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": 60000, "per_device_batch_size": 8, "reduce_in_full_precision": true, "shuffle_buffer_size": 250000, "train_strategy": "fsdp-full-shard", "type": "prism-dinosiglip-224px+oxe+diffusion", "unfreeze_last_llm_layer": false, "vla_id": "prism-dinosiglip-224px+oxe+diffusion", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "daeone0920", "wandb_project": "CogACT-Bridge"}, "run_id": "prism-dinosiglip-224px+oxe+diffusion+n0+b8+x42+t1758331480--image_aug"}
|
wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T01:29:12.850210295Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
|
| 2 |
+
{"time":"2025-09-20T01:29:13.295086072Z","level":"INFO","msg":"stream: created new stream","id":"gkot7bhi"}
|
| 3 |
+
{"time":"2025-09-20T01:29:13.295172952Z","level":"INFO","msg":"stream: started","id":"gkot7bhi"}
|
| 4 |
+
{"time":"2025-09-20T01:29:13.29524218Z","level":"INFO","msg":"writer: started","stream_id":"gkot7bhi"}
|
| 5 |
+
{"time":"2025-09-20T01:29:13.295271085Z","level":"INFO","msg":"sender: started","stream_id":"gkot7bhi"}
|
| 6 |
+
{"time":"2025-09-20T01:29:13.295274166Z","level":"INFO","msg":"handler: started","stream_id":"gkot7bhi"}
|
wandb/debug.log
ADDED
|
File without changes
|
wandb/latest-run/files/output.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
09/20 [01:29:15] INFO | >> [*] Starting VLA Training Loop train.py:364
|
| 2 |
+
=>> [Epoch 000] Global Step 009999 =>> LR :: 0.000020 - Loss :: 0.0353: 17%|█▋ | 9999/60000 [4:15:13<21:31:21, 1.55s/it]/fsx/daewon/miniforge3/envs/cogact/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 3 |
+
warnings.warn( # warn only once
|
| 4 |
+
09/20 [05:47:29] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 5 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 6 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 7 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 8 |
+
heckpoints/step-010000-epoch-00-loss=0.052
|
| 9 |
+
7.optimizer
|
| 10 |
+
=>> [Epoch 000] Global Step 046503 =>> LR :: 0.000020 - Loss :: 0.0931: 78%|███████▊ | 46503/60000 [19:54:32<5:56:43, 1.59s/it]
|
| 11 |
+
09/20 [10:05:18] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 12 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 13 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 14 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 15 |
+
heckpoints/step-020000-epoch-00-loss=0.062
|
| 16 |
+
6.optimizer
|
| 17 |
+
09/20 [14:21:39] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 18 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 19 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 20 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 21 |
+
heckpoints/step-030000-epoch-00-loss=0.062
|
| 22 |
+
3.optimizer
|
| 23 |
+
09/20 [18:37:43] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 24 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 25 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 26 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 27 |
+
heckpoints/step-040000-epoch-00-loss=0.087
|
| 28 |
+
8.optimizer
|
wandb/latest-run/files/requirements.txt
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
kiwisolver==1.4.9
|
| 2 |
+
safetensors==0.6.2
|
| 3 |
+
tqdm==4.67.1
|
| 4 |
+
six==1.17.0
|
| 5 |
+
requests-oauthlib==2.0.0
|
| 6 |
+
contourpy==1.3.2
|
| 7 |
+
typing_extensions==4.15.0
|
| 8 |
+
gitdb==4.0.12
|
| 9 |
+
torchaudio==2.8.0
|
| 10 |
+
nvidia-nvtx-cu12==12.8.90
|
| 11 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 12 |
+
cycler==0.12.1
|
| 13 |
+
ml-dtypes==0.2.0
|
| 14 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 15 |
+
google-auth==2.40.3
|
| 16 |
+
wheel==0.45.1
|
| 17 |
+
flash_attn==2.5.5
|
| 18 |
+
Jinja2==3.1.6
|
| 19 |
+
array_record==0.8.1
|
| 20 |
+
mpmath==1.3.0
|
| 21 |
+
OpenEXR==3.4.0
|
| 22 |
+
pyyaml-include==1.4.1
|
| 23 |
+
json-numpy==2.1.1
|
| 24 |
+
dm-tree==0.1.9
|
| 25 |
+
transformers==4.40.1
|
| 26 |
+
libclang==18.1.1
|
| 27 |
+
Pygments==2.19.2
|
| 28 |
+
markdown-it-py==4.0.0
|
| 29 |
+
pyasn1==0.6.1
|
| 30 |
+
tensorboard-data-server==0.7.2
|
| 31 |
+
torch==2.8.0
|
| 32 |
+
sympy==1.14.0
|
| 33 |
+
grpcio==1.74.0
|
| 34 |
+
toml==0.10.2
|
| 35 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 36 |
+
PyYAML==6.0.2
|
| 37 |
+
tokenizers==0.19.1
|
| 38 |
+
nvidia-curand-cu12==10.3.9.90
|
| 39 |
+
sentry-sdk==2.37.1
|
| 40 |
+
google-pasta==0.2.0
|
| 41 |
+
tensorflow==2.15.0
|
| 42 |
+
pydantic==2.11.7
|
| 43 |
+
triton==3.4.0
|
| 44 |
+
urllib3==2.5.0
|
| 45 |
+
regex==2025.9.1
|
| 46 |
+
torchvision==0.23.0
|
| 47 |
+
pyparsing==3.2.3
|
| 48 |
+
opt_einsum==3.4.0
|
| 49 |
+
astunparse==1.6.3
|
| 50 |
+
packaging==25.0
|
| 51 |
+
Werkzeug==3.1.3
|
| 52 |
+
charset-normalizer==3.4.3
|
| 53 |
+
h5py==3.14.0
|
| 54 |
+
einops==0.8.1
|
| 55 |
+
psutil==7.0.0
|
| 56 |
+
idna==3.10
|
| 57 |
+
pydantic_core==2.33.2
|
| 58 |
+
mypy_extensions==1.1.0
|
| 59 |
+
absl-py==2.3.1
|
| 60 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 61 |
+
peft==0.11.1
|
| 62 |
+
MarkupSafe==3.0.2
|
| 63 |
+
tensorflow-estimator==2.15.0
|
| 64 |
+
importlib_resources==6.5.2
|
| 65 |
+
nvidia-nccl-cu12==2.27.3
|
| 66 |
+
python-dateutil==2.9.0.post0
|
| 67 |
+
huggingface-hub==0.34.4
|
| 68 |
+
promise==2.3
|
| 69 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 70 |
+
typeguard==2.13.3
|
| 71 |
+
keras==2.15.0
|
| 72 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 73 |
+
matplotlib==3.10.6
|
| 74 |
+
wrapt==1.14.2
|
| 75 |
+
pyasn1_modules==0.4.2
|
| 76 |
+
gast==0.6.0
|
| 77 |
+
timm==0.9.10
|
| 78 |
+
fsspec==2025.9.0
|
| 79 |
+
accelerate==1.10.1
|
| 80 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 81 |
+
certifi==2025.8.3
|
| 82 |
+
tensorflow-graphics==2021.12.3
|
| 83 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 84 |
+
Markdown==3.9
|
| 85 |
+
openvla==0.0.3
|
| 86 |
+
etils==1.13.0
|
| 87 |
+
smmap==5.0.2
|
| 88 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 89 |
+
typing-inspect==0.9.0
|
| 90 |
+
mergedeep==1.3.4
|
| 91 |
+
rich==14.1.0
|
| 92 |
+
platformdirs==4.4.0
|
| 93 |
+
google-auth-oauthlib==1.2.2
|
| 94 |
+
hf-xet==1.1.9
|
| 95 |
+
tensorflow-metadata==1.17.2
|
| 96 |
+
filelock==3.19.1
|
| 97 |
+
click==8.2.1
|
| 98 |
+
termcolor==3.1.0
|
| 99 |
+
rsa==4.9.1
|
| 100 |
+
tensorflow-datasets==4.9.3
|
| 101 |
+
cachetools==5.5.2
|
| 102 |
+
requests==2.32.5
|
| 103 |
+
fonttools==4.59.2
|
| 104 |
+
numpy==1.26.4
|
| 105 |
+
wandb==0.21.4
|
| 106 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 107 |
+
mdurl==0.1.2
|
| 108 |
+
draccus==0.11.5
|
| 109 |
+
sentencepiece==0.1.99
|
| 110 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 111 |
+
pip==25.2
|
| 112 |
+
trimesh==4.8.1
|
| 113 |
+
pillow==11.3.0
|
| 114 |
+
annotated-types==0.7.0
|
| 115 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 116 |
+
jsonlines==4.0.0
|
| 117 |
+
attrs==25.3.0
|
| 118 |
+
protobuf==4.21.12
|
| 119 |
+
tensorboard==2.15.2
|
| 120 |
+
oauthlib==3.3.1
|
| 121 |
+
typing-inspection==0.4.1
|
| 122 |
+
scipy==1.15.3
|
| 123 |
+
setuptools==80.9.0
|
| 124 |
+
cogact==0.0.1
|
| 125 |
+
dlimp==0.0.1
|
| 126 |
+
networkx==3.4.2
|
| 127 |
+
tensorflow-addons==0.23.0
|
| 128 |
+
GitPython==3.1.45
|
| 129 |
+
ninja==1.13.0
|
| 130 |
+
zipp==3.23.0
|
| 131 |
+
flatbuffers==25.2.10
|
| 132 |
+
typeguard==4.3.0
|
| 133 |
+
wheel==0.45.1
|
| 134 |
+
importlib_metadata==8.0.0
|
| 135 |
+
tomli==2.0.1
|
| 136 |
+
jaraco.collections==5.1.0
|
| 137 |
+
jaraco.context==5.3.0
|
| 138 |
+
platformdirs==4.2.2
|
| 139 |
+
backports.tarfile==1.2.0
|
| 140 |
+
autocommand==2.2.2
|
| 141 |
+
packaging==24.2
|
| 142 |
+
jaraco.text==3.12.1
|
| 143 |
+
jaraco.functools==4.0.1
|
| 144 |
+
zipp==3.19.2
|
| 145 |
+
typing_extensions==4.12.2
|
| 146 |
+
inflect==7.3.1
|
| 147 |
+
more-itertools==10.3.0
|
wandb/latest-run/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00498c421104e4848f452a7e8228c67ef0462a41e9800630b124809260823c01
|
| 3 |
+
size 4549
|
wandb/latest-run/logs/debug-core.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T01:29:12.598207172Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpw8ncoq98/port-188371.txt","pid":188371,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-20T01:29:12.599858934Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":188371}
|
| 3 |
+
{"time":"2025-09-20T01:29:12.599832149Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-188371-1343361-2142719206/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-20T01:29:12.792915528Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-20T01:29:12.847731224Z","level":"INFO","msg":"handleInformInit: received","streamId":"gkot7bhi","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-20T01:29:13.295181026Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gkot7bhi","id":"1(@)"}
|
wandb/latest-run/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T01:29:12.850210295Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
|
| 2 |
+
{"time":"2025-09-20T01:29:13.295086072Z","level":"INFO","msg":"stream: created new stream","id":"gkot7bhi"}
|
| 3 |
+
{"time":"2025-09-20T01:29:13.295172952Z","level":"INFO","msg":"stream: started","id":"gkot7bhi"}
|
| 4 |
+
{"time":"2025-09-20T01:29:13.29524218Z","level":"INFO","msg":"writer: started","stream_id":"gkot7bhi"}
|
| 5 |
+
{"time":"2025-09-20T01:29:13.295271085Z","level":"INFO","msg":"sender: started","stream_id":"gkot7bhi"}
|
| 6 |
+
{"time":"2025-09-20T01:29:13.295274166Z","level":"INFO","msg":"handler: started","stream_id":"gkot7bhi"}
|
wandb/latest-run/logs/debug.log
ADDED
|
File without changes
|
wandb/latest-run/run-gkot7bhi.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4610c404e09e260730e3beea6621f785c6919d0a9cf98792421260712bcd986e
|
| 3 |
+
size 67207168
|
wandb/run-20250920_012912-gkot7bhi/files/output.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
09/20 [01:29:15] INFO | >> [*] Starting VLA Training Loop train.py:364
|
| 2 |
+
=>> [Epoch 000] Global Step 009999 =>> LR :: 0.000020 - Loss :: 0.0353: 17%|█▋ | 9999/60000 [4:15:13<21:31:21, 1.55s/it]/fsx/daewon/miniforge3/envs/cogact/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 3 |
+
warnings.warn( # warn only once
|
| 4 |
+
09/20 [05:47:29] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 5 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 6 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 7 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 8 |
+
heckpoints/step-010000-epoch-00-loss=0.052
|
| 9 |
+
7.optimizer
|
| 10 |
+
=>> [Epoch 000] Global Step 046503 =>> LR :: 0.000020 - Loss :: 0.0931: 78%|███████▊ | 46503/60000 [19:54:32<5:56:43, 1.59s/it]
|
| 11 |
+
09/20 [10:05:18] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 12 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 13 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 14 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 15 |
+
heckpoints/step-020000-epoch-00-loss=0.062
|
| 16 |
+
6.optimizer
|
| 17 |
+
09/20 [14:21:39] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 18 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 19 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 20 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 21 |
+
heckpoints/step-030000-epoch-00-loss=0.062
|
| 22 |
+
3.optimizer
|
| 23 |
+
09/20 [18:37:43] INFO | >> [*] Saved optimizer state dict to fsdp.py:155
|
| 24 |
+
/fsx/daewon/ckpt/bridege/cogact_base_naive
|
| 25 |
+
_multi_frame/prism-dinosiglip-224px+oxe+di
|
| 26 |
+
ffusion+n0+b8+x42+t1758331480--image_aug/c
|
| 27 |
+
heckpoints/step-040000-epoch-00-loss=0.087
|
| 28 |
+
8.optimizer
|
wandb/run-20250920_012912-gkot7bhi/files/requirements.txt
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
kiwisolver==1.4.9
|
| 2 |
+
safetensors==0.6.2
|
| 3 |
+
tqdm==4.67.1
|
| 4 |
+
six==1.17.0
|
| 5 |
+
requests-oauthlib==2.0.0
|
| 6 |
+
contourpy==1.3.2
|
| 7 |
+
typing_extensions==4.15.0
|
| 8 |
+
gitdb==4.0.12
|
| 9 |
+
torchaudio==2.8.0
|
| 10 |
+
nvidia-nvtx-cu12==12.8.90
|
| 11 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 12 |
+
cycler==0.12.1
|
| 13 |
+
ml-dtypes==0.2.0
|
| 14 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 15 |
+
google-auth==2.40.3
|
| 16 |
+
wheel==0.45.1
|
| 17 |
+
flash_attn==2.5.5
|
| 18 |
+
Jinja2==3.1.6
|
| 19 |
+
array_record==0.8.1
|
| 20 |
+
mpmath==1.3.0
|
| 21 |
+
OpenEXR==3.4.0
|
| 22 |
+
pyyaml-include==1.4.1
|
| 23 |
+
json-numpy==2.1.1
|
| 24 |
+
dm-tree==0.1.9
|
| 25 |
+
transformers==4.40.1
|
| 26 |
+
libclang==18.1.1
|
| 27 |
+
Pygments==2.19.2
|
| 28 |
+
markdown-it-py==4.0.0
|
| 29 |
+
pyasn1==0.6.1
|
| 30 |
+
tensorboard-data-server==0.7.2
|
| 31 |
+
torch==2.8.0
|
| 32 |
+
sympy==1.14.0
|
| 33 |
+
grpcio==1.74.0
|
| 34 |
+
toml==0.10.2
|
| 35 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 36 |
+
PyYAML==6.0.2
|
| 37 |
+
tokenizers==0.19.1
|
| 38 |
+
nvidia-curand-cu12==10.3.9.90
|
| 39 |
+
sentry-sdk==2.37.1
|
| 40 |
+
google-pasta==0.2.0
|
| 41 |
+
tensorflow==2.15.0
|
| 42 |
+
pydantic==2.11.7
|
| 43 |
+
triton==3.4.0
|
| 44 |
+
urllib3==2.5.0
|
| 45 |
+
regex==2025.9.1
|
| 46 |
+
torchvision==0.23.0
|
| 47 |
+
pyparsing==3.2.3
|
| 48 |
+
opt_einsum==3.4.0
|
| 49 |
+
astunparse==1.6.3
|
| 50 |
+
packaging==25.0
|
| 51 |
+
Werkzeug==3.1.3
|
| 52 |
+
charset-normalizer==3.4.3
|
| 53 |
+
h5py==3.14.0
|
| 54 |
+
einops==0.8.1
|
| 55 |
+
psutil==7.0.0
|
| 56 |
+
idna==3.10
|
| 57 |
+
pydantic_core==2.33.2
|
| 58 |
+
mypy_extensions==1.1.0
|
| 59 |
+
absl-py==2.3.1
|
| 60 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 61 |
+
peft==0.11.1
|
| 62 |
+
MarkupSafe==3.0.2
|
| 63 |
+
tensorflow-estimator==2.15.0
|
| 64 |
+
importlib_resources==6.5.2
|
| 65 |
+
nvidia-nccl-cu12==2.27.3
|
| 66 |
+
python-dateutil==2.9.0.post0
|
| 67 |
+
huggingface-hub==0.34.4
|
| 68 |
+
promise==2.3
|
| 69 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 70 |
+
typeguard==2.13.3
|
| 71 |
+
keras==2.15.0
|
| 72 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 73 |
+
matplotlib==3.10.6
|
| 74 |
+
wrapt==1.14.2
|
| 75 |
+
pyasn1_modules==0.4.2
|
| 76 |
+
gast==0.6.0
|
| 77 |
+
timm==0.9.10
|
| 78 |
+
fsspec==2025.9.0
|
| 79 |
+
accelerate==1.10.1
|
| 80 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 81 |
+
certifi==2025.8.3
|
| 82 |
+
tensorflow-graphics==2021.12.3
|
| 83 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 84 |
+
Markdown==3.9
|
| 85 |
+
openvla==0.0.3
|
| 86 |
+
etils==1.13.0
|
| 87 |
+
smmap==5.0.2
|
| 88 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 89 |
+
typing-inspect==0.9.0
|
| 90 |
+
mergedeep==1.3.4
|
| 91 |
+
rich==14.1.0
|
| 92 |
+
platformdirs==4.4.0
|
| 93 |
+
google-auth-oauthlib==1.2.2
|
| 94 |
+
hf-xet==1.1.9
|
| 95 |
+
tensorflow-metadata==1.17.2
|
| 96 |
+
filelock==3.19.1
|
| 97 |
+
click==8.2.1
|
| 98 |
+
termcolor==3.1.0
|
| 99 |
+
rsa==4.9.1
|
| 100 |
+
tensorflow-datasets==4.9.3
|
| 101 |
+
cachetools==5.5.2
|
| 102 |
+
requests==2.32.5
|
| 103 |
+
fonttools==4.59.2
|
| 104 |
+
numpy==1.26.4
|
| 105 |
+
wandb==0.21.4
|
| 106 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 107 |
+
mdurl==0.1.2
|
| 108 |
+
draccus==0.11.5
|
| 109 |
+
sentencepiece==0.1.99
|
| 110 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 111 |
+
pip==25.2
|
| 112 |
+
trimesh==4.8.1
|
| 113 |
+
pillow==11.3.0
|
| 114 |
+
annotated-types==0.7.0
|
| 115 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 116 |
+
jsonlines==4.0.0
|
| 117 |
+
attrs==25.3.0
|
| 118 |
+
protobuf==4.21.12
|
| 119 |
+
tensorboard==2.15.2
|
| 120 |
+
oauthlib==3.3.1
|
| 121 |
+
typing-inspection==0.4.1
|
| 122 |
+
scipy==1.15.3
|
| 123 |
+
setuptools==80.9.0
|
| 124 |
+
cogact==0.0.1
|
| 125 |
+
dlimp==0.0.1
|
| 126 |
+
networkx==3.4.2
|
| 127 |
+
tensorflow-addons==0.23.0
|
| 128 |
+
GitPython==3.1.45
|
| 129 |
+
ninja==1.13.0
|
| 130 |
+
zipp==3.23.0
|
| 131 |
+
flatbuffers==25.2.10
|
| 132 |
+
typeguard==4.3.0
|
| 133 |
+
wheel==0.45.1
|
| 134 |
+
importlib_metadata==8.0.0
|
| 135 |
+
tomli==2.0.1
|
| 136 |
+
jaraco.collections==5.1.0
|
| 137 |
+
jaraco.context==5.3.0
|
| 138 |
+
platformdirs==4.2.2
|
| 139 |
+
backports.tarfile==1.2.0
|
| 140 |
+
autocommand==2.2.2
|
| 141 |
+
packaging==24.2
|
| 142 |
+
jaraco.text==3.12.1
|
| 143 |
+
jaraco.functools==4.0.1
|
| 144 |
+
zipp==3.19.2
|
| 145 |
+
typing_extensions==4.12.2
|
| 146 |
+
inflect==7.3.1
|
| 147 |
+
more-itertools==10.3.0
|
wandb/run-20250920_012912-gkot7bhi/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00498c421104e4848f452a7e8228c67ef0462a41e9800630b124809260823c01
|
| 3 |
+
size 4549
|
wandb/run-20250920_012912-gkot7bhi/logs/debug-core.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T01:29:12.598207172Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpw8ncoq98/port-188371.txt","pid":188371,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-20T01:29:12.599858934Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":188371}
|
| 3 |
+
{"time":"2025-09-20T01:29:12.599832149Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-188371-1343361-2142719206/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-20T01:29:12.792915528Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-20T01:29:12.847731224Z","level":"INFO","msg":"handleInformInit: received","streamId":"gkot7bhi","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-20T01:29:13.295181026Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gkot7bhi","id":"1(@)"}
|
wandb/run-20250920_012912-gkot7bhi/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T01:29:12.850210295Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
|
| 2 |
+
{"time":"2025-09-20T01:29:13.295086072Z","level":"INFO","msg":"stream: created new stream","id":"gkot7bhi"}
|
| 3 |
+
{"time":"2025-09-20T01:29:13.295172952Z","level":"INFO","msg":"stream: started","id":"gkot7bhi"}
|
| 4 |
+
{"time":"2025-09-20T01:29:13.29524218Z","level":"INFO","msg":"writer: started","stream_id":"gkot7bhi"}
|
| 5 |
+
{"time":"2025-09-20T01:29:13.295271085Z","level":"INFO","msg":"sender: started","stream_id":"gkot7bhi"}
|
| 6 |
+
{"time":"2025-09-20T01:29:13.295274166Z","level":"INFO","msg":"handler: started","stream_id":"gkot7bhi"}
|
wandb/run-20250920_012912-gkot7bhi/logs/debug.log
ADDED
|
File without changes
|
wandb/run-20250920_012912-gkot7bhi/run-gkot7bhi.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4610c404e09e260730e3beea6621f785c6919d0a9cf98792421260712bcd986e
|
| 3 |
+
size 67207168
|