Upload 10k metrics and environment snapshot
Browse files- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/date_utc.txt +1 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/df_workspace.txt +2 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/env_selected.txt +9 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/nvidia_smi.txt +32 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/nvidia_smi_topo.txt +23 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/pip_freeze.txt +145 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/python_version.txt +1 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/torch_env.txt +10 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/uname.txt +1 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/environment/workspace_usage.txt +5 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/baseline_train_full.csv +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/comparison_2k_vs_10k.csv +5 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/parallel_train_full.csv +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/runtime_table.csv +12 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/sample_eval_table.csv +17 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/startup_summaries.txt +72 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/summary.json +1018 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/teacher_forced_eval_table.csv +9 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/train_loss_table.csv +9 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/__pycache__/upload_to_hf.cpython-311.pyc +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/__pycache__/upload_to_hf_incremental.cpython-311.pyc +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/changed_files.txt +41 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/checkpoint_locations.txt +4 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/commands_reproduce.sh +67 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf.py +60 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf_incremental.py +196 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_10k_followup.log +23 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k.log +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_1000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_10000.log +198 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_2000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_5000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k.log +0 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_1000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_10000.log +198 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_2000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_5000.log +148 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/smoke_baseline_10k_diag.log +149 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/smoke_parallel_10k_diag.log +149 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/sanity_checks/inspect_twin_packed_batch_handover_train.log +176 -0
- artifacts/twin_handover_packed_parallelization_10k_20260309/sanity_checks/warmstart_equivalence_10k.log +29 -0
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/date_utc.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2026-03-09 21:57:08 UTC
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/df_workspace.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Filesystem Size Used Avail Use% Mounted on
|
| 2 |
+
mfs#us-mo-1.runpod.net:9421 154T 127T 27T 83% /workspace
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/env_selected.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HF_HOME=
|
| 2 |
+
HF_HUB_CACHE=
|
| 3 |
+
HF_DATASETS_CACHE=
|
| 4 |
+
HUGGINGFACE_HUB_CACHE=
|
| 5 |
+
XDG_CACHE_HOME=
|
| 6 |
+
OPENPI_LEROBOT_HOME=
|
| 7 |
+
PYTORCH_CUDA_ALLOC_CONF=
|
| 8 |
+
OPENPI_TORCH_COMPILE_SAMPLE_ACTIONS=
|
| 9 |
+
TOKENIZERS_PARALLELISM=
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/nvidia_smi.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Mon Mar 9 21:57:08 2026
|
| 2 |
+
+-----------------------------------------------------------------------------------------+
|
| 3 |
+
| NVIDIA-SMI 580.126.09 Driver Version: 580.126.09 CUDA Version: 13.0 |
|
| 4 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 5 |
+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
|
| 6 |
+
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
|
| 7 |
+
| | | MIG M. |
|
| 8 |
+
|=========================================+========================+======================|
|
| 9 |
+
| 0 NVIDIA H100 80GB HBM3 On | 00000000:3A:00.0 Off | 0 |
|
| 10 |
+
| N/A 26C P0 71W / 700W | 0MiB / 81559MiB | 0% Default |
|
| 11 |
+
| | | Disabled |
|
| 12 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 13 |
+
| 1 NVIDIA H100 80GB HBM3 On | 00000000:5D:00.0 Off | 0 |
|
| 14 |
+
| N/A 25C P0 72W / 700W | 0MiB / 81559MiB | 0% Default |
|
| 15 |
+
| | | Disabled |
|
| 16 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 17 |
+
| 2 NVIDIA H100 80GB HBM3 On | 00000000:9A:00.0 Off | 0 |
|
| 18 |
+
| N/A 25C P0 72W / 700W | 0MiB / 81559MiB | 0% Default |
|
| 19 |
+
| | | Disabled |
|
| 20 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 21 |
+
| 3 NVIDIA H100 80GB HBM3 On | 00000000:DB:00.0 Off | 0 |
|
| 22 |
+
| N/A 25C P0 70W / 700W | 0MiB / 81559MiB | 0% Default |
|
| 23 |
+
| | | Disabled |
|
| 24 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 25 |
+
|
| 26 |
+
+-----------------------------------------------------------------------------------------+
|
| 27 |
+
| Processes: |
|
| 28 |
+
| GPU GI CI PID Type Process name GPU Memory |
|
| 29 |
+
| ID ID Usage |
|
| 30 |
+
|=========================================================================================|
|
| 31 |
+
| No running processes found |
|
| 32 |
+
+-----------------------------------------------------------------------------------------+
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/nvidia_smi_topo.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[4mGPU0 GPU1 GPU2 GPU3 NIC0 NIC1 CPU Affinity NUMA Affinity GPU NUMA ID[0m
|
| 2 |
+
GPU0 X NV18 NV18 NV18 NODE NODE 0-51,104-155 0 N/A
|
| 3 |
+
GPU1 NV18 X NV18 NV18 NODE NODE 0-51,104-155 0 N/A
|
| 4 |
+
GPU2 NV18 NV18 X NV18 SYS SYS 52-103,156-207 1 N/A
|
| 5 |
+
GPU3 NV18 NV18 NV18 X SYS SYS 52-103,156-207 1 N/A
|
| 6 |
+
NIC0 NODE NODE SYS SYS X PIX
|
| 7 |
+
NIC1 NODE NODE SYS SYS PIX X
|
| 8 |
+
|
| 9 |
+
Legend:
|
| 10 |
+
|
| 11 |
+
X = Self
|
| 12 |
+
SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
|
| 13 |
+
NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
|
| 14 |
+
PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
|
| 15 |
+
PXB = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
|
| 16 |
+
PIX = Connection traversing at most a single PCIe bridge
|
| 17 |
+
NV# = Connection traversing a bonded set of # NVLinks
|
| 18 |
+
|
| 19 |
+
NIC Legend:
|
| 20 |
+
|
| 21 |
+
NIC0: mlx5_3
|
| 22 |
+
NIC1: mlx5_4
|
| 23 |
+
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/pip_freeze.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
anyio==4.6.0
|
| 2 |
+
argon2-cffi==23.1.0
|
| 3 |
+
argon2-cffi-bindings==21.2.0
|
| 4 |
+
arrow==1.3.0
|
| 5 |
+
asttokens==2.4.1
|
| 6 |
+
async-lru==2.0.4
|
| 7 |
+
attrs==24.2.0
|
| 8 |
+
babel==2.16.0
|
| 9 |
+
beautifulsoup4==4.12.3
|
| 10 |
+
bleach==6.1.0
|
| 11 |
+
blinker==1.4
|
| 12 |
+
certifi==2024.8.30
|
| 13 |
+
cffi==1.17.1
|
| 14 |
+
charset-normalizer==3.3.2
|
| 15 |
+
comm==0.2.2
|
| 16 |
+
cryptography==3.4.8
|
| 17 |
+
dbus-python==1.2.18
|
| 18 |
+
debugpy==1.8.5
|
| 19 |
+
decorator==5.1.1
|
| 20 |
+
defusedxml==0.7.1
|
| 21 |
+
distro==1.7.0
|
| 22 |
+
entrypoints==0.4
|
| 23 |
+
executing==2.1.0
|
| 24 |
+
fastjsonschema==2.20.0
|
| 25 |
+
filelock==3.13.1
|
| 26 |
+
fqdn==1.5.1
|
| 27 |
+
fsspec==2024.2.0
|
| 28 |
+
h11==0.14.0
|
| 29 |
+
httpcore==1.0.5
|
| 30 |
+
httplib2==0.20.2
|
| 31 |
+
httpx==0.27.2
|
| 32 |
+
idna==3.10
|
| 33 |
+
importlib-metadata==4.6.4
|
| 34 |
+
ipykernel==6.29.5
|
| 35 |
+
ipython==8.27.0
|
| 36 |
+
ipython-genutils==0.2.0
|
| 37 |
+
ipywidgets==8.1.5
|
| 38 |
+
isoduration==20.11.0
|
| 39 |
+
jedi==0.19.1
|
| 40 |
+
jeepney==0.7.1
|
| 41 |
+
Jinja2==3.1.3
|
| 42 |
+
json5==0.9.25
|
| 43 |
+
jsonpointer==3.0.0
|
| 44 |
+
jsonschema==4.23.0
|
| 45 |
+
jsonschema-specifications==2023.12.1
|
| 46 |
+
jupyter-archive==3.4.0
|
| 47 |
+
jupyter-events==0.10.0
|
| 48 |
+
jupyter-highlight-selected-word==0.2.0
|
| 49 |
+
jupyter-lsp==2.2.5
|
| 50 |
+
jupyter_client==7.4.9
|
| 51 |
+
jupyter_contrib_core==0.4.2
|
| 52 |
+
jupyter_contrib_nbextensions==0.7.0
|
| 53 |
+
jupyter_core==5.7.2
|
| 54 |
+
jupyter_nbextensions_configurator==0.6.4
|
| 55 |
+
jupyter_server==2.14.2
|
| 56 |
+
jupyter_server_terminals==0.5.3
|
| 57 |
+
jupyterlab==4.2.5
|
| 58 |
+
jupyterlab_pygments==0.3.0
|
| 59 |
+
jupyterlab_server==2.27.3
|
| 60 |
+
jupyterlab_widgets==3.0.13
|
| 61 |
+
keyring==23.5.0
|
| 62 |
+
launchpadlib==1.10.16
|
| 63 |
+
lazr.restfulclient==0.14.4
|
| 64 |
+
lazr.uri==1.0.6
|
| 65 |
+
lxml==5.3.0
|
| 66 |
+
MarkupSafe==2.1.5
|
| 67 |
+
matplotlib-inline==0.1.7
|
| 68 |
+
mistune==3.0.2
|
| 69 |
+
more-itertools==8.10.0
|
| 70 |
+
mpmath==1.3.0
|
| 71 |
+
nbclassic==1.1.0
|
| 72 |
+
nbclient==0.10.0
|
| 73 |
+
nbconvert==7.16.4
|
| 74 |
+
nbformat==5.10.4
|
| 75 |
+
nest-asyncio==1.6.0
|
| 76 |
+
networkx==3.2.1
|
| 77 |
+
notebook==6.5.5
|
| 78 |
+
notebook_shim==0.2.4
|
| 79 |
+
numpy==1.26.3
|
| 80 |
+
nvidia-cublas-cu12==12.4.2.65
|
| 81 |
+
nvidia-cuda-cupti-cu12==12.4.99
|
| 82 |
+
nvidia-cuda-nvrtc-cu12==12.4.99
|
| 83 |
+
nvidia-cuda-runtime-cu12==12.4.99
|
| 84 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 85 |
+
nvidia-cufft-cu12==11.2.0.44
|
| 86 |
+
nvidia-curand-cu12==10.3.5.119
|
| 87 |
+
nvidia-cusolver-cu12==11.6.0.99
|
| 88 |
+
nvidia-cusparse-cu12==12.3.0.142
|
| 89 |
+
nvidia-nccl-cu12==2.20.5
|
| 90 |
+
nvidia-nvjitlink-cu12==12.4.99
|
| 91 |
+
nvidia-nvtx-cu12==12.4.99
|
| 92 |
+
oauthlib==3.2.0
|
| 93 |
+
overrides==7.7.0
|
| 94 |
+
packaging==24.1
|
| 95 |
+
pandocfilters==1.5.1
|
| 96 |
+
parso==0.8.4
|
| 97 |
+
pexpect==4.9.0
|
| 98 |
+
pillow==10.2.0
|
| 99 |
+
platformdirs==4.3.6
|
| 100 |
+
prometheus_client==0.21.0
|
| 101 |
+
prompt_toolkit==3.0.47
|
| 102 |
+
psutil==6.0.0
|
| 103 |
+
ptyprocess==0.7.0
|
| 104 |
+
pure_eval==0.2.3
|
| 105 |
+
pycparser==2.22
|
| 106 |
+
Pygments==2.18.0
|
| 107 |
+
PyGObject==3.42.1
|
| 108 |
+
PyJWT==2.3.0
|
| 109 |
+
pyparsing==2.4.7
|
| 110 |
+
python-apt==2.4.0+ubuntu4
|
| 111 |
+
python-dateutil==2.9.0.post0
|
| 112 |
+
python-json-logger==2.0.7
|
| 113 |
+
PyYAML==6.0.2
|
| 114 |
+
pyzmq==24.0.1
|
| 115 |
+
referencing==0.35.1
|
| 116 |
+
requests==2.32.3
|
| 117 |
+
rfc3339-validator==0.1.4
|
| 118 |
+
rfc3986-validator==0.1.1
|
| 119 |
+
rpds-py==0.20.0
|
| 120 |
+
SecretStorage==3.3.1
|
| 121 |
+
Send2Trash==1.8.3
|
| 122 |
+
six==1.16.0
|
| 123 |
+
sniffio==1.3.1
|
| 124 |
+
soupsieve==2.6
|
| 125 |
+
stack-data==0.6.3
|
| 126 |
+
sympy==1.12
|
| 127 |
+
terminado==0.18.1
|
| 128 |
+
tinycss2==1.3.0
|
| 129 |
+
torch==2.4.1+cu124
|
| 130 |
+
torchaudio==2.4.1+cu124
|
| 131 |
+
torchvision==0.19.1+cu124
|
| 132 |
+
tornado==6.4.1
|
| 133 |
+
traitlets==5.14.3
|
| 134 |
+
triton==3.0.0
|
| 135 |
+
types-python-dateutil==2.9.0.20240906
|
| 136 |
+
typing_extensions==4.9.0
|
| 137 |
+
uri-template==1.3.0
|
| 138 |
+
urllib3==2.2.3
|
| 139 |
+
wadllib==1.3.6
|
| 140 |
+
wcwidth==0.2.13
|
| 141 |
+
webcolors==24.8.0
|
| 142 |
+
webencodings==0.5.1
|
| 143 |
+
websocket-client==1.8.0
|
| 144 |
+
widgetsnbextension==4.0.13
|
| 145 |
+
zipp==1.0.0
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/python_version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Python 3.11.10
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/torch_env.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python=3.11.10
|
| 2 |
+
torch=2.7.1+cu126
|
| 3 |
+
cuda=12.6
|
| 4 |
+
cudnn=90501
|
| 5 |
+
cuda_available=True
|
| 6 |
+
device_count=4
|
| 7 |
+
device_0=NVIDIA H100 80GB HBM3
|
| 8 |
+
device_1=NVIDIA H100 80GB HBM3
|
| 9 |
+
device_2=NVIDIA H100 80GB HBM3
|
| 10 |
+
device_3=NVIDIA H100 80GB HBM3
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/uname.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Linux 9a96de7d560b 6.8.0-90-generic #91-Ubuntu SMP PREEMPT_DYNAMIC Tue Nov 18 14:14:30 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
|
artifacts/twin_handover_packed_parallelization_10k_20260309/environment/workspace_usage.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
391G /workspace/pi05tests-openpi-multiarm/openpi/checkpoints
|
| 2 |
+
26G /workspace/pi05tests-openpi-multiarm/artifacts
|
| 3 |
+
9.5G /workspace/checkpoints
|
| 4 |
+
23G /workspace/.hf
|
| 5 |
+
11G /workspace/lerobot
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/baseline_train_full.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/comparison_2k_vs_10k.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,run,val_1000_mean,val_2000_mean,val_5000_mean,val_10000_mean,runtime,peak_vram
|
| 2 |
+
baseline,2k,0.052885,0.035776,,,33:27,35.23GB
|
| 3 |
+
baseline,10k,0.06113,0.041595,0.027324,0.022345,2:13:40,35.23GB
|
| 4 |
+
parallel,2k,0.051214,0.03568,,,30:38,35.27GB
|
| 5 |
+
parallel,10k,0.059715,0.039947,0.02734,0.022168,2:20:51,35.27GB
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/parallel_train_full.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/runtime_table.csv
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
stage,start_utc,end_utc,duration_seconds,duration_hms
|
| 2 |
+
baseline_train,2026-03-09 16:03:23 UTC,2026-03-09 18:17:03 UTC,8020,2:13:40
|
| 3 |
+
baseline_eval_1000,2026-03-09 18:17:03 UTC,2026-03-09 18:23:42 UTC,399,0:06:39
|
| 4 |
+
baseline_eval_2000,2026-03-09 18:23:42 UTC,2026-03-09 18:28:54 UTC,312,0:05:12
|
| 5 |
+
baseline_eval_5000,2026-03-09 18:28:54 UTC,2026-03-09 18:33:53 UTC,299,0:04:59
|
| 6 |
+
baseline_eval_10000,2026-03-09 18:33:53 UTC,2026-03-09 18:41:07 UTC,434,0:07:14
|
| 7 |
+
parallel_train,2026-03-09 18:41:07 UTC,2026-03-09 21:01:58 UTC,8451,2:20:51
|
| 8 |
+
parallel_eval_1000,2026-03-09 21:01:58 UTC,2026-03-09 21:14:35 UTC,757,0:12:37
|
| 9 |
+
parallel_eval_2000,2026-03-09 21:14:35 UTC,2026-03-09 21:22:39 UTC,484,0:08:04
|
| 10 |
+
parallel_eval_5000,2026-03-09 21:22:40 UTC,2026-03-09 21:35:26 UTC,766,0:12:46
|
| 11 |
+
parallel_eval_10000,2026-03-09 21:35:26 UTC,2026-03-09 21:45:53 UTC,627,0:10:27
|
| 12 |
+
full_pipeline,2026-03-09 15:57:20 UTC,2026-03-09 21:45:53 UTC,20913,5:48:33
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/sample_eval_table.csv
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,checkpoint_step,num_steps,num_batches,mean_masked_mae,std_masked_mae,mean_left_arm_mae,std_left_arm_mae,mean_right_arm_mae,std_right_arm_mae,mean_left_joint_mae,std_left_joint_mae,mean_left_gripper_mae,std_left_gripper_mae,mean_right_joint_mae,std_right_joint_mae,mean_right_gripper_mae,std_right_gripper_mae,mean_left_right_imbalance_mae,std_left_right_imbalance_mae,per_batch_timing_seconds
|
| 2 |
+
baseline,1000,4,16,0.090938,0.02224,0.120414,0.046606,0.061461,0.058026,0.130966,0.054578,0.046552,0.06792,0.063945,0.062779,0.044077,0.053987,0.095076,0.059464,mean=0.3131 std=0.0370 min=0.2649 max=0.3781
|
| 3 |
+
baseline,1000,10,16,0.100992,0.023502,0.132369,0.047803,0.069615,0.063335,0.143677,0.056155,0.053215,0.074232,0.072165,0.068555,0.051764,0.054067,0.101649,0.063159,mean=0.3640 std=0.0430 min=0.3333 max=0.4572
|
| 4 |
+
baseline,2000,4,16,0.060253,0.017936,0.078725,0.032786,0.041781,0.04091,0.083688,0.036089,0.043985,0.072901,0.042767,0.041669,0.034874,0.058769,0.063418,0.039412,mean=0.3006 std=0.0345 min=0.2674 max=0.3753
|
| 5 |
+
baseline,2000,10,16,0.065765,0.016923,0.086375,0.032761,0.045154,0.041131,0.092111,0.036788,0.046224,0.076043,0.046163,0.042138,0.038093,0.056179,0.066659,0.040501,mean=0.3586 std=0.0248 min=0.3396 max=0.4220
|
| 6 |
+
baseline,5000,4,16,0.03972,0.014654,0.049239,0.019869,0.030201,0.034473,0.052215,0.023235,0.028408,0.028427,0.031159,0.037572,0.02349,0.024208,0.04196,0.030152,mean=0.2920 std=0.0342 min=0.2585 max=0.3528
|
| 7 |
+
baseline,5000,10,16,0.043346,0.013818,0.053788,0.020493,0.032904,0.034889,0.057689,0.024439,0.026486,0.029864,0.0337,0.038002,0.027331,0.027093,0.044562,0.030999,mean=0.3951 std=0.0357 min=0.3463 max=0.4774
|
| 8 |
+
baseline,10000,4,16,0.029935,0.0082,0.041062,0.019621,0.018807,0.018117,0.04444,0.02295,0.017416,0.016394,0.0195,0.019305,0.013963,0.019504,0.033733,0.022691,mean=0.2793 std=0.0247 min=0.2625 max=0.3469
|
| 9 |
+
baseline,10000,10,16,0.030294,0.007277,0.041307,0.019181,0.019282,0.019077,0.045179,0.022508,0.014207,0.016425,0.020231,0.020465,0.01264,0.018571,0.034582,0.023261,mean=0.3823 std=0.0398 min=0.3432 max=0.4686
|
| 10 |
+
parallel,1000,4,16,0.09253,0.020956,0.122108,0.04378,0.062952,0.056483,0.133062,0.052111,0.045431,0.055952,0.065476,0.060695,0.04528,0.053039,0.093392,0.056874,mean=0.3110 std=0.0430 min=0.2654 max=0.3864
|
| 11 |
+
parallel,1000,10,16,0.102452,0.022208,0.13361,0.044796,0.071295,0.061523,0.145474,0.053589,0.05056,0.060317,0.073909,0.066406,0.053,0.051143,0.099213,0.060422,mean=0.4143 std=0.0560 min=0.3405 max=0.5017
|
| 12 |
+
parallel,2000,4,16,0.05986,0.012924,0.080984,0.031604,0.038736,0.031293,0.086197,0.035912,0.04449,0.062755,0.039304,0.030982,0.034761,0.051397,0.061196,0.036442,mean=0.3702 std=0.1017 min=0.2793 max=0.7256
|
| 13 |
+
parallel,2000,10,16,0.065897,0.012628,0.088735,0.03201,0.043059,0.032823,0.094654,0.036668,0.047298,0.06466,0.043769,0.032862,0.038089,0.049635,0.064491,0.038643,mean=0.4575 std=0.0902 min=0.3373 max=0.6590
|
| 14 |
+
parallel,5000,4,16,0.040712,0.013646,0.050681,0.020624,0.030742,0.03279,0.053976,0.024153,0.027611,0.02458,0.032227,0.03635,0.020349,0.017496,0.042435,0.029207,mean=0.3861 std=0.0848 min=0.2719 max=0.5485
|
| 15 |
+
parallel,5000,10,16,0.044799,0.012807,0.055016,0.021278,0.034583,0.032757,0.059296,0.025068,0.025058,0.027173,0.035777,0.036454,0.026224,0.01689,0.043614,0.030178,mean=0.4549 std=0.0835 min=0.3373 max=0.6280
|
| 16 |
+
parallel,10000,4,16,0.029277,0.007579,0.040375,0.01919,0.018178,0.015856,0.043636,0.022278,0.017546,0.013485,0.018908,0.017028,0.013066,0.016678,0.031629,0.022404,mean=0.3241 std=0.0551 min=0.2600 max=0.4241
|
| 17 |
+
parallel,10000,10,16,0.030241,0.00674,0.041072,0.018866,0.01941,0.017031,0.044817,0.022046,0.014857,0.014376,0.020279,0.018425,0.013323,0.014475,0.032456,0.022935,mean=0.4058 std=0.0569 min=0.3332 max=0.5100
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/startup_summaries.txt
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[baseline]
|
| 2 |
+
weight_missing_count: 0 (10775:train_pytorch.py:629)
|
| 3 |
+
weight_missing_keys: set() (10775:train_pytorch.py:630)
|
| 4 |
+
weight_unexpected_count: 0 (10775:train_pytorch.py:631)
|
| 5 |
+
weight_unexpected_keys: [] (10775:train_pytorch.py:632)
|
| 6 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k (10775:train_pytorch.py:280)
|
| 7 |
+
dataset_repo_id: lsnu/twin_handover_256_train (10775:train_pytorch.py:281)
|
| 8 |
+
norm_stats_file: /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (10775:train_pytorch.py:282)
|
| 9 |
+
norm_stats_summary: {'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (10775:train_pytorch.py:283)
|
| 10 |
+
checkpoint_source: /workspace/checkpoints/pi05_base_single_pytorch (10775:train_pytorch.py:284)
|
| 11 |
+
model_type: baseline (10775:train_pytorch.py:285)
|
| 12 |
+
packed_transforms: True (10775:train_pytorch.py:286)
|
| 13 |
+
world_size: 4 (10775:train_pytorch.py:287)
|
| 14 |
+
batch_size: local=4, global=16 (10775:train_pytorch.py:288)
|
| 15 |
+
num_workers: 8 (10775:train_pytorch.py:289)
|
| 16 |
+
precision: bfloat16 (10775:train_pytorch.py:290)
|
| 17 |
+
lr_schedule: warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (10775:train_pytorch.py:291)
|
| 18 |
+
save_log_intervals: save_interval=1000, log_interval=10 (10775:train_pytorch.py:298)
|
| 19 |
+
action_loss_mask: (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (10775:train_pytorch.py:299)
|
| 20 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (10775:train_pytorch.py:300)
|
| 21 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (10775:train_pytorch.py:301)
|
| 22 |
+
gradient_buckets: action_in_proj, action_out_proj, shared_expert (10775:train_pytorch.py:694)
|
| 23 |
+
|
| 24 |
+
16:06:09.367 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (10775:train_pytorch.py:799)
|
| 25 |
+
16:06:09.368 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (10775:train_pytorch.py:803)
|
| 26 |
+
16:06:09.368 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (10775:train_pytorch.py:806)
|
| 27 |
+
16:06:09.368 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (10775:train_pytorch.py:807)
|
| 28 |
+
16:06:09.369 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (10775:train_pytorch.py:810)
|
| 29 |
+
16:06:09.369 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (10775:train_pytorch.py:813)
|
| 30 |
+
16:06:09.390 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (10775:train_pytorch.py:817)
|
| 31 |
+
16:06:09.390 [I] debug_step=1 lr=4.99e-08 grad_norm=15.9656 data_time=0.9485s step_time=1.9454s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (10775:train_pytorch.py:822)
|
| 32 |
+
16:06:09.390 [I] debug_step=1 grad_shared_expert=15.5493 grad_action_in_proj=0.4919 grad_action_out_proj=2.1574 (10775:train_pytorch.py:830)
|
| 33 |
+
Training: 0%| | 1/10000 [00:02<8:12:34, 2.96s/it, loss=1.4673, lr=4.99e-08, step=1]16:06:10.034 [I] debug_step=2 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (10775:train_pytorch.py:799)
|
| 34 |
+
16:06:10.035 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (10775:train_pytorch.py:803)
|
| 35 |
+
16:06:10.035 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (10775:train_pytorch.py:806)
|
| 36 |
+
|
| 37 |
+
[parallel]
|
| 38 |
+
weight_missing_count: 0 (18633:train_pytorch.py:629)
|
| 39 |
+
weight_missing_keys: set() (18633:train_pytorch.py:630)
|
| 40 |
+
weight_unexpected_count: 0 (18633:train_pytorch.py:631)
|
| 41 |
+
weight_unexpected_keys: [] (18633:train_pytorch.py:632)
|
| 42 |
+
config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k (18633:train_pytorch.py:280)
|
| 43 |
+
dataset_repo_id: lsnu/twin_handover_256_train (18633:train_pytorch.py:281)
|
| 44 |
+
norm_stats_file: /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (18633:train_pytorch.py:282)
|
| 45 |
+
norm_stats_summary: {'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (18633:train_pytorch.py:283)
|
| 46 |
+
checkpoint_source: /workspace/checkpoints/pi05_base_parallel_packed_from_single (18633:train_pytorch.py:284)
|
| 47 |
+
model_type: parallel (18633:train_pytorch.py:285)
|
| 48 |
+
packed_transforms: True (18633:train_pytorch.py:286)
|
| 49 |
+
world_size: 4 (18633:train_pytorch.py:287)
|
| 50 |
+
batch_size: local=4, global=16 (18633:train_pytorch.py:288)
|
| 51 |
+
num_workers: 8 (18633:train_pytorch.py:289)
|
| 52 |
+
precision: bfloat16 (18633:train_pytorch.py:290)
|
| 53 |
+
lr_schedule: warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (18633:train_pytorch.py:291)
|
| 54 |
+
save_log_intervals: save_interval=1000, log_interval=10 (18633:train_pytorch.py:298)
|
| 55 |
+
action_loss_mask: (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (18633:train_pytorch.py:299)
|
| 56 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (18633:train_pytorch.py:300)
|
| 57 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (18633:train_pytorch.py:301)
|
| 58 |
+
gradient_buckets: action_in_proj_arms, arm_token_fuse, action_out_proj_arms, shared_expert (18633:train_pytorch.py:694)
|
| 59 |
+
|
| 60 |
+
18:44:34.768 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (18633:train_pytorch.py:799)
|
| 61 |
+
18:44:34.769 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (18633:train_pytorch.py:803)
|
| 62 |
+
18:44:34.769 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (18633:train_pytorch.py:806)
|
| 63 |
+
18:44:34.769 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (18633:train_pytorch.py:807)
|
| 64 |
+
18:44:34.770 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (18633:train_pytorch.py:810)
|
| 65 |
+
18:44:34.770 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (18633:train_pytorch.py:813)
|
| 66 |
+
18:44:34.791 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (18633:train_pytorch.py:817)
|
| 67 |
+
18:44:34.792 [I] debug_step=1 lr=4.99e-08 grad_norm=16.1250 data_time=0.7232s step_time=2.1776s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (18633:train_pytorch.py:822)
|
| 68 |
+
18:44:34.792 [I] debug_step=1 grad_shared_expert=15.5090 grad_action_in_proj_arms=0.5665 grad_arm_token_fuse=2.6833 grad_action_out_proj_arms=2.1581 (18633:train_pytorch.py:830)
|
| 69 |
+
Training: 0%| | 1/10000 [00:02<8:13:44, 2.96s/it, loss=1.4675, lr=4.99e-08, step=1]18:44:35.388 [I] debug_step=2 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (18633:train_pytorch.py:799)
|
| 70 |
+
18:44:35.389 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (18633:train_pytorch.py:803)
|
| 71 |
+
18:44:35.389 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (18633:train_pytorch.py:806)
|
| 72 |
+
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/summary.json
ADDED
|
@@ -0,0 +1,1018 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"reference_2k_summary_path": "/workspace/pi05tests-openpi-multiarm/artifacts/twin_handover_packed_parallelization_20260309/metrics/summary.json",
|
| 3 |
+
"train": {
|
| 4 |
+
"baseline": {
|
| 5 |
+
"startup": {
|
| 6 |
+
"weight_missing_count": "0 (10775:train_pytorch.py:629)",
|
| 7 |
+
"weight_missing_keys": "set() (10775:train_pytorch.py:630)",
|
| 8 |
+
"weight_unexpected_count": "0 (10775:train_pytorch.py:631)",
|
| 9 |
+
"weight_unexpected_keys": "[] (10775:train_pytorch.py:632)",
|
| 10 |
+
"config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k (10775:train_pytorch.py:280)",
|
| 11 |
+
"dataset_repo_id": "lsnu/twin_handover_256_train (10775:train_pytorch.py:281)",
|
| 12 |
+
"norm_stats_file": "/workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (10775:train_pytorch.py:282)",
|
| 13 |
+
"norm_stats_summary": "{'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (10775:train_pytorch.py:283)",
|
| 14 |
+
"checkpoint_source": "/workspace/checkpoints/pi05_base_single_pytorch (10775:train_pytorch.py:284)",
|
| 15 |
+
"model_type": "baseline (10775:train_pytorch.py:285)",
|
| 16 |
+
"packed_transforms": "True (10775:train_pytorch.py:286)",
|
| 17 |
+
"world_size": "4 (10775:train_pytorch.py:287)",
|
| 18 |
+
"batch_size": "local=4, global=16 (10775:train_pytorch.py:288)",
|
| 19 |
+
"num_workers": "8 (10775:train_pytorch.py:289)",
|
| 20 |
+
"precision": "bfloat16 (10775:train_pytorch.py:290)",
|
| 21 |
+
"lr_schedule": "warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (10775:train_pytorch.py:291)",
|
| 22 |
+
"save_log_intervals": "save_interval=1000, log_interval=10 (10775:train_pytorch.py:298)",
|
| 23 |
+
"action_loss_mask": "(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (10775:train_pytorch.py:299)",
|
| 24 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (10775:train_pytorch.py:300)",
|
| 25 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (10775:train_pytorch.py:301)",
|
| 26 |
+
"gradient_buckets": "action_in_proj, action_out_proj, shared_expert (10775:train_pytorch.py:694)"
|
| 27 |
+
},
|
| 28 |
+
"steps": {
|
| 29 |
+
"1000": {
|
| 30 |
+
"ts": "16:16:42.668",
|
| 31 |
+
"step": "1000",
|
| 32 |
+
"loss": "0.0228",
|
| 33 |
+
"smoothed": "0.0476",
|
| 34 |
+
"lr": "2.48e-05",
|
| 35 |
+
"grad_norm": "0.9699",
|
| 36 |
+
"step_time": "0.5638",
|
| 37 |
+
"data_time": "0.0801",
|
| 38 |
+
"its": "1.553",
|
| 39 |
+
"eta": "5793.6",
|
| 40 |
+
"mem": "35.23GB",
|
| 41 |
+
"grad_action_in_proj": "0.0109",
|
| 42 |
+
"grad_action_out_proj": "0.1595",
|
| 43 |
+
"grad_shared_expert": "0.4924"
|
| 44 |
+
},
|
| 45 |
+
"2000": {
|
| 46 |
+
"ts": "16:28:30.872",
|
| 47 |
+
"step": "2000",
|
| 48 |
+
"loss": "0.0492",
|
| 49 |
+
"smoothed": "0.0284",
|
| 50 |
+
"lr": "2.37e-05",
|
| 51 |
+
"grad_norm": "0.6437",
|
| 52 |
+
"step_time": "0.4982",
|
| 53 |
+
"data_time": "0.0622",
|
| 54 |
+
"its": "1.785",
|
| 55 |
+
"eta": "4482.7",
|
| 56 |
+
"mem": "35.23GB",
|
| 57 |
+
"grad_action_in_proj": "0.0184",
|
| 58 |
+
"grad_action_out_proj": "0.2195",
|
| 59 |
+
"grad_shared_expert": "0.8358"
|
| 60 |
+
},
|
| 61 |
+
"5000": {
|
| 62 |
+
"ts": "17:04:21.626",
|
| 63 |
+
"step": "5000",
|
| 64 |
+
"loss": "0.0038",
|
| 65 |
+
"smoothed": "0.0165",
|
| 66 |
+
"lr": "1.47e-05",
|
| 67 |
+
"grad_norm": "0.5112",
|
| 68 |
+
"step_time": "0.4974",
|
| 69 |
+
"data_time": "0.0606",
|
| 70 |
+
"its": "1.792",
|
| 71 |
+
"eta": "2789.7",
|
| 72 |
+
"mem": "35.23GB",
|
| 73 |
+
"grad_action_in_proj": "0.0101",
|
| 74 |
+
"grad_action_out_proj": "0.1353",
|
| 75 |
+
"grad_shared_expert": "1.1505"
|
| 76 |
+
},
|
| 77 |
+
"10000": {
|
| 78 |
+
"ts": "18:15:00.659",
|
| 79 |
+
"step": "10000",
|
| 80 |
+
"loss": "0.0141",
|
| 81 |
+
"smoothed": "0.0172",
|
| 82 |
+
"lr": "2.50e-06",
|
| 83 |
+
"grad_norm": "0.4377",
|
| 84 |
+
"step_time": "0.5241",
|
| 85 |
+
"data_time": "0.1210",
|
| 86 |
+
"its": "1.550",
|
| 87 |
+
"eta": "0.0",
|
| 88 |
+
"mem": "35.23GB",
|
| 89 |
+
"grad_action_in_proj": "0.0125",
|
| 90 |
+
"grad_action_out_proj": "0.1342",
|
| 91 |
+
"grad_shared_expert": "0.4184"
|
| 92 |
+
}
|
| 93 |
+
},
|
| 94 |
+
"saves": {
|
| 95 |
+
"1000": {
|
| 96 |
+
"timestamp": "16:18:02.120",
|
| 97 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000"
|
| 98 |
+
},
|
| 99 |
+
"2000": {
|
| 100 |
+
"timestamp": "16:30:11.326",
|
| 101 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000"
|
| 102 |
+
},
|
| 103 |
+
"3000": {
|
| 104 |
+
"timestamp": "16:42:29.626",
|
| 105 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/3000"
|
| 106 |
+
},
|
| 107 |
+
"4000": {
|
| 108 |
+
"timestamp": "16:54:35.424",
|
| 109 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/4000"
|
| 110 |
+
},
|
| 111 |
+
"5000": {
|
| 112 |
+
"timestamp": "17:05:36.535",
|
| 113 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000"
|
| 114 |
+
},
|
| 115 |
+
"6000": {
|
| 116 |
+
"timestamp": "17:19:56.648",
|
| 117 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/6000"
|
| 118 |
+
},
|
| 119 |
+
"7000": {
|
| 120 |
+
"timestamp": "17:34:35.906",
|
| 121 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/7000"
|
| 122 |
+
},
|
| 123 |
+
"8000": {
|
| 124 |
+
"timestamp": "17:48:19.855",
|
| 125 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/8000"
|
| 126 |
+
},
|
| 127 |
+
"9000": {
|
| 128 |
+
"timestamp": "18:02:59.063",
|
| 129 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/9000"
|
| 130 |
+
},
|
| 131 |
+
"10000": {
|
| 132 |
+
"timestamp": "18:16:58.135",
|
| 133 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000"
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"debug_lines": [
|
| 137 |
+
"16:06:09.367 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (10775:train_pytorch.py:799)",
|
| 138 |
+
"16:06:09.368 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (10775:train_pytorch.py:803)",
|
| 139 |
+
"16:06:09.368 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (10775:train_pytorch.py:806)",
|
| 140 |
+
"16:06:09.368 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (10775:train_pytorch.py:807)",
|
| 141 |
+
"16:06:09.369 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (10775:train_pytorch.py:810)",
|
| 142 |
+
"16:06:09.369 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (10775:train_pytorch.py:813)",
|
| 143 |
+
"16:06:09.390 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (10775:train_pytorch.py:817)",
|
| 144 |
+
"16:06:09.390 [I] debug_step=1 lr=4.99e-08 grad_norm=15.9656 data_time=0.9485s step_time=1.9454s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (10775:train_pytorch.py:822)",
|
| 145 |
+
"16:06:09.390 [I] debug_step=1 grad_shared_expert=15.5493 grad_action_in_proj=0.4919 grad_action_out_proj=2.1574 (10775:train_pytorch.py:830)",
|
| 146 |
+
"Training: 0%| | 1/10000 [00:02<8:12:34, 2.96s/it, loss=1.4673, lr=4.99e-08, step=1]16:06:10.034 [I] debug_step=2 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (10775:train_pytorch.py:799)",
|
| 147 |
+
"16:06:10.035 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (10775:train_pytorch.py:803)",
|
| 148 |
+
"16:06:10.035 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (10775:train_pytorch.py:806)"
|
| 149 |
+
],
|
| 150 |
+
"runtime": "2:13:40"
|
| 151 |
+
},
|
| 152 |
+
"parallel": {
|
| 153 |
+
"startup": {
|
| 154 |
+
"weight_missing_count": "0 (18633:train_pytorch.py:629)",
|
| 155 |
+
"weight_missing_keys": "set() (18633:train_pytorch.py:630)",
|
| 156 |
+
"weight_unexpected_count": "0 (18633:train_pytorch.py:631)",
|
| 157 |
+
"weight_unexpected_keys": "[] (18633:train_pytorch.py:632)",
|
| 158 |
+
"config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k (18633:train_pytorch.py:280)",
|
| 159 |
+
"dataset_repo_id": "lsnu/twin_handover_256_train (18633:train_pytorch.py:281)",
|
| 160 |
+
"norm_stats_file": "/workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (18633:train_pytorch.py:282)",
|
| 161 |
+
"norm_stats_summary": "{'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (18633:train_pytorch.py:283)",
|
| 162 |
+
"checkpoint_source": "/workspace/checkpoints/pi05_base_parallel_packed_from_single (18633:train_pytorch.py:284)",
|
| 163 |
+
"model_type": "parallel (18633:train_pytorch.py:285)",
|
| 164 |
+
"packed_transforms": "True (18633:train_pytorch.py:286)",
|
| 165 |
+
"world_size": "4 (18633:train_pytorch.py:287)",
|
| 166 |
+
"batch_size": "local=4, global=16 (18633:train_pytorch.py:288)",
|
| 167 |
+
"num_workers": "8 (18633:train_pytorch.py:289)",
|
| 168 |
+
"precision": "bfloat16 (18633:train_pytorch.py:290)",
|
| 169 |
+
"lr_schedule": "warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (18633:train_pytorch.py:291)",
|
| 170 |
+
"save_log_intervals": "save_interval=1000, log_interval=10 (18633:train_pytorch.py:298)",
|
| 171 |
+
"action_loss_mask": "(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (18633:train_pytorch.py:299)",
|
| 172 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (18633:train_pytorch.py:300)",
|
| 173 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (18633:train_pytorch.py:301)",
|
| 174 |
+
"gradient_buckets": "action_in_proj_arms, arm_token_fuse, action_out_proj_arms, shared_expert (18633:train_pytorch.py:694)"
|
| 175 |
+
},
|
| 176 |
+
"steps": {
|
| 177 |
+
"1000": {
|
| 178 |
+
"ts": "18:56:22.847",
|
| 179 |
+
"step": "1000",
|
| 180 |
+
"loss": "0.0246",
|
| 181 |
+
"smoothed": "0.0492",
|
| 182 |
+
"lr": "2.48e-05",
|
| 183 |
+
"grad_norm": "0.9470",
|
| 184 |
+
"step_time": "0.5836",
|
| 185 |
+
"data_time": "0.1086",
|
| 186 |
+
"its": "1.445",
|
| 187 |
+
"eta": "6229.0",
|
| 188 |
+
"mem": "35.27GB",
|
| 189 |
+
"grad_action_in_proj_arms": "0.0139",
|
| 190 |
+
"grad_action_out_proj_arms": "0.1631",
|
| 191 |
+
"grad_arm_token_fuse": "0.0704",
|
| 192 |
+
"grad_shared_expert": "0.5049"
|
| 193 |
+
},
|
| 194 |
+
"2000": {
|
| 195 |
+
"ts": "19:09:53.627",
|
| 196 |
+
"step": "2000",
|
| 197 |
+
"loss": "0.0280",
|
| 198 |
+
"smoothed": "0.0267",
|
| 199 |
+
"lr": "2.37e-05",
|
| 200 |
+
"grad_norm": "0.6051",
|
| 201 |
+
"step_time": "0.7138",
|
| 202 |
+
"data_time": "0.1628",
|
| 203 |
+
"its": "1.141",
|
| 204 |
+
"eta": "7012.2",
|
| 205 |
+
"mem": "35.27GB",
|
| 206 |
+
"grad_action_in_proj_arms": "0.0180",
|
| 207 |
+
"grad_action_out_proj_arms": "0.1784",
|
| 208 |
+
"grad_arm_token_fuse": "0.0955",
|
| 209 |
+
"grad_shared_expert": "0.5627"
|
| 210 |
+
},
|
| 211 |
+
"5000": {
|
| 212 |
+
"ts": "19:50:55.815",
|
| 213 |
+
"step": "5000",
|
| 214 |
+
"loss": "0.0043",
|
| 215 |
+
"smoothed": "0.0159",
|
| 216 |
+
"lr": "1.47e-05",
|
| 217 |
+
"grad_norm": "0.4850",
|
| 218 |
+
"step_time": "0.5183",
|
| 219 |
+
"data_time": "0.0658",
|
| 220 |
+
"its": "1.712",
|
| 221 |
+
"eta": "2920.0",
|
| 222 |
+
"mem": "35.27GB",
|
| 223 |
+
"grad_action_in_proj_arms": "0.0105",
|
| 224 |
+
"grad_action_out_proj_arms": "0.1454",
|
| 225 |
+
"grad_arm_token_fuse": "0.0568",
|
| 226 |
+
"grad_shared_expert": "1.0533"
|
| 227 |
+
},
|
| 228 |
+
"10000": {
|
| 229 |
+
"ts": "20:58:23.797",
|
| 230 |
+
"step": "10000",
|
| 231 |
+
"loss": "0.0140",
|
| 232 |
+
"smoothed": "0.0169",
|
| 233 |
+
"lr": "2.50e-06",
|
| 234 |
+
"grad_norm": "0.4269",
|
| 235 |
+
"step_time": "0.6919",
|
| 236 |
+
"data_time": "0.2213",
|
| 237 |
+
"its": "1.095",
|
| 238 |
+
"eta": "0.0",
|
| 239 |
+
"mem": "35.27GB",
|
| 240 |
+
"grad_action_in_proj_arms": "0.0121",
|
| 241 |
+
"grad_action_out_proj_arms": "0.1277",
|
| 242 |
+
"grad_arm_token_fuse": "0.0634",
|
| 243 |
+
"grad_shared_expert": "0.4071"
|
| 244 |
+
}
|
| 245 |
+
},
|
| 246 |
+
"saves": {
|
| 247 |
+
"1000": {
|
| 248 |
+
"timestamp": "18:58:14.131",
|
| 249 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000"
|
| 250 |
+
},
|
| 251 |
+
"2000": {
|
| 252 |
+
"timestamp": "19:12:06.795",
|
| 253 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000"
|
| 254 |
+
},
|
| 255 |
+
"3000": {
|
| 256 |
+
"timestamp": "19:25:59.695",
|
| 257 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/3000"
|
| 258 |
+
},
|
| 259 |
+
"4000": {
|
| 260 |
+
"timestamp": "19:39:53.065",
|
| 261 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/4000"
|
| 262 |
+
},
|
| 263 |
+
"5000": {
|
| 264 |
+
"timestamp": "19:52:11.616",
|
| 265 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000"
|
| 266 |
+
},
|
| 267 |
+
"6000": {
|
| 268 |
+
"timestamp": "20:04:56.835",
|
| 269 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/6000"
|
| 270 |
+
},
|
| 271 |
+
"7000": {
|
| 272 |
+
"timestamp": "20:17:25.392",
|
| 273 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/7000"
|
| 274 |
+
},
|
| 275 |
+
"8000": {
|
| 276 |
+
"timestamp": "20:33:46.138",
|
| 277 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/8000"
|
| 278 |
+
},
|
| 279 |
+
"9000": {
|
| 280 |
+
"timestamp": "20:46:05.807",
|
| 281 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/9000"
|
| 282 |
+
},
|
| 283 |
+
"10000": {
|
| 284 |
+
"timestamp": "21:01:52.032",
|
| 285 |
+
"path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000"
|
| 286 |
+
}
|
| 287 |
+
},
|
| 288 |
+
"debug_lines": [
|
| 289 |
+
"18:44:34.768 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (18633:train_pytorch.py:799)",
|
| 290 |
+
"18:44:34.769 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (18633:train_pytorch.py:803)",
|
| 291 |
+
"18:44:34.769 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (18633:train_pytorch.py:806)",
|
| 292 |
+
"18:44:34.769 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (18633:train_pytorch.py:807)",
|
| 293 |
+
"18:44:34.770 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (18633:train_pytorch.py:810)",
|
| 294 |
+
"18:44:34.770 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (18633:train_pytorch.py:813)",
|
| 295 |
+
"18:44:34.791 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (18633:train_pytorch.py:817)",
|
| 296 |
+
"18:44:34.792 [I] debug_step=1 lr=4.99e-08 grad_norm=16.1250 data_time=0.7232s step_time=2.1776s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (18633:train_pytorch.py:822)",
|
| 297 |
+
"18:44:34.792 [I] debug_step=1 grad_shared_expert=15.5090 grad_action_in_proj_arms=0.5665 grad_arm_token_fuse=2.6833 grad_action_out_proj_arms=2.1581 (18633:train_pytorch.py:830)",
|
| 298 |
+
"Training: 0%| | 1/10000 [00:02<8:13:44, 2.96s/it, loss=1.4675, lr=4.99e-08, step=1]18:44:35.388 [I] debug_step=2 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (18633:train_pytorch.py:799)",
|
| 299 |
+
"18:44:35.389 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (18633:train_pytorch.py:803)",
|
| 300 |
+
"18:44:35.389 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (18633:train_pytorch.py:806)"
|
| 301 |
+
],
|
| 302 |
+
"runtime": "2:20:51"
|
| 303 |
+
}
|
| 304 |
+
},
|
| 305 |
+
"val_teacher_forced": {
|
| 306 |
+
"baseline_1000": {
|
| 307 |
+
"teacher_forced_eval_seed": 123,
|
| 308 |
+
"config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 309 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000",
|
| 310 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 311 |
+
"num_batches": 50,
|
| 312 |
+
"mean_val_loss": 0.06113,
|
| 313 |
+
"std_val_loss": 0.043921,
|
| 314 |
+
"mean_left_arm_loss": 0.077421,
|
| 315 |
+
"std_left_arm_loss": 0.059309,
|
| 316 |
+
"mean_right_arm_loss": 0.04484,
|
| 317 |
+
"std_right_arm_loss": 0.080634,
|
| 318 |
+
"mean_left_joint_loss": 0.082092,
|
| 319 |
+
"std_left_joint_loss": 0.06674,
|
| 320 |
+
"mean_left_gripper_loss": 0.04472,
|
| 321 |
+
"std_left_gripper_loss": 0.088365,
|
| 322 |
+
"mean_right_joint_loss": 0.046274,
|
| 323 |
+
"std_right_joint_loss": 0.087919,
|
| 324 |
+
"mean_right_gripper_loss": 0.034807,
|
| 325 |
+
"std_right_gripper_loss": 0.076825,
|
| 326 |
+
"mean_left_right_imbalance": 0.08012,
|
| 327 |
+
"std_left_right_imbalance": 0.083456,
|
| 328 |
+
"per_batch_timing_seconds": "mean=0.3040 std=0.1266 min=0.2246 max=0.8837",
|
| 329 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 330 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 331 |
+
"weight_loading_missing_keys": "[]",
|
| 332 |
+
"weight_loading_unexpected_keys": "[]",
|
| 333 |
+
"model": "baseline",
|
| 334 |
+
"checkpoint_step": 1000
|
| 335 |
+
},
|
| 336 |
+
"baseline_2000": {
|
| 337 |
+
"teacher_forced_eval_seed": 123,
|
| 338 |
+
"config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 339 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000",
|
| 340 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 341 |
+
"num_batches": 50,
|
| 342 |
+
"mean_val_loss": 0.041595,
|
| 343 |
+
"std_val_loss": 0.030015,
|
| 344 |
+
"mean_left_arm_loss": 0.049919,
|
| 345 |
+
"std_left_arm_loss": 0.033208,
|
| 346 |
+
"mean_right_arm_loss": 0.033271,
|
| 347 |
+
"std_right_arm_loss": 0.059873,
|
| 348 |
+
"mean_left_joint_loss": 0.051501,
|
| 349 |
+
"std_left_joint_loss": 0.035502,
|
| 350 |
+
"mean_left_gripper_loss": 0.038846,
|
| 351 |
+
"std_left_gripper_loss": 0.082622,
|
| 352 |
+
"mean_right_joint_loss": 0.034159,
|
| 353 |
+
"std_right_joint_loss": 0.066139,
|
| 354 |
+
"mean_right_gripper_loss": 0.027055,
|
| 355 |
+
"std_right_gripper_loss": 0.06654,
|
| 356 |
+
"mean_left_right_imbalance": 0.05474,
|
| 357 |
+
"std_left_right_imbalance": 0.055247,
|
| 358 |
+
"per_batch_timing_seconds": "mean=0.2487 std=0.0844 min=0.2239 max=0.8257",
|
| 359 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 360 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 361 |
+
"weight_loading_missing_keys": "[]",
|
| 362 |
+
"weight_loading_unexpected_keys": "[]",
|
| 363 |
+
"model": "baseline",
|
| 364 |
+
"checkpoint_step": 2000
|
| 365 |
+
},
|
| 366 |
+
"baseline_5000": {
|
| 367 |
+
"teacher_forced_eval_seed": 123,
|
| 368 |
+
"config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 369 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000",
|
| 370 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 371 |
+
"num_batches": 50,
|
| 372 |
+
"mean_val_loss": 0.027324,
|
| 373 |
+
"std_val_loss": 0.020404,
|
| 374 |
+
"mean_left_arm_loss": 0.039118,
|
| 375 |
+
"std_left_arm_loss": 0.037404,
|
| 376 |
+
"mean_right_arm_loss": 0.015529,
|
| 377 |
+
"std_right_arm_loss": 0.023314,
|
| 378 |
+
"mean_left_joint_loss": 0.042035,
|
| 379 |
+
"std_left_joint_loss": 0.041763,
|
| 380 |
+
"mean_left_gripper_loss": 0.018705,
|
| 381 |
+
"std_left_gripper_loss": 0.031815,
|
| 382 |
+
"mean_right_joint_loss": 0.015711,
|
| 383 |
+
"std_right_joint_loss": 0.023929,
|
| 384 |
+
"mean_right_gripper_loss": 0.014261,
|
| 385 |
+
"std_right_gripper_loss": 0.030013,
|
| 386 |
+
"mean_left_right_imbalance": 0.038961,
|
| 387 |
+
"std_left_right_imbalance": 0.035474,
|
| 388 |
+
"per_batch_timing_seconds": "mean=0.2601 std=0.0801 min=0.2212 max=0.7730",
|
| 389 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 390 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 391 |
+
"weight_loading_missing_keys": "[]",
|
| 392 |
+
"weight_loading_unexpected_keys": "[]",
|
| 393 |
+
"model": "baseline",
|
| 394 |
+
"checkpoint_step": 5000
|
| 395 |
+
},
|
| 396 |
+
"baseline_10000": {
|
| 397 |
+
"teacher_forced_eval_seed": 123,
|
| 398 |
+
"config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 399 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000",
|
| 400 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 401 |
+
"num_batches": 100,
|
| 402 |
+
"mean_val_loss": 0.022345,
|
| 403 |
+
"std_val_loss": 0.024337,
|
| 404 |
+
"mean_left_arm_loss": 0.029659,
|
| 405 |
+
"std_left_arm_loss": 0.039896,
|
| 406 |
+
"mean_right_arm_loss": 0.015031,
|
| 407 |
+
"std_right_arm_loss": 0.032929,
|
| 408 |
+
"mean_left_joint_loss": 0.031507,
|
| 409 |
+
"std_left_joint_loss": 0.044637,
|
| 410 |
+
"mean_left_gripper_loss": 0.016725,
|
| 411 |
+
"std_left_gripper_loss": 0.040894,
|
| 412 |
+
"mean_right_joint_loss": 0.015776,
|
| 413 |
+
"std_right_joint_loss": 0.036308,
|
| 414 |
+
"mean_right_gripper_loss": 0.009818,
|
| 415 |
+
"std_right_gripper_loss": 0.028543,
|
| 416 |
+
"mean_left_right_imbalance": 0.034067,
|
| 417 |
+
"std_left_right_imbalance": 0.045126,
|
| 418 |
+
"per_batch_timing_seconds": "mean=0.2524 std=0.0719 min=0.2263 max=0.8903",
|
| 419 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 420 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 421 |
+
"weight_loading_missing_keys": "[]",
|
| 422 |
+
"weight_loading_unexpected_keys": "[]",
|
| 423 |
+
"model": "baseline",
|
| 424 |
+
"checkpoint_step": 10000
|
| 425 |
+
},
|
| 426 |
+
"parallel_1000": {
|
| 427 |
+
"teacher_forced_eval_seed": 123,
|
| 428 |
+
"config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 429 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000",
|
| 430 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 431 |
+
"num_batches": 50,
|
| 432 |
+
"mean_val_loss": 0.059715,
|
| 433 |
+
"std_val_loss": 0.042962,
|
| 434 |
+
"mean_left_arm_loss": 0.073681,
|
| 435 |
+
"std_left_arm_loss": 0.049928,
|
| 436 |
+
"mean_right_arm_loss": 0.045749,
|
| 437 |
+
"std_right_arm_loss": 0.082818,
|
| 438 |
+
"mean_left_joint_loss": 0.078129,
|
| 439 |
+
"std_left_joint_loss": 0.055212,
|
| 440 |
+
"mean_left_gripper_loss": 0.042541,
|
| 441 |
+
"std_left_gripper_loss": 0.08491,
|
| 442 |
+
"mean_right_joint_loss": 0.047261,
|
| 443 |
+
"std_right_joint_loss": 0.090299,
|
| 444 |
+
"mean_right_gripper_loss": 0.035161,
|
| 445 |
+
"std_right_gripper_loss": 0.079674,
|
| 446 |
+
"mean_left_right_imbalance": 0.075806,
|
| 447 |
+
"std_left_right_imbalance": 0.079713,
|
| 448 |
+
"per_batch_timing_seconds": "mean=0.3663 std=0.6150 min=0.2224 max=4.6353",
|
| 449 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 450 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 451 |
+
"weight_loading_missing_keys": "[]",
|
| 452 |
+
"weight_loading_unexpected_keys": "[]",
|
| 453 |
+
"model": "parallel",
|
| 454 |
+
"checkpoint_step": 1000
|
| 455 |
+
},
|
| 456 |
+
"parallel_2000": {
|
| 457 |
+
"teacher_forced_eval_seed": 123,
|
| 458 |
+
"config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 459 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000",
|
| 460 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 461 |
+
"num_batches": 50,
|
| 462 |
+
"mean_val_loss": 0.039947,
|
| 463 |
+
"std_val_loss": 0.025053,
|
| 464 |
+
"mean_left_arm_loss": 0.050148,
|
| 465 |
+
"std_left_arm_loss": 0.033233,
|
| 466 |
+
"mean_right_arm_loss": 0.029745,
|
| 467 |
+
"std_right_arm_loss": 0.04786,
|
| 468 |
+
"mean_left_joint_loss": 0.051925,
|
| 469 |
+
"std_left_joint_loss": 0.036277,
|
| 470 |
+
"mean_left_gripper_loss": 0.037711,
|
| 471 |
+
"std_left_gripper_loss": 0.077017,
|
| 472 |
+
"mean_right_joint_loss": 0.030139,
|
| 473 |
+
"std_right_joint_loss": 0.051862,
|
| 474 |
+
"mean_right_gripper_loss": 0.026984,
|
| 475 |
+
"std_right_gripper_loss": 0.065713,
|
| 476 |
+
"mean_left_right_imbalance": 0.051938,
|
| 477 |
+
"std_left_right_imbalance": 0.044701,
|
| 478 |
+
"per_batch_timing_seconds": "mean=0.3708 std=0.1690 min=0.2327 max=1.3050",
|
| 479 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 480 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 481 |
+
"weight_loading_missing_keys": "[]",
|
| 482 |
+
"weight_loading_unexpected_keys": "[]",
|
| 483 |
+
"model": "parallel",
|
| 484 |
+
"checkpoint_step": 2000
|
| 485 |
+
},
|
| 486 |
+
"parallel_5000": {
|
| 487 |
+
"teacher_forced_eval_seed": 123,
|
| 488 |
+
"config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 489 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000",
|
| 490 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 491 |
+
"num_batches": 50,
|
| 492 |
+
"mean_val_loss": 0.02734,
|
| 493 |
+
"std_val_loss": 0.020897,
|
| 494 |
+
"mean_left_arm_loss": 0.039155,
|
| 495 |
+
"std_left_arm_loss": 0.038641,
|
| 496 |
+
"mean_right_arm_loss": 0.015526,
|
| 497 |
+
"std_right_arm_loss": 0.023413,
|
| 498 |
+
"mean_left_joint_loss": 0.042035,
|
| 499 |
+
"std_left_joint_loss": 0.043377,
|
| 500 |
+
"mean_left_gripper_loss": 0.018994,
|
| 501 |
+
"std_left_gripper_loss": 0.032843,
|
| 502 |
+
"mean_right_joint_loss": 0.015753,
|
| 503 |
+
"std_right_joint_loss": 0.024564,
|
| 504 |
+
"mean_right_gripper_loss": 0.013938,
|
| 505 |
+
"std_right_gripper_loss": 0.029304,
|
| 506 |
+
"mean_left_right_imbalance": 0.038635,
|
| 507 |
+
"std_left_right_imbalance": 0.037436,
|
| 508 |
+
"per_batch_timing_seconds": "mean=0.3717 std=0.2172 min=0.2283 max=1.7875",
|
| 509 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 510 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 511 |
+
"weight_loading_missing_keys": "[]",
|
| 512 |
+
"weight_loading_unexpected_keys": "[]",
|
| 513 |
+
"model": "parallel",
|
| 514 |
+
"checkpoint_step": 5000
|
| 515 |
+
},
|
| 516 |
+
"parallel_10000": {
|
| 517 |
+
"teacher_forced_eval_seed": 123,
|
| 518 |
+
"config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 519 |
+
"checkpoint_path": "/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000",
|
| 520 |
+
"repo_id_used": "lsnu/twin_handover_256_val",
|
| 521 |
+
"num_batches": 100,
|
| 522 |
+
"mean_val_loss": 0.022168,
|
| 523 |
+
"std_val_loss": 0.024902,
|
| 524 |
+
"mean_left_arm_loss": 0.030184,
|
| 525 |
+
"std_left_arm_loss": 0.043653,
|
| 526 |
+
"mean_right_arm_loss": 0.014151,
|
| 527 |
+
"std_right_arm_loss": 0.029382,
|
| 528 |
+
"mean_left_joint_loss": 0.032356,
|
| 529 |
+
"std_left_joint_loss": 0.048977,
|
| 530 |
+
"mean_left_gripper_loss": 0.014984,
|
| 531 |
+
"std_left_gripper_loss": 0.037395,
|
| 532 |
+
"mean_right_joint_loss": 0.014888,
|
| 533 |
+
"std_right_joint_loss": 0.032582,
|
| 534 |
+
"mean_right_gripper_loss": 0.008996,
|
| 535 |
+
"std_right_gripper_loss": 0.025757,
|
| 536 |
+
"mean_left_right_imbalance": 0.033825,
|
| 537 |
+
"std_left_right_imbalance": 0.046586,
|
| 538 |
+
"per_batch_timing_seconds": "mean=0.3248 std=0.0893 min=0.2203 max=0.7969",
|
| 539 |
+
"active_mask_dims": "[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]",
|
| 540 |
+
"masked_dims": "[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",
|
| 541 |
+
"weight_loading_missing_keys": "[]",
|
| 542 |
+
"weight_loading_unexpected_keys": "[]",
|
| 543 |
+
"model": "parallel",
|
| 544 |
+
"checkpoint_step": 10000
|
| 545 |
+
}
|
| 546 |
+
},
|
| 547 |
+
"val_sample": [
|
| 548 |
+
{
|
| 549 |
+
"num_steps": 4,
|
| 550 |
+
"num_batches": 16,
|
| 551 |
+
"mean_masked_mae": 0.090938,
|
| 552 |
+
"std_masked_mae": 0.02224,
|
| 553 |
+
"mean_left_arm_mae": 0.120414,
|
| 554 |
+
"std_left_arm_mae": 0.046606,
|
| 555 |
+
"mean_right_arm_mae": 0.061461,
|
| 556 |
+
"std_right_arm_mae": 0.058026,
|
| 557 |
+
"mean_left_joint_mae": 0.130966,
|
| 558 |
+
"std_left_joint_mae": 0.054578,
|
| 559 |
+
"mean_left_gripper_mae": 0.046552,
|
| 560 |
+
"std_left_gripper_mae": 0.06792,
|
| 561 |
+
"mean_right_joint_mae": 0.063945,
|
| 562 |
+
"std_right_joint_mae": 0.062779,
|
| 563 |
+
"mean_right_gripper_mae": 0.044077,
|
| 564 |
+
"std_right_gripper_mae": 0.053987,
|
| 565 |
+
"mean_left_right_imbalance_mae": 0.095076,
|
| 566 |
+
"std_left_right_imbalance_mae": 0.059464,
|
| 567 |
+
"per_batch_timing_seconds": "mean=0.3131 std=0.0370 min=0.2649 max=0.3781",
|
| 568 |
+
"model": "baseline",
|
| 569 |
+
"checkpoint_step": 1000
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"num_steps": 10,
|
| 573 |
+
"num_batches": 16,
|
| 574 |
+
"mean_masked_mae": 0.100992,
|
| 575 |
+
"std_masked_mae": 0.023502,
|
| 576 |
+
"mean_left_arm_mae": 0.132369,
|
| 577 |
+
"std_left_arm_mae": 0.047803,
|
| 578 |
+
"mean_right_arm_mae": 0.069615,
|
| 579 |
+
"std_right_arm_mae": 0.063335,
|
| 580 |
+
"mean_left_joint_mae": 0.143677,
|
| 581 |
+
"std_left_joint_mae": 0.056155,
|
| 582 |
+
"mean_left_gripper_mae": 0.053215,
|
| 583 |
+
"std_left_gripper_mae": 0.074232,
|
| 584 |
+
"mean_right_joint_mae": 0.072165,
|
| 585 |
+
"std_right_joint_mae": 0.068555,
|
| 586 |
+
"mean_right_gripper_mae": 0.051764,
|
| 587 |
+
"std_right_gripper_mae": 0.054067,
|
| 588 |
+
"mean_left_right_imbalance_mae": 0.101649,
|
| 589 |
+
"std_left_right_imbalance_mae": 0.063159,
|
| 590 |
+
"per_batch_timing_seconds": "mean=0.3640 std=0.0430 min=0.3333 max=0.4572",
|
| 591 |
+
"model": "baseline",
|
| 592 |
+
"checkpoint_step": 1000
|
| 593 |
+
},
|
| 594 |
+
{
|
| 595 |
+
"num_steps": 4,
|
| 596 |
+
"num_batches": 16,
|
| 597 |
+
"mean_masked_mae": 0.060253,
|
| 598 |
+
"std_masked_mae": 0.017936,
|
| 599 |
+
"mean_left_arm_mae": 0.078725,
|
| 600 |
+
"std_left_arm_mae": 0.032786,
|
| 601 |
+
"mean_right_arm_mae": 0.041781,
|
| 602 |
+
"std_right_arm_mae": 0.04091,
|
| 603 |
+
"mean_left_joint_mae": 0.083688,
|
| 604 |
+
"std_left_joint_mae": 0.036089,
|
| 605 |
+
"mean_left_gripper_mae": 0.043985,
|
| 606 |
+
"std_left_gripper_mae": 0.072901,
|
| 607 |
+
"mean_right_joint_mae": 0.042767,
|
| 608 |
+
"std_right_joint_mae": 0.041669,
|
| 609 |
+
"mean_right_gripper_mae": 0.034874,
|
| 610 |
+
"std_right_gripper_mae": 0.058769,
|
| 611 |
+
"mean_left_right_imbalance_mae": 0.063418,
|
| 612 |
+
"std_left_right_imbalance_mae": 0.039412,
|
| 613 |
+
"per_batch_timing_seconds": "mean=0.3006 std=0.0345 min=0.2674 max=0.3753",
|
| 614 |
+
"model": "baseline",
|
| 615 |
+
"checkpoint_step": 2000
|
| 616 |
+
},
|
| 617 |
+
{
|
| 618 |
+
"num_steps": 10,
|
| 619 |
+
"num_batches": 16,
|
| 620 |
+
"mean_masked_mae": 0.065765,
|
| 621 |
+
"std_masked_mae": 0.016923,
|
| 622 |
+
"mean_left_arm_mae": 0.086375,
|
| 623 |
+
"std_left_arm_mae": 0.032761,
|
| 624 |
+
"mean_right_arm_mae": 0.045154,
|
| 625 |
+
"std_right_arm_mae": 0.041131,
|
| 626 |
+
"mean_left_joint_mae": 0.092111,
|
| 627 |
+
"std_left_joint_mae": 0.036788,
|
| 628 |
+
"mean_left_gripper_mae": 0.046224,
|
| 629 |
+
"std_left_gripper_mae": 0.076043,
|
| 630 |
+
"mean_right_joint_mae": 0.046163,
|
| 631 |
+
"std_right_joint_mae": 0.042138,
|
| 632 |
+
"mean_right_gripper_mae": 0.038093,
|
| 633 |
+
"std_right_gripper_mae": 0.056179,
|
| 634 |
+
"mean_left_right_imbalance_mae": 0.066659,
|
| 635 |
+
"std_left_right_imbalance_mae": 0.040501,
|
| 636 |
+
"per_batch_timing_seconds": "mean=0.3586 std=0.0248 min=0.3396 max=0.4220",
|
| 637 |
+
"model": "baseline",
|
| 638 |
+
"checkpoint_step": 2000
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"num_steps": 4,
|
| 642 |
+
"num_batches": 16,
|
| 643 |
+
"mean_masked_mae": 0.03972,
|
| 644 |
+
"std_masked_mae": 0.014654,
|
| 645 |
+
"mean_left_arm_mae": 0.049239,
|
| 646 |
+
"std_left_arm_mae": 0.019869,
|
| 647 |
+
"mean_right_arm_mae": 0.030201,
|
| 648 |
+
"std_right_arm_mae": 0.034473,
|
| 649 |
+
"mean_left_joint_mae": 0.052215,
|
| 650 |
+
"std_left_joint_mae": 0.023235,
|
| 651 |
+
"mean_left_gripper_mae": 0.028408,
|
| 652 |
+
"std_left_gripper_mae": 0.028427,
|
| 653 |
+
"mean_right_joint_mae": 0.031159,
|
| 654 |
+
"std_right_joint_mae": 0.037572,
|
| 655 |
+
"mean_right_gripper_mae": 0.02349,
|
| 656 |
+
"std_right_gripper_mae": 0.024208,
|
| 657 |
+
"mean_left_right_imbalance_mae": 0.04196,
|
| 658 |
+
"std_left_right_imbalance_mae": 0.030152,
|
| 659 |
+
"per_batch_timing_seconds": "mean=0.2920 std=0.0342 min=0.2585 max=0.3528",
|
| 660 |
+
"model": "baseline",
|
| 661 |
+
"checkpoint_step": 5000
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"num_steps": 10,
|
| 665 |
+
"num_batches": 16,
|
| 666 |
+
"mean_masked_mae": 0.043346,
|
| 667 |
+
"std_masked_mae": 0.013818,
|
| 668 |
+
"mean_left_arm_mae": 0.053788,
|
| 669 |
+
"std_left_arm_mae": 0.020493,
|
| 670 |
+
"mean_right_arm_mae": 0.032904,
|
| 671 |
+
"std_right_arm_mae": 0.034889,
|
| 672 |
+
"mean_left_joint_mae": 0.057689,
|
| 673 |
+
"std_left_joint_mae": 0.024439,
|
| 674 |
+
"mean_left_gripper_mae": 0.026486,
|
| 675 |
+
"std_left_gripper_mae": 0.029864,
|
| 676 |
+
"mean_right_joint_mae": 0.0337,
|
| 677 |
+
"std_right_joint_mae": 0.038002,
|
| 678 |
+
"mean_right_gripper_mae": 0.027331,
|
| 679 |
+
"std_right_gripper_mae": 0.027093,
|
| 680 |
+
"mean_left_right_imbalance_mae": 0.044562,
|
| 681 |
+
"std_left_right_imbalance_mae": 0.030999,
|
| 682 |
+
"per_batch_timing_seconds": "mean=0.3951 std=0.0357 min=0.3463 max=0.4774",
|
| 683 |
+
"model": "baseline",
|
| 684 |
+
"checkpoint_step": 5000
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"num_steps": 4,
|
| 688 |
+
"num_batches": 16,
|
| 689 |
+
"mean_masked_mae": 0.029935,
|
| 690 |
+
"std_masked_mae": 0.0082,
|
| 691 |
+
"mean_left_arm_mae": 0.041062,
|
| 692 |
+
"std_left_arm_mae": 0.019621,
|
| 693 |
+
"mean_right_arm_mae": 0.018807,
|
| 694 |
+
"std_right_arm_mae": 0.018117,
|
| 695 |
+
"mean_left_joint_mae": 0.04444,
|
| 696 |
+
"std_left_joint_mae": 0.02295,
|
| 697 |
+
"mean_left_gripper_mae": 0.017416,
|
| 698 |
+
"std_left_gripper_mae": 0.016394,
|
| 699 |
+
"mean_right_joint_mae": 0.0195,
|
| 700 |
+
"std_right_joint_mae": 0.019305,
|
| 701 |
+
"mean_right_gripper_mae": 0.013963,
|
| 702 |
+
"std_right_gripper_mae": 0.019504,
|
| 703 |
+
"mean_left_right_imbalance_mae": 0.033733,
|
| 704 |
+
"std_left_right_imbalance_mae": 0.022691,
|
| 705 |
+
"per_batch_timing_seconds": "mean=0.2793 std=0.0247 min=0.2625 max=0.3469",
|
| 706 |
+
"model": "baseline",
|
| 707 |
+
"checkpoint_step": 10000
|
| 708 |
+
},
|
| 709 |
+
{
|
| 710 |
+
"num_steps": 10,
|
| 711 |
+
"num_batches": 16,
|
| 712 |
+
"mean_masked_mae": 0.030294,
|
| 713 |
+
"std_masked_mae": 0.007277,
|
| 714 |
+
"mean_left_arm_mae": 0.041307,
|
| 715 |
+
"std_left_arm_mae": 0.019181,
|
| 716 |
+
"mean_right_arm_mae": 0.019282,
|
| 717 |
+
"std_right_arm_mae": 0.019077,
|
| 718 |
+
"mean_left_joint_mae": 0.045179,
|
| 719 |
+
"std_left_joint_mae": 0.022508,
|
| 720 |
+
"mean_left_gripper_mae": 0.014207,
|
| 721 |
+
"std_left_gripper_mae": 0.016425,
|
| 722 |
+
"mean_right_joint_mae": 0.020231,
|
| 723 |
+
"std_right_joint_mae": 0.020465,
|
| 724 |
+
"mean_right_gripper_mae": 0.01264,
|
| 725 |
+
"std_right_gripper_mae": 0.018571,
|
| 726 |
+
"mean_left_right_imbalance_mae": 0.034582,
|
| 727 |
+
"std_left_right_imbalance_mae": 0.023261,
|
| 728 |
+
"per_batch_timing_seconds": "mean=0.3823 std=0.0398 min=0.3432 max=0.4686",
|
| 729 |
+
"model": "baseline",
|
| 730 |
+
"checkpoint_step": 10000
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"num_steps": 4,
|
| 734 |
+
"num_batches": 16,
|
| 735 |
+
"mean_masked_mae": 0.09253,
|
| 736 |
+
"std_masked_mae": 0.020956,
|
| 737 |
+
"mean_left_arm_mae": 0.122108,
|
| 738 |
+
"std_left_arm_mae": 0.04378,
|
| 739 |
+
"mean_right_arm_mae": 0.062952,
|
| 740 |
+
"std_right_arm_mae": 0.056483,
|
| 741 |
+
"mean_left_joint_mae": 0.133062,
|
| 742 |
+
"std_left_joint_mae": 0.052111,
|
| 743 |
+
"mean_left_gripper_mae": 0.045431,
|
| 744 |
+
"std_left_gripper_mae": 0.055952,
|
| 745 |
+
"mean_right_joint_mae": 0.065476,
|
| 746 |
+
"std_right_joint_mae": 0.060695,
|
| 747 |
+
"mean_right_gripper_mae": 0.04528,
|
| 748 |
+
"std_right_gripper_mae": 0.053039,
|
| 749 |
+
"mean_left_right_imbalance_mae": 0.093392,
|
| 750 |
+
"std_left_right_imbalance_mae": 0.056874,
|
| 751 |
+
"per_batch_timing_seconds": "mean=0.3110 std=0.0430 min=0.2654 max=0.3864",
|
| 752 |
+
"model": "parallel",
|
| 753 |
+
"checkpoint_step": 1000
|
| 754 |
+
},
|
| 755 |
+
{
|
| 756 |
+
"num_steps": 10,
|
| 757 |
+
"num_batches": 16,
|
| 758 |
+
"mean_masked_mae": 0.102452,
|
| 759 |
+
"std_masked_mae": 0.022208,
|
| 760 |
+
"mean_left_arm_mae": 0.13361,
|
| 761 |
+
"std_left_arm_mae": 0.044796,
|
| 762 |
+
"mean_right_arm_mae": 0.071295,
|
| 763 |
+
"std_right_arm_mae": 0.061523,
|
| 764 |
+
"mean_left_joint_mae": 0.145474,
|
| 765 |
+
"std_left_joint_mae": 0.053589,
|
| 766 |
+
"mean_left_gripper_mae": 0.05056,
|
| 767 |
+
"std_left_gripper_mae": 0.060317,
|
| 768 |
+
"mean_right_joint_mae": 0.073909,
|
| 769 |
+
"std_right_joint_mae": 0.066406,
|
| 770 |
+
"mean_right_gripper_mae": 0.053,
|
| 771 |
+
"std_right_gripper_mae": 0.051143,
|
| 772 |
+
"mean_left_right_imbalance_mae": 0.099213,
|
| 773 |
+
"std_left_right_imbalance_mae": 0.060422,
|
| 774 |
+
"per_batch_timing_seconds": "mean=0.4143 std=0.0560 min=0.3405 max=0.5017",
|
| 775 |
+
"model": "parallel",
|
| 776 |
+
"checkpoint_step": 1000
|
| 777 |
+
},
|
| 778 |
+
{
|
| 779 |
+
"num_steps": 4,
|
| 780 |
+
"num_batches": 16,
|
| 781 |
+
"mean_masked_mae": 0.05986,
|
| 782 |
+
"std_masked_mae": 0.012924,
|
| 783 |
+
"mean_left_arm_mae": 0.080984,
|
| 784 |
+
"std_left_arm_mae": 0.031604,
|
| 785 |
+
"mean_right_arm_mae": 0.038736,
|
| 786 |
+
"std_right_arm_mae": 0.031293,
|
| 787 |
+
"mean_left_joint_mae": 0.086197,
|
| 788 |
+
"std_left_joint_mae": 0.035912,
|
| 789 |
+
"mean_left_gripper_mae": 0.04449,
|
| 790 |
+
"std_left_gripper_mae": 0.062755,
|
| 791 |
+
"mean_right_joint_mae": 0.039304,
|
| 792 |
+
"std_right_joint_mae": 0.030982,
|
| 793 |
+
"mean_right_gripper_mae": 0.034761,
|
| 794 |
+
"std_right_gripper_mae": 0.051397,
|
| 795 |
+
"mean_left_right_imbalance_mae": 0.061196,
|
| 796 |
+
"std_left_right_imbalance_mae": 0.036442,
|
| 797 |
+
"per_batch_timing_seconds": "mean=0.3702 std=0.1017 min=0.2793 max=0.7256",
|
| 798 |
+
"model": "parallel",
|
| 799 |
+
"checkpoint_step": 2000
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
"num_steps": 10,
|
| 803 |
+
"num_batches": 16,
|
| 804 |
+
"mean_masked_mae": 0.065897,
|
| 805 |
+
"std_masked_mae": 0.012628,
|
| 806 |
+
"mean_left_arm_mae": 0.088735,
|
| 807 |
+
"std_left_arm_mae": 0.03201,
|
| 808 |
+
"mean_right_arm_mae": 0.043059,
|
| 809 |
+
"std_right_arm_mae": 0.032823,
|
| 810 |
+
"mean_left_joint_mae": 0.094654,
|
| 811 |
+
"std_left_joint_mae": 0.036668,
|
| 812 |
+
"mean_left_gripper_mae": 0.047298,
|
| 813 |
+
"std_left_gripper_mae": 0.06466,
|
| 814 |
+
"mean_right_joint_mae": 0.043769,
|
| 815 |
+
"std_right_joint_mae": 0.032862,
|
| 816 |
+
"mean_right_gripper_mae": 0.038089,
|
| 817 |
+
"std_right_gripper_mae": 0.049635,
|
| 818 |
+
"mean_left_right_imbalance_mae": 0.064491,
|
| 819 |
+
"std_left_right_imbalance_mae": 0.038643,
|
| 820 |
+
"per_batch_timing_seconds": "mean=0.4575 std=0.0902 min=0.3373 max=0.6590",
|
| 821 |
+
"model": "parallel",
|
| 822 |
+
"checkpoint_step": 2000
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"num_steps": 4,
|
| 826 |
+
"num_batches": 16,
|
| 827 |
+
"mean_masked_mae": 0.040712,
|
| 828 |
+
"std_masked_mae": 0.013646,
|
| 829 |
+
"mean_left_arm_mae": 0.050681,
|
| 830 |
+
"std_left_arm_mae": 0.020624,
|
| 831 |
+
"mean_right_arm_mae": 0.030742,
|
| 832 |
+
"std_right_arm_mae": 0.03279,
|
| 833 |
+
"mean_left_joint_mae": 0.053976,
|
| 834 |
+
"std_left_joint_mae": 0.024153,
|
| 835 |
+
"mean_left_gripper_mae": 0.027611,
|
| 836 |
+
"std_left_gripper_mae": 0.02458,
|
| 837 |
+
"mean_right_joint_mae": 0.032227,
|
| 838 |
+
"std_right_joint_mae": 0.03635,
|
| 839 |
+
"mean_right_gripper_mae": 0.020349,
|
| 840 |
+
"std_right_gripper_mae": 0.017496,
|
| 841 |
+
"mean_left_right_imbalance_mae": 0.042435,
|
| 842 |
+
"std_left_right_imbalance_mae": 0.029207,
|
| 843 |
+
"per_batch_timing_seconds": "mean=0.3861 std=0.0848 min=0.2719 max=0.5485",
|
| 844 |
+
"model": "parallel",
|
| 845 |
+
"checkpoint_step": 5000
|
| 846 |
+
},
|
| 847 |
+
{
|
| 848 |
+
"num_steps": 10,
|
| 849 |
+
"num_batches": 16,
|
| 850 |
+
"mean_masked_mae": 0.044799,
|
| 851 |
+
"std_masked_mae": 0.012807,
|
| 852 |
+
"mean_left_arm_mae": 0.055016,
|
| 853 |
+
"std_left_arm_mae": 0.021278,
|
| 854 |
+
"mean_right_arm_mae": 0.034583,
|
| 855 |
+
"std_right_arm_mae": 0.032757,
|
| 856 |
+
"mean_left_joint_mae": 0.059296,
|
| 857 |
+
"std_left_joint_mae": 0.025068,
|
| 858 |
+
"mean_left_gripper_mae": 0.025058,
|
| 859 |
+
"std_left_gripper_mae": 0.027173,
|
| 860 |
+
"mean_right_joint_mae": 0.035777,
|
| 861 |
+
"std_right_joint_mae": 0.036454,
|
| 862 |
+
"mean_right_gripper_mae": 0.026224,
|
| 863 |
+
"std_right_gripper_mae": 0.01689,
|
| 864 |
+
"mean_left_right_imbalance_mae": 0.043614,
|
| 865 |
+
"std_left_right_imbalance_mae": 0.030178,
|
| 866 |
+
"per_batch_timing_seconds": "mean=0.4549 std=0.0835 min=0.3373 max=0.6280",
|
| 867 |
+
"model": "parallel",
|
| 868 |
+
"checkpoint_step": 5000
|
| 869 |
+
},
|
| 870 |
+
{
|
| 871 |
+
"num_steps": 4,
|
| 872 |
+
"num_batches": 16,
|
| 873 |
+
"mean_masked_mae": 0.029277,
|
| 874 |
+
"std_masked_mae": 0.007579,
|
| 875 |
+
"mean_left_arm_mae": 0.040375,
|
| 876 |
+
"std_left_arm_mae": 0.01919,
|
| 877 |
+
"mean_right_arm_mae": 0.018178,
|
| 878 |
+
"std_right_arm_mae": 0.015856,
|
| 879 |
+
"mean_left_joint_mae": 0.043636,
|
| 880 |
+
"std_left_joint_mae": 0.022278,
|
| 881 |
+
"mean_left_gripper_mae": 0.017546,
|
| 882 |
+
"std_left_gripper_mae": 0.013485,
|
| 883 |
+
"mean_right_joint_mae": 0.018908,
|
| 884 |
+
"std_right_joint_mae": 0.017028,
|
| 885 |
+
"mean_right_gripper_mae": 0.013066,
|
| 886 |
+
"std_right_gripper_mae": 0.016678,
|
| 887 |
+
"mean_left_right_imbalance_mae": 0.031629,
|
| 888 |
+
"std_left_right_imbalance_mae": 0.022404,
|
| 889 |
+
"per_batch_timing_seconds": "mean=0.3241 std=0.0551 min=0.2600 max=0.4241",
|
| 890 |
+
"model": "parallel",
|
| 891 |
+
"checkpoint_step": 10000
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"num_steps": 10,
|
| 895 |
+
"num_batches": 16,
|
| 896 |
+
"mean_masked_mae": 0.030241,
|
| 897 |
+
"std_masked_mae": 0.00674,
|
| 898 |
+
"mean_left_arm_mae": 0.041072,
|
| 899 |
+
"std_left_arm_mae": 0.018866,
|
| 900 |
+
"mean_right_arm_mae": 0.01941,
|
| 901 |
+
"std_right_arm_mae": 0.017031,
|
| 902 |
+
"mean_left_joint_mae": 0.044817,
|
| 903 |
+
"std_left_joint_mae": 0.022046,
|
| 904 |
+
"mean_left_gripper_mae": 0.014857,
|
| 905 |
+
"std_left_gripper_mae": 0.014376,
|
| 906 |
+
"mean_right_joint_mae": 0.020279,
|
| 907 |
+
"std_right_joint_mae": 0.018425,
|
| 908 |
+
"mean_right_gripper_mae": 0.013323,
|
| 909 |
+
"std_right_gripper_mae": 0.014475,
|
| 910 |
+
"mean_left_right_imbalance_mae": 0.032456,
|
| 911 |
+
"std_left_right_imbalance_mae": 0.022935,
|
| 912 |
+
"per_batch_timing_seconds": "mean=0.4058 std=0.0569 min=0.3332 max=0.5100",
|
| 913 |
+
"model": "parallel",
|
| 914 |
+
"checkpoint_step": 10000
|
| 915 |
+
}
|
| 916 |
+
],
|
| 917 |
+
"runtime": [
|
| 918 |
+
{
|
| 919 |
+
"stage": "baseline_train",
|
| 920 |
+
"start_utc": "2026-03-09 16:03:23 UTC",
|
| 921 |
+
"end_utc": "2026-03-09 18:17:03 UTC",
|
| 922 |
+
"duration_seconds": 8020,
|
| 923 |
+
"duration_hms": "2:13:40"
|
| 924 |
+
},
|
| 925 |
+
{
|
| 926 |
+
"stage": "baseline_eval_1000",
|
| 927 |
+
"start_utc": "2026-03-09 18:17:03 UTC",
|
| 928 |
+
"end_utc": "2026-03-09 18:23:42 UTC",
|
| 929 |
+
"duration_seconds": 399,
|
| 930 |
+
"duration_hms": "0:06:39"
|
| 931 |
+
},
|
| 932 |
+
{
|
| 933 |
+
"stage": "baseline_eval_2000",
|
| 934 |
+
"start_utc": "2026-03-09 18:23:42 UTC",
|
| 935 |
+
"end_utc": "2026-03-09 18:28:54 UTC",
|
| 936 |
+
"duration_seconds": 312,
|
| 937 |
+
"duration_hms": "0:05:12"
|
| 938 |
+
},
|
| 939 |
+
{
|
| 940 |
+
"stage": "baseline_eval_5000",
|
| 941 |
+
"start_utc": "2026-03-09 18:28:54 UTC",
|
| 942 |
+
"end_utc": "2026-03-09 18:33:53 UTC",
|
| 943 |
+
"duration_seconds": 299,
|
| 944 |
+
"duration_hms": "0:04:59"
|
| 945 |
+
},
|
| 946 |
+
{
|
| 947 |
+
"stage": "baseline_eval_10000",
|
| 948 |
+
"start_utc": "2026-03-09 18:33:53 UTC",
|
| 949 |
+
"end_utc": "2026-03-09 18:41:07 UTC",
|
| 950 |
+
"duration_seconds": 434,
|
| 951 |
+
"duration_hms": "0:07:14"
|
| 952 |
+
},
|
| 953 |
+
{
|
| 954 |
+
"stage": "parallel_train",
|
| 955 |
+
"start_utc": "2026-03-09 18:41:07 UTC",
|
| 956 |
+
"end_utc": "2026-03-09 21:01:58 UTC",
|
| 957 |
+
"duration_seconds": 8451,
|
| 958 |
+
"duration_hms": "2:20:51"
|
| 959 |
+
},
|
| 960 |
+
{
|
| 961 |
+
"stage": "parallel_eval_1000",
|
| 962 |
+
"start_utc": "2026-03-09 21:01:58 UTC",
|
| 963 |
+
"end_utc": "2026-03-09 21:14:35 UTC",
|
| 964 |
+
"duration_seconds": 757,
|
| 965 |
+
"duration_hms": "0:12:37"
|
| 966 |
+
},
|
| 967 |
+
{
|
| 968 |
+
"stage": "parallel_eval_2000",
|
| 969 |
+
"start_utc": "2026-03-09 21:14:35 UTC",
|
| 970 |
+
"end_utc": "2026-03-09 21:22:39 UTC",
|
| 971 |
+
"duration_seconds": 484,
|
| 972 |
+
"duration_hms": "0:08:04"
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"stage": "parallel_eval_5000",
|
| 976 |
+
"start_utc": "2026-03-09 21:22:40 UTC",
|
| 977 |
+
"end_utc": "2026-03-09 21:35:26 UTC",
|
| 978 |
+
"duration_seconds": 766,
|
| 979 |
+
"duration_hms": "0:12:46"
|
| 980 |
+
},
|
| 981 |
+
{
|
| 982 |
+
"stage": "parallel_eval_10000",
|
| 983 |
+
"start_utc": "2026-03-09 21:35:26 UTC",
|
| 984 |
+
"end_utc": "2026-03-09 21:45:53 UTC",
|
| 985 |
+
"duration_seconds": 627,
|
| 986 |
+
"duration_hms": "0:10:27"
|
| 987 |
+
},
|
| 988 |
+
{
|
| 989 |
+
"stage": "full_pipeline",
|
| 990 |
+
"start_utc": "2026-03-09 15:57:20 UTC",
|
| 991 |
+
"end_utc": "2026-03-09 21:45:53 UTC",
|
| 992 |
+
"duration_seconds": 20913,
|
| 993 |
+
"duration_hms": "5:48:33"
|
| 994 |
+
}
|
| 995 |
+
],
|
| 996 |
+
"warmstart_equivalence": {
|
| 997 |
+
"baseline_config_name": "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 998 |
+
"parallel_config_name": "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 999 |
+
"repo_id_used": "lsnu/twin_handover_256_train",
|
| 1000 |
+
"baseline_ckpt": "/workspace/checkpoints/pi05_base_single_pytorch",
|
| 1001 |
+
"parallel_ckpt": "/workspace/checkpoints/pi05_base_parallel_packed_from_single",
|
| 1002 |
+
"batch_size": 4,
|
| 1003 |
+
"eval_seed": 777,
|
| 1004 |
+
"tolerance": "1e-06",
|
| 1005 |
+
"baseline_missing_keys": "[]",
|
| 1006 |
+
"baseline_unexpected_keys": "[]",
|
| 1007 |
+
"parallel_missing_keys": "[]",
|
| 1008 |
+
"parallel_unexpected_keys": "[]",
|
| 1009 |
+
"input_projection_max_abs_diff": 0.00122881,
|
| 1010 |
+
"input_projection_mean_abs_diff": 0.00015435,
|
| 1011 |
+
"loss_max_abs_diff": 0.90186501,
|
| 1012 |
+
"loss_mean_abs_diff": 0.04585753,
|
| 1013 |
+
"baseline_masked_loss": 1.00531137,
|
| 1014 |
+
"parallel_masked_loss": 1.00929189,
|
| 1015 |
+
"masked_loss_abs_diff": 0.00398052,
|
| 1016 |
+
"warmstart_equivalent": false
|
| 1017 |
+
}
|
| 1018 |
+
}
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/teacher_forced_eval_table.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,checkpoint_step,teacher_forced_eval_seed,config_name,checkpoint_path,repo_id_used,num_batches,mean_val_loss,std_val_loss,mean_left_arm_loss,std_left_arm_loss,mean_right_arm_loss,std_right_arm_loss,mean_left_joint_loss,std_left_joint_loss,mean_left_gripper_loss,std_left_gripper_loss,mean_right_joint_loss,std_right_joint_loss,mean_right_gripper_loss,std_right_gripper_loss,mean_left_right_imbalance,std_left_right_imbalance,per_batch_timing_seconds,active_mask_dims,masked_dims,weight_loading_missing_keys,weight_loading_unexpected_keys
|
| 2 |
+
baseline,1000,123,pi05_twin_handover_256_packed_baseline_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000,lsnu/twin_handover_256_val,50,0.06113,0.043921,0.077421,0.059309,0.04484,0.080634,0.082092,0.06674,0.04472,0.088365,0.046274,0.087919,0.034807,0.076825,0.08012,0.083456,mean=0.3040 std=0.1266 min=0.2246 max=0.8837,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 3 |
+
baseline,2000,123,pi05_twin_handover_256_packed_baseline_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000,lsnu/twin_handover_256_val,50,0.041595,0.030015,0.049919,0.033208,0.033271,0.059873,0.051501,0.035502,0.038846,0.082622,0.034159,0.066139,0.027055,0.06654,0.05474,0.055247,mean=0.2487 std=0.0844 min=0.2239 max=0.8257,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 4 |
+
baseline,5000,123,pi05_twin_handover_256_packed_baseline_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000,lsnu/twin_handover_256_val,50,0.027324,0.020404,0.039118,0.037404,0.015529,0.023314,0.042035,0.041763,0.018705,0.031815,0.015711,0.023929,0.014261,0.030013,0.038961,0.035474,mean=0.2601 std=0.0801 min=0.2212 max=0.7730,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 5 |
+
baseline,10000,123,pi05_twin_handover_256_packed_baseline_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000,lsnu/twin_handover_256_val,100,0.022345,0.024337,0.029659,0.039896,0.015031,0.032929,0.031507,0.044637,0.016725,0.040894,0.015776,0.036308,0.009818,0.028543,0.034067,0.045126,mean=0.2524 std=0.0719 min=0.2263 max=0.8903,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 6 |
+
parallel,1000,123,pi05_twin_handover_256_packed_parallel_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000,lsnu/twin_handover_256_val,50,0.059715,0.042962,0.073681,0.049928,0.045749,0.082818,0.078129,0.055212,0.042541,0.08491,0.047261,0.090299,0.035161,0.079674,0.075806,0.079713,mean=0.3663 std=0.6150 min=0.2224 max=4.6353,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 7 |
+
parallel,2000,123,pi05_twin_handover_256_packed_parallel_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000,lsnu/twin_handover_256_val,50,0.039947,0.025053,0.050148,0.033233,0.029745,0.04786,0.051925,0.036277,0.037711,0.077017,0.030139,0.051862,0.026984,0.065713,0.051938,0.044701,mean=0.3708 std=0.1690 min=0.2327 max=1.3050,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 8 |
+
parallel,5000,123,pi05_twin_handover_256_packed_parallel_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000,lsnu/twin_handover_256_val,50,0.02734,0.020897,0.039155,0.038641,0.015526,0.023413,0.042035,0.043377,0.018994,0.032843,0.015753,0.024564,0.013938,0.029304,0.038635,0.037436,mean=0.3717 std=0.2172 min=0.2283 max=1.7875,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
| 9 |
+
parallel,10000,123,pi05_twin_handover_256_packed_parallel_pytorch_10k,/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000,lsnu/twin_handover_256_val,100,0.022168,0.024902,0.030184,0.043653,0.014151,0.029382,0.032356,0.048977,0.014984,0.037395,0.014888,0.032582,0.008996,0.025757,0.033825,0.046586,mean=0.3248 std=0.0893 min=0.2203 max=0.7969,"[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]","[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]",[],[]
|
artifacts/twin_handover_packed_parallelization_10k_20260309/metrics/train_loss_table.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,step,ts,loss,smoothed,lr,grad_norm,step_time,data_time,its,eta,mem,grad_action_in_proj,grad_action_out_proj,grad_shared_expert,grad_action_in_proj_arms,grad_action_out_proj_arms,grad_arm_token_fuse
|
| 2 |
+
baseline,1000,16:16:42.668,0.0228,0.0476,2.48e-05,0.9699,0.5638,0.0801,1.553,5793.6,35.23GB,0.0109,0.1595,0.4924,,,
|
| 3 |
+
baseline,2000,16:28:30.872,0.0492,0.0284,2.37e-05,0.6437,0.4982,0.0622,1.785,4482.7,35.23GB,0.0184,0.2195,0.8358,,,
|
| 4 |
+
baseline,5000,17:04:21.626,0.0038,0.0165,1.47e-05,0.5112,0.4974,0.0606,1.792,2789.7,35.23GB,0.0101,0.1353,1.1505,,,
|
| 5 |
+
baseline,10000,18:15:00.659,0.0141,0.0172,2.50e-06,0.4377,0.5241,0.1210,1.550,0.0,35.23GB,0.0125,0.1342,0.4184,,,
|
| 6 |
+
parallel,1000,18:56:22.847,0.0246,0.0492,2.48e-05,0.9470,0.5836,0.1086,1.445,6229.0,35.27GB,,,0.5049,0.0139,0.1631,0.0704
|
| 7 |
+
parallel,2000,19:09:53.627,0.0280,0.0267,2.37e-05,0.6051,0.7138,0.1628,1.141,7012.2,35.27GB,,,0.5627,0.0180,0.1784,0.0955
|
| 8 |
+
parallel,5000,19:50:55.815,0.0043,0.0159,1.47e-05,0.4850,0.5183,0.0658,1.712,2920.0,35.27GB,,,1.0533,0.0105,0.1454,0.0568
|
| 9 |
+
parallel,10000,20:58:23.797,0.0140,0.0169,2.50e-06,0.4269,0.6919,0.2213,1.095,0.0,35.27GB,,,0.4071,0.0121,0.1277,0.0634
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/__pycache__/upload_to_hf.cpython-311.pyc
ADDED
|
Binary file (3.03 kB). View file
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/__pycache__/upload_to_hf_incremental.cpython-311.pyc
ADDED
|
Binary file (9.12 kB). View file
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/changed_files.txt
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Phase 1 initial study file list:
|
| 2 |
+
see artifacts/twin_handover_packed_parallelization_20260309/repro/changed_files.txt
|
| 3 |
+
|
| 4 |
+
Phase 2 10K follow-up additions and updates:
|
| 5 |
+
|
| 6 |
+
openpi/src/openpi/training/config.py
|
| 7 |
+
added pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 8 |
+
added pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 9 |
+
added 10K packed norm-stats asset paths
|
| 10 |
+
|
| 11 |
+
openpi/scripts/train_pytorch.py
|
| 12 |
+
added periodic per-module gradient bucket norms for baseline and parallel models
|
| 13 |
+
baseline buckets: action_in_proj, action_out_proj, shared_expert
|
| 14 |
+
parallel buckets: action_in_proj_arms, arm_token_fuse, action_out_proj_arms, shared_expert
|
| 15 |
+
|
| 16 |
+
openpi/scripts/eval_twin_val_loss_pytorch.py
|
| 17 |
+
added left/right arm teacher-forced losses
|
| 18 |
+
added joint vs gripper teacher-forced losses
|
| 19 |
+
added left/right imbalance
|
| 20 |
+
added deterministic sample_actions eval on a fixed subset for num_steps=4,10
|
| 21 |
+
|
| 22 |
+
openpi/scripts/check_parallel_warmstart_equivalence.py
|
| 23 |
+
added explicit step-0 numerical comparison between the packed single-head bootstrap and packed parallel warm-start
|
| 24 |
+
|
| 25 |
+
openpi/scripts/run_twin_handover_packed_10k.sh
|
| 26 |
+
added detached 10K baseline->eval sweep->parallel->eval sweep runner
|
| 27 |
+
|
| 28 |
+
openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json
|
| 29 |
+
copied existing public handover-train norm stats for the 10K baseline config
|
| 30 |
+
|
| 31 |
+
openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json
|
| 32 |
+
copied existing public handover-train norm stats for the 10K parallel config
|
| 33 |
+
|
| 34 |
+
README.md
|
| 35 |
+
updated repo landing page to cover both the 2K initial study and the 10K follow-up
|
| 36 |
+
|
| 37 |
+
REPORT.md
|
| 38 |
+
updated full report to include methodology, changed files, runtimes, warm-start check, and final 10K metrics
|
| 39 |
+
|
| 40 |
+
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf.py
|
| 41 |
+
added reproducible Hub uploader for the final 10K bundle, docs, code, assets, and checkpoints
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/checkpoint_locations.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/workspace/checkpoints/pi05_base_single_pytorch
|
| 2 |
+
/workspace/checkpoints/pi05_base_parallel_packed_from_single
|
| 3 |
+
/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k
|
| 4 |
+
/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/commands_reproduce.sh
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
cd /workspace/pi05tests-openpi-multiarm/openpi
|
| 5 |
+
source .venv/bin/activate
|
| 6 |
+
|
| 7 |
+
export HF_HOME=/workspace/.hf
|
| 8 |
+
export HF_HUB_CACHE=/workspace/.hf/hub
|
| 9 |
+
export HF_DATASETS_CACHE=/workspace/.hf/datasets
|
| 10 |
+
export HUGGINGFACE_HUB_CACHE=/workspace/.hf/hub
|
| 11 |
+
export XDG_CACHE_HOME=/workspace/.cache
|
| 12 |
+
export OPENPI_LEROBOT_HOME=/workspace/lerobot
|
| 13 |
+
export OPENPI_TORCH_COMPILE_SAMPLE_ACTIONS=0
|
| 14 |
+
export TOKENIZERS_PARALLELISM=false
|
| 15 |
+
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
| 16 |
+
|
| 17 |
+
# Warm-start numerical check.
|
| 18 |
+
python scripts/check_parallel_warmstart_equivalence.py
|
| 19 |
+
|
| 20 |
+
# Optional smoke tests.
|
| 21 |
+
torchrun --standalone --nproc_per_node=4 scripts/train_pytorch.py \
|
| 22 |
+
pi05_twin_handover_256_packed_baseline_pytorch_10k \
|
| 23 |
+
--exp_name smoke_baseline_10k_diag \
|
| 24 |
+
--overwrite
|
| 25 |
+
|
| 26 |
+
torchrun --standalone --nproc_per_node=4 scripts/train_pytorch.py \
|
| 27 |
+
pi05_twin_handover_256_packed_parallel_pytorch_10k \
|
| 28 |
+
--exp_name smoke_parallel_10k_diag \
|
| 29 |
+
--overwrite
|
| 30 |
+
|
| 31 |
+
# Batch inspection.
|
| 32 |
+
python scripts/inspect_twin_packed_batch.py \
|
| 33 |
+
--config_name pi05_twin_handover_256_packed_baseline_pytorch_2k \
|
| 34 |
+
--repo_id lsnu/twin_handover_256_train
|
| 35 |
+
|
| 36 |
+
# Detached full 10K chain.
|
| 37 |
+
setsid bash -lc 'cd /workspace/pi05tests-openpi-multiarm/openpi && exec bash ./scripts/run_twin_handover_packed_10k.sh >> /workspace/run_logs/handover_packed_10k_followup.log 2>&1' >/dev/null 2>&1 < /dev/null &
|
| 38 |
+
|
| 39 |
+
# Direct full 10K chain, if detach is not needed.
|
| 40 |
+
bash ./scripts/run_twin_handover_packed_10k.sh
|
| 41 |
+
|
| 42 |
+
# Push the final bundle to the Hugging Face repo after the run finishes.
|
| 43 |
+
python /workspace/pi05tests-openpi-multiarm/artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf.py
|
| 44 |
+
|
| 45 |
+
# Individual evals, if re-running manually after training.
|
| 46 |
+
python scripts/eval_twin_val_loss_pytorch.py \
|
| 47 |
+
--config_name pi05_twin_handover_256_packed_baseline_pytorch_10k \
|
| 48 |
+
--checkpoint_dir /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000 \
|
| 49 |
+
--repo_id lsnu/twin_handover_256_val \
|
| 50 |
+
--num_batches 100 \
|
| 51 |
+
--num_workers 0 \
|
| 52 |
+
--sample_num_batches 16 \
|
| 53 |
+
--sample_num_steps 4,10
|
| 54 |
+
|
| 55 |
+
# The uploader expects HF_TOKEN in the environment.
|
| 56 |
+
# Example:
|
| 57 |
+
# export HF_TOKEN=...
|
| 58 |
+
# python /workspace/pi05tests-openpi-multiarm/artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf.py
|
| 59 |
+
|
| 60 |
+
python scripts/eval_twin_val_loss_pytorch.py \
|
| 61 |
+
--config_name pi05_twin_handover_256_packed_parallel_pytorch_10k \
|
| 62 |
+
--checkpoint_dir /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000 \
|
| 63 |
+
--repo_id lsnu/twin_handover_256_val \
|
| 64 |
+
--num_batches 100 \
|
| 65 |
+
--num_workers 0 \
|
| 66 |
+
--sample_num_batches 16 \
|
| 67 |
+
--sample_num_steps 4,10
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from huggingface_hub import HfApi
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
REPO_ID = "lsnu/pi05tests-openpi-multiarm"
|
| 10 |
+
REPO_TYPE = "model"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main() -> None:
|
| 14 |
+
token = os.environ.get("HF_TOKEN")
|
| 15 |
+
token_file = os.environ.get("HF_TOKEN_FILE")
|
| 16 |
+
if not token and token_file:
|
| 17 |
+
token_path = Path(token_file)
|
| 18 |
+
if token_path.exists():
|
| 19 |
+
token = token_path.read_text().strip()
|
| 20 |
+
if os.environ.get("HF_TOKEN_FILE_DELETE_AFTER_READ") == "1":
|
| 21 |
+
token_path.unlink(missing_ok=True)
|
| 22 |
+
if not token:
|
| 23 |
+
raise RuntimeError("HF_TOKEN is required in the environment")
|
| 24 |
+
|
| 25 |
+
repo_root = Path(__file__).resolve().parents[3]
|
| 26 |
+
allow_patterns = [
|
| 27 |
+
"README.md",
|
| 28 |
+
"REPORT.md",
|
| 29 |
+
"artifacts/twin_handover_packed_parallelization_10k_20260309/**",
|
| 30 |
+
"openpi/README.md",
|
| 31 |
+
"openpi/pyproject.toml",
|
| 32 |
+
"openpi/uv.lock",
|
| 33 |
+
"openpi/examples/convert_jax_model_to_pytorch.py",
|
| 34 |
+
"openpi/scripts/**",
|
| 35 |
+
"openpi/src/openpi/**",
|
| 36 |
+
"openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_2k/**",
|
| 37 |
+
"openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_2k/**",
|
| 38 |
+
"openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/**",
|
| 39 |
+
"openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/**",
|
| 40 |
+
"openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/**",
|
| 41 |
+
"openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/**",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
print(f"uploading repo_root={repo_root}", flush=True)
|
| 45 |
+
print(f"repo_id={REPO_ID}", flush=True)
|
| 46 |
+
print(f"allow_patterns={allow_patterns}", flush=True)
|
| 47 |
+
|
| 48 |
+
HfApi(token=token).upload_large_folder(
|
| 49 |
+
repo_id=REPO_ID,
|
| 50 |
+
folder_path=repo_root,
|
| 51 |
+
repo_type=REPO_TYPE,
|
| 52 |
+
allow_patterns=allow_patterns,
|
| 53 |
+
num_workers=8,
|
| 54 |
+
print_report=True,
|
| 55 |
+
print_report_every=30,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
main()
|
artifacts/twin_handover_packed_parallelization_10k_20260309/repro/upload_to_hf_incremental.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import shutil
|
| 5 |
+
import tempfile
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
from huggingface_hub import HfApi
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
REPO_ID = "lsnu/pi05tests-openpi-multiarm"
|
| 12 |
+
REPO_TYPE = "model"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _read_token() -> str:
|
| 16 |
+
token = os.environ.get("HF_TOKEN")
|
| 17 |
+
token_file = os.environ.get("HF_TOKEN_FILE")
|
| 18 |
+
if not token and token_file:
|
| 19 |
+
token_path = Path(token_file)
|
| 20 |
+
if token_path.exists():
|
| 21 |
+
token = token_path.read_text().strip()
|
| 22 |
+
if os.environ.get("HF_TOKEN_FILE_DELETE_AFTER_READ") == "1":
|
| 23 |
+
token_path.unlink(missing_ok=True)
|
| 24 |
+
if not token:
|
| 25 |
+
raise RuntimeError("HF_TOKEN is required in the environment")
|
| 26 |
+
return token
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _verify_path(api: HfApi, path_in_repo: str) -> None:
|
| 30 |
+
info = api.get_paths_info(repo_id=REPO_ID, paths=[path_in_repo], repo_type=REPO_TYPE)
|
| 31 |
+
if not info or info[0] is None:
|
| 32 |
+
raise RuntimeError(f"remote path missing after upload: {path_in_repo}")
|
| 33 |
+
print(f"verified remote path: {path_in_repo}", flush=True)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _upload_folder(
|
| 37 |
+
api: HfApi,
|
| 38 |
+
folder_path: Path,
|
| 39 |
+
path_in_repo: str,
|
| 40 |
+
commit_message: str,
|
| 41 |
+
allow_patterns: list[str] | None = None,
|
| 42 |
+
verify_path: str | None = None,
|
| 43 |
+
) -> None:
|
| 44 |
+
print(
|
| 45 |
+
f"upload_folder start folder_path={folder_path} path_in_repo={path_in_repo} "
|
| 46 |
+
f"allow_patterns={allow_patterns}",
|
| 47 |
+
flush=True,
|
| 48 |
+
)
|
| 49 |
+
api.upload_folder(
|
| 50 |
+
repo_id=REPO_ID,
|
| 51 |
+
repo_type=REPO_TYPE,
|
| 52 |
+
folder_path=folder_path,
|
| 53 |
+
path_in_repo=path_in_repo,
|
| 54 |
+
allow_patterns=allow_patterns,
|
| 55 |
+
commit_message=commit_message,
|
| 56 |
+
)
|
| 57 |
+
path_to_verify = verify_path or path_in_repo or (allow_patterns or [""])[0].rstrip("/**")
|
| 58 |
+
_verify_path(api, path_to_verify)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _stage_small_files(base_dir: Path, files: list[str]) -> Path:
|
| 62 |
+
stage_root = Path(tempfile.mkdtemp(prefix="hf_stage_small_", dir="/workspace"))
|
| 63 |
+
for rel_path in files:
|
| 64 |
+
src_path = base_dir / rel_path
|
| 65 |
+
dst_path = stage_root / rel_path
|
| 66 |
+
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
| 67 |
+
os.link(src_path, dst_path)
|
| 68 |
+
return stage_root
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _upload_sparse_files(
|
| 72 |
+
api: HfApi,
|
| 73 |
+
base_dir: Path,
|
| 74 |
+
files: list[str],
|
| 75 |
+
path_in_repo: str,
|
| 76 |
+
commit_message: str,
|
| 77 |
+
verify_path: str,
|
| 78 |
+
) -> None:
|
| 79 |
+
stage_root = _stage_small_files(base_dir, files)
|
| 80 |
+
try:
|
| 81 |
+
_upload_folder(api, stage_root, path_in_repo, commit_message, verify_path=verify_path)
|
| 82 |
+
finally:
|
| 83 |
+
shutil.rmtree(stage_root, ignore_errors=True)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _stage_large_tree(src_dir: Path, repo_subdir: str) -> Path:
|
| 87 |
+
stage_root = Path(tempfile.mkdtemp(prefix="hf_stage_", dir="/workspace"))
|
| 88 |
+
dst_dir = stage_root / repo_subdir
|
| 89 |
+
dst_dir.parent.mkdir(parents=True, exist_ok=True)
|
| 90 |
+
print(f"hardlink staging src={src_dir} dst={dst_dir}", flush=True)
|
| 91 |
+
shutil.copytree(src_dir, dst_dir, copy_function=os.link)
|
| 92 |
+
return stage_root
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _upload_large_tree(api: HfApi, src_dir: Path, repo_subdir: str) -> None:
|
| 96 |
+
stage_root = _stage_large_tree(src_dir, repo_subdir)
|
| 97 |
+
try:
|
| 98 |
+
print(f"upload_large_folder start repo_subdir={repo_subdir} stage_root={stage_root}", flush=True)
|
| 99 |
+
api.upload_large_folder(
|
| 100 |
+
repo_id=REPO_ID,
|
| 101 |
+
repo_type=REPO_TYPE,
|
| 102 |
+
folder_path=stage_root,
|
| 103 |
+
allow_patterns=[f"{repo_subdir}/**"],
|
| 104 |
+
num_workers=8,
|
| 105 |
+
print_report=True,
|
| 106 |
+
print_report_every=30,
|
| 107 |
+
)
|
| 108 |
+
_verify_path(api, repo_subdir)
|
| 109 |
+
finally:
|
| 110 |
+
print(f"removing stage_root={stage_root}", flush=True)
|
| 111 |
+
shutil.rmtree(stage_root, ignore_errors=True)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def main() -> None:
|
| 115 |
+
token = _read_token()
|
| 116 |
+
api = HfApi(token=token)
|
| 117 |
+
repo_root = Path(__file__).resolve().parents[3]
|
| 118 |
+
openpi_root = repo_root / "openpi"
|
| 119 |
+
|
| 120 |
+
print(f"repo_root={repo_root}", flush=True)
|
| 121 |
+
print(f"repo_id={REPO_ID}", flush=True)
|
| 122 |
+
|
| 123 |
+
_upload_sparse_files(
|
| 124 |
+
api,
|
| 125 |
+
repo_root,
|
| 126 |
+
["README.md", "REPORT.md"],
|
| 127 |
+
"",
|
| 128 |
+
"Upload 10k report docs",
|
| 129 |
+
"README.md",
|
| 130 |
+
)
|
| 131 |
+
_upload_sparse_files(
|
| 132 |
+
api,
|
| 133 |
+
openpi_root,
|
| 134 |
+
["README.md", "pyproject.toml", "uv.lock", "examples/convert_jax_model_to_pytorch.py"],
|
| 135 |
+
"openpi",
|
| 136 |
+
"Upload reproducibility metadata",
|
| 137 |
+
"openpi/pyproject.toml",
|
| 138 |
+
)
|
| 139 |
+
_upload_folder(
|
| 140 |
+
api,
|
| 141 |
+
openpi_root / "scripts",
|
| 142 |
+
"openpi/scripts",
|
| 143 |
+
"Upload training and eval scripts",
|
| 144 |
+
)
|
| 145 |
+
_upload_folder(
|
| 146 |
+
api,
|
| 147 |
+
openpi_root / "src" / "openpi",
|
| 148 |
+
"openpi/src/openpi",
|
| 149 |
+
"Upload training source tree",
|
| 150 |
+
)
|
| 151 |
+
_upload_folder(
|
| 152 |
+
api,
|
| 153 |
+
openpi_root / "assets" / "pi05_twin_handover_256_packed_baseline_pytorch_2k",
|
| 154 |
+
"openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_2k",
|
| 155 |
+
"Upload 2k baseline norm stats",
|
| 156 |
+
)
|
| 157 |
+
_upload_folder(
|
| 158 |
+
api,
|
| 159 |
+
openpi_root / "assets" / "pi05_twin_handover_256_packed_parallel_pytorch_2k",
|
| 160 |
+
"openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_2k",
|
| 161 |
+
"Upload 2k parallel norm stats",
|
| 162 |
+
)
|
| 163 |
+
_upload_folder(
|
| 164 |
+
api,
|
| 165 |
+
openpi_root / "assets" / "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 166 |
+
"openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 167 |
+
"Upload 10k baseline norm stats",
|
| 168 |
+
)
|
| 169 |
+
_upload_folder(
|
| 170 |
+
api,
|
| 171 |
+
openpi_root / "assets" / "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 172 |
+
"openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 173 |
+
"Upload 10k parallel norm stats",
|
| 174 |
+
)
|
| 175 |
+
_upload_folder(
|
| 176 |
+
api,
|
| 177 |
+
repo_root / "artifacts" / "twin_handover_packed_parallelization_10k_20260309",
|
| 178 |
+
"artifacts/twin_handover_packed_parallelization_10k_20260309",
|
| 179 |
+
"Upload 10k metrics and environment snapshot",
|
| 180 |
+
)
|
| 181 |
+
_upload_large_tree(
|
| 182 |
+
api,
|
| 183 |
+
openpi_root / "checkpoints" / "pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 184 |
+
"openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k",
|
| 185 |
+
)
|
| 186 |
+
_upload_large_tree(
|
| 187 |
+
api,
|
| 188 |
+
openpi_root / "checkpoints" / "pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 189 |
+
"openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k",
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
print("incremental upload complete", flush=True)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
main()
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_10k_followup.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-03-09 15:57:20 UTC] packed 10k runner started
|
| 2 |
+
[2026-03-09 16:03:23 UTC] warm-start equivalence check logged to /workspace/run_logs/warmstart_equivalence_10k.log
|
| 3 |
+
[2026-03-09 16:03:23 UTC] train start config=pi05_twin_handover_256_packed_baseline_pytorch_10k exp=handover_packed_baseline_10k
|
| 4 |
+
[2026-03-09 18:17:03 UTC] train done config=pi05_twin_handover_256_packed_baseline_pytorch_10k exp=handover_packed_baseline_10k
|
| 5 |
+
[2026-03-09 18:17:03 UTC] eval start config=pi05_twin_handover_256_packed_baseline_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000 batches=50
|
| 6 |
+
[2026-03-09 18:23:42 UTC] eval done log=/workspace/run_logs/handover_packed_baseline_10k_val_1000.log
|
| 7 |
+
[2026-03-09 18:23:42 UTC] eval start config=pi05_twin_handover_256_packed_baseline_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000 batches=50
|
| 8 |
+
[2026-03-09 18:28:54 UTC] eval done log=/workspace/run_logs/handover_packed_baseline_10k_val_2000.log
|
| 9 |
+
[2026-03-09 18:28:54 UTC] eval start config=pi05_twin_handover_256_packed_baseline_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000 batches=50
|
| 10 |
+
[2026-03-09 18:33:53 UTC] eval done log=/workspace/run_logs/handover_packed_baseline_10k_val_5000.log
|
| 11 |
+
[2026-03-09 18:33:53 UTC] eval start config=pi05_twin_handover_256_packed_baseline_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000 batches=100
|
| 12 |
+
[2026-03-09 18:41:07 UTC] eval done log=/workspace/run_logs/handover_packed_baseline_10k_val_10000.log
|
| 13 |
+
[2026-03-09 18:41:07 UTC] train start config=pi05_twin_handover_256_packed_parallel_pytorch_10k exp=handover_packed_parallel_10k
|
| 14 |
+
[2026-03-09 21:01:58 UTC] train done config=pi05_twin_handover_256_packed_parallel_pytorch_10k exp=handover_packed_parallel_10k
|
| 15 |
+
[2026-03-09 21:01:58 UTC] eval start config=pi05_twin_handover_256_packed_parallel_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000 batches=50
|
| 16 |
+
[2026-03-09 21:14:35 UTC] eval done log=/workspace/run_logs/handover_packed_parallel_10k_val_1000.log
|
| 17 |
+
[2026-03-09 21:14:35 UTC] eval start config=pi05_twin_handover_256_packed_parallel_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000 batches=50
|
| 18 |
+
[2026-03-09 21:22:39 UTC] eval done log=/workspace/run_logs/handover_packed_parallel_10k_val_2000.log
|
| 19 |
+
[2026-03-09 21:22:40 UTC] eval start config=pi05_twin_handover_256_packed_parallel_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000 batches=50
|
| 20 |
+
[2026-03-09 21:35:26 UTC] eval done log=/workspace/run_logs/handover_packed_parallel_10k_val_5000.log
|
| 21 |
+
[2026-03-09 21:35:26 UTC] eval start config=pi05_twin_handover_256_packed_parallel_pytorch_10k ckpt=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000 batches=100
|
| 22 |
+
[2026-03-09 21:45:53 UTC] eval done log=/workspace/run_logs/handover_packed_parallel_10k_val_10000.log
|
| 23 |
+
[2026-03-09 21:45:53 UTC] packed 10k runner finished
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_1000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_baseline_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.046618 left_arm_loss=0.035624 right_arm_loss=0.057612 imbalance=0.021988 batch_time_s=0.8837
|
| 7 |
+
eval_batch=2 loss=0.011211 left_arm_loss=0.010756 right_arm_loss=0.011666 imbalance=0.000910 batch_time_s=0.2997
|
| 8 |
+
eval_batch=3 loss=0.014357 left_arm_loss=0.020238 right_arm_loss=0.008477 imbalance=0.011761 batch_time_s=0.2246
|
| 9 |
+
eval_batch=4 loss=0.065152 left_arm_loss=0.061398 right_arm_loss=0.068906 imbalance=0.007508 batch_time_s=0.5019
|
| 10 |
+
eval_batch=5 loss=0.045531 left_arm_loss=0.063439 right_arm_loss=0.027622 imbalance=0.035817 batch_time_s=0.2430
|
| 11 |
+
eval_batch=6 loss=0.048678 left_arm_loss=0.092346 right_arm_loss=0.005009 imbalance=0.087337 batch_time_s=0.2351
|
| 12 |
+
eval_batch=7 loss=0.037585 left_arm_loss=0.070659 right_arm_loss=0.004512 imbalance=0.066146 batch_time_s=0.3137
|
| 13 |
+
eval_batch=8 loss=0.016246 left_arm_loss=0.029937 right_arm_loss=0.002555 imbalance=0.027382 batch_time_s=0.5194
|
| 14 |
+
eval_batch=9 loss=0.027677 left_arm_loss=0.053019 right_arm_loss=0.002335 imbalance=0.050684 batch_time_s=0.5638
|
| 15 |
+
eval_batch=10 loss=0.028385 left_arm_loss=0.054602 right_arm_loss=0.002167 imbalance=0.052435 batch_time_s=0.4029
|
| 16 |
+
eval_batch=11 loss=0.029503 left_arm_loss=0.055273 right_arm_loss=0.003732 imbalance=0.051541 batch_time_s=0.3176
|
| 17 |
+
eval_batch=12 loss=0.043170 left_arm_loss=0.082558 right_arm_loss=0.003782 imbalance=0.078776 batch_time_s=0.2468
|
| 18 |
+
eval_batch=13 loss=0.052655 left_arm_loss=0.101415 right_arm_loss=0.003895 imbalance=0.097519 batch_time_s=0.2813
|
| 19 |
+
eval_batch=14 loss=0.067551 left_arm_loss=0.115959 right_arm_loss=0.019144 imbalance=0.096815 batch_time_s=0.3179
|
| 20 |
+
eval_batch=15 loss=0.086284 left_arm_loss=0.032746 right_arm_loss=0.139821 imbalance=0.107075 batch_time_s=0.2862
|
| 21 |
+
eval_batch=16 loss=0.076913 left_arm_loss=0.047023 right_arm_loss=0.106803 imbalance=0.059780 batch_time_s=0.2262
|
| 22 |
+
eval_batch=17 loss=0.055457 left_arm_loss=0.100819 right_arm_loss=0.010095 imbalance=0.090724 batch_time_s=0.2535
|
| 23 |
+
eval_batch=18 loss=0.070395 left_arm_loss=0.077499 right_arm_loss=0.063291 imbalance=0.014207 batch_time_s=0.2412
|
| 24 |
+
eval_batch=19 loss=0.031461 left_arm_loss=0.041223 right_arm_loss=0.021699 imbalance=0.019524 batch_time_s=0.3031
|
| 25 |
+
eval_batch=20 loss=0.026952 left_arm_loss=0.041134 right_arm_loss=0.012770 imbalance=0.028364 batch_time_s=0.2572
|
| 26 |
+
eval_batch=21 loss=0.025842 left_arm_loss=0.040805 right_arm_loss=0.010879 imbalance=0.029926 batch_time_s=0.2815
|
| 27 |
+
eval_batch=22 loss=0.056536 left_arm_loss=0.058355 right_arm_loss=0.054717 imbalance=0.003638 batch_time_s=0.7272
|
| 28 |
+
eval_batch=23 loss=0.077286 left_arm_loss=0.129516 right_arm_loss=0.025057 imbalance=0.104459 batch_time_s=0.2620
|
| 29 |
+
eval_batch=24 loss=0.108069 left_arm_loss=0.203466 right_arm_loss=0.012671 imbalance=0.190795 batch_time_s=0.2676
|
| 30 |
+
eval_batch=25 loss=0.082836 left_arm_loss=0.162669 right_arm_loss=0.003003 imbalance=0.159666 batch_time_s=0.2385
|
| 31 |
+
eval_batch=26 loss=0.036761 left_arm_loss=0.066170 right_arm_loss=0.007353 imbalance=0.058817 batch_time_s=0.2609
|
| 32 |
+
eval_batch=27 loss=0.037065 left_arm_loss=0.065602 right_arm_loss=0.008527 imbalance=0.057075 batch_time_s=0.2331
|
| 33 |
+
eval_batch=28 loss=0.035955 left_arm_loss=0.069021 right_arm_loss=0.002889 imbalance=0.066132 batch_time_s=0.3208
|
| 34 |
+
eval_batch=29 loss=0.060579 left_arm_loss=0.118573 right_arm_loss=0.002585 imbalance=0.115988 batch_time_s=0.3175
|
| 35 |
+
eval_batch=30 loss=0.100699 left_arm_loss=0.197816 right_arm_loss=0.003583 imbalance=0.194233 batch_time_s=0.2390
|
| 36 |
+
eval_batch=31 loss=0.187748 left_arm_loss=0.361111 right_arm_loss=0.014385 imbalance=0.346726 batch_time_s=0.2807
|
| 37 |
+
eval_batch=32 loss=0.108934 left_arm_loss=0.117864 right_arm_loss=0.100004 imbalance=0.017860 batch_time_s=0.3261
|
| 38 |
+
eval_batch=33 loss=0.072897 left_arm_loss=0.035474 right_arm_loss=0.110320 imbalance=0.074846 batch_time_s=0.3380
|
| 39 |
+
eval_batch=34 loss=0.079352 left_arm_loss=0.131144 right_arm_loss=0.027560 imbalance=0.103585 batch_time_s=0.2874
|
| 40 |
+
eval_batch=35 loss=0.062093 left_arm_loss=0.110691 right_arm_loss=0.013495 imbalance=0.097196 batch_time_s=0.2346
|
| 41 |
+
eval_batch=36 loss=0.050124 left_arm_loss=0.062390 right_arm_loss=0.037857 imbalance=0.024533 batch_time_s=0.2303
|
| 42 |
+
eval_batch=37 loss=0.028622 left_arm_loss=0.044315 right_arm_loss=0.012930 imbalance=0.031385 batch_time_s=0.2376
|
| 43 |
+
eval_batch=38 loss=0.064885 left_arm_loss=0.078474 right_arm_loss=0.051295 imbalance=0.027179 batch_time_s=0.2284
|
| 44 |
+
eval_batch=39 loss=0.073221 left_arm_loss=0.047691 right_arm_loss=0.098751 imbalance=0.051060 batch_time_s=0.2703
|
| 45 |
+
eval_batch=40 loss=0.039382 left_arm_loss=0.045306 right_arm_loss=0.033458 imbalance=0.011848 batch_time_s=0.2373
|
| 46 |
+
eval_batch=41 loss=0.071908 left_arm_loss=0.139208 right_arm_loss=0.004608 imbalance=0.134601 batch_time_s=0.2347
|
| 47 |
+
eval_batch=42 loss=0.041757 left_arm_loss=0.079108 right_arm_loss=0.004406 imbalance=0.074702 batch_time_s=0.3166
|
| 48 |
+
eval_batch=43 loss=0.018202 left_arm_loss=0.030615 right_arm_loss=0.005788 imbalance=0.024827 batch_time_s=0.2292
|
| 49 |
+
eval_batch=44 loss=0.020007 left_arm_loss=0.035204 right_arm_loss=0.004809 imbalance=0.030394 batch_time_s=0.2328
|
| 50 |
+
eval_batch=45 loss=0.021428 left_arm_loss=0.038985 right_arm_loss=0.003871 imbalance=0.035115 batch_time_s=0.2296
|
| 51 |
+
eval_batch=46 loss=0.039452 left_arm_loss=0.073343 right_arm_loss=0.005561 imbalance=0.067782 batch_time_s=0.2299
|
| 52 |
+
eval_batch=47 loss=0.131330 left_arm_loss=0.042242 right_arm_loss=0.220417 imbalance=0.178175 batch_time_s=0.2279
|
| 53 |
+
eval_batch=48 loss=0.248957 left_arm_loss=0.015340 right_arm_loss=0.482575 imbalance=0.467235 batch_time_s=0.2493
|
| 54 |
+
eval_batch=49 loss=0.046603 left_arm_loss=0.014231 right_arm_loss=0.078976 imbalance=0.064745 batch_time_s=0.2881
|
| 55 |
+
eval_batch=50 loss=0.146214 left_arm_loss=0.068633 right_arm_loss=0.223796 imbalance=0.155163 batch_time_s=0.2250
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/1000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.061130
|
| 61 |
+
std_val_loss: 0.043921
|
| 62 |
+
mean_left_arm_loss: 0.077421
|
| 63 |
+
std_left_arm_loss: 0.059309
|
| 64 |
+
mean_right_arm_loss: 0.044840
|
| 65 |
+
std_right_arm_loss: 0.080634
|
| 66 |
+
mean_left_joint_loss: 0.082092
|
| 67 |
+
std_left_joint_loss: 0.066740
|
| 68 |
+
mean_left_gripper_loss: 0.044720
|
| 69 |
+
std_left_gripper_loss: 0.088365
|
| 70 |
+
mean_right_joint_loss: 0.046274
|
| 71 |
+
std_right_joint_loss: 0.087919
|
| 72 |
+
mean_right_gripper_loss: 0.034807
|
| 73 |
+
std_right_gripper_loss: 0.076825
|
| 74 |
+
mean_left_right_imbalance: 0.080120
|
| 75 |
+
std_left_right_imbalance: 0.083456
|
| 76 |
+
per_batch_timing_seconds: mean=0.3040 std=0.1266 min=0.2246 max=0.8837
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.110773 left_arm_mae=0.098997 right_arm_mae=0.122549 imbalance_mae=0.023551 batch_time_s=0.2698
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.053114 left_arm_mae=0.054272 right_arm_mae=0.051956 imbalance_mae=0.002316 batch_time_s=0.2901
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.064381 left_arm_mae=0.067260 right_arm_mae=0.061502 imbalance_mae=0.005757 batch_time_s=0.3011
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.109693 left_arm_mae=0.111135 right_arm_mae=0.108252 imbalance_mae=0.002883 batch_time_s=0.2649
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.077213 left_arm_mae=0.098610 right_arm_mae=0.055816 imbalance_mae=0.042794 batch_time_s=0.3085
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.091437 left_arm_mae=0.160364 right_arm_mae=0.022511 imbalance_mae=0.137853 batch_time_s=0.3781
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.091958 left_arm_mae=0.164175 right_arm_mae=0.019740 imbalance_mae=0.144435 batch_time_s=0.3430
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.065797 left_arm_mae=0.112976 right_arm_mae=0.018618 imbalance_mae=0.094358 batch_time_s=0.3558
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.072095 left_arm_mae=0.126277 right_arm_mae=0.017913 imbalance_mae=0.108364 batch_time_s=0.2688
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.079846 left_arm_mae=0.139709 right_arm_mae=0.019984 imbalance_mae=0.119725 batch_time_s=0.2815
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.072607 left_arm_mae=0.124672 right_arm_mae=0.020542 imbalance_mae=0.104131 batch_time_s=0.3351
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.097009 left_arm_mae=0.172318 right_arm_mae=0.021700 imbalance_mae=0.150618 batch_time_s=0.3060
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.102344 left_arm_mae=0.182477 right_arm_mae=0.022212 imbalance_mae=0.160265 batch_time_s=0.3382
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.125010 left_arm_mae=0.204377 right_arm_mae=0.045644 imbalance_mae=0.158733 batch_time_s=0.2661
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.132648 left_arm_mae=0.043128 right_arm_mae=0.222168 imbalance_mae=0.179040 batch_time_s=0.3299
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.109078 left_arm_mae=0.065883 right_arm_mae=0.152274 imbalance_mae=0.086391 batch_time_s=0.3721
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.090938
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.022240
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.120414
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.046606
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.061461
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.058026
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.130966
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.054578
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.046552
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.067920
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.063945
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.062779
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.044077
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.053987
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.095076
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.059464
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3131 std=0.0370 min=0.2649 max=0.3781
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.125925 left_arm_mae=0.112806 right_arm_mae=0.139044 imbalance_mae=0.026238 batch_time_s=0.3393
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.065916 left_arm_mae=0.067937 right_arm_mae=0.063895 imbalance_mae=0.004043 batch_time_s=0.3368
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.075489 left_arm_mae=0.077150 right_arm_mae=0.073827 imbalance_mae=0.003322 batch_time_s=0.3428
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.119956 left_arm_mae=0.122138 right_arm_mae=0.117774 imbalance_mae=0.004364 batch_time_s=0.3683
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.086405 left_arm_mae=0.108638 right_arm_mae=0.064172 imbalance_mae=0.044466 batch_time_s=0.3385
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.102866 left_arm_mae=0.179362 right_arm_mae=0.026370 imbalance_mae=0.152992 batch_time_s=0.4448
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.099225 left_arm_mae=0.175145 right_arm_mae=0.023305 imbalance_mae=0.151840 batch_time_s=0.4423
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.070220 left_arm_mae=0.118236 right_arm_mae=0.022204 imbalance_mae=0.096032 batch_time_s=0.4572
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.080352 left_arm_mae=0.138299 right_arm_mae=0.022405 imbalance_mae=0.115894 batch_time_s=0.3420
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.088702 left_arm_mae=0.154109 right_arm_mae=0.023295 imbalance_mae=0.130813 batch_time_s=0.3360
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.080649 left_arm_mae=0.139099 right_arm_mae=0.022199 imbalance_mae=0.116900 batch_time_s=0.3334
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.105638 left_arm_mae=0.185946 right_arm_mae=0.025330 imbalance_mae=0.160616 batch_time_s=0.3391
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.111236 left_arm_mae=0.196994 right_arm_mae=0.025478 imbalance_mae=0.171516 batch_time_s=0.3333
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.133621 left_arm_mae=0.214837 right_arm_mae=0.052405 imbalance_mae=0.162432 batch_time_s=0.3404
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.147807 left_arm_mae=0.053255 right_arm_mae=0.242359 imbalance_mae=0.189104 batch_time_s=0.3383
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.121862 left_arm_mae=0.073953 right_arm_mae=0.169770 imbalance_mae=0.095817 batch_time_s=0.3914
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.100992
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.023502
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.132369
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.047803
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.069615
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.063335
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.143677
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.056155
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.053215
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.074232
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.072165
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.068555
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.051764
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.054067
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.101649
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.063159
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.3640 std=0.0430 min=0.3333 max=0.4572
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_10000.log
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_baseline_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=100 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.008560 left_arm_loss=0.008768 right_arm_loss=0.008353 imbalance=0.000415 batch_time_s=0.8903
|
| 7 |
+
eval_batch=2 loss=0.001335 left_arm_loss=0.001273 right_arm_loss=0.001396 imbalance=0.000123 batch_time_s=0.2357
|
| 8 |
+
eval_batch=3 loss=0.001543 left_arm_loss=0.001703 right_arm_loss=0.001384 imbalance=0.000319 batch_time_s=0.3261
|
| 9 |
+
eval_batch=4 loss=0.014131 left_arm_loss=0.013319 right_arm_loss=0.014943 imbalance=0.001624 batch_time_s=0.2341
|
| 10 |
+
eval_batch=5 loss=0.010147 left_arm_loss=0.011514 right_arm_loss=0.008780 imbalance=0.002734 batch_time_s=0.2311
|
| 11 |
+
eval_batch=6 loss=0.008744 left_arm_loss=0.016569 right_arm_loss=0.000919 imbalance=0.015649 batch_time_s=0.2410
|
| 12 |
+
eval_batch=7 loss=0.007124 left_arm_loss=0.013695 right_arm_loss=0.000553 imbalance=0.013142 batch_time_s=0.2401
|
| 13 |
+
eval_batch=8 loss=0.009670 left_arm_loss=0.018982 right_arm_loss=0.000359 imbalance=0.018624 batch_time_s=0.2342
|
| 14 |
+
eval_batch=9 loss=0.016381 left_arm_loss=0.032430 right_arm_loss=0.000333 imbalance=0.032097 batch_time_s=0.2400
|
| 15 |
+
eval_batch=10 loss=0.023648 left_arm_loss=0.046801 right_arm_loss=0.000495 imbalance=0.046306 batch_time_s=0.2375
|
| 16 |
+
eval_batch=11 loss=0.019174 left_arm_loss=0.037797 right_arm_loss=0.000552 imbalance=0.037244 batch_time_s=0.2783
|
| 17 |
+
eval_batch=12 loss=0.017045 left_arm_loss=0.033491 right_arm_loss=0.000599 imbalance=0.032891 batch_time_s=0.2462
|
| 18 |
+
eval_batch=13 loss=0.023033 left_arm_loss=0.045382 right_arm_loss=0.000684 imbalance=0.044698 batch_time_s=0.2408
|
| 19 |
+
eval_batch=14 loss=0.005748 left_arm_loss=0.009245 right_arm_loss=0.002251 imbalance=0.006994 batch_time_s=0.2910
|
| 20 |
+
eval_batch=15 loss=0.030839 left_arm_loss=0.010502 right_arm_loss=0.051175 imbalance=0.040673 batch_time_s=0.2852
|
| 21 |
+
eval_batch=16 loss=0.024088 left_arm_loss=0.018003 right_arm_loss=0.030173 imbalance=0.012170 batch_time_s=0.3083
|
| 22 |
+
eval_batch=17 loss=0.012957 left_arm_loss=0.024034 right_arm_loss=0.001880 imbalance=0.022154 batch_time_s=0.2456
|
| 23 |
+
eval_batch=18 loss=0.038324 left_arm_loss=0.056229 right_arm_loss=0.020419 imbalance=0.035809 batch_time_s=0.2414
|
| 24 |
+
eval_batch=19 loss=0.011172 left_arm_loss=0.019652 right_arm_loss=0.002692 imbalance=0.016960 batch_time_s=0.2383
|
| 25 |
+
eval_batch=20 loss=0.021376 left_arm_loss=0.039737 right_arm_loss=0.003016 imbalance=0.036721 batch_time_s=0.2315
|
| 26 |
+
eval_batch=21 loss=0.092819 left_arm_loss=0.183719 right_arm_loss=0.001919 imbalance=0.181800 batch_time_s=0.2421
|
| 27 |
+
eval_batch=22 loss=0.107753 left_arm_loss=0.187696 right_arm_loss=0.027809 imbalance=0.159887 batch_time_s=0.2272
|
| 28 |
+
eval_batch=23 loss=0.039306 left_arm_loss=0.070818 right_arm_loss=0.007795 imbalance=0.063023 batch_time_s=0.2290
|
| 29 |
+
eval_batch=24 loss=0.105801 left_arm_loss=0.210136 right_arm_loss=0.001466 imbalance=0.208669 batch_time_s=0.2295
|
| 30 |
+
eval_batch=25 loss=0.074643 left_arm_loss=0.148833 right_arm_loss=0.000454 imbalance=0.148380 batch_time_s=0.2341
|
| 31 |
+
eval_batch=26 loss=0.053766 left_arm_loss=0.106145 right_arm_loss=0.001387 imbalance=0.104758 batch_time_s=0.2310
|
| 32 |
+
eval_batch=27 loss=0.059274 left_arm_loss=0.117549 right_arm_loss=0.000999 imbalance=0.116551 batch_time_s=0.2295
|
| 33 |
+
eval_batch=28 loss=0.024668 left_arm_loss=0.048976 right_arm_loss=0.000361 imbalance=0.048615 batch_time_s=0.2339
|
| 34 |
+
eval_batch=29 loss=0.033007 left_arm_loss=0.065406 right_arm_loss=0.000608 imbalance=0.064798 batch_time_s=0.2921
|
| 35 |
+
eval_batch=30 loss=0.025443 left_arm_loss=0.049679 right_arm_loss=0.001208 imbalance=0.048470 batch_time_s=0.2567
|
| 36 |
+
eval_batch=31 loss=0.047262 left_arm_loss=0.092545 right_arm_loss=0.001978 imbalance=0.090567 batch_time_s=0.2269
|
| 37 |
+
eval_batch=32 loss=0.017943 left_arm_loss=0.028981 right_arm_loss=0.006904 imbalance=0.022077 batch_time_s=0.2486
|
| 38 |
+
eval_batch=33 loss=0.049076 left_arm_loss=0.023606 right_arm_loss=0.074545 imbalance=0.050939 batch_time_s=0.4874
|
| 39 |
+
eval_batch=34 loss=0.078728 left_arm_loss=0.128720 right_arm_loss=0.028735 imbalance=0.099985 batch_time_s=0.2288
|
| 40 |
+
eval_batch=35 loss=0.027250 left_arm_loss=0.051945 right_arm_loss=0.002554 imbalance=0.049391 batch_time_s=0.2350
|
| 41 |
+
eval_batch=36 loss=0.007188 left_arm_loss=0.003737 right_arm_loss=0.010639 imbalance=0.006902 batch_time_s=0.2371
|
| 42 |
+
eval_batch=37 loss=0.001722 left_arm_loss=0.002658 right_arm_loss=0.000786 imbalance=0.001872 batch_time_s=0.2310
|
| 43 |
+
eval_batch=38 loss=0.024799 left_arm_loss=0.031059 right_arm_loss=0.018538 imbalance=0.012521 batch_time_s=0.2716
|
| 44 |
+
eval_batch=39 loss=0.022045 left_arm_loss=0.009727 right_arm_loss=0.034363 imbalance=0.024636 batch_time_s=0.2294
|
| 45 |
+
eval_batch=40 loss=0.010673 left_arm_loss=0.012038 right_arm_loss=0.009308 imbalance=0.002730 batch_time_s=0.3581
|
| 46 |
+
eval_batch=41 loss=0.019520 left_arm_loss=0.038111 right_arm_loss=0.000930 imbalance=0.037180 batch_time_s=0.2449
|
| 47 |
+
eval_batch=42 loss=0.010768 left_arm_loss=0.020798 right_arm_loss=0.000738 imbalance=0.020060 batch_time_s=0.2284
|
| 48 |
+
eval_batch=43 loss=0.002843 left_arm_loss=0.005206 right_arm_loss=0.000481 imbalance=0.004725 batch_time_s=0.2434
|
| 49 |
+
eval_batch=44 loss=0.001429 left_arm_loss=0.002409 right_arm_loss=0.000449 imbalance=0.001960 batch_time_s=0.2829
|
| 50 |
+
eval_batch=45 loss=0.003741 left_arm_loss=0.006856 right_arm_loss=0.000625 imbalance=0.006232 batch_time_s=0.2430
|
| 51 |
+
eval_batch=46 loss=0.011082 left_arm_loss=0.021369 right_arm_loss=0.000795 imbalance=0.020575 batch_time_s=0.2387
|
| 52 |
+
eval_batch=47 loss=0.102886 left_arm_loss=0.004540 right_arm_loss=0.201233 imbalance=0.196693 batch_time_s=0.2470
|
| 53 |
+
eval_batch=48 loss=0.097341 left_arm_loss=0.003805 right_arm_loss=0.190878 imbalance=0.187074 batch_time_s=0.2359
|
| 54 |
+
eval_batch=49 loss=0.006523 left_arm_loss=0.002001 right_arm_loss=0.011044 imbalance=0.009043 batch_time_s=0.2302
|
| 55 |
+
eval_batch=50 loss=0.046598 left_arm_loss=0.022808 right_arm_loss=0.070389 imbalance=0.047580 batch_time_s=0.2539
|
| 56 |
+
eval_batch=51 loss=0.009135 left_arm_loss=0.013602 right_arm_loss=0.004667 imbalance=0.008934 batch_time_s=0.2427
|
| 57 |
+
eval_batch=52 loss=0.011514 left_arm_loss=0.016650 right_arm_loss=0.006378 imbalance=0.010272 batch_time_s=0.2395
|
| 58 |
+
eval_batch=53 loss=0.001132 left_arm_loss=0.001374 right_arm_loss=0.000890 imbalance=0.000485 batch_time_s=0.2332
|
| 59 |
+
eval_batch=54 loss=0.002559 left_arm_loss=0.002530 right_arm_loss=0.002589 imbalance=0.000059 batch_time_s=0.2354
|
| 60 |
+
eval_batch=55 loss=0.017277 left_arm_loss=0.018273 right_arm_loss=0.016282 imbalance=0.001991 batch_time_s=0.2345
|
| 61 |
+
eval_batch=56 loss=0.024251 left_arm_loss=0.005012 right_arm_loss=0.043491 imbalance=0.038479 batch_time_s=0.2376
|
| 62 |
+
eval_batch=57 loss=0.009270 left_arm_loss=0.013307 right_arm_loss=0.005233 imbalance=0.008074 batch_time_s=0.2310
|
| 63 |
+
eval_batch=58 loss=0.008486 left_arm_loss=0.015714 right_arm_loss=0.001258 imbalance=0.014456 batch_time_s=0.2297
|
| 64 |
+
eval_batch=59 loss=0.012955 left_arm_loss=0.025000 right_arm_loss=0.000911 imbalance=0.024089 batch_time_s=0.2376
|
| 65 |
+
eval_batch=60 loss=0.004419 left_arm_loss=0.007966 right_arm_loss=0.000873 imbalance=0.007093 batch_time_s=0.2301
|
| 66 |
+
eval_batch=61 loss=0.001311 left_arm_loss=0.002374 right_arm_loss=0.000248 imbalance=0.002127 batch_time_s=0.2271
|
| 67 |
+
eval_batch=62 loss=0.001412 left_arm_loss=0.002614 right_arm_loss=0.000210 imbalance=0.002404 batch_time_s=0.3047
|
| 68 |
+
eval_batch=63 loss=0.003703 left_arm_loss=0.006794 right_arm_loss=0.000613 imbalance=0.006181 batch_time_s=0.2590
|
| 69 |
+
eval_batch=64 loss=0.003989 left_arm_loss=0.007039 right_arm_loss=0.000940 imbalance=0.006100 batch_time_s=0.2326
|
| 70 |
+
eval_batch=65 loss=0.011913 left_arm_loss=0.002230 right_arm_loss=0.021597 imbalance=0.019367 batch_time_s=0.2301
|
| 71 |
+
eval_batch=66 loss=0.003909 left_arm_loss=0.004961 right_arm_loss=0.002856 imbalance=0.002105 batch_time_s=0.2372
|
| 72 |
+
eval_batch=67 loss=0.005047 left_arm_loss=0.009273 right_arm_loss=0.000820 imbalance=0.008453 batch_time_s=0.2308
|
| 73 |
+
eval_batch=68 loss=0.018646 left_arm_loss=0.024080 right_arm_loss=0.013213 imbalance=0.010868 batch_time_s=0.2338
|
| 74 |
+
eval_batch=69 loss=0.035457 left_arm_loss=0.055589 right_arm_loss=0.015326 imbalance=0.040263 batch_time_s=0.2278
|
| 75 |
+
eval_batch=70 loss=0.023561 left_arm_loss=0.014324 right_arm_loss=0.032799 imbalance=0.018475 batch_time_s=0.2263
|
| 76 |
+
eval_batch=71 loss=0.012966 left_arm_loss=0.005677 right_arm_loss=0.020255 imbalance=0.014578 batch_time_s=0.2334
|
| 77 |
+
eval_batch=72 loss=0.030692 left_arm_loss=0.052390 right_arm_loss=0.008995 imbalance=0.043395 batch_time_s=0.2307
|
| 78 |
+
eval_batch=73 loss=0.019974 left_arm_loss=0.037737 right_arm_loss=0.002212 imbalance=0.035525 batch_time_s=0.2324
|
| 79 |
+
eval_batch=74 loss=0.017243 left_arm_loss=0.033196 right_arm_loss=0.001290 imbalance=0.031907 batch_time_s=0.2301
|
| 80 |
+
eval_batch=75 loss=0.006222 left_arm_loss=0.012136 right_arm_loss=0.000308 imbalance=0.011828 batch_time_s=0.2352
|
| 81 |
+
eval_batch=76 loss=0.022304 left_arm_loss=0.036135 right_arm_loss=0.008474 imbalance=0.027661 batch_time_s=0.2309
|
| 82 |
+
eval_batch=77 loss=0.007094 left_arm_loss=0.013553 right_arm_loss=0.000635 imbalance=0.012918 batch_time_s=0.2285
|
| 83 |
+
eval_batch=78 loss=0.027930 left_arm_loss=0.004931 right_arm_loss=0.050929 imbalance=0.045998 batch_time_s=0.2580
|
| 84 |
+
eval_batch=79 loss=0.073179 left_arm_loss=0.013695 right_arm_loss=0.132664 imbalance=0.118969 batch_time_s=0.2436
|
| 85 |
+
eval_batch=80 loss=0.052136 left_arm_loss=0.014955 right_arm_loss=0.089316 imbalance=0.074362 batch_time_s=0.2320
|
| 86 |
+
eval_batch=81 loss=0.021709 left_arm_loss=0.017623 right_arm_loss=0.025794 imbalance=0.008171 batch_time_s=0.2394
|
| 87 |
+
eval_batch=82 loss=0.008380 left_arm_loss=0.006955 right_arm_loss=0.009806 imbalance=0.002851 batch_time_s=0.2371
|
| 88 |
+
eval_batch=83 loss=0.011753 left_arm_loss=0.013604 right_arm_loss=0.009902 imbalance=0.003702 batch_time_s=0.2396
|
| 89 |
+
eval_batch=84 loss=0.007405 left_arm_loss=0.010580 right_arm_loss=0.004231 imbalance=0.006349 batch_time_s=0.2440
|
| 90 |
+
eval_batch=85 loss=0.017654 left_arm_loss=0.011350 right_arm_loss=0.023958 imbalance=0.012608 batch_time_s=0.2373
|
| 91 |
+
eval_batch=86 loss=0.015883 left_arm_loss=0.002756 right_arm_loss=0.029009 imbalance=0.026253 batch_time_s=0.2370
|
| 92 |
+
eval_batch=87 loss=0.006524 left_arm_loss=0.005036 right_arm_loss=0.008012 imbalance=0.002976 batch_time_s=0.2359
|
| 93 |
+
eval_batch=88 loss=0.016466 left_arm_loss=0.031930 right_arm_loss=0.001001 imbalance=0.030929 batch_time_s=0.2377
|
| 94 |
+
eval_batch=89 loss=0.015712 left_arm_loss=0.027821 right_arm_loss=0.003602 imbalance=0.024219 batch_time_s=0.2324
|
| 95 |
+
eval_batch=90 loss=0.002218 left_arm_loss=0.004046 right_arm_loss=0.000389 imbalance=0.003657 batch_time_s=0.2310
|
| 96 |
+
eval_batch=91 loss=0.001704 left_arm_loss=0.002999 right_arm_loss=0.000409 imbalance=0.002590 batch_time_s=0.2355
|
| 97 |
+
eval_batch=92 loss=0.001195 left_arm_loss=0.002064 right_arm_loss=0.000326 imbalance=0.001738 batch_time_s=0.2340
|
| 98 |
+
eval_batch=93 loss=0.007939 left_arm_loss=0.014865 right_arm_loss=0.001012 imbalance=0.013853 batch_time_s=0.2389
|
| 99 |
+
eval_batch=94 loss=0.003085 left_arm_loss=0.003954 right_arm_loss=0.002217 imbalance=0.001737 batch_time_s=0.2660
|
| 100 |
+
eval_batch=95 loss=0.008923 left_arm_loss=0.003261 right_arm_loss=0.014584 imbalance=0.011323 batch_time_s=0.2413
|
| 101 |
+
eval_batch=96 loss=0.007102 left_arm_loss=0.008076 right_arm_loss=0.006127 imbalance=0.001949 batch_time_s=0.2398
|
| 102 |
+
eval_batch=97 loss=0.014454 left_arm_loss=0.026941 right_arm_loss=0.001967 imbalance=0.024973 batch_time_s=0.2452
|
| 103 |
+
eval_batch=98 loss=0.033711 left_arm_loss=0.063071 right_arm_loss=0.004351 imbalance=0.058720 batch_time_s=0.2431
|
| 104 |
+
eval_batch=99 loss=0.020718 left_arm_loss=0.037961 right_arm_loss=0.003476 imbalance=0.034484 batch_time_s=0.2347
|
| 105 |
+
eval_batch=100 loss=0.021343 left_arm_loss=0.036917 right_arm_loss=0.005769 imbalance=0.031148 batch_time_s=0.2456
|
| 106 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 107 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/10000
|
| 108 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 109 |
+
num_batches: 100
|
| 110 |
+
mean_val_loss: 0.022345
|
| 111 |
+
std_val_loss: 0.024337
|
| 112 |
+
mean_left_arm_loss: 0.029659
|
| 113 |
+
std_left_arm_loss: 0.039896
|
| 114 |
+
mean_right_arm_loss: 0.015031
|
| 115 |
+
std_right_arm_loss: 0.032929
|
| 116 |
+
mean_left_joint_loss: 0.031507
|
| 117 |
+
std_left_joint_loss: 0.044637
|
| 118 |
+
mean_left_gripper_loss: 0.016725
|
| 119 |
+
std_left_gripper_loss: 0.040894
|
| 120 |
+
mean_right_joint_loss: 0.015776
|
| 121 |
+
std_right_joint_loss: 0.036308
|
| 122 |
+
mean_right_gripper_loss: 0.009818
|
| 123 |
+
std_right_gripper_loss: 0.028543
|
| 124 |
+
mean_left_right_imbalance: 0.034067
|
| 125 |
+
std_left_right_imbalance: 0.045126
|
| 126 |
+
per_batch_timing_seconds: mean=0.2524 std=0.0719 min=0.2263 max=0.8903
|
| 127 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 128 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 129 |
+
weight_loading_missing_keys: []
|
| 130 |
+
weight_loading_unexpected_keys: []
|
| 131 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.029301 left_arm_mae=0.027942 right_arm_mae=0.030659 imbalance_mae=0.002717 batch_time_s=0.2769
|
| 132 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.015433 left_arm_mae=0.017666 right_arm_mae=0.013200 imbalance_mae=0.004466 batch_time_s=0.2659
|
| 133 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.016550 left_arm_mae=0.019292 right_arm_mae=0.013809 imbalance_mae=0.005483 batch_time_s=0.2699
|
| 134 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.025598 left_arm_mae=0.022401 right_arm_mae=0.028794 imbalance_mae=0.006393 batch_time_s=0.2676
|
| 135 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.026846 left_arm_mae=0.028538 right_arm_mae=0.025154 imbalance_mae=0.003384 batch_time_s=0.2658
|
| 136 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.026403 left_arm_mae=0.044187 right_arm_mae=0.008619 imbalance_mae=0.035567 batch_time_s=0.2680
|
| 137 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.026617 left_arm_mae=0.047235 right_arm_mae=0.005999 imbalance_mae=0.041236 batch_time_s=0.2669
|
| 138 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.022871 left_arm_mae=0.039870 right_arm_mae=0.005872 imbalance_mae=0.033999 batch_time_s=0.2625
|
| 139 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.034925 left_arm_mae=0.062935 right_arm_mae=0.006915 imbalance_mae=0.056020 batch_time_s=0.2726
|
| 140 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.043991 left_arm_mae=0.080034 right_arm_mae=0.007949 imbalance_mae=0.072084 batch_time_s=0.2678
|
| 141 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.034402 left_arm_mae=0.062883 right_arm_mae=0.005922 imbalance_mae=0.056961 batch_time_s=0.2676
|
| 142 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.037064 left_arm_mae=0.067084 right_arm_mae=0.007043 imbalance_mae=0.060041 batch_time_s=0.3375
|
| 143 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.033553 left_arm_mae=0.059772 right_arm_mae=0.007334 imbalance_mae=0.052438 batch_time_s=0.2909
|
| 144 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.024011 left_arm_mae=0.037121 right_arm_mae=0.010900 imbalance_mae=0.026221 batch_time_s=0.2666
|
| 145 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.043051 left_arm_mae=0.017745 right_arm_mae=0.068357 imbalance_mae=0.050613 batch_time_s=0.3469
|
| 146 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.038342 left_arm_mae=0.022290 right_arm_mae=0.054393 imbalance_mae=0.032104 batch_time_s=0.2756
|
| 147 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 148 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.029935
|
| 149 |
+
sample_eval_num_steps_4_std_masked_mae: 0.008200
|
| 150 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.041062
|
| 151 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.019621
|
| 152 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.018807
|
| 153 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.018117
|
| 154 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.044440
|
| 155 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.022950
|
| 156 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.017416
|
| 157 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.016394
|
| 158 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.019500
|
| 159 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.019305
|
| 160 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.013963
|
| 161 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.019504
|
| 162 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.033733
|
| 163 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.022691
|
| 164 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.2793 std=0.0247 min=0.2625 max=0.3469
|
| 165 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.031892 left_arm_mae=0.030478 right_arm_mae=0.033307 imbalance_mae=0.002830 batch_time_s=0.3695
|
| 166 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.019175 left_arm_mae=0.021774 right_arm_mae=0.016576 imbalance_mae=0.005198 batch_time_s=0.3460
|
| 167 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.019252 left_arm_mae=0.020680 right_arm_mae=0.017824 imbalance_mae=0.002856 batch_time_s=0.4095
|
| 168 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.027861 left_arm_mae=0.023648 right_arm_mae=0.032074 imbalance_mae=0.008427 batch_time_s=0.3446
|
| 169 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.027615 left_arm_mae=0.028965 right_arm_mae=0.026265 imbalance_mae=0.002700 batch_time_s=0.3563
|
| 170 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.025685 left_arm_mae=0.044061 right_arm_mae=0.007309 imbalance_mae=0.036752 batch_time_s=0.3497
|
| 171 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.026870 left_arm_mae=0.048038 right_arm_mae=0.005701 imbalance_mae=0.042337 batch_time_s=0.3481
|
| 172 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.021825 left_arm_mae=0.037125 right_arm_mae=0.006526 imbalance_mae=0.030599 batch_time_s=0.4133
|
| 173 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.033276 left_arm_mae=0.061431 right_arm_mae=0.005121 imbalance_mae=0.056311 batch_time_s=0.3536
|
| 174 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.041929 left_arm_mae=0.078845 right_arm_mae=0.005012 imbalance_mae=0.073833 batch_time_s=0.3504
|
| 175 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.034225 left_arm_mae=0.062573 right_arm_mae=0.005876 imbalance_mae=0.056697 batch_time_s=0.4244
|
| 176 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.036636 left_arm_mae=0.066864 right_arm_mae=0.006408 imbalance_mae=0.060457 batch_time_s=0.4686
|
| 177 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.033938 left_arm_mae=0.061894 right_arm_mae=0.005982 imbalance_mae=0.055912 batch_time_s=0.4191
|
| 178 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.023608 left_arm_mae=0.038211 right_arm_mae=0.009005 imbalance_mae=0.029206 batch_time_s=0.4448
|
| 179 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.043359 left_arm_mae=0.016845 right_arm_mae=0.069874 imbalance_mae=0.053029 batch_time_s=0.3755
|
| 180 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.037564 left_arm_mae=0.019482 right_arm_mae=0.055646 imbalance_mae=0.036164 batch_time_s=0.3432
|
| 181 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 182 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.030294
|
| 183 |
+
sample_eval_num_steps_10_std_masked_mae: 0.007277
|
| 184 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.041307
|
| 185 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.019181
|
| 186 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.019282
|
| 187 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.019077
|
| 188 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.045179
|
| 189 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.022508
|
| 190 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.014207
|
| 191 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.016425
|
| 192 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.020231
|
| 193 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.020465
|
| 194 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.012640
|
| 195 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.018571
|
| 196 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.034582
|
| 197 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.023261
|
| 198 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.3823 std=0.0398 min=0.3432 max=0.4686
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_2000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_baseline_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.026397 left_arm_loss=0.019776 right_arm_loss=0.033018 imbalance=0.013242 batch_time_s=0.8257
|
| 7 |
+
eval_batch=2 loss=0.009284 left_arm_loss=0.009409 right_arm_loss=0.009160 imbalance=0.000248 batch_time_s=0.2272
|
| 8 |
+
eval_batch=3 loss=0.011369 left_arm_loss=0.014311 right_arm_loss=0.008427 imbalance=0.005883 batch_time_s=0.2242
|
| 9 |
+
eval_batch=4 loss=0.036861 left_arm_loss=0.035507 right_arm_loss=0.038216 imbalance=0.002709 batch_time_s=0.2250
|
| 10 |
+
eval_batch=5 loss=0.025353 left_arm_loss=0.036053 right_arm_loss=0.014654 imbalance=0.021399 batch_time_s=0.2409
|
| 11 |
+
eval_batch=6 loss=0.046137 left_arm_loss=0.089046 right_arm_loss=0.003228 imbalance=0.085817 batch_time_s=0.2292
|
| 12 |
+
eval_batch=7 loss=0.032558 left_arm_loss=0.062428 right_arm_loss=0.002688 imbalance=0.059740 batch_time_s=0.2458
|
| 13 |
+
eval_batch=8 loss=0.009889 left_arm_loss=0.018301 right_arm_loss=0.001477 imbalance=0.016824 batch_time_s=0.2339
|
| 14 |
+
eval_batch=9 loss=0.012928 left_arm_loss=0.024751 right_arm_loss=0.001106 imbalance=0.023645 batch_time_s=0.2444
|
| 15 |
+
eval_batch=10 loss=0.019660 left_arm_loss=0.037839 right_arm_loss=0.001480 imbalance=0.036359 batch_time_s=0.2305
|
| 16 |
+
eval_batch=11 loss=0.018686 left_arm_loss=0.034757 right_arm_loss=0.002615 imbalance=0.032142 batch_time_s=0.2271
|
| 17 |
+
eval_batch=12 loss=0.021816 left_arm_loss=0.041631 right_arm_loss=0.002000 imbalance=0.039631 batch_time_s=0.2597
|
| 18 |
+
eval_batch=13 loss=0.043429 left_arm_loss=0.084094 right_arm_loss=0.002765 imbalance=0.081329 batch_time_s=0.2239
|
| 19 |
+
eval_batch=14 loss=0.042424 left_arm_loss=0.069546 right_arm_loss=0.015301 imbalance=0.054244 batch_time_s=0.2338
|
| 20 |
+
eval_batch=15 loss=0.060247 left_arm_loss=0.023033 right_arm_loss=0.097461 imbalance=0.074428 batch_time_s=0.2298
|
| 21 |
+
eval_batch=16 loss=0.078912 left_arm_loss=0.039402 right_arm_loss=0.118422 imbalance=0.079020 batch_time_s=0.2336
|
| 22 |
+
eval_batch=17 loss=0.034687 left_arm_loss=0.061929 right_arm_loss=0.007444 imbalance=0.054485 batch_time_s=0.2323
|
| 23 |
+
eval_batch=18 loss=0.049051 left_arm_loss=0.068182 right_arm_loss=0.029920 imbalance=0.038262 batch_time_s=0.2351
|
| 24 |
+
eval_batch=19 loss=0.021375 left_arm_loss=0.026341 right_arm_loss=0.016409 imbalance=0.009933 batch_time_s=0.3373
|
| 25 |
+
eval_batch=20 loss=0.012990 left_arm_loss=0.014628 right_arm_loss=0.011352 imbalance=0.003276 batch_time_s=0.2283
|
| 26 |
+
eval_batch=21 loss=0.024979 left_arm_loss=0.038678 right_arm_loss=0.011280 imbalance=0.027397 batch_time_s=0.2371
|
| 27 |
+
eval_batch=22 loss=0.056881 left_arm_loss=0.061898 right_arm_loss=0.051865 imbalance=0.010034 batch_time_s=0.2315
|
| 28 |
+
eval_batch=23 loss=0.054200 left_arm_loss=0.092598 right_arm_loss=0.015802 imbalance=0.076795 batch_time_s=0.2385
|
| 29 |
+
eval_batch=24 loss=0.052739 left_arm_loss=0.099767 right_arm_loss=0.005711 imbalance=0.094056 batch_time_s=0.2319
|
| 30 |
+
eval_batch=25 loss=0.056198 left_arm_loss=0.111241 right_arm_loss=0.001156 imbalance=0.110086 batch_time_s=0.2370
|
| 31 |
+
eval_batch=26 loss=0.029912 left_arm_loss=0.055838 right_arm_loss=0.003987 imbalance=0.051851 batch_time_s=0.2339
|
| 32 |
+
eval_batch=27 loss=0.023166 left_arm_loss=0.042513 right_arm_loss=0.003819 imbalance=0.038695 batch_time_s=0.2326
|
| 33 |
+
eval_batch=28 loss=0.021049 left_arm_loss=0.040540 right_arm_loss=0.001557 imbalance=0.038983 batch_time_s=0.2299
|
| 34 |
+
eval_batch=29 loss=0.038624 left_arm_loss=0.074662 right_arm_loss=0.002585 imbalance=0.072077 batch_time_s=0.2458
|
| 35 |
+
eval_batch=30 loss=0.054455 left_arm_loss=0.106275 right_arm_loss=0.002636 imbalance=0.103639 batch_time_s=0.2291
|
| 36 |
+
eval_batch=31 loss=0.082370 left_arm_loss=0.156108 right_arm_loss=0.008633 imbalance=0.147475 batch_time_s=0.2312
|
| 37 |
+
eval_batch=32 loss=0.076872 left_arm_loss=0.071586 right_arm_loss=0.082158 imbalance=0.010573 batch_time_s=0.2272
|
| 38 |
+
eval_batch=33 loss=0.048834 left_arm_loss=0.020280 right_arm_loss=0.077388 imbalance=0.057109 batch_time_s=0.2342
|
| 39 |
+
eval_batch=34 loss=0.073862 left_arm_loss=0.108661 right_arm_loss=0.039063 imbalance=0.069597 batch_time_s=0.2314
|
| 40 |
+
eval_batch=35 loss=0.045368 left_arm_loss=0.079731 right_arm_loss=0.011004 imbalance=0.068727 batch_time_s=0.2324
|
| 41 |
+
eval_batch=36 loss=0.026741 left_arm_loss=0.018015 right_arm_loss=0.035468 imbalance=0.017453 batch_time_s=0.2344
|
| 42 |
+
eval_batch=37 loss=0.011712 left_arm_loss=0.014219 right_arm_loss=0.009205 imbalance=0.005013 batch_time_s=0.2332
|
| 43 |
+
eval_batch=38 loss=0.046667 left_arm_loss=0.060838 right_arm_loss=0.032495 imbalance=0.028343 batch_time_s=0.2303
|
| 44 |
+
eval_batch=39 loss=0.056612 left_arm_loss=0.036946 right_arm_loss=0.076279 imbalance=0.039333 batch_time_s=0.2286
|
| 45 |
+
eval_batch=40 loss=0.026539 left_arm_loss=0.029187 right_arm_loss=0.023891 imbalance=0.005296 batch_time_s=0.2277
|
| 46 |
+
eval_batch=41 loss=0.057449 left_arm_loss=0.112006 right_arm_loss=0.002892 imbalance=0.109114 batch_time_s=0.2290
|
| 47 |
+
eval_batch=42 loss=0.025764 left_arm_loss=0.048528 right_arm_loss=0.003000 imbalance=0.045528 batch_time_s=0.2396
|
| 48 |
+
eval_batch=43 loss=0.011870 left_arm_loss=0.020990 right_arm_loss=0.002750 imbalance=0.018240 batch_time_s=0.2340
|
| 49 |
+
eval_batch=44 loss=0.013696 left_arm_loss=0.025204 right_arm_loss=0.002189 imbalance=0.023015 batch_time_s=0.2451
|
| 50 |
+
eval_batch=45 loss=0.018640 left_arm_loss=0.034554 right_arm_loss=0.002726 imbalance=0.031828 batch_time_s=0.2325
|
| 51 |
+
eval_batch=46 loss=0.026927 left_arm_loss=0.049770 right_arm_loss=0.004084 imbalance=0.045686 batch_time_s=0.2301
|
| 52 |
+
eval_batch=47 loss=0.133834 left_arm_loss=0.017368 right_arm_loss=0.250299 imbalance=0.232931 batch_time_s=0.2997
|
| 53 |
+
eval_batch=48 loss=0.162658 left_arm_loss=0.010945 right_arm_loss=0.314371 imbalance=0.303426 batch_time_s=0.2318
|
| 54 |
+
eval_batch=49 loss=0.020931 left_arm_loss=0.005021 right_arm_loss=0.036841 imbalance=0.031820 batch_time_s=0.2316
|
| 55 |
+
eval_batch=50 loss=0.086151 left_arm_loss=0.041024 right_arm_loss=0.131277 imbalance=0.090253 batch_time_s=0.2341
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/2000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.041595
|
| 61 |
+
std_val_loss: 0.030015
|
| 62 |
+
mean_left_arm_loss: 0.049919
|
| 63 |
+
std_left_arm_loss: 0.033208
|
| 64 |
+
mean_right_arm_loss: 0.033271
|
| 65 |
+
std_right_arm_loss: 0.059873
|
| 66 |
+
mean_left_joint_loss: 0.051501
|
| 67 |
+
std_left_joint_loss: 0.035502
|
| 68 |
+
mean_left_gripper_loss: 0.038846
|
| 69 |
+
std_left_gripper_loss: 0.082622
|
| 70 |
+
mean_right_joint_loss: 0.034159
|
| 71 |
+
std_right_joint_loss: 0.066139
|
| 72 |
+
mean_right_gripper_loss: 0.027055
|
| 73 |
+
std_right_gripper_loss: 0.066540
|
| 74 |
+
mean_left_right_imbalance: 0.054740
|
| 75 |
+
std_left_right_imbalance: 0.055247
|
| 76 |
+
per_batch_timing_seconds: mean=0.2487 std=0.0844 min=0.2239 max=0.8257
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.064759 left_arm_mae=0.058327 right_arm_mae=0.071190 imbalance_mae=0.012863 batch_time_s=0.2814
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.040229 left_arm_mae=0.048076 right_arm_mae=0.032383 imbalance_mae=0.015693 batch_time_s=0.2705
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.039452 left_arm_mae=0.043037 right_arm_mae=0.035867 imbalance_mae=0.007170 batch_time_s=0.2690
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.057066 left_arm_mae=0.061438 right_arm_mae=0.052694 imbalance_mae=0.008743 batch_time_s=0.2702
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.051264 left_arm_mae=0.059876 right_arm_mae=0.042652 imbalance_mae=0.017224 batch_time_s=0.2814
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.079315 left_arm_mae=0.141172 right_arm_mae=0.017458 imbalance_mae=0.123714 batch_time_s=0.2681
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.064131 left_arm_mae=0.113531 right_arm_mae=0.014731 imbalance_mae=0.098800 batch_time_s=0.2998
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.036546 left_arm_mae=0.060300 right_arm_mae=0.012791 imbalance_mae=0.047508 batch_time_s=0.2774
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.042204 left_arm_mae=0.072879 right_arm_mae=0.011529 imbalance_mae=0.061350 batch_time_s=0.3000
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.053692 left_arm_mae=0.094078 right_arm_mae=0.013305 imbalance_mae=0.080773 batch_time_s=0.2674
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.047388 left_arm_mae=0.079979 right_arm_mae=0.014798 imbalance_mae=0.065181 batch_time_s=0.3285
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.050189 left_arm_mae=0.085965 right_arm_mae=0.014413 imbalance_mae=0.071552 batch_time_s=0.3060
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.073749 left_arm_mae=0.132138 right_arm_mae=0.015360 imbalance_mae=0.116778 batch_time_s=0.3753
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.082068 left_arm_mae=0.126276 right_arm_mae=0.037859 imbalance_mae=0.088417 batch_time_s=0.3704
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.084759 left_arm_mae=0.030757 right_arm_mae=0.138762 imbalance_mae=0.108006 batch_time_s=0.3056
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.097239 left_arm_mae=0.051779 right_arm_mae=0.142698 imbalance_mae=0.090919 batch_time_s=0.3393
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.060253
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.017936
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.078725
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.032786
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.041781
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.040910
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.083688
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.036089
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.043985
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.072901
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.042767
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.041669
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.034874
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.058769
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.063418
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.039412
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3006 std=0.0345 min=0.2674 max=0.3753
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.071056 left_arm_mae=0.066950 right_arm_mae=0.075162 imbalance_mae=0.008212 batch_time_s=0.4220
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.047812 left_arm_mae=0.056756 right_arm_mae=0.038868 imbalance_mae=0.017888 batch_time_s=0.3396
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.045826 left_arm_mae=0.051423 right_arm_mae=0.040229 imbalance_mae=0.011195 batch_time_s=0.3502
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.065155 left_arm_mae=0.070466 right_arm_mae=0.059845 imbalance_mae=0.010622 batch_time_s=0.3414
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.057679 left_arm_mae=0.065192 right_arm_mae=0.050167 imbalance_mae=0.015025 batch_time_s=0.3405
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.084349 left_arm_mae=0.148198 right_arm_mae=0.020499 imbalance_mae=0.127699 batch_time_s=0.3414
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.067378 left_arm_mae=0.119032 right_arm_mae=0.015724 imbalance_mae=0.103307 batch_time_s=0.3734
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.041997 left_arm_mae=0.070063 right_arm_mae=0.013930 imbalance_mae=0.056133 batch_time_s=0.3433
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.048462 left_arm_mae=0.083206 right_arm_mae=0.013718 imbalance_mae=0.069487 batch_time_s=0.3682
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.059187 left_arm_mae=0.103132 right_arm_mae=0.015243 imbalance_mae=0.087889 batch_time_s=0.4041
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.052531 left_arm_mae=0.088090 right_arm_mae=0.016972 imbalance_mae=0.071118 batch_time_s=0.3420
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.057733 left_arm_mae=0.096639 right_arm_mae=0.018827 imbalance_mae=0.077812 batch_time_s=0.3407
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.078588 left_arm_mae=0.139026 right_arm_mae=0.018150 imbalance_mae=0.120876 batch_time_s=0.3427
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.085513 left_arm_mae=0.132507 right_arm_mae=0.038519 imbalance_mae=0.093988 batch_time_s=0.3408
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.088594 left_arm_mae=0.035055 right_arm_mae=0.142132 imbalance_mae=0.107077 batch_time_s=0.3833
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.100376 left_arm_mae=0.056270 right_arm_mae=0.144482 imbalance_mae=0.088212 batch_time_s=0.3644
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.065765
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.016923
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.086375
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.032761
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.045154
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.041131
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.092111
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.036788
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.046224
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.076043
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.046163
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.042138
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.038093
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.056179
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.066659
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.040501
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.3586 std=0.0248 min=0.3396 max=0.4220
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_baseline_10k_val_5000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_baseline_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.022598 left_arm_loss=0.025243 right_arm_loss=0.019952 imbalance=0.005291 batch_time_s=0.7730
|
| 7 |
+
eval_batch=2 loss=0.003257 left_arm_loss=0.002496 right_arm_loss=0.004018 imbalance=0.001522 batch_time_s=0.2212
|
| 8 |
+
eval_batch=3 loss=0.003316 left_arm_loss=0.003317 right_arm_loss=0.003314 imbalance=0.000004 batch_time_s=0.2266
|
| 9 |
+
eval_batch=4 loss=0.017987 left_arm_loss=0.019024 right_arm_loss=0.016950 imbalance=0.002074 batch_time_s=0.2267
|
| 10 |
+
eval_batch=5 loss=0.012783 left_arm_loss=0.016612 right_arm_loss=0.008953 imbalance=0.007659 batch_time_s=0.3403
|
| 11 |
+
eval_batch=6 loss=0.010879 left_arm_loss=0.020326 right_arm_loss=0.001432 imbalance=0.018894 batch_time_s=0.2233
|
| 12 |
+
eval_batch=7 loss=0.012325 left_arm_loss=0.023537 right_arm_loss=0.001113 imbalance=0.022423 batch_time_s=0.2308
|
| 13 |
+
eval_batch=8 loss=0.008239 left_arm_loss=0.015663 right_arm_loss=0.000815 imbalance=0.014848 batch_time_s=0.2327
|
| 14 |
+
eval_batch=9 loss=0.013383 left_arm_loss=0.025981 right_arm_loss=0.000785 imbalance=0.025196 batch_time_s=0.2296
|
| 15 |
+
eval_batch=10 loss=0.022633 left_arm_loss=0.044221 right_arm_loss=0.001046 imbalance=0.043176 batch_time_s=0.2398
|
| 16 |
+
eval_batch=11 loss=0.015927 left_arm_loss=0.030194 right_arm_loss=0.001659 imbalance=0.028536 batch_time_s=0.2276
|
| 17 |
+
eval_batch=12 loss=0.016066 left_arm_loss=0.031229 right_arm_loss=0.000902 imbalance=0.030327 batch_time_s=0.2228
|
| 18 |
+
eval_batch=13 loss=0.019034 left_arm_loss=0.036426 right_arm_loss=0.001641 imbalance=0.034785 batch_time_s=0.3058
|
| 19 |
+
eval_batch=14 loss=0.016662 left_arm_loss=0.016339 right_arm_loss=0.016986 imbalance=0.000647 batch_time_s=0.2226
|
| 20 |
+
eval_batch=15 loss=0.055849 left_arm_loss=0.016080 right_arm_loss=0.095619 imbalance=0.079538 batch_time_s=0.2277
|
| 21 |
+
eval_batch=16 loss=0.035661 left_arm_loss=0.017943 right_arm_loss=0.053379 imbalance=0.035436 batch_time_s=0.2301
|
| 22 |
+
eval_batch=17 loss=0.021186 left_arm_loss=0.039219 right_arm_loss=0.003153 imbalance=0.036066 batch_time_s=0.2307
|
| 23 |
+
eval_batch=18 loss=0.033071 left_arm_loss=0.046249 right_arm_loss=0.019893 imbalance=0.026356 batch_time_s=0.2317
|
| 24 |
+
eval_batch=19 loss=0.010998 left_arm_loss=0.017014 right_arm_loss=0.004983 imbalance=0.012032 batch_time_s=0.2259
|
| 25 |
+
eval_batch=20 loss=0.016367 left_arm_loss=0.027997 right_arm_loss=0.004737 imbalance=0.023260 batch_time_s=0.2276
|
| 26 |
+
eval_batch=21 loss=0.070861 left_arm_loss=0.138458 right_arm_loss=0.003263 imbalance=0.135195 batch_time_s=0.2659
|
| 27 |
+
eval_batch=22 loss=0.086826 left_arm_loss=0.136300 right_arm_loss=0.037352 imbalance=0.098947 batch_time_s=0.2656
|
| 28 |
+
eval_batch=23 loss=0.041515 left_arm_loss=0.074249 right_arm_loss=0.008781 imbalance=0.065469 batch_time_s=0.2287
|
| 29 |
+
eval_batch=24 loss=0.075753 left_arm_loss=0.148664 right_arm_loss=0.002842 imbalance=0.145822 batch_time_s=0.2883
|
| 30 |
+
eval_batch=25 loss=0.063371 left_arm_loss=0.125955 right_arm_loss=0.000787 imbalance=0.125168 batch_time_s=0.2283
|
| 31 |
+
eval_batch=26 loss=0.031963 left_arm_loss=0.061717 right_arm_loss=0.002209 imbalance=0.059508 batch_time_s=0.2304
|
| 32 |
+
eval_batch=27 loss=0.029457 left_arm_loss=0.055315 right_arm_loss=0.003600 imbalance=0.051715 batch_time_s=0.2292
|
| 33 |
+
eval_batch=28 loss=0.015485 left_arm_loss=0.030234 right_arm_loss=0.000735 imbalance=0.029499 batch_time_s=0.3076
|
| 34 |
+
eval_batch=29 loss=0.024835 left_arm_loss=0.047639 right_arm_loss=0.002031 imbalance=0.045607 batch_time_s=0.2278
|
| 35 |
+
eval_batch=30 loss=0.026867 left_arm_loss=0.050554 right_arm_loss=0.003179 imbalance=0.047374 batch_time_s=0.3279
|
| 36 |
+
eval_batch=31 loss=0.048694 left_arm_loss=0.092962 right_arm_loss=0.004426 imbalance=0.088536 batch_time_s=0.3195
|
| 37 |
+
eval_batch=32 loss=0.032212 left_arm_loss=0.041649 right_arm_loss=0.022774 imbalance=0.018875 batch_time_s=0.2350
|
| 38 |
+
eval_batch=33 loss=0.037968 left_arm_loss=0.012033 right_arm_loss=0.063903 imbalance=0.051870 batch_time_s=0.2801
|
| 39 |
+
eval_batch=34 loss=0.070101 left_arm_loss=0.121847 right_arm_loss=0.018354 imbalance=0.103493 batch_time_s=0.2352
|
| 40 |
+
eval_batch=35 loss=0.036351 left_arm_loss=0.069739 right_arm_loss=0.002963 imbalance=0.066775 batch_time_s=0.2946
|
| 41 |
+
eval_batch=36 loss=0.015255 left_arm_loss=0.009489 right_arm_loss=0.021021 imbalance=0.011532 batch_time_s=0.2311
|
| 42 |
+
eval_batch=37 loss=0.003919 left_arm_loss=0.005172 right_arm_loss=0.002666 imbalance=0.002506 batch_time_s=0.2330
|
| 43 |
+
eval_batch=38 loss=0.034404 left_arm_loss=0.039350 right_arm_loss=0.029457 imbalance=0.009893 batch_time_s=0.2376
|
| 44 |
+
eval_batch=39 loss=0.031972 left_arm_loss=0.013650 right_arm_loss=0.050293 imbalance=0.036643 batch_time_s=0.2325
|
| 45 |
+
eval_batch=40 loss=0.013568 left_arm_loss=0.016394 right_arm_loss=0.010741 imbalance=0.005654 batch_time_s=0.2671
|
| 46 |
+
eval_batch=41 loss=0.026423 left_arm_loss=0.051625 right_arm_loss=0.001222 imbalance=0.050402 batch_time_s=0.2496
|
| 47 |
+
eval_batch=42 loss=0.011443 left_arm_loss=0.021655 right_arm_loss=0.001231 imbalance=0.020424 batch_time_s=0.2390
|
| 48 |
+
eval_batch=43 loss=0.004324 left_arm_loss=0.007171 right_arm_loss=0.001478 imbalance=0.005693 batch_time_s=0.2313
|
| 49 |
+
eval_batch=44 loss=0.002703 left_arm_loss=0.004312 right_arm_loss=0.001093 imbalance=0.003219 batch_time_s=0.2279
|
| 50 |
+
eval_batch=45 loss=0.007087 left_arm_loss=0.012914 right_arm_loss=0.001261 imbalance=0.011654 batch_time_s=0.2363
|
| 51 |
+
eval_batch=46 loss=0.022314 left_arm_loss=0.043007 right_arm_loss=0.001622 imbalance=0.041385 batch_time_s=0.2282
|
| 52 |
+
eval_batch=47 loss=0.029021 left_arm_loss=0.008937 right_arm_loss=0.049105 imbalance=0.040168 batch_time_s=0.3012
|
| 53 |
+
eval_batch=48 loss=0.033211 left_arm_loss=0.005827 right_arm_loss=0.060594 imbalance=0.054767 batch_time_s=0.2974
|
| 54 |
+
eval_batch=49 loss=0.006837 left_arm_loss=0.002519 right_arm_loss=0.011154 imbalance=0.008635 batch_time_s=0.2310
|
| 55 |
+
eval_batch=50 loss=0.063237 left_arm_loss=0.031470 right_arm_loss=0.095004 imbalance=0.063534 batch_time_s=0.3002
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/handover_packed_baseline_10k/5000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.027324
|
| 61 |
+
std_val_loss: 0.020404
|
| 62 |
+
mean_left_arm_loss: 0.039118
|
| 63 |
+
std_left_arm_loss: 0.037404
|
| 64 |
+
mean_right_arm_loss: 0.015529
|
| 65 |
+
std_right_arm_loss: 0.023314
|
| 66 |
+
mean_left_joint_loss: 0.042035
|
| 67 |
+
std_left_joint_loss: 0.041763
|
| 68 |
+
mean_left_gripper_loss: 0.018705
|
| 69 |
+
std_left_gripper_loss: 0.031815
|
| 70 |
+
mean_right_joint_loss: 0.015711
|
| 71 |
+
std_right_joint_loss: 0.023929
|
| 72 |
+
mean_right_gripper_loss: 0.014261
|
| 73 |
+
std_right_gripper_loss: 0.030013
|
| 74 |
+
mean_left_right_imbalance: 0.038961
|
| 75 |
+
std_left_right_imbalance: 0.035474
|
| 76 |
+
per_batch_timing_seconds: mean=0.2601 std=0.0801 min=0.2212 max=0.7730
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.049127 left_arm_mae=0.051359 right_arm_mae=0.046895 imbalance_mae=0.004464 batch_time_s=0.2731
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.021553 left_arm_mae=0.021278 right_arm_mae=0.021828 imbalance_mae=0.000550 batch_time_s=0.3528
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.020387 left_arm_mae=0.018467 right_arm_mae=0.022306 imbalance_mae=0.003839 batch_time_s=0.2626
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.035600 left_arm_mae=0.030283 right_arm_mae=0.040917 imbalance_mae=0.010633 batch_time_s=0.2712
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.032516 left_arm_mae=0.037471 right_arm_mae=0.027560 imbalance_mae=0.009911 batch_time_s=0.2688
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.034533 left_arm_mae=0.058071 right_arm_mae=0.010994 imbalance_mae=0.047077 batch_time_s=0.3179
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.035423 left_arm_mae=0.061402 right_arm_mae=0.009444 imbalance_mae=0.051958 batch_time_s=0.3146
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.026805 left_arm_mae=0.046320 right_arm_mae=0.007290 imbalance_mae=0.039029 batch_time_s=0.3397
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.040398 left_arm_mae=0.072072 right_arm_mae=0.008723 imbalance_mae=0.063349 batch_time_s=0.3298
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.050191 left_arm_mae=0.090027 right_arm_mae=0.010354 imbalance_mae=0.079673 batch_time_s=0.2585
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.034508 left_arm_mae=0.059912 right_arm_mae=0.009105 imbalance_mae=0.050807 batch_time_s=0.2612
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.041212 left_arm_mae=0.073254 right_arm_mae=0.009170 imbalance_mae=0.064084 batch_time_s=0.2658
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.035764 left_arm_mae=0.060856 right_arm_mae=0.010673 imbalance_mae=0.050183 batch_time_s=0.3511
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.035192 left_arm_mae=0.048918 right_arm_mae=0.021465 imbalance_mae=0.027453 batch_time_s=0.2694
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.081409 left_arm_mae=0.025452 right_arm_mae=0.137367 imbalance_mae=0.111915 batch_time_s=0.2695
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.060902 left_arm_mae=0.032682 right_arm_mae=0.089121 imbalance_mae=0.056439 batch_time_s=0.2659
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.039720
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.014654
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.049239
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.019869
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.030201
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.034473
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.052215
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.023235
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.028408
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.028427
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.031159
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.037572
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.023490
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.024208
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.041960
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.030152
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.2920 std=0.0342 min=0.2585 max=0.3528
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.058142 left_arm_mae=0.062580 right_arm_mae=0.053705 imbalance_mae=0.008875 batch_time_s=0.3521
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.027516 left_arm_mae=0.027185 right_arm_mae=0.027846 imbalance_mae=0.000661 batch_time_s=0.3613
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.026459 left_arm_mae=0.024776 right_arm_mae=0.028142 imbalance_mae=0.003366 batch_time_s=0.3707
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.042321 left_arm_mae=0.037100 right_arm_mae=0.047541 imbalance_mae=0.010441 batch_time_s=0.4291
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.035501 left_arm_mae=0.039882 right_arm_mae=0.031121 imbalance_mae=0.008761 batch_time_s=0.3789
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.037181 left_arm_mae=0.063069 right_arm_mae=0.011292 imbalance_mae=0.051776 batch_time_s=0.3463
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.037960 left_arm_mae=0.065358 right_arm_mae=0.010561 imbalance_mae=0.054798 batch_time_s=0.3618
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.030014 left_arm_mae=0.052116 right_arm_mae=0.007913 imbalance_mae=0.044203 batch_time_s=0.4241
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.045459 left_arm_mae=0.080979 right_arm_mae=0.009940 imbalance_mae=0.071039 batch_time_s=0.4006
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.052380 left_arm_mae=0.092981 right_arm_mae=0.011778 imbalance_mae=0.081203 batch_time_s=0.4774
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.036979 left_arm_mae=0.064074 right_arm_mae=0.009883 imbalance_mae=0.054191 batch_time_s=0.4397
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.044283 left_arm_mae=0.078149 right_arm_mae=0.010416 imbalance_mae=0.067733 batch_time_s=0.3574
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.037810 left_arm_mae=0.063530 right_arm_mae=0.012089 imbalance_mae=0.051441 batch_time_s=0.3996
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.037400 left_arm_mae=0.052177 right_arm_mae=0.022623 imbalance_mae=0.029554 batch_time_s=0.3962
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.080721 left_arm_mae=0.024507 right_arm_mae=0.136936 imbalance_mae=0.112428 batch_time_s=0.4037
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.063413 left_arm_mae=0.032152 right_arm_mae=0.094674 imbalance_mae=0.062522 batch_time_s=0.4226
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.043346
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.013818
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.053788
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.020493
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.032904
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.034889
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.057689
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.024439
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.026486
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.029864
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.033700
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.038002
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.027331
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.027093
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.044562
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.030999
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.3951 std=0.0357 min=0.3463 max=0.4774
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_1000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_parallel_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.049506 left_arm_loss=0.040621 right_arm_loss=0.058391 imbalance=0.017770 batch_time_s=4.6353
|
| 7 |
+
eval_batch=2 loss=0.013146 left_arm_loss=0.013595 right_arm_loss=0.012698 imbalance=0.000897 batch_time_s=0.2253
|
| 8 |
+
eval_batch=3 loss=0.014637 left_arm_loss=0.019903 right_arm_loss=0.009370 imbalance=0.010533 batch_time_s=0.2946
|
| 9 |
+
eval_batch=4 loss=0.064632 left_arm_loss=0.061204 right_arm_loss=0.068061 imbalance=0.006857 batch_time_s=0.2289
|
| 10 |
+
eval_batch=5 loss=0.049220 left_arm_loss=0.068132 right_arm_loss=0.030307 imbalance=0.037825 batch_time_s=0.2334
|
| 11 |
+
eval_batch=6 loss=0.044489 left_arm_loss=0.084255 right_arm_loss=0.004724 imbalance=0.079531 batch_time_s=0.3232
|
| 12 |
+
eval_batch=7 loss=0.038667 left_arm_loss=0.073409 right_arm_loss=0.003924 imbalance=0.069485 batch_time_s=0.2285
|
| 13 |
+
eval_batch=8 loss=0.018589 left_arm_loss=0.034451 right_arm_loss=0.002728 imbalance=0.031723 batch_time_s=0.2299
|
| 14 |
+
eval_batch=9 loss=0.025908 left_arm_loss=0.049782 right_arm_loss=0.002034 imbalance=0.047748 batch_time_s=0.2356
|
| 15 |
+
eval_batch=10 loss=0.035559 left_arm_loss=0.068822 right_arm_loss=0.002296 imbalance=0.066526 batch_time_s=0.2449
|
| 16 |
+
eval_batch=11 loss=0.030806 left_arm_loss=0.058047 right_arm_loss=0.003565 imbalance=0.054483 batch_time_s=0.3058
|
| 17 |
+
eval_batch=12 loss=0.047394 left_arm_loss=0.090843 right_arm_loss=0.003945 imbalance=0.086899 batch_time_s=0.2833
|
| 18 |
+
eval_batch=13 loss=0.049660 left_arm_loss=0.095403 right_arm_loss=0.003917 imbalance=0.091486 batch_time_s=0.2489
|
| 19 |
+
eval_batch=14 loss=0.061841 left_arm_loss=0.104474 right_arm_loss=0.019209 imbalance=0.085265 batch_time_s=0.2382
|
| 20 |
+
eval_batch=15 loss=0.085757 left_arm_loss=0.037049 right_arm_loss=0.134464 imbalance=0.097415 batch_time_s=0.2364
|
| 21 |
+
eval_batch=16 loss=0.076827 left_arm_loss=0.045344 right_arm_loss=0.108310 imbalance=0.062966 batch_time_s=0.2900
|
| 22 |
+
eval_batch=17 loss=0.056418 left_arm_loss=0.100516 right_arm_loss=0.012320 imbalance=0.088197 batch_time_s=0.4810
|
| 23 |
+
eval_batch=18 loss=0.070686 left_arm_loss=0.076775 right_arm_loss=0.064597 imbalance=0.012178 batch_time_s=0.2382
|
| 24 |
+
eval_batch=19 loss=0.033053 left_arm_loss=0.041608 right_arm_loss=0.024499 imbalance=0.017110 batch_time_s=0.2385
|
| 25 |
+
eval_batch=20 loss=0.031012 left_arm_loss=0.045658 right_arm_loss=0.016366 imbalance=0.029292 batch_time_s=0.2304
|
| 26 |
+
eval_batch=21 loss=0.028765 left_arm_loss=0.044768 right_arm_loss=0.012761 imbalance=0.032007 batch_time_s=0.2992
|
| 27 |
+
eval_batch=22 loss=0.057293 left_arm_loss=0.061262 right_arm_loss=0.053323 imbalance=0.007940 batch_time_s=0.2391
|
| 28 |
+
eval_batch=23 loss=0.094658 left_arm_loss=0.165888 right_arm_loss=0.023429 imbalance=0.142458 batch_time_s=0.3353
|
| 29 |
+
eval_batch=24 loss=0.097680 left_arm_loss=0.184031 right_arm_loss=0.011328 imbalance=0.172703 batch_time_s=0.3058
|
| 30 |
+
eval_batch=25 loss=0.064214 left_arm_loss=0.125794 right_arm_loss=0.002633 imbalance=0.123161 batch_time_s=0.3103
|
| 31 |
+
eval_batch=26 loss=0.029143 left_arm_loss=0.050796 right_arm_loss=0.007489 imbalance=0.043307 batch_time_s=0.7111
|
| 32 |
+
eval_batch=27 loss=0.036844 left_arm_loss=0.063446 right_arm_loss=0.010242 imbalance=0.053204 batch_time_s=0.3351
|
| 33 |
+
eval_batch=28 loss=0.031578 left_arm_loss=0.060362 right_arm_loss=0.002794 imbalance=0.057568 batch_time_s=0.2335
|
| 34 |
+
eval_batch=29 loss=0.047676 left_arm_loss=0.092382 right_arm_loss=0.002970 imbalance=0.089412 batch_time_s=0.2433
|
| 35 |
+
eval_batch=30 loss=0.084667 left_arm_loss=0.165372 right_arm_loss=0.003963 imbalance=0.161408 batch_time_s=0.2322
|
| 36 |
+
eval_batch=31 loss=0.159263 left_arm_loss=0.298709 right_arm_loss=0.019817 imbalance=0.278892 batch_time_s=0.2344
|
| 37 |
+
eval_batch=32 loss=0.112677 left_arm_loss=0.118272 right_arm_loss=0.107082 imbalance=0.011190 batch_time_s=0.3530
|
| 38 |
+
eval_batch=33 loss=0.068681 left_arm_loss=0.031034 right_arm_loss=0.106329 imbalance=0.075295 batch_time_s=0.2477
|
| 39 |
+
eval_batch=34 loss=0.073726 left_arm_loss=0.121948 right_arm_loss=0.025504 imbalance=0.096444 batch_time_s=0.3370
|
| 40 |
+
eval_batch=35 loss=0.061882 left_arm_loss=0.109883 right_arm_loss=0.013881 imbalance=0.096002 batch_time_s=0.2556
|
| 41 |
+
eval_batch=36 loss=0.046614 left_arm_loss=0.054836 right_arm_loss=0.038392 imbalance=0.016444 batch_time_s=0.2569
|
| 42 |
+
eval_batch=37 loss=0.032190 left_arm_loss=0.051540 right_arm_loss=0.012840 imbalance=0.038700 batch_time_s=0.3450
|
| 43 |
+
eval_batch=38 loss=0.066159 left_arm_loss=0.083341 right_arm_loss=0.048978 imbalance=0.034363 batch_time_s=0.3564
|
| 44 |
+
eval_batch=39 loss=0.074041 left_arm_loss=0.047870 right_arm_loss=0.100211 imbalance=0.052341 batch_time_s=0.2406
|
| 45 |
+
eval_batch=40 loss=0.047020 left_arm_loss=0.053036 right_arm_loss=0.041005 imbalance=0.012030 batch_time_s=0.3074
|
| 46 |
+
eval_batch=41 loss=0.057365 left_arm_loss=0.109413 right_arm_loss=0.005316 imbalance=0.104097 batch_time_s=0.2427
|
| 47 |
+
eval_batch=42 loss=0.033981 left_arm_loss=0.063623 right_arm_loss=0.004340 imbalance=0.059283 batch_time_s=0.2765
|
| 48 |
+
eval_batch=43 loss=0.018033 left_arm_loss=0.029285 right_arm_loss=0.006781 imbalance=0.022504 batch_time_s=0.2264
|
| 49 |
+
eval_batch=44 loss=0.017014 left_arm_loss=0.028932 right_arm_loss=0.005096 imbalance=0.023836 batch_time_s=0.2224
|
| 50 |
+
eval_batch=45 loss=0.021894 left_arm_loss=0.040422 right_arm_loss=0.003366 imbalance=0.037055 batch_time_s=0.2718
|
| 51 |
+
eval_batch=46 loss=0.041116 left_arm_loss=0.076250 right_arm_loss=0.005983 imbalance=0.070267 batch_time_s=0.2373
|
| 52 |
+
eval_batch=47 loss=0.134721 left_arm_loss=0.042995 right_arm_loss=0.226447 imbalance=0.183451 batch_time_s=0.2264
|
| 53 |
+
eval_batch=48 loss=0.258522 left_arm_loss=0.016699 right_arm_loss=0.500345 imbalance=0.483646 batch_time_s=0.2296
|
| 54 |
+
eval_batch=49 loss=0.043552 left_arm_loss=0.017405 right_arm_loss=0.069698 imbalance=0.052293 batch_time_s=0.2327
|
| 55 |
+
eval_batch=50 loss=0.146957 left_arm_loss=0.064547 right_arm_loss=0.229367 imbalance=0.164819 batch_time_s=0.3012
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/1000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.059715
|
| 61 |
+
std_val_loss: 0.042962
|
| 62 |
+
mean_left_arm_loss: 0.073681
|
| 63 |
+
std_left_arm_loss: 0.049928
|
| 64 |
+
mean_right_arm_loss: 0.045749
|
| 65 |
+
std_right_arm_loss: 0.082818
|
| 66 |
+
mean_left_joint_loss: 0.078129
|
| 67 |
+
std_left_joint_loss: 0.055212
|
| 68 |
+
mean_left_gripper_loss: 0.042541
|
| 69 |
+
std_left_gripper_loss: 0.084910
|
| 70 |
+
mean_right_joint_loss: 0.047261
|
| 71 |
+
std_right_joint_loss: 0.090299
|
| 72 |
+
mean_right_gripper_loss: 0.035161
|
| 73 |
+
std_right_gripper_loss: 0.079674
|
| 74 |
+
mean_left_right_imbalance: 0.075806
|
| 75 |
+
std_left_right_imbalance: 0.079713
|
| 76 |
+
per_batch_timing_seconds: mean=0.3663 std=0.6150 min=0.2224 max=4.6353
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.119875 left_arm_mae=0.108588 right_arm_mae=0.131163 imbalance_mae=0.022575 batch_time_s=0.3299
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.056468 left_arm_mae=0.059824 right_arm_mae=0.053113 imbalance_mae=0.006710 batch_time_s=0.3864
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.069907 left_arm_mae=0.072771 right_arm_mae=0.067042 imbalance_mae=0.005730 batch_time_s=0.2686
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.116824 left_arm_mae=0.118923 right_arm_mae=0.114724 imbalance_mae=0.004199 batch_time_s=0.3825
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.082754 left_arm_mae=0.103956 right_arm_mae=0.061551 imbalance_mae=0.042404 batch_time_s=0.3197
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.083889 left_arm_mae=0.145390 right_arm_mae=0.022387 imbalance_mae=0.123002 batch_time_s=0.2733
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.095747 left_arm_mae=0.170531 right_arm_mae=0.020963 imbalance_mae=0.149568 batch_time_s=0.3479
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.067250 left_arm_mae=0.114657 right_arm_mae=0.019842 imbalance_mae=0.094815 batch_time_s=0.3748
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.070907 left_arm_mae=0.122207 right_arm_mae=0.019607 imbalance_mae=0.102601 batch_time_s=0.2877
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.087629 left_arm_mae=0.153592 right_arm_mae=0.021666 imbalance_mae=0.131926 batch_time_s=0.2709
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.075383 left_arm_mae=0.129150 right_arm_mae=0.021616 imbalance_mae=0.107533 batch_time_s=0.2654
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.100087 left_arm_mae=0.177705 right_arm_mae=0.022468 imbalance_mae=0.155237 batch_time_s=0.2791
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.097545 left_arm_mae=0.173683 right_arm_mae=0.021406 imbalance_mae=0.152276 batch_time_s=0.3463
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.119858 left_arm_mae=0.192049 right_arm_mae=0.047666 imbalance_mae=0.144383 batch_time_s=0.2714
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.126125 left_arm_mae=0.043271 right_arm_mae=0.208979 imbalance_mae=0.165708 batch_time_s=0.2719
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.110233 left_arm_mae=0.067434 right_arm_mae=0.153031 imbalance_mae=0.085598 batch_time_s=0.3009
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.092530
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.020956
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.122108
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.043780
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.062952
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.056483
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.133062
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.052111
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.045431
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.055952
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.065476
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.060695
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.045280
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.053039
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.093392
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.056874
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3110 std=0.0430 min=0.2654 max=0.3864
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.135566 left_arm_mae=0.122877 right_arm_mae=0.148255 imbalance_mae=0.025378 batch_time_s=0.4122
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.068124 left_arm_mae=0.071843 right_arm_mae=0.064406 imbalance_mae=0.007438 batch_time_s=0.3659
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.081230 left_arm_mae=0.083152 right_arm_mae=0.079308 imbalance_mae=0.003844 batch_time_s=0.4764
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.128195 left_arm_mae=0.129532 right_arm_mae=0.126857 imbalance_mae=0.002675 batch_time_s=0.3405
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.090927 left_arm_mae=0.113657 right_arm_mae=0.068196 imbalance_mae=0.045462 batch_time_s=0.3940
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.095554 left_arm_mae=0.164228 right_arm_mae=0.026880 imbalance_mae=0.137348 batch_time_s=0.4560
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.103011 left_arm_mae=0.180335 right_arm_mae=0.025687 imbalance_mae=0.154648 batch_time_s=0.4857
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.071890 left_arm_mae=0.119614 right_arm_mae=0.024165 imbalance_mae=0.095449 batch_time_s=0.3618
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.079933 left_arm_mae=0.135905 right_arm_mae=0.023962 imbalance_mae=0.111943 batch_time_s=0.4824
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.096654 left_arm_mae=0.168318 right_arm_mae=0.024991 imbalance_mae=0.143327 batch_time_s=0.5017
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.083773 left_arm_mae=0.144171 right_arm_mae=0.023375 imbalance_mae=0.120796 batch_time_s=0.4778
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.107955 left_arm_mae=0.189506 right_arm_mae=0.026404 imbalance_mae=0.163102 batch_time_s=0.3573
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.106832 left_arm_mae=0.187708 right_arm_mae=0.025955 imbalance_mae=0.161753 batch_time_s=0.3475
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.127854 left_arm_mae=0.200072 right_arm_mae=0.055635 imbalance_mae=0.144437 batch_time_s=0.4218
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.140580 left_arm_mae=0.052115 right_arm_mae=0.229045 imbalance_mae=0.176931 batch_time_s=0.3976
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.121160 left_arm_mae=0.074721 right_arm_mae=0.167600 imbalance_mae=0.092879 batch_time_s=0.3501
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.102452
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.022208
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.133610
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.044796
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.071295
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.061523
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.145474
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.053589
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.050560
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.060317
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.073909
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.066406
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.053000
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.051143
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.099213
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.060422
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.4143 std=0.0560 min=0.3405 max=0.5017
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_10000.log
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_parallel_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=100 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.008598 left_arm_loss=0.008966 right_arm_loss=0.008230 imbalance=0.000736 batch_time_s=0.7969
|
| 7 |
+
eval_batch=2 loss=0.001298 left_arm_loss=0.001120 right_arm_loss=0.001477 imbalance=0.000356 batch_time_s=0.4212
|
| 8 |
+
eval_batch=3 loss=0.002089 left_arm_loss=0.002916 right_arm_loss=0.001262 imbalance=0.001655 batch_time_s=0.3802
|
| 9 |
+
eval_batch=4 loss=0.014139 left_arm_loss=0.013196 right_arm_loss=0.015083 imbalance=0.001887 batch_time_s=0.4351
|
| 10 |
+
eval_batch=5 loss=0.010969 left_arm_loss=0.011818 right_arm_loss=0.010120 imbalance=0.001699 batch_time_s=0.4440
|
| 11 |
+
eval_batch=6 loss=0.006583 left_arm_loss=0.012185 right_arm_loss=0.000980 imbalance=0.011205 batch_time_s=0.3722
|
| 12 |
+
eval_batch=7 loss=0.007013 left_arm_loss=0.013410 right_arm_loss=0.000616 imbalance=0.012795 batch_time_s=0.3687
|
| 13 |
+
eval_batch=8 loss=0.009583 left_arm_loss=0.018835 right_arm_loss=0.000331 imbalance=0.018504 batch_time_s=0.3495
|
| 14 |
+
eval_batch=9 loss=0.012016 left_arm_loss=0.023703 right_arm_loss=0.000330 imbalance=0.023373 batch_time_s=0.3122
|
| 15 |
+
eval_batch=10 loss=0.022936 left_arm_loss=0.045498 right_arm_loss=0.000375 imbalance=0.045123 batch_time_s=0.3299
|
| 16 |
+
eval_batch=11 loss=0.017193 left_arm_loss=0.033949 right_arm_loss=0.000436 imbalance=0.033513 batch_time_s=0.2463
|
| 17 |
+
eval_batch=12 loss=0.019110 left_arm_loss=0.037568 right_arm_loss=0.000652 imbalance=0.036916 batch_time_s=0.2415
|
| 18 |
+
eval_batch=13 loss=0.022467 left_arm_loss=0.044283 right_arm_loss=0.000650 imbalance=0.043633 batch_time_s=0.3323
|
| 19 |
+
eval_batch=14 loss=0.005440 left_arm_loss=0.008297 right_arm_loss=0.002582 imbalance=0.005715 batch_time_s=0.3667
|
| 20 |
+
eval_batch=15 loss=0.028680 left_arm_loss=0.010137 right_arm_loss=0.047224 imbalance=0.037087 batch_time_s=0.3973
|
| 21 |
+
eval_batch=16 loss=0.020404 left_arm_loss=0.012046 right_arm_loss=0.028762 imbalance=0.016716 batch_time_s=0.3513
|
| 22 |
+
eval_batch=17 loss=0.012630 left_arm_loss=0.023091 right_arm_loss=0.002169 imbalance=0.020923 batch_time_s=0.2340
|
| 23 |
+
eval_batch=18 loss=0.037282 left_arm_loss=0.054978 right_arm_loss=0.019586 imbalance=0.035393 batch_time_s=0.3184
|
| 24 |
+
eval_batch=19 loss=0.009588 left_arm_loss=0.016425 right_arm_loss=0.002752 imbalance=0.013673 batch_time_s=0.2645
|
| 25 |
+
eval_batch=20 loss=0.017728 left_arm_loss=0.031880 right_arm_loss=0.003577 imbalance=0.028302 batch_time_s=0.2603
|
| 26 |
+
eval_batch=21 loss=0.098740 left_arm_loss=0.195217 right_arm_loss=0.002263 imbalance=0.192954 batch_time_s=0.2219
|
| 27 |
+
eval_batch=22 loss=0.114247 left_arm_loss=0.199525 right_arm_loss=0.028969 imbalance=0.170556 batch_time_s=0.2241
|
| 28 |
+
eval_batch=23 loss=0.042247 left_arm_loss=0.076375 right_arm_loss=0.008120 imbalance=0.068255 batch_time_s=0.3778
|
| 29 |
+
eval_batch=24 loss=0.124734 left_arm_loss=0.247885 right_arm_loss=0.001583 imbalance=0.246302 batch_time_s=0.2236
|
| 30 |
+
eval_batch=25 loss=0.080652 left_arm_loss=0.160831 right_arm_loss=0.000473 imbalance=0.160358 batch_time_s=0.2245
|
| 31 |
+
eval_batch=26 loss=0.058621 left_arm_loss=0.116006 right_arm_loss=0.001236 imbalance=0.114770 batch_time_s=0.3346
|
| 32 |
+
eval_batch=27 loss=0.066596 left_arm_loss=0.132247 right_arm_loss=0.000946 imbalance=0.131301 batch_time_s=0.2557
|
| 33 |
+
eval_batch=28 loss=0.029455 left_arm_loss=0.058478 right_arm_loss=0.000432 imbalance=0.058046 batch_time_s=0.2267
|
| 34 |
+
eval_batch=29 loss=0.040648 left_arm_loss=0.080583 right_arm_loss=0.000712 imbalance=0.079871 batch_time_s=0.4551
|
| 35 |
+
eval_batch=30 loss=0.027801 left_arm_loss=0.054392 right_arm_loss=0.001210 imbalance=0.053182 batch_time_s=0.4952
|
| 36 |
+
eval_batch=31 loss=0.049047 left_arm_loss=0.095610 right_arm_loss=0.002484 imbalance=0.093125 batch_time_s=0.4630
|
| 37 |
+
eval_batch=32 loss=0.017039 left_arm_loss=0.025473 right_arm_loss=0.008604 imbalance=0.016869 batch_time_s=0.3515
|
| 38 |
+
eval_batch=33 loss=0.036852 left_arm_loss=0.017885 right_arm_loss=0.055819 imbalance=0.037935 batch_time_s=0.4058
|
| 39 |
+
eval_batch=34 loss=0.075331 left_arm_loss=0.115144 right_arm_loss=0.035518 imbalance=0.079626 batch_time_s=0.4361
|
| 40 |
+
eval_batch=35 loss=0.024882 left_arm_loss=0.047816 right_arm_loss=0.001948 imbalance=0.045868 batch_time_s=0.4481
|
| 41 |
+
eval_batch=36 loss=0.007855 left_arm_loss=0.003467 right_arm_loss=0.012244 imbalance=0.008777 batch_time_s=0.3434
|
| 42 |
+
eval_batch=37 loss=0.002229 left_arm_loss=0.003409 right_arm_loss=0.001049 imbalance=0.002359 batch_time_s=0.2315
|
| 43 |
+
eval_batch=38 loss=0.023309 left_arm_loss=0.028313 right_arm_loss=0.018306 imbalance=0.010007 batch_time_s=0.2646
|
| 44 |
+
eval_batch=39 loss=0.020423 left_arm_loss=0.007732 right_arm_loss=0.033114 imbalance=0.025382 batch_time_s=0.3098
|
| 45 |
+
eval_batch=40 loss=0.011275 left_arm_loss=0.014338 right_arm_loss=0.008213 imbalance=0.006125 batch_time_s=0.2342
|
| 46 |
+
eval_batch=41 loss=0.011151 left_arm_loss=0.021560 right_arm_loss=0.000741 imbalance=0.020819 batch_time_s=0.3639
|
| 47 |
+
eval_batch=42 loss=0.008415 left_arm_loss=0.016206 right_arm_loss=0.000625 imbalance=0.015581 batch_time_s=0.3638
|
| 48 |
+
eval_batch=43 loss=0.003240 left_arm_loss=0.006059 right_arm_loss=0.000420 imbalance=0.005639 batch_time_s=0.3456
|
| 49 |
+
eval_batch=44 loss=0.001447 left_arm_loss=0.002358 right_arm_loss=0.000536 imbalance=0.001821 batch_time_s=0.3825
|
| 50 |
+
eval_batch=45 loss=0.003787 left_arm_loss=0.006964 right_arm_loss=0.000610 imbalance=0.006355 batch_time_s=0.2629
|
| 51 |
+
eval_batch=46 loss=0.010355 left_arm_loss=0.019963 right_arm_loss=0.000747 imbalance=0.019216 batch_time_s=0.3228
|
| 52 |
+
eval_batch=47 loss=0.093454 left_arm_loss=0.004442 right_arm_loss=0.182465 imbalance=0.178023 batch_time_s=0.3782
|
| 53 |
+
eval_batch=48 loss=0.085065 left_arm_loss=0.003706 right_arm_loss=0.166424 imbalance=0.162718 batch_time_s=0.3985
|
| 54 |
+
eval_batch=49 loss=0.005288 left_arm_loss=0.002214 right_arm_loss=0.008361 imbalance=0.006147 batch_time_s=0.3958
|
| 55 |
+
eval_batch=50 loss=0.045286 left_arm_loss=0.022582 right_arm_loss=0.067991 imbalance=0.045409 batch_time_s=0.2282
|
| 56 |
+
eval_batch=51 loss=0.008671 left_arm_loss=0.012865 right_arm_loss=0.004476 imbalance=0.008390 batch_time_s=0.3869
|
| 57 |
+
eval_batch=52 loss=0.011851 left_arm_loss=0.017357 right_arm_loss=0.006346 imbalance=0.011011 batch_time_s=0.2768
|
| 58 |
+
eval_batch=53 loss=0.001221 left_arm_loss=0.001389 right_arm_loss=0.001052 imbalance=0.000337 batch_time_s=0.3491
|
| 59 |
+
eval_batch=54 loss=0.002532 left_arm_loss=0.002639 right_arm_loss=0.002425 imbalance=0.000214 batch_time_s=0.2220
|
| 60 |
+
eval_batch=55 loss=0.017695 left_arm_loss=0.018922 right_arm_loss=0.016468 imbalance=0.002454 batch_time_s=0.3504
|
| 61 |
+
eval_batch=56 loss=0.023422 left_arm_loss=0.005041 right_arm_loss=0.041803 imbalance=0.036762 batch_time_s=0.3002
|
| 62 |
+
eval_batch=57 loss=0.007411 left_arm_loss=0.009387 right_arm_loss=0.005434 imbalance=0.003953 batch_time_s=0.2261
|
| 63 |
+
eval_batch=58 loss=0.009703 left_arm_loss=0.018272 right_arm_loss=0.001134 imbalance=0.017138 batch_time_s=0.5275
|
| 64 |
+
eval_batch=59 loss=0.011092 left_arm_loss=0.021360 right_arm_loss=0.000824 imbalance=0.020536 batch_time_s=0.2356
|
| 65 |
+
eval_batch=60 loss=0.004315 left_arm_loss=0.007742 right_arm_loss=0.000887 imbalance=0.006854 batch_time_s=0.3182
|
| 66 |
+
eval_batch=61 loss=0.001272 left_arm_loss=0.002311 right_arm_loss=0.000233 imbalance=0.002078 batch_time_s=0.2562
|
| 67 |
+
eval_batch=62 loss=0.001647 left_arm_loss=0.003064 right_arm_loss=0.000229 imbalance=0.002836 batch_time_s=0.2236
|
| 68 |
+
eval_batch=63 loss=0.004060 left_arm_loss=0.007585 right_arm_loss=0.000534 imbalance=0.007051 batch_time_s=0.2289
|
| 69 |
+
eval_batch=64 loss=0.004870 left_arm_loss=0.008584 right_arm_loss=0.001156 imbalance=0.007428 batch_time_s=0.3408
|
| 70 |
+
eval_batch=65 loss=0.009187 left_arm_loss=0.002123 right_arm_loss=0.016250 imbalance=0.014127 batch_time_s=0.2414
|
| 71 |
+
eval_batch=66 loss=0.003548 left_arm_loss=0.004316 right_arm_loss=0.002779 imbalance=0.001537 batch_time_s=0.2221
|
| 72 |
+
eval_batch=67 loss=0.005461 left_arm_loss=0.010117 right_arm_loss=0.000805 imbalance=0.009312 batch_time_s=0.3501
|
| 73 |
+
eval_batch=68 loss=0.019510 left_arm_loss=0.022769 right_arm_loss=0.016250 imbalance=0.006519 batch_time_s=0.2250
|
| 74 |
+
eval_batch=69 loss=0.040062 left_arm_loss=0.064019 right_arm_loss=0.016104 imbalance=0.047915 batch_time_s=0.2227
|
| 75 |
+
eval_batch=70 loss=0.022290 left_arm_loss=0.011103 right_arm_loss=0.033476 imbalance=0.022373 batch_time_s=0.3519
|
| 76 |
+
eval_batch=71 loss=0.014200 left_arm_loss=0.006097 right_arm_loss=0.022303 imbalance=0.016206 batch_time_s=0.2229
|
| 77 |
+
eval_batch=72 loss=0.028683 left_arm_loss=0.049631 right_arm_loss=0.007736 imbalance=0.041895 batch_time_s=0.4946
|
| 78 |
+
eval_batch=73 loss=0.017749 left_arm_loss=0.033027 right_arm_loss=0.002471 imbalance=0.030555 batch_time_s=0.3221
|
| 79 |
+
eval_batch=74 loss=0.018133 left_arm_loss=0.034962 right_arm_loss=0.001304 imbalance=0.033658 batch_time_s=0.3079
|
| 80 |
+
eval_batch=75 loss=0.005441 left_arm_loss=0.010617 right_arm_loss=0.000264 imbalance=0.010353 batch_time_s=0.3381
|
| 81 |
+
eval_batch=76 loss=0.022286 left_arm_loss=0.036838 right_arm_loss=0.007734 imbalance=0.029103 batch_time_s=0.2776
|
| 82 |
+
eval_batch=77 loss=0.006658 left_arm_loss=0.012715 right_arm_loss=0.000601 imbalance=0.012114 batch_time_s=0.2320
|
| 83 |
+
eval_batch=78 loss=0.028744 left_arm_loss=0.004508 right_arm_loss=0.052980 imbalance=0.048472 batch_time_s=0.4010
|
| 84 |
+
eval_batch=79 loss=0.067121 left_arm_loss=0.015094 right_arm_loss=0.119147 imbalance=0.104052 batch_time_s=0.3640
|
| 85 |
+
eval_batch=80 loss=0.040183 left_arm_loss=0.006979 right_arm_loss=0.073386 imbalance=0.066408 batch_time_s=0.3644
|
| 86 |
+
eval_batch=81 loss=0.020130 left_arm_loss=0.016093 right_arm_loss=0.024167 imbalance=0.008074 batch_time_s=0.2203
|
| 87 |
+
eval_batch=82 loss=0.008735 left_arm_loss=0.006954 right_arm_loss=0.010516 imbalance=0.003563 batch_time_s=0.2563
|
| 88 |
+
eval_batch=83 loss=0.012587 left_arm_loss=0.014092 right_arm_loss=0.011083 imbalance=0.003008 batch_time_s=0.3576
|
| 89 |
+
eval_batch=84 loss=0.007837 left_arm_loss=0.010760 right_arm_loss=0.004913 imbalance=0.005847 batch_time_s=0.2301
|
| 90 |
+
eval_batch=85 loss=0.019506 left_arm_loss=0.013295 right_arm_loss=0.025716 imbalance=0.012420 batch_time_s=0.3843
|
| 91 |
+
eval_batch=86 loss=0.016306 left_arm_loss=0.004147 right_arm_loss=0.028465 imbalance=0.024318 batch_time_s=0.3352
|
| 92 |
+
eval_batch=87 loss=0.007440 left_arm_loss=0.006391 right_arm_loss=0.008488 imbalance=0.002097 batch_time_s=0.3479
|
| 93 |
+
eval_batch=88 loss=0.017042 left_arm_loss=0.033084 right_arm_loss=0.001001 imbalance=0.032082 batch_time_s=0.3755
|
| 94 |
+
eval_batch=89 loss=0.016117 left_arm_loss=0.029051 right_arm_loss=0.003184 imbalance=0.025867 batch_time_s=0.3429
|
| 95 |
+
eval_batch=90 loss=0.001942 left_arm_loss=0.003463 right_arm_loss=0.000421 imbalance=0.003043 batch_time_s=0.2972
|
| 96 |
+
eval_batch=91 loss=0.001894 left_arm_loss=0.003272 right_arm_loss=0.000516 imbalance=0.002756 batch_time_s=0.2248
|
| 97 |
+
eval_batch=92 loss=0.001436 left_arm_loss=0.002526 right_arm_loss=0.000346 imbalance=0.002180 batch_time_s=0.2841
|
| 98 |
+
eval_batch=93 loss=0.008758 left_arm_loss=0.016516 right_arm_loss=0.001000 imbalance=0.015516 batch_time_s=0.2259
|
| 99 |
+
eval_batch=94 loss=0.003155 left_arm_loss=0.004276 right_arm_loss=0.002035 imbalance=0.002241 batch_time_s=0.2262
|
| 100 |
+
eval_batch=95 loss=0.008810 left_arm_loss=0.003654 right_arm_loss=0.013966 imbalance=0.010312 batch_time_s=0.3757
|
| 101 |
+
eval_batch=96 loss=0.007168 left_arm_loss=0.008067 right_arm_loss=0.006269 imbalance=0.001798 batch_time_s=0.3337
|
| 102 |
+
eval_batch=97 loss=0.015145 left_arm_loss=0.027200 right_arm_loss=0.003091 imbalance=0.024109 batch_time_s=0.3449
|
| 103 |
+
eval_batch=98 loss=0.035565 left_arm_loss=0.066956 right_arm_loss=0.004173 imbalance=0.062783 batch_time_s=0.2216
|
| 104 |
+
eval_batch=99 loss=0.021572 left_arm_loss=0.039440 right_arm_loss=0.003703 imbalance=0.035737 batch_time_s=0.3681
|
| 105 |
+
eval_batch=100 loss=0.021406 left_arm_loss=0.036665 right_arm_loss=0.006146 imbalance=0.030518 batch_time_s=0.3594
|
| 106 |
+
config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 107 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/10000
|
| 108 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 109 |
+
num_batches: 100
|
| 110 |
+
mean_val_loss: 0.022168
|
| 111 |
+
std_val_loss: 0.024902
|
| 112 |
+
mean_left_arm_loss: 0.030184
|
| 113 |
+
std_left_arm_loss: 0.043653
|
| 114 |
+
mean_right_arm_loss: 0.014151
|
| 115 |
+
std_right_arm_loss: 0.029382
|
| 116 |
+
mean_left_joint_loss: 0.032356
|
| 117 |
+
std_left_joint_loss: 0.048977
|
| 118 |
+
mean_left_gripper_loss: 0.014984
|
| 119 |
+
std_left_gripper_loss: 0.037395
|
| 120 |
+
mean_right_joint_loss: 0.014888
|
| 121 |
+
std_right_joint_loss: 0.032582
|
| 122 |
+
mean_right_gripper_loss: 0.008996
|
| 123 |
+
std_right_gripper_loss: 0.025757
|
| 124 |
+
mean_left_right_imbalance: 0.033825
|
| 125 |
+
std_left_right_imbalance: 0.046586
|
| 126 |
+
per_batch_timing_seconds: mean=0.3248 std=0.0893 min=0.2203 max=0.7969
|
| 127 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 128 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 129 |
+
weight_loading_missing_keys: []
|
| 130 |
+
weight_loading_unexpected_keys: []
|
| 131 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.027761 left_arm_mae=0.027813 right_arm_mae=0.027708 imbalance_mae=0.000105 batch_time_s=0.4093
|
| 132 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.013821 left_arm_mae=0.015602 right_arm_mae=0.012041 imbalance_mae=0.003561 batch_time_s=0.2600
|
| 133 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.017503 left_arm_mae=0.019663 right_arm_mae=0.015342 imbalance_mae=0.004321 batch_time_s=0.2638
|
| 134 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.025637 left_arm_mae=0.023152 right_arm_mae=0.028121 imbalance_mae=0.004969 batch_time_s=0.2750
|
| 135 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.027783 left_arm_mae=0.028539 right_arm_mae=0.027028 imbalance_mae=0.001512 batch_time_s=0.4241
|
| 136 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.026692 left_arm_mae=0.044013 right_arm_mae=0.009371 imbalance_mae=0.034641 batch_time_s=0.2679
|
| 137 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.027037 left_arm_mae=0.046942 right_arm_mae=0.007132 imbalance_mae=0.039809 batch_time_s=0.3080
|
| 138 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.024824 left_arm_mae=0.042787 right_arm_mae=0.006862 imbalance_mae=0.035925 batch_time_s=0.3591
|
| 139 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.034854 left_arm_mae=0.062094 right_arm_mae=0.007614 imbalance_mae=0.054480 batch_time_s=0.3635
|
| 140 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.043676 left_arm_mae=0.080232 right_arm_mae=0.007120 imbalance_mae=0.073112 batch_time_s=0.3643
|
| 141 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.030659 left_arm_mae=0.054766 right_arm_mae=0.006553 imbalance_mae=0.048212 batch_time_s=0.3975
|
| 142 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.037423 left_arm_mae=0.066644 right_arm_mae=0.008201 imbalance_mae=0.058443 batch_time_s=0.2758
|
| 143 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.033165 left_arm_mae=0.058826 right_arm_mae=0.007504 imbalance_mae=0.051322 batch_time_s=0.3565
|
| 144 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.023418 left_arm_mae=0.036001 right_arm_mae=0.010836 imbalance_mae=0.025165 batch_time_s=0.3043
|
| 145 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.039746 left_arm_mae=0.018612 right_arm_mae=0.060879 imbalance_mae=0.042267 batch_time_s=0.2737
|
| 146 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.034426 left_arm_mae=0.020315 right_arm_mae=0.048536 imbalance_mae=0.028221 batch_time_s=0.2826
|
| 147 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 148 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.029277
|
| 149 |
+
sample_eval_num_steps_4_std_masked_mae: 0.007579
|
| 150 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.040375
|
| 151 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.019190
|
| 152 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.018178
|
| 153 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.015856
|
| 154 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.043636
|
| 155 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.022278
|
| 156 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.017546
|
| 157 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.013485
|
| 158 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.018908
|
| 159 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.017028
|
| 160 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.013066
|
| 161 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.016678
|
| 162 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.031629
|
| 163 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.022404
|
| 164 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3241 std=0.0551 min=0.2600 max=0.4241
|
| 165 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.030055 left_arm_mae=0.029251 right_arm_mae=0.030859 imbalance_mae=0.001608 batch_time_s=0.4774
|
| 166 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.018086 left_arm_mae=0.019810 right_arm_mae=0.016363 imbalance_mae=0.003447 batch_time_s=0.3371
|
| 167 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.020473 left_arm_mae=0.021246 right_arm_mae=0.019700 imbalance_mae=0.001546 batch_time_s=0.5100
|
| 168 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.027667 left_arm_mae=0.024581 right_arm_mae=0.030754 imbalance_mae=0.006173 batch_time_s=0.4261
|
| 169 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.029556 left_arm_mae=0.029850 right_arm_mae=0.029262 imbalance_mae=0.000588 batch_time_s=0.3332
|
| 170 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.026114 left_arm_mae=0.043516 right_arm_mae=0.008712 imbalance_mae=0.034804 batch_time_s=0.3380
|
| 171 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.027823 left_arm_mae=0.048532 right_arm_mae=0.007113 imbalance_mae=0.041419 batch_time_s=0.4218
|
| 172 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.024256 left_arm_mae=0.040880 right_arm_mae=0.007631 imbalance_mae=0.033249 batch_time_s=0.4172
|
| 173 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.033590 left_arm_mae=0.060695 right_arm_mae=0.006485 imbalance_mae=0.054211 batch_time_s=0.4333
|
| 174 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.043233 left_arm_mae=0.079897 right_arm_mae=0.006570 imbalance_mae=0.073327 batch_time_s=0.3751
|
| 175 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.031220 left_arm_mae=0.055826 right_arm_mae=0.006614 imbalance_mae=0.049212 batch_time_s=0.4710
|
| 176 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.037113 left_arm_mae=0.066601 right_arm_mae=0.007624 imbalance_mae=0.058976 batch_time_s=0.3541
|
| 177 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.034603 left_arm_mae=0.062063 right_arm_mae=0.007144 imbalance_mae=0.054919 batch_time_s=0.4295
|
| 178 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.024069 left_arm_mae=0.037691 right_arm_mae=0.010447 imbalance_mae=0.027245 batch_time_s=0.3363
|
| 179 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.040480 left_arm_mae=0.016929 right_arm_mae=0.064030 imbalance_mae=0.047101 batch_time_s=0.3608
|
| 180 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.035514 left_arm_mae=0.019780 right_arm_mae=0.051249 imbalance_mae=0.031470 batch_time_s=0.4725
|
| 181 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 182 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.030241
|
| 183 |
+
sample_eval_num_steps_10_std_masked_mae: 0.006740
|
| 184 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.041072
|
| 185 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.018866
|
| 186 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.019410
|
| 187 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.017031
|
| 188 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.044817
|
| 189 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.022046
|
| 190 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.014857
|
| 191 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.014376
|
| 192 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.020279
|
| 193 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.018425
|
| 194 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.013323
|
| 195 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.014475
|
| 196 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.032456
|
| 197 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.022935
|
| 198 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.4058 std=0.0569 min=0.3332 max=0.5100
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_2000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_parallel_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.025787 left_arm_loss=0.021923 right_arm_loss=0.029651 imbalance=0.007728 batch_time_s=1.3050
|
| 7 |
+
eval_batch=2 loss=0.010885 left_arm_loss=0.011649 right_arm_loss=0.010121 imbalance=0.001528 batch_time_s=0.2327
|
| 8 |
+
eval_batch=3 loss=0.011956 left_arm_loss=0.016623 right_arm_loss=0.007290 imbalance=0.009332 batch_time_s=0.5065
|
| 9 |
+
eval_batch=4 loss=0.038901 left_arm_loss=0.042096 right_arm_loss=0.035706 imbalance=0.006391 batch_time_s=0.3083
|
| 10 |
+
eval_batch=5 loss=0.022632 left_arm_loss=0.029108 right_arm_loss=0.016157 imbalance=0.012951 batch_time_s=0.4952
|
| 11 |
+
eval_batch=6 loss=0.035525 left_arm_loss=0.067873 right_arm_loss=0.003178 imbalance=0.064695 batch_time_s=0.5699
|
| 12 |
+
eval_batch=7 loss=0.037493 left_arm_loss=0.072243 right_arm_loss=0.002743 imbalance=0.069500 batch_time_s=0.3339
|
| 13 |
+
eval_batch=8 loss=0.011528 left_arm_loss=0.021023 right_arm_loss=0.002032 imbalance=0.018991 batch_time_s=0.3246
|
| 14 |
+
eval_batch=9 loss=0.014947 left_arm_loss=0.028066 right_arm_loss=0.001828 imbalance=0.026238 batch_time_s=0.2926
|
| 15 |
+
eval_batch=10 loss=0.023378 left_arm_loss=0.045036 right_arm_loss=0.001720 imbalance=0.043316 batch_time_s=0.5283
|
| 16 |
+
eval_batch=11 loss=0.025311 left_arm_loss=0.047408 right_arm_loss=0.003213 imbalance=0.044196 batch_time_s=0.4648
|
| 17 |
+
eval_batch=12 loss=0.022664 left_arm_loss=0.043080 right_arm_loss=0.002247 imbalance=0.040833 batch_time_s=0.2914
|
| 18 |
+
eval_batch=13 loss=0.043299 left_arm_loss=0.083316 right_arm_loss=0.003283 imbalance=0.080034 batch_time_s=0.2490
|
| 19 |
+
eval_batch=14 loss=0.028448 left_arm_loss=0.049884 right_arm_loss=0.007012 imbalance=0.042872 batch_time_s=0.3239
|
| 20 |
+
eval_batch=15 loss=0.055534 left_arm_loss=0.023412 right_arm_loss=0.087656 imbalance=0.064244 batch_time_s=0.7896
|
| 21 |
+
eval_batch=16 loss=0.070242 left_arm_loss=0.037843 right_arm_loss=0.102640 imbalance=0.064797 batch_time_s=0.3277
|
| 22 |
+
eval_batch=17 loss=0.034091 left_arm_loss=0.061349 right_arm_loss=0.006834 imbalance=0.054514 batch_time_s=0.3386
|
| 23 |
+
eval_batch=18 loss=0.048450 left_arm_loss=0.065674 right_arm_loss=0.031225 imbalance=0.034449 batch_time_s=0.2716
|
| 24 |
+
eval_batch=19 loss=0.020858 left_arm_loss=0.026401 right_arm_loss=0.015315 imbalance=0.011086 batch_time_s=0.2662
|
| 25 |
+
eval_batch=20 loss=0.012802 left_arm_loss=0.017406 right_arm_loss=0.008198 imbalance=0.009208 batch_time_s=0.3161
|
| 26 |
+
eval_batch=21 loss=0.022067 left_arm_loss=0.035582 right_arm_loss=0.008551 imbalance=0.027031 batch_time_s=0.2446
|
| 27 |
+
eval_batch=22 loss=0.052524 left_arm_loss=0.058496 right_arm_loss=0.046553 imbalance=0.011943 batch_time_s=0.3242
|
| 28 |
+
eval_batch=23 loss=0.049664 left_arm_loss=0.082497 right_arm_loss=0.016830 imbalance=0.065667 batch_time_s=0.3345
|
| 29 |
+
eval_batch=24 loss=0.057649 left_arm_loss=0.109523 right_arm_loss=0.005776 imbalance=0.103747 batch_time_s=0.3507
|
| 30 |
+
eval_batch=25 loss=0.065660 left_arm_loss=0.129855 right_arm_loss=0.001465 imbalance=0.128389 batch_time_s=0.3145
|
| 31 |
+
eval_batch=26 loss=0.030339 left_arm_loss=0.056315 right_arm_loss=0.004364 imbalance=0.051951 batch_time_s=0.5504
|
| 32 |
+
eval_batch=27 loss=0.026639 left_arm_loss=0.048688 right_arm_loss=0.004590 imbalance=0.044098 batch_time_s=0.2753
|
| 33 |
+
eval_batch=28 loss=0.027996 left_arm_loss=0.054026 right_arm_loss=0.001966 imbalance=0.052060 batch_time_s=0.3143
|
| 34 |
+
eval_batch=29 loss=0.035882 left_arm_loss=0.069171 right_arm_loss=0.002594 imbalance=0.066576 batch_time_s=0.2392
|
| 35 |
+
eval_batch=30 loss=0.053704 left_arm_loss=0.104880 right_arm_loss=0.002527 imbalance=0.102353 batch_time_s=0.2710
|
| 36 |
+
eval_batch=31 loss=0.081458 left_arm_loss=0.154924 right_arm_loss=0.007991 imbalance=0.146933 batch_time_s=0.3473
|
| 37 |
+
eval_batch=32 loss=0.070487 left_arm_loss=0.072677 right_arm_loss=0.068297 imbalance=0.004380 batch_time_s=0.3377
|
| 38 |
+
eval_batch=33 loss=0.046639 left_arm_loss=0.018259 right_arm_loss=0.075019 imbalance=0.056760 batch_time_s=0.3076
|
| 39 |
+
eval_batch=34 loss=0.085334 left_arm_loss=0.123811 right_arm_loss=0.046856 imbalance=0.076955 batch_time_s=0.2470
|
| 40 |
+
eval_batch=35 loss=0.043193 left_arm_loss=0.075120 right_arm_loss=0.011267 imbalance=0.063853 batch_time_s=0.2781
|
| 41 |
+
eval_batch=36 loss=0.024055 left_arm_loss=0.014381 right_arm_loss=0.033729 imbalance=0.019349 batch_time_s=0.3140
|
| 42 |
+
eval_batch=37 loss=0.015806 left_arm_loss=0.021006 right_arm_loss=0.010606 imbalance=0.010401 batch_time_s=0.3179
|
| 43 |
+
eval_batch=38 loss=0.046615 left_arm_loss=0.061180 right_arm_loss=0.032049 imbalance=0.029131 batch_time_s=0.3286
|
| 44 |
+
eval_batch=39 loss=0.054128 left_arm_loss=0.033725 right_arm_loss=0.074530 imbalance=0.040805 batch_time_s=0.3452
|
| 45 |
+
eval_batch=40 loss=0.022496 left_arm_loss=0.022509 right_arm_loss=0.022484 imbalance=0.000026 batch_time_s=0.2541
|
| 46 |
+
eval_batch=41 loss=0.050047 left_arm_loss=0.097146 right_arm_loss=0.002948 imbalance=0.094197 batch_time_s=0.3104
|
| 47 |
+
eval_batch=42 loss=0.024861 left_arm_loss=0.046637 right_arm_loss=0.003085 imbalance=0.043553 batch_time_s=0.3127
|
| 48 |
+
eval_batch=43 loss=0.013173 left_arm_loss=0.023176 right_arm_loss=0.003170 imbalance=0.020006 batch_time_s=0.3346
|
| 49 |
+
eval_batch=44 loss=0.013327 left_arm_loss=0.024117 right_arm_loss=0.002537 imbalance=0.021580 batch_time_s=0.3632
|
| 50 |
+
eval_batch=45 loss=0.016324 left_arm_loss=0.029968 right_arm_loss=0.002681 imbalance=0.027287 batch_time_s=0.3180
|
| 51 |
+
eval_batch=46 loss=0.028118 left_arm_loss=0.051257 right_arm_loss=0.004978 imbalance=0.046279 batch_time_s=0.4321
|
| 52 |
+
eval_batch=47 loss=0.106531 left_arm_loss=0.017094 right_arm_loss=0.195969 imbalance=0.178875 batch_time_s=0.5261
|
| 53 |
+
eval_batch=48 loss=0.120483 left_arm_loss=0.010918 right_arm_loss=0.230047 imbalance=0.219129 batch_time_s=0.3434
|
| 54 |
+
eval_batch=49 loss=0.026319 left_arm_loss=0.006001 right_arm_loss=0.046636 imbalance=0.040635 batch_time_s=0.3443
|
| 55 |
+
eval_batch=50 loss=0.091088 left_arm_loss=0.043066 right_arm_loss=0.139110 imbalance=0.096045 batch_time_s=0.4223
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/2000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.039947
|
| 61 |
+
std_val_loss: 0.025053
|
| 62 |
+
mean_left_arm_loss: 0.050148
|
| 63 |
+
std_left_arm_loss: 0.033233
|
| 64 |
+
mean_right_arm_loss: 0.029745
|
| 65 |
+
std_right_arm_loss: 0.047860
|
| 66 |
+
mean_left_joint_loss: 0.051925
|
| 67 |
+
std_left_joint_loss: 0.036277
|
| 68 |
+
mean_left_gripper_loss: 0.037711
|
| 69 |
+
std_left_gripper_loss: 0.077017
|
| 70 |
+
mean_right_joint_loss: 0.030139
|
| 71 |
+
std_right_joint_loss: 0.051862
|
| 72 |
+
mean_right_gripper_loss: 0.026984
|
| 73 |
+
std_right_gripper_loss: 0.065713
|
| 74 |
+
mean_left_right_imbalance: 0.051938
|
| 75 |
+
std_left_right_imbalance: 0.044701
|
| 76 |
+
per_batch_timing_seconds: mean=0.3708 std=0.1690 min=0.2327 max=1.3050
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.061882 left_arm_mae=0.057364 right_arm_mae=0.066401 imbalance_mae=0.009037 batch_time_s=0.3858
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.041649 left_arm_mae=0.049056 right_arm_mae=0.034243 imbalance_mae=0.014814 batch_time_s=0.3355
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.043529 left_arm_mae=0.052206 right_arm_mae=0.034851 imbalance_mae=0.017355 batch_time_s=0.2794
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.056773 left_arm_mae=0.064577 right_arm_mae=0.048968 imbalance_mae=0.015609 batch_time_s=0.2793
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.049480 left_arm_mae=0.055472 right_arm_mae=0.043487 imbalance_mae=0.011986 batch_time_s=0.3373
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.073431 left_arm_mae=0.128902 right_arm_mae=0.017959 imbalance_mae=0.110943 batch_time_s=0.3862
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.076275 left_arm_mae=0.134920 right_arm_mae=0.017629 imbalance_mae=0.117291 batch_time_s=0.3713
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.042675 left_arm_mae=0.068631 right_arm_mae=0.016719 imbalance_mae=0.051913 batch_time_s=0.2806
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.047099 left_arm_mae=0.078409 right_arm_mae=0.015790 imbalance_mae=0.062619 batch_time_s=0.3906
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.062623 left_arm_mae=0.107672 right_arm_mae=0.017573 imbalance_mae=0.090099 batch_time_s=0.3369
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.058652 left_arm_mae=0.098428 right_arm_mae=0.018877 imbalance_mae=0.079552 batch_time_s=0.3893
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.051733 left_arm_mae=0.085662 right_arm_mae=0.017805 imbalance_mae=0.067856 batch_time_s=0.4135
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.073952 left_arm_mae=0.130081 right_arm_mae=0.017823 imbalance_mae=0.112258 batch_time_s=0.3486
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.064349 left_arm_mae=0.101978 right_arm_mae=0.026721 imbalance_mae=0.075257 batch_time_s=0.2832
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.067566 left_arm_mae=0.030355 right_arm_mae=0.104778 imbalance_mae=0.074423 batch_time_s=0.7256
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.086088 left_arm_mae=0.052028 right_arm_mae=0.120148 imbalance_mae=0.068119 batch_time_s=0.3804
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.059860
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.012924
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.080984
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.031604
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.038736
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.031293
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.086197
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.035912
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.044490
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.062755
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.039304
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.030982
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.034761
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.051397
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.061196
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.036442
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3702 std=0.1017 min=0.2793 max=0.7256
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.068575 left_arm_mae=0.066392 right_arm_mae=0.070757 imbalance_mae=0.004365 batch_time_s=0.4156
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.048682 left_arm_mae=0.056914 right_arm_mae=0.040451 imbalance_mae=0.016462 batch_time_s=0.3402
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.048330 left_arm_mae=0.056728 right_arm_mae=0.039932 imbalance_mae=0.016797 batch_time_s=0.6590
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.064731 left_arm_mae=0.072759 right_arm_mae=0.056703 imbalance_mae=0.016055 batch_time_s=0.4853
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.056433 left_arm_mae=0.061980 right_arm_mae=0.050886 imbalance_mae=0.011094 batch_time_s=0.4784
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.079709 left_arm_mae=0.137447 right_arm_mae=0.021970 imbalance_mae=0.115477 batch_time_s=0.3479
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.079619 left_arm_mae=0.139576 right_arm_mae=0.019663 imbalance_mae=0.119913 batch_time_s=0.4953
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.047182 left_arm_mae=0.076524 right_arm_mae=0.017840 imbalance_mae=0.058684 batch_time_s=0.4351
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.053413 left_arm_mae=0.088859 right_arm_mae=0.017968 imbalance_mae=0.070891 batch_time_s=0.6540
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.066754 left_arm_mae=0.114514 right_arm_mae=0.018994 imbalance_mae=0.095520 batch_time_s=0.3876
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.064689 left_arm_mae=0.108810 right_arm_mae=0.020569 imbalance_mae=0.088241 batch_time_s=0.4600
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.060080 left_arm_mae=0.098145 right_arm_mae=0.022016 imbalance_mae=0.076129 batch_time_s=0.4352
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.079265 left_arm_mae=0.137559 right_arm_mae=0.020971 imbalance_mae=0.116587 batch_time_s=0.3373
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.071031 left_arm_mae=0.110774 right_arm_mae=0.031288 imbalance_mae=0.079487 batch_time_s=0.4683
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.074507 left_arm_mae=0.037228 right_arm_mae=0.111785 imbalance_mae=0.074557 batch_time_s=0.4737
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.091350 left_arm_mae=0.055550 right_arm_mae=0.127150 imbalance_mae=0.071600 batch_time_s=0.4467
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.065897
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.012628
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.088735
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.032010
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.043059
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.032823
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.094654
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.036668
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.047298
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.064660
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.043769
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.032862
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.038089
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.049635
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.064491
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.038643
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.4575 std=0.0902 min=0.3373 max=0.6590
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/handover_packed_parallel_10k_val_5000.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_eval config=pi05_twin_handover_256_packed_parallel_pytorch_10k checkpoint=/workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000 repo_id=lsnu/twin_handover_256_val
|
| 2 |
+
eval_loader batch_size=16 num_batches=50 num_workers=0
|
| 3 |
+
teacher_forced_eval_seed: 123
|
| 4 |
+
sample_eval enabled=True batch_size=16 num_batches=16 num_steps=[4, 10] seed=321
|
| 5 |
+
weight_loading missing=0 unexpected=0 device=cuda:0
|
| 6 |
+
eval_batch=1 loss=0.018009 left_arm_loss=0.018792 right_arm_loss=0.017225 imbalance=0.001567 batch_time_s=1.7875
|
| 7 |
+
eval_batch=2 loss=0.003388 left_arm_loss=0.002589 right_arm_loss=0.004187 imbalance=0.001598 batch_time_s=0.4062
|
| 8 |
+
eval_batch=3 loss=0.003306 left_arm_loss=0.002658 right_arm_loss=0.003954 imbalance=0.001296 batch_time_s=0.5074
|
| 9 |
+
eval_batch=4 loss=0.017967 left_arm_loss=0.019657 right_arm_loss=0.016276 imbalance=0.003381 batch_time_s=0.4412
|
| 10 |
+
eval_batch=5 loss=0.012909 left_arm_loss=0.015657 right_arm_loss=0.010161 imbalance=0.005496 batch_time_s=0.4610
|
| 11 |
+
eval_batch=6 loss=0.012707 left_arm_loss=0.023879 right_arm_loss=0.001535 imbalance=0.022344 batch_time_s=0.5589
|
| 12 |
+
eval_batch=7 loss=0.012281 left_arm_loss=0.023433 right_arm_loss=0.001129 imbalance=0.022304 batch_time_s=0.3979
|
| 13 |
+
eval_batch=8 loss=0.010313 left_arm_loss=0.019642 right_arm_loss=0.000985 imbalance=0.018657 batch_time_s=0.2939
|
| 14 |
+
eval_batch=9 loss=0.011270 left_arm_loss=0.021697 right_arm_loss=0.000842 imbalance=0.020855 batch_time_s=0.2766
|
| 15 |
+
eval_batch=10 loss=0.020419 left_arm_loss=0.040029 right_arm_loss=0.000809 imbalance=0.039219 batch_time_s=0.2618
|
| 16 |
+
eval_batch=11 loss=0.012979 left_arm_loss=0.024547 right_arm_loss=0.001411 imbalance=0.023136 batch_time_s=0.2342
|
| 17 |
+
eval_batch=12 loss=0.016370 left_arm_loss=0.031587 right_arm_loss=0.001153 imbalance=0.030434 batch_time_s=0.2544
|
| 18 |
+
eval_batch=13 loss=0.022673 left_arm_loss=0.043847 right_arm_loss=0.001498 imbalance=0.042349 batch_time_s=0.3947
|
| 19 |
+
eval_batch=14 loss=0.015649 left_arm_loss=0.013524 right_arm_loss=0.017774 imbalance=0.004250 batch_time_s=0.3622
|
| 20 |
+
eval_batch=15 loss=0.065092 left_arm_loss=0.016442 right_arm_loss=0.113742 imbalance=0.097301 batch_time_s=0.3778
|
| 21 |
+
eval_batch=16 loss=0.031027 left_arm_loss=0.014831 right_arm_loss=0.047224 imbalance=0.032393 batch_time_s=0.2350
|
| 22 |
+
eval_batch=17 loss=0.020677 left_arm_loss=0.037752 right_arm_loss=0.003602 imbalance=0.034149 batch_time_s=0.2326
|
| 23 |
+
eval_batch=18 loss=0.032304 left_arm_loss=0.042944 right_arm_loss=0.021663 imbalance=0.021281 batch_time_s=0.2283
|
| 24 |
+
eval_batch=19 loss=0.010371 left_arm_loss=0.016259 right_arm_loss=0.004484 imbalance=0.011775 batch_time_s=0.3932
|
| 25 |
+
eval_batch=20 loss=0.015657 left_arm_loss=0.026673 right_arm_loss=0.004640 imbalance=0.022033 batch_time_s=0.4344
|
| 26 |
+
eval_batch=21 loss=0.073863 left_arm_loss=0.143820 right_arm_loss=0.003905 imbalance=0.139915 batch_time_s=0.3016
|
| 27 |
+
eval_batch=22 loss=0.086733 left_arm_loss=0.138835 right_arm_loss=0.034632 imbalance=0.104203 batch_time_s=0.3656
|
| 28 |
+
eval_batch=23 loss=0.041098 left_arm_loss=0.072591 right_arm_loss=0.009606 imbalance=0.062984 batch_time_s=0.2442
|
| 29 |
+
eval_batch=24 loss=0.083534 left_arm_loss=0.164134 right_arm_loss=0.002933 imbalance=0.161201 batch_time_s=0.3228
|
| 30 |
+
eval_batch=25 loss=0.067565 left_arm_loss=0.134226 right_arm_loss=0.000903 imbalance=0.133323 batch_time_s=0.4508
|
| 31 |
+
eval_batch=26 loss=0.030208 left_arm_loss=0.057778 right_arm_loss=0.002639 imbalance=0.055139 batch_time_s=0.3326
|
| 32 |
+
eval_batch=27 loss=0.029988 left_arm_loss=0.055316 right_arm_loss=0.004661 imbalance=0.050655 batch_time_s=0.2515
|
| 33 |
+
eval_batch=28 loss=0.017679 left_arm_loss=0.034427 right_arm_loss=0.000931 imbalance=0.033496 batch_time_s=0.3445
|
| 34 |
+
eval_batch=29 loss=0.028188 left_arm_loss=0.054125 right_arm_loss=0.002251 imbalance=0.051874 batch_time_s=0.3502
|
| 35 |
+
eval_batch=30 loss=0.025111 left_arm_loss=0.046639 right_arm_loss=0.003583 imbalance=0.043056 batch_time_s=0.4274
|
| 36 |
+
eval_batch=31 loss=0.047902 left_arm_loss=0.091445 right_arm_loss=0.004359 imbalance=0.087086 batch_time_s=0.2993
|
| 37 |
+
eval_batch=32 loss=0.034540 left_arm_loss=0.036401 right_arm_loss=0.032679 imbalance=0.003722 batch_time_s=0.3326
|
| 38 |
+
eval_batch=33 loss=0.030009 left_arm_loss=0.011000 right_arm_loss=0.049019 imbalance=0.038019 batch_time_s=0.3898
|
| 39 |
+
eval_batch=34 loss=0.064066 left_arm_loss=0.109818 right_arm_loss=0.018313 imbalance=0.091505 batch_time_s=0.3321
|
| 40 |
+
eval_batch=35 loss=0.038442 left_arm_loss=0.072379 right_arm_loss=0.004506 imbalance=0.067873 batch_time_s=0.4945
|
| 41 |
+
eval_batch=36 loss=0.015525 left_arm_loss=0.012302 right_arm_loss=0.018747 imbalance=0.006445 batch_time_s=0.3318
|
| 42 |
+
eval_batch=37 loss=0.004400 left_arm_loss=0.005982 right_arm_loss=0.002817 imbalance=0.003166 batch_time_s=0.2853
|
| 43 |
+
eval_batch=38 loss=0.033808 left_arm_loss=0.038027 right_arm_loss=0.029589 imbalance=0.008438 batch_time_s=0.3567
|
| 44 |
+
eval_batch=39 loss=0.031964 left_arm_loss=0.013754 right_arm_loss=0.050174 imbalance=0.036420 batch_time_s=0.2974
|
| 45 |
+
eval_batch=40 loss=0.014522 left_arm_loss=0.017470 right_arm_loss=0.011574 imbalance=0.005896 batch_time_s=0.3888
|
| 46 |
+
eval_batch=41 loss=0.024863 left_arm_loss=0.048452 right_arm_loss=0.001273 imbalance=0.047179 batch_time_s=0.4214
|
| 47 |
+
eval_batch=42 loss=0.012502 left_arm_loss=0.023855 right_arm_loss=0.001148 imbalance=0.022707 batch_time_s=0.3489
|
| 48 |
+
eval_batch=43 loss=0.004550 left_arm_loss=0.007728 right_arm_loss=0.001372 imbalance=0.006356 batch_time_s=0.3647
|
| 49 |
+
eval_batch=44 loss=0.003732 left_arm_loss=0.006069 right_arm_loss=0.001396 imbalance=0.004672 batch_time_s=0.2821
|
| 50 |
+
eval_batch=45 loss=0.006992 left_arm_loss=0.012467 right_arm_loss=0.001518 imbalance=0.010949 batch_time_s=0.3792
|
| 51 |
+
eval_batch=46 loss=0.022667 left_arm_loss=0.043763 right_arm_loss=0.001571 imbalance=0.042192 batch_time_s=0.2396
|
| 52 |
+
eval_batch=47 loss=0.026646 left_arm_loss=0.008901 right_arm_loss=0.044391 imbalance=0.035490 batch_time_s=0.2305
|
| 53 |
+
eval_batch=48 loss=0.032550 left_arm_loss=0.005242 right_arm_loss=0.059858 imbalance=0.054616 batch_time_s=0.3562
|
| 54 |
+
eval_batch=49 loss=0.007825 left_arm_loss=0.002985 right_arm_loss=0.012665 imbalance=0.009680 batch_time_s=0.2352
|
| 55 |
+
eval_batch=50 loss=0.060185 left_arm_loss=0.031356 right_arm_loss=0.089014 imbalance=0.057658 batch_time_s=0.2872
|
| 56 |
+
config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 57 |
+
checkpoint_path: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/handover_packed_parallel_10k/5000
|
| 58 |
+
repo_id_used: lsnu/twin_handover_256_val
|
| 59 |
+
num_batches: 50
|
| 60 |
+
mean_val_loss: 0.027340
|
| 61 |
+
std_val_loss: 0.020897
|
| 62 |
+
mean_left_arm_loss: 0.039155
|
| 63 |
+
std_left_arm_loss: 0.038641
|
| 64 |
+
mean_right_arm_loss: 0.015526
|
| 65 |
+
std_right_arm_loss: 0.023413
|
| 66 |
+
mean_left_joint_loss: 0.042035
|
| 67 |
+
std_left_joint_loss: 0.043377
|
| 68 |
+
mean_left_gripper_loss: 0.018994
|
| 69 |
+
std_left_gripper_loss: 0.032843
|
| 70 |
+
mean_right_joint_loss: 0.015753
|
| 71 |
+
std_right_joint_loss: 0.024564
|
| 72 |
+
mean_right_gripper_loss: 0.013938
|
| 73 |
+
std_right_gripper_loss: 0.029304
|
| 74 |
+
mean_left_right_imbalance: 0.038635
|
| 75 |
+
std_left_right_imbalance: 0.037436
|
| 76 |
+
per_batch_timing_seconds: mean=0.3717 std=0.2172 min=0.2283 max=1.7875
|
| 77 |
+
active_mask_dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 78 |
+
masked_dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31]
|
| 79 |
+
weight_loading_missing_keys: []
|
| 80 |
+
weight_loading_unexpected_keys: []
|
| 81 |
+
sample_eval_batch=1 num_steps=4 masked_mae=0.050586 left_arm_mae=0.058916 right_arm_mae=0.042257 imbalance_mae=0.016659 batch_time_s=0.3724
|
| 82 |
+
sample_eval_batch=2 num_steps=4 masked_mae=0.022248 left_arm_mae=0.021135 right_arm_mae=0.023362 imbalance_mae=0.002226 batch_time_s=0.3071
|
| 83 |
+
sample_eval_batch=3 num_steps=4 masked_mae=0.023393 left_arm_mae=0.020391 right_arm_mae=0.026394 imbalance_mae=0.006003 batch_time_s=0.3356
|
| 84 |
+
sample_eval_batch=4 num_steps=4 masked_mae=0.035006 left_arm_mae=0.031920 right_arm_mae=0.038093 imbalance_mae=0.006173 batch_time_s=0.3073
|
| 85 |
+
sample_eval_batch=5 num_steps=4 masked_mae=0.033634 left_arm_mae=0.037647 right_arm_mae=0.029620 imbalance_mae=0.008027 batch_time_s=0.4116
|
| 86 |
+
sample_eval_batch=6 num_steps=4 masked_mae=0.037616 left_arm_mae=0.063739 right_arm_mae=0.011493 imbalance_mae=0.052246 batch_time_s=0.2947
|
| 87 |
+
sample_eval_batch=7 num_steps=4 masked_mae=0.034674 left_arm_mae=0.057874 right_arm_mae=0.011474 imbalance_mae=0.046401 batch_time_s=0.3792
|
| 88 |
+
sample_eval_batch=8 num_steps=4 masked_mae=0.032207 left_arm_mae=0.053714 right_arm_mae=0.010699 imbalance_mae=0.043015 batch_time_s=0.4709
|
| 89 |
+
sample_eval_batch=9 num_steps=4 masked_mae=0.044335 left_arm_mae=0.077539 right_arm_mae=0.011131 imbalance_mae=0.066409 batch_time_s=0.3545
|
| 90 |
+
sample_eval_batch=10 num_steps=4 masked_mae=0.051304 left_arm_mae=0.091093 right_arm_mae=0.011515 imbalance_mae=0.079578 batch_time_s=0.2719
|
| 91 |
+
sample_eval_batch=11 num_steps=4 masked_mae=0.032892 left_arm_mae=0.055199 right_arm_mae=0.010585 imbalance_mae=0.044614 batch_time_s=0.2832
|
| 92 |
+
sample_eval_batch=12 num_steps=4 masked_mae=0.040746 left_arm_mae=0.070150 right_arm_mae=0.011341 imbalance_mae=0.058809 batch_time_s=0.3939
|
| 93 |
+
sample_eval_batch=13 num_steps=4 masked_mae=0.040115 left_arm_mae=0.068278 right_arm_mae=0.011951 imbalance_mae=0.056326 batch_time_s=0.4705
|
| 94 |
+
sample_eval_batch=14 num_steps=4 masked_mae=0.035901 left_arm_mae=0.049350 right_arm_mae=0.022452 imbalance_mae=0.026898 batch_time_s=0.5485
|
| 95 |
+
sample_eval_batch=15 num_steps=4 masked_mae=0.080623 left_arm_mae=0.025484 right_arm_mae=0.135762 imbalance_mae=0.110278 batch_time_s=0.5356
|
| 96 |
+
sample_eval_batch=16 num_steps=4 masked_mae=0.056104 left_arm_mae=0.028460 right_arm_mae=0.083749 imbalance_mae=0.055290 batch_time_s=0.4407
|
| 97 |
+
sample_eval_num_steps_4_num_batches: 16
|
| 98 |
+
sample_eval_num_steps_4_mean_masked_mae: 0.040712
|
| 99 |
+
sample_eval_num_steps_4_std_masked_mae: 0.013646
|
| 100 |
+
sample_eval_num_steps_4_mean_left_arm_mae: 0.050681
|
| 101 |
+
sample_eval_num_steps_4_std_left_arm_mae: 0.020624
|
| 102 |
+
sample_eval_num_steps_4_mean_right_arm_mae: 0.030742
|
| 103 |
+
sample_eval_num_steps_4_std_right_arm_mae: 0.032790
|
| 104 |
+
sample_eval_num_steps_4_mean_left_joint_mae: 0.053976
|
| 105 |
+
sample_eval_num_steps_4_std_left_joint_mae: 0.024153
|
| 106 |
+
sample_eval_num_steps_4_mean_left_gripper_mae: 0.027611
|
| 107 |
+
sample_eval_num_steps_4_std_left_gripper_mae: 0.024580
|
| 108 |
+
sample_eval_num_steps_4_mean_right_joint_mae: 0.032227
|
| 109 |
+
sample_eval_num_steps_4_std_right_joint_mae: 0.036350
|
| 110 |
+
sample_eval_num_steps_4_mean_right_gripper_mae: 0.020349
|
| 111 |
+
sample_eval_num_steps_4_std_right_gripper_mae: 0.017496
|
| 112 |
+
sample_eval_num_steps_4_mean_left_right_imbalance_mae: 0.042435
|
| 113 |
+
sample_eval_num_steps_4_std_left_right_imbalance_mae: 0.029207
|
| 114 |
+
sample_eval_num_steps_4_per_batch_timing_seconds: mean=0.3861 std=0.0848 min=0.2719 max=0.5485
|
| 115 |
+
sample_eval_batch=1 num_steps=10 masked_mae=0.060244 left_arm_mae=0.069755 right_arm_mae=0.050732 imbalance_mae=0.019023 batch_time_s=0.3931
|
| 116 |
+
sample_eval_batch=2 num_steps=10 masked_mae=0.028194 left_arm_mae=0.026266 right_arm_mae=0.030122 imbalance_mae=0.003856 batch_time_s=0.4060
|
| 117 |
+
sample_eval_batch=3 num_steps=10 masked_mae=0.029302 left_arm_mae=0.026488 right_arm_mae=0.032115 imbalance_mae=0.005627 batch_time_s=0.6280
|
| 118 |
+
sample_eval_batch=4 num_steps=10 masked_mae=0.040353 left_arm_mae=0.038823 right_arm_mae=0.041882 imbalance_mae=0.003059 batch_time_s=0.5683
|
| 119 |
+
sample_eval_batch=5 num_steps=10 masked_mae=0.037448 left_arm_mae=0.040207 right_arm_mae=0.034689 imbalance_mae=0.005518 batch_time_s=0.4537
|
| 120 |
+
sample_eval_batch=6 num_steps=10 masked_mae=0.041892 left_arm_mae=0.069450 right_arm_mae=0.014334 imbalance_mae=0.055116 batch_time_s=0.5177
|
| 121 |
+
sample_eval_batch=7 num_steps=10 masked_mae=0.037873 left_arm_mae=0.061853 right_arm_mae=0.013892 imbalance_mae=0.047961 batch_time_s=0.3831
|
| 122 |
+
sample_eval_batch=8 num_steps=10 masked_mae=0.035303 left_arm_mae=0.058263 right_arm_mae=0.012343 imbalance_mae=0.045920 batch_time_s=0.3624
|
| 123 |
+
sample_eval_batch=9 num_steps=10 masked_mae=0.049224 left_arm_mae=0.084585 right_arm_mae=0.013863 imbalance_mae=0.070723 batch_time_s=0.4046
|
| 124 |
+
sample_eval_batch=10 num_steps=10 masked_mae=0.053856 left_arm_mae=0.092990 right_arm_mae=0.014723 imbalance_mae=0.078267 batch_time_s=0.3373
|
| 125 |
+
sample_eval_batch=11 num_steps=10 masked_mae=0.036063 left_arm_mae=0.058790 right_arm_mae=0.013336 imbalance_mae=0.045454 batch_time_s=0.4558
|
| 126 |
+
sample_eval_batch=12 num_steps=10 masked_mae=0.043667 left_arm_mae=0.073829 right_arm_mae=0.013505 imbalance_mae=0.060324 batch_time_s=0.3940
|
| 127 |
+
sample_eval_batch=13 num_steps=10 masked_mae=0.044050 left_arm_mae=0.071945 right_arm_mae=0.016154 imbalance_mae=0.055791 batch_time_s=0.5080
|
| 128 |
+
sample_eval_batch=14 num_steps=10 masked_mae=0.040370 left_arm_mae=0.054512 right_arm_mae=0.026228 imbalance_mae=0.028284 batch_time_s=0.5988
|
| 129 |
+
sample_eval_batch=15 num_steps=10 masked_mae=0.080254 left_arm_mae=0.023710 right_arm_mae=0.136797 imbalance_mae=0.113086 batch_time_s=0.4224
|
| 130 |
+
sample_eval_batch=16 num_steps=10 masked_mae=0.058699 left_arm_mae=0.028788 right_arm_mae=0.088609 imbalance_mae=0.059822 batch_time_s=0.4455
|
| 131 |
+
sample_eval_num_steps_10_num_batches: 16
|
| 132 |
+
sample_eval_num_steps_10_mean_masked_mae: 0.044799
|
| 133 |
+
sample_eval_num_steps_10_std_masked_mae: 0.012807
|
| 134 |
+
sample_eval_num_steps_10_mean_left_arm_mae: 0.055016
|
| 135 |
+
sample_eval_num_steps_10_std_left_arm_mae: 0.021278
|
| 136 |
+
sample_eval_num_steps_10_mean_right_arm_mae: 0.034583
|
| 137 |
+
sample_eval_num_steps_10_std_right_arm_mae: 0.032757
|
| 138 |
+
sample_eval_num_steps_10_mean_left_joint_mae: 0.059296
|
| 139 |
+
sample_eval_num_steps_10_std_left_joint_mae: 0.025068
|
| 140 |
+
sample_eval_num_steps_10_mean_left_gripper_mae: 0.025058
|
| 141 |
+
sample_eval_num_steps_10_std_left_gripper_mae: 0.027173
|
| 142 |
+
sample_eval_num_steps_10_mean_right_joint_mae: 0.035777
|
| 143 |
+
sample_eval_num_steps_10_std_right_joint_mae: 0.036454
|
| 144 |
+
sample_eval_num_steps_10_mean_right_gripper_mae: 0.026224
|
| 145 |
+
sample_eval_num_steps_10_std_right_gripper_mae: 0.016890
|
| 146 |
+
sample_eval_num_steps_10_mean_left_right_imbalance_mae: 0.043614
|
| 147 |
+
sample_eval_num_steps_10_std_left_right_imbalance_mae: 0.030178
|
| 148 |
+
sample_eval_num_steps_10_per_batch_timing_seconds: mean=0.4549 std=0.0835 min=0.3373 max=0.6280
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/smoke_baseline_10k_diag.log
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
W0309 15:40:17.171000 3586 torch/distributed/run.py:766]
|
| 2 |
+
W0309 15:40:17.171000 3586 torch/distributed/run.py:766] *****************************************
|
| 3 |
+
W0309 15:40:17.171000 3586 torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
W0309 15:40:17.171000 3586 torch/distributed/run.py:766] *****************************************
|
| 5 |
+
15:41:17.850 [I] Created experiment checkpoint directory: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/smoke_baseline_10k_diag (3655:train_pytorch.py:505)
|
| 6 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 7 |
+
warnings.warn( # warn only once
|
| 8 |
+
[rank0]:[W309 15:41:18.330229924 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 9 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 10 |
+
warnings.warn( # warn only once
|
| 11 |
+
[rank2]:[W309 15:41:18.361924667 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 12 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 13 |
+
warnings.warn( # warn only once
|
| 14 |
+
[rank3]:[W309 15:41:18.083889614 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 15 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 16 |
+
warnings.warn( # warn only once
|
| 17 |
+
[rank1]:[W309 15:41:19.988503311 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 18 |
+
15:41:20.805 [I] Using batch size per GPU: 4 (total batch size across 4 GPUs: 16) (3655:train_pytorch.py:524)
|
| 19 |
+
15:41:20.957 [I] Loaded norm stats from /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/lsnu/twin_handover_256_train (3655:config.py:234)
|
| 20 |
+
15:41:20.960 [I] data_config: DataConfig(repo_id='lsnu/twin_handover_256_train', asset_id='lsnu/twin_handover_256_train', norm_stats={'state': NormStats(mean=array([ 0.40321857, 0.17899239, -0.07588876, -2.06326795, -0.46418607,
|
| 21 |
+
1.79356563, 0.70229131, 0.48194093, 0.93952829, 0.86693275,
|
| 22 |
+
-1.03168762, -1.9056077 , -0.53421056, 1.87584054, 2.36738205,
|
| 23 |
+
0.91249251]), std=array([0.73344636, 0.47653052, 0.72710407, 0.42399687, 0.63613892,
|
| 24 |
+
0.61144608, 1.11724186, 0.49967375, 0.86981195, 0.75071597,
|
| 25 |
+
0.90787333, 0.35008711, 0.51183224, 0.36600712, 0.56947577,
|
| 26 |
+
0.28257725]), q01=array([-1.52408956, -1.32446341, -1.91092197, -2.89885788, -1.66315554,
|
| 27 |
+
0.59010215, -2.27611645, 0. , -1.77352981, -1.62131719,
|
| 28 |
+
-1.77092851, -2.19172778, -2.03159353, 0.55409113, 0.79255736,
|
| 29 |
+
0. ]), q99=array([ 2.16638614, 1.38857444, 1.93436338, -0.88548369, 1.39976143,
|
| 30 |
+
2.99162304, 2.8194857 , 0.9998 , 1.46557211, 1.74660106,
|
| 31 |
+
1.58644652, -0.87876934, 2.25910752, 2.54628449, 2.89347284,
|
| 32 |
+
0.9998 ])), 'actions': NormStats(mean=array([ 0.05879939, -0.00704042, -0.02719213, -0.07685276, -0.07520971,
|
| 33 |
+
-0.00498583, 0.03577602, 0.48164892, 0.06564316, 0.06023132,
|
| 34 |
+
-0.10068271, -0.09547432, -0.0526481 , 0.08205888, 0.13954687,
|
| 35 |
+
0.88333535]), std=array([0.18337056, 0.28128958, 0.18525195, 0.29767084, 0.22944973,
|
| 36 |
+
0.40312037, 0.3896611 , 0.49966311, 0.21938531, 0.16883859,
|
| 37 |
+
0.20206179, 0.14864719, 0.12629333, 0.15546791, 0.23423795,
|
| 38 |
+
0.32102022]), q01=array([-0.34140511, -0.71597991, -0.55301429, -0.8233152 , -0.68097536,
|
| 39 |
+
-0.87723451, -0.86000918, 0. , -0.53261366, -0.49289397,
|
| 40 |
+
-0.48524564, -0.35752607, -0.42426748, -0.18230745, -0.09212705,
|
| 41 |
+
0. ]), q99=array([0.55444025, 0.69361174, 0.44115428, 0.550829 , 0.49707318,
|
| 42 |
+
0.68353445, 0.82907713, 0.9998 , 0.42654409, 0.44255511,
|
| 43 |
+
0.4114292 , 0.01550327, 0.38038206, 0.71452535, 0.62808441,
|
| 44 |
+
0.9998 ]))}, repack_transforms=Group(inputs=[RepackTransform(structure={'images': {'cam_high': 'front_image', 'cam_left_wrist': 'wrist_left_image', 'cam_right_wrist': 'wrist_right_image'}, 'state': 'state', 'actions': 'action', 'prompt': 'task'})], outputs=()), data_transforms=Group(inputs=[AlohaInputs(adapt_to_pi=False)], outputs=[]), model_transforms=Group(inputs=[InjectDefaultPrompt(prompt=None), ResizeImages(height=224, width=224), TokenizePrompt(tokenizer=<openpi.models.tokenizer.PaligemmaTokenizer object at 0x7f1efa6ddb50>, discrete_state_input=True), PackPerArmBlocks(real_arm_dims=(8, 8), block_dims=(16, 16))], outputs=[UnpackPerArmBlocks(real_arm_dims=(8, 8), block_dims=(16, 16))]), use_quantile_norm=True, action_sequence_keys=('action',), prompt_from_task=False, rlds_data_dir=None, action_space=None, datasets=()) (3655:data_loader.py:283)
|
| 45 |
+
15:41:20.969 [I] Using existing local LeRobot dataset mirror for lsnu/twin_handover_256_train: /workspace/lerobot/lsnu/twin_handover_256_train (3655:data_loader.py:149)
|
| 46 |
+
15:41:24.542 [I] local_batch_size: 4 (3655:data_loader.py:364)
|
| 47 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 48 |
+
warnings.warn( # warn only once
|
| 49 |
+
15:42:35.770 [I] Enabled gradient checkpointing for PI0Pytorch model (3655:pi0_pytorch.py:150)
|
| 50 |
+
15:42:35.771 [I] Enabled gradient checkpointing for memory optimization (3655:train_pytorch.py:596)
|
| 51 |
+
15:42:35.773 [I] Step 0 (after_model_creation): GPU memory - allocated: 7.47GB, reserved: 7.48GB, free: 0.01GB, peak_allocated: 7.47GB, peak_reserved: 7.48GB | DDP: rank=0, world_size=4 (3655:train_pytorch.py:465)
|
| 52 |
+
15:42:35.940 [I] Loading weights from: /workspace/checkpoints/pi05_base_single_pytorch (3655:train_pytorch.py:625)
|
| 53 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 54 |
+
self.pid = os.fork()
|
| 55 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 56 |
+
self.pid = os.fork()
|
| 57 |
+
15:42:38.116 [I] Weight loading missing key count: 0 (3655:train_pytorch.py:629)
|
| 58 |
+
15:42:38.117 [I] Weight loading missing keys: set() (3655:train_pytorch.py:630)
|
| 59 |
+
15:42:38.118 [I] Weight loading unexpected key count: 0 (3655:train_pytorch.py:631)
|
| 60 |
+
15:42:38.118 [I] Weight loading unexpected keys: [] (3655:train_pytorch.py:632)
|
| 61 |
+
15:42:38.118 [I] Loaded PyTorch weights from /workspace/checkpoints/pi05_base_single_pytorch (3655:train_pytorch.py:633)
|
| 62 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 63 |
+
self.pid = os.fork()
|
| 64 |
+
15:42:38.122 [I] Running on: 9a96de7d560b | world_size=4 (3655:train_pytorch.py:673)
|
| 65 |
+
15:42:38.122 [I] Training config: batch_size=16, effective_batch_size=4, num_train_steps=20 (3655:train_pytorch.py:674)
|
| 66 |
+
15:42:38.123 [I] Memory optimizations: gradient_checkpointing=True (3655:train_pytorch.py:677)
|
| 67 |
+
15:42:38.123 [I] DDP settings: find_unused_parameters=False, gradient_as_bucket_view=True, static_graph=True (3655:train_pytorch.py:678)
|
| 68 |
+
15:42:38.124 [I] LR schedule: warmup=500, peak_lr=2.50e-05, decay_steps=10000, end_lr=2.50e-06 (3655:train_pytorch.py:679)
|
| 69 |
+
15:42:38.124 [I] Optimizer: AdamW, weight_decay=1e-10, clip_norm=1.0 (3655:train_pytorch.py:682)
|
| 70 |
+
15:42:38.124 [I] EMA is not supported for PyTorch training (3655:train_pytorch.py:685)
|
| 71 |
+
15:42:38.125 [I] Training precision: bfloat16 (3655:train_pytorch.py:686)
|
| 72 |
+
15:42:38.129 [I] Resolved config name: pi05_twin_handover_256_packed_baseline_pytorch_10k (3655:train_pytorch.py:280)
|
| 73 |
+
15:42:38.129 [I] Dataset repo_id: lsnu/twin_handover_256_train (3655:train_pytorch.py:281)
|
| 74 |
+
15:42:38.129 [I] Norm-stats file path: /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (3655:train_pytorch.py:282)
|
| 75 |
+
15:42:38.129 [I] Norm-stats summary: {'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (3655:train_pytorch.py:283)
|
| 76 |
+
15:42:38.130 [I] Checkpoint source path: /workspace/checkpoints/pi05_base_single_pytorch (3655:train_pytorch.py:284)
|
| 77 |
+
15:42:38.130 [I] Model type: baseline (3655:train_pytorch.py:285)
|
| 78 |
+
15:42:38.130 [I] Packed transforms active: True (3655:train_pytorch.py:286)
|
| 79 |
+
15:42:38.130 [I] World size: 4 (3655:train_pytorch.py:287)
|
| 80 |
+
15:42:38.130 [I] Batch size: local=4, global=16 (3655:train_pytorch.py:288)
|
| 81 |
+
15:42:38.131 [I] num_workers: 8 (3655:train_pytorch.py:289)
|
| 82 |
+
15:42:38.131 [I] Precision: bfloat16 (3655:train_pytorch.py:290)
|
| 83 |
+
15:42:38.131 [I] LR schedule summary: warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (3655:train_pytorch.py:291)
|
| 84 |
+
15:42:38.131 [I] Save/log intervals: save_interval=20, log_interval=5 (3655:train_pytorch.py:298)
|
| 85 |
+
15:42:38.132 [I] Action-loss mask: (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (3655:train_pytorch.py:299)
|
| 86 |
+
15:42:38.132 [I] Active mask dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (3655:train_pytorch.py:300)
|
| 87 |
+
15:42:38.132 [I] Masked dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (3655:train_pytorch.py:301)
|
| 88 |
+
15:42:38.132 [I] Gradient bucket diagnostics: action_in_proj, action_out_proj, shared_expert (3655:train_pytorch.py:694)
|
| 89 |
+
|
| 90 |
+
self.pid = os.fork()
|
| 91 |
+
15:42:43.978 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (3655:train_pytorch.py:799)
|
| 92 |
+
15:42:43.979 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (3655:train_pytorch.py:803)
|
| 93 |
+
15:42:43.979 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (3655:train_pytorch.py:806)
|
| 94 |
+
15:42:43.979 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (3655:train_pytorch.py:807)
|
| 95 |
+
15:42:43.980 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (3655:train_pytorch.py:810)
|
| 96 |
+
15:42:43.982 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (3655:train_pytorch.py:813)
|
| 97 |
+
15:42:44.012 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (3655:train_pytorch.py:817)
|
| 98 |
+
15:42:44.012 [I] debug_step=1 lr=4.99e-08 grad_norm=15.9656 data_time=2.1447s step_time=3.5958s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (3655:train_pytorch.py:822)
|
| 99 |
+
15:42:44.012 [I] debug_step=1 grad_shared_expert=15.5493 grad_action_in_proj=0.4919 grad_action_out_proj=2.1574 (3655:train_pytorch.py:830)
|
| 100 |
+
|
| 101 |
+
15:42:44.710 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (3655:train_pytorch.py:803)
|
| 102 |
+
15:42:44.711 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (3655:train_pytorch.py:806)
|
| 103 |
+
15:42:44.711 [I] debug_step=2 state_stats min=-1.0000 max=1.0004 mean=0.0430 std=0.4223 (3655:train_pytorch.py:807)
|
| 104 |
+
15:42:44.711 [I] debug_step=2 action_stats min=-1.0000 max=1.0071 mean=0.0532 std=0.4394 (3655:train_pytorch.py:810)
|
| 105 |
+
15:42:44.712 [I] debug_step=2 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (3655:train_pytorch.py:813)
|
| 106 |
+
15:42:44.713 [I] debug_step=2 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (3655:train_pytorch.py:817)
|
| 107 |
+
15:42:44.713 [I] debug_step=2 lr=9.98e-08 grad_norm=7.5566 data_time=0.2466s step_time=0.5634s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (3655:train_pytorch.py:822)
|
| 108 |
+
15:42:44.713 [I] debug_step=2 grad_shared_expert=7.0884 grad_action_in_proj=0.2225 grad_action_out_proj=2.2163 (3655:train_pytorch.py:830)
|
| 109 |
+
|
| 110 |
+
15:42:45.322 [I] debug_step=3 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (3655:train_pytorch.py:803)
|
| 111 |
+
15:42:45.322 [I] debug_step=3 prompt_token_lengths=[74, 68, 72, 73] (3655:train_pytorch.py:806)
|
| 112 |
+
15:42:45.322 [I] debug_step=3 state_stats min=-1.1677 max=1.0004 mean=0.0099 std=0.5093 (3655:train_pytorch.py:807)
|
| 113 |
+
15:42:45.322 [I] debug_step=3 action_stats min=-1.1487 max=1.1439 mean=0.0173 std=0.4079 (3655:train_pytorch.py:810)
|
| 114 |
+
15:42:45.323 [I] debug_step=3 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (3655:train_pytorch.py:813)
|
| 115 |
+
15:42:45.323 [I] debug_step=3 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (3655:train_pytorch.py:817)
|
| 116 |
+
15:42:45.323 [I] debug_step=3 lr=1.50e-07 grad_norm=10.4303 data_time=0.0950s step_time=0.5166s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (3655:train_pytorch.py:822)
|
| 117 |
+
15:42:45.324 [I] debug_step=3 grad_shared_expert=9.9546 grad_action_in_proj=0.3685 grad_action_out_proj=2.4023 (3655:train_pytorch.py:830)
|
| 118 |
+
|
| 119 |
+
15:42:45.904 [I] debug_step=4 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (3655:train_pytorch.py:803)
|
| 120 |
+
15:42:45.904 [I] debug_step=4 prompt_token_lengths=[75, 73, 76, 71] (3655:train_pytorch.py:806)
|
| 121 |
+
15:42:45.905 [I] debug_step=4 state_stats min=-1.0000 max=1.0708 mean=0.0711 std=0.4551 (3655:train_pytorch.py:807)
|
| 122 |
+
15:42:45.905 [I] debug_step=4 action_stats min=-1.0000 max=1.4460 mean=0.0674 std=0.4311 (3655:train_pytorch.py:810)
|
| 123 |
+
15:42:45.905 [I] debug_step=4 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (3655:train_pytorch.py:813)
|
| 124 |
+
15:42:45.906 [I] debug_step=4 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (3655:train_pytorch.py:817)
|
| 125 |
+
15:42:45.906 [I] debug_step=4 lr=2.00e-07 grad_norm=13.0902 data_time=0.0833s step_time=0.4993s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (3655:train_pytorch.py:822)
|
| 126 |
+
15:42:45.906 [I] debug_step=4 grad_shared_expert=12.6485 grad_action_in_proj=0.3687 grad_action_out_proj=2.2604 (3655:train_pytorch.py:830)
|
| 127 |
+
|
| 128 |
+
15:42:46.563 [I] debug_step=5 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (3655:train_pytorch.py:803)
|
| 129 |
+
15:42:46.564 [I] debug_step=5 prompt_token_lengths=[73, 75, 70, 73] (3655:train_pytorch.py:806)
|
| 130 |
+
15:42:46.565 [I] debug_step=5 state_stats min=-1.0000 max=1.0004 mean=0.0188 std=0.4734 (3655:train_pytorch.py:807)
|
| 131 |
+
15:42:46.565 [I] debug_step=5 action_stats min=-1.0000 max=1.0647 mean=0.0147 std=0.3985 (3655:train_pytorch.py:810)
|
| 132 |
+
15:42:46.566 [I] debug_step=5 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (3655:train_pytorch.py:813)
|
| 133 |
+
15:42:46.566 [I] debug_step=5 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (3655:train_pytorch.py:817)
|
| 134 |
+
15:42:46.567 [I] debug_step=5 lr=2.50e-07 grad_norm=21.1458 data_time=0.1041s step_time=0.5550s gpu_mem_allocated=28.49GB gpu_mem_reserved=35.24GB gpu_mem_max_allocated=35.23GB gpu_mem_max_reserved=35.24GB (3655:train_pytorch.py:822)
|
| 135 |
+
15:42:46.567 [I] debug_step=5 grad_shared_expert=20.4420 grad_action_in_proj=0.7223 grad_action_out_proj=2.2568 (3655:train_pytorch.py:830)
|
| 136 |
+
15:42:46.568 [I] step=5 loss=1.2624 smoothed_loss=1.3447 lr=1.50e-07 grad_norm=13.6377 step_time=1.1460s data_time=0.5347s it/s=0.592 eta_to_20=25.3s max_cuda_memory=35.23GB grad_action_in_proj=0.7223 grad_action_out_proj=2.2568 grad_shared_expert=20.4420 (3655:train_pytorch.py:850)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 141 |
+
warnings.warn( # warn only once
|
| 142 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 143 |
+
warnings.warn( # warn only once
|
| 144 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 145 |
+
warnings.warn( # warn only once
|
| 146 |
+
15:44:34.506 [I] Saved checkpoint at step 20 -> /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_baseline_pytorch_10k/smoke_baseline_10k_diag/20 (3655:train_pytorch.py:350)
|
| 147 |
+
|
| 148 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 149 |
+
warnings.warn( # warn only once
|
artifacts/twin_handover_packed_parallelization_10k_20260309/run_logs/smoke_parallel_10k_diag.log
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
W0309 15:46:21.273000 6578 torch/distributed/run.py:766]
|
| 2 |
+
W0309 15:46:21.273000 6578 torch/distributed/run.py:766] *****************************************
|
| 3 |
+
W0309 15:46:21.273000 6578 torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
W0309 15:46:21.273000 6578 torch/distributed/run.py:766] *****************************************
|
| 5 |
+
15:47:11.286 [I] Created experiment checkpoint directory: /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/smoke_parallel_10k_diag (6647:train_pytorch.py:505)
|
| 6 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 7 |
+
warnings.warn( # warn only once
|
| 8 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 9 |
+
warnings.warn( # warn only once
|
| 10 |
+
[rank2]:[W309 15:47:11.762262237 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 11 |
+
[rank0]:[W309 15:47:11.772293922 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 12 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 13 |
+
warnings.warn( # warn only once
|
| 14 |
+
[rank1]:[W309 15:47:12.078834637 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 15 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 16 |
+
warnings.warn( # warn only once
|
| 17 |
+
[rank3]:[W309 15:47:13.952599935 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
|
| 18 |
+
15:47:14.872 [I] Using batch size per GPU: 4 (total batch size across 4 GPUs: 16) (6647:train_pytorch.py:524)
|
| 19 |
+
15:47:15.088 [I] Loaded norm stats from /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/lsnu/twin_handover_256_train (6647:config.py:234)
|
| 20 |
+
15:47:15.090 [I] data_config: DataConfig(repo_id='lsnu/twin_handover_256_train', asset_id='lsnu/twin_handover_256_train', norm_stats={'state': NormStats(mean=array([ 0.40321857, 0.17899239, -0.07588876, -2.06326795, -0.46418607,
|
| 21 |
+
1.79356563, 0.70229131, 0.48194093, 0.93952829, 0.86693275,
|
| 22 |
+
-1.03168762, -1.9056077 , -0.53421056, 1.87584054, 2.36738205,
|
| 23 |
+
0.91249251]), std=array([0.73344636, 0.47653052, 0.72710407, 0.42399687, 0.63613892,
|
| 24 |
+
0.61144608, 1.11724186, 0.49967375, 0.86981195, 0.75071597,
|
| 25 |
+
0.90787333, 0.35008711, 0.51183224, 0.36600712, 0.56947577,
|
| 26 |
+
0.28257725]), q01=array([-1.52408956, -1.32446341, -1.91092197, -2.89885788, -1.66315554,
|
| 27 |
+
0.59010215, -2.27611645, 0. , -1.77352981, -1.62131719,
|
| 28 |
+
-1.77092851, -2.19172778, -2.03159353, 0.55409113, 0.79255736,
|
| 29 |
+
0. ]), q99=array([ 2.16638614, 1.38857444, 1.93436338, -0.88548369, 1.39976143,
|
| 30 |
+
2.99162304, 2.8194857 , 0.9998 , 1.46557211, 1.74660106,
|
| 31 |
+
1.58644652, -0.87876934, 2.25910752, 2.54628449, 2.89347284,
|
| 32 |
+
0.9998 ])), 'actions': NormStats(mean=array([ 0.05879939, -0.00704042, -0.02719213, -0.07685276, -0.07520971,
|
| 33 |
+
-0.00498583, 0.03577602, 0.48164892, 0.06564316, 0.06023132,
|
| 34 |
+
-0.10068271, -0.09547432, -0.0526481 , 0.08205888, 0.13954687,
|
| 35 |
+
0.88333535]), std=array([0.18337056, 0.28128958, 0.18525195, 0.29767084, 0.22944973,
|
| 36 |
+
0.40312037, 0.3896611 , 0.49966311, 0.21938531, 0.16883859,
|
| 37 |
+
0.20206179, 0.14864719, 0.12629333, 0.15546791, 0.23423795,
|
| 38 |
+
0.32102022]), q01=array([-0.34140511, -0.71597991, -0.55301429, -0.8233152 , -0.68097536,
|
| 39 |
+
-0.87723451, -0.86000918, 0. , -0.53261366, -0.49289397,
|
| 40 |
+
-0.48524564, -0.35752607, -0.42426748, -0.18230745, -0.09212705,
|
| 41 |
+
0. ]), q99=array([0.55444025, 0.69361174, 0.44115428, 0.550829 , 0.49707318,
|
| 42 |
+
0.68353445, 0.82907713, 0.9998 , 0.42654409, 0.44255511,
|
| 43 |
+
0.4114292 , 0.01550327, 0.38038206, 0.71452535, 0.62808441,
|
| 44 |
+
0.9998 ]))}, repack_transforms=Group(inputs=[RepackTransform(structure={'images': {'cam_high': 'front_image', 'cam_left_wrist': 'wrist_left_image', 'cam_right_wrist': 'wrist_right_image'}, 'state': 'state', 'actions': 'action', 'prompt': 'task'})], outputs=()), data_transforms=Group(inputs=[AlohaInputs(adapt_to_pi=False)], outputs=[]), model_transforms=Group(inputs=[InjectDefaultPrompt(prompt=None), ResizeImages(height=224, width=224), TokenizePrompt(tokenizer=<openpi.models.tokenizer.PaligemmaTokenizer object at 0x7e18350d3550>, discrete_state_input=True), PackPerArmBlocks(real_arm_dims=(8, 8), block_dims=(16, 16))], outputs=[UnpackPerArmBlocks(real_arm_dims=(8, 8), block_dims=(16, 16))]), use_quantile_norm=True, action_sequence_keys=('action',), prompt_from_task=False, rlds_data_dir=None, action_space=None, datasets=()) (6647:data_loader.py:283)
|
| 45 |
+
15:47:15.124 [I] Using existing local LeRobot dataset mirror for lsnu/twin_handover_256_train: /workspace/lerobot/lsnu/twin_handover_256_train (6647:data_loader.py:149)
|
| 46 |
+
15:47:21.449 [I] local_batch_size: 4 (6647:data_loader.py:364)
|
| 47 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 48 |
+
warnings.warn( # warn only once
|
| 49 |
+
15:50:36.938 [I] Enabled gradient checkpointing for PI0Pytorch model (6647:pi0_pytorch.py:150)
|
| 50 |
+
15:50:36.949 [I] Enabled gradient checkpointing for memory optimization (6647:train_pytorch.py:596)
|
| 51 |
+
15:50:36.951 [I] Step 0 (after_model_creation): GPU memory - allocated: 7.48GB, reserved: 7.48GB, free: 0.00GB, peak_allocated: 7.48GB, peak_reserved: 7.48GB | DDP: rank=0, world_size=4 (6647:train_pytorch.py:465)
|
| 52 |
+
15:51:05.826 [I] Loading weights from: /workspace/checkpoints/pi05_base_parallel_packed_from_single (6647:train_pytorch.py:625)
|
| 53 |
+
15:51:08.127 [I] Weight loading missing key count: 0 (6647:train_pytorch.py:629)
|
| 54 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 55 |
+
self.pid = os.fork()
|
| 56 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 57 |
+
self.pid = os.fork()
|
| 58 |
+
/usr/lib/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
|
| 59 |
+
self.pid = os.fork()
|
| 60 |
+
15:51:08.133 [I] Weight loading missing keys: set() (6647:train_pytorch.py:630)
|
| 61 |
+
15:51:08.134 [I] Weight loading unexpected key count: 0 (6647:train_pytorch.py:631)
|
| 62 |
+
15:51:08.135 [I] Weight loading unexpected keys: [] (6647:train_pytorch.py:632)
|
| 63 |
+
15:51:08.135 [I] Loaded PyTorch weights from /workspace/checkpoints/pi05_base_parallel_packed_from_single (6647:train_pytorch.py:633)
|
| 64 |
+
15:51:08.138 [I] Running on: 9a96de7d560b | world_size=4 (6647:train_pytorch.py:673)
|
| 65 |
+
15:51:08.139 [I] Training config: batch_size=16, effective_batch_size=4, num_train_steps=20 (6647:train_pytorch.py:674)
|
| 66 |
+
15:51:08.139 [I] Memory optimizations: gradient_checkpointing=True (6647:train_pytorch.py:677)
|
| 67 |
+
15:51:08.140 [I] DDP settings: find_unused_parameters=False, gradient_as_bucket_view=True, static_graph=True (6647:train_pytorch.py:678)
|
| 68 |
+
15:51:08.140 [I] LR schedule: warmup=500, peak_lr=2.50e-05, decay_steps=10000, end_lr=2.50e-06 (6647:train_pytorch.py:679)
|
| 69 |
+
15:51:08.140 [I] Optimizer: AdamW, weight_decay=1e-10, clip_norm=1.0 (6647:train_pytorch.py:682)
|
| 70 |
+
15:51:08.140 [I] EMA is not supported for PyTorch training (6647:train_pytorch.py:685)
|
| 71 |
+
15:51:08.140 [I] Training precision: bfloat16 (6647:train_pytorch.py:686)
|
| 72 |
+
15:51:08.162 [I] Resolved config name: pi05_twin_handover_256_packed_parallel_pytorch_10k (6647:train_pytorch.py:280)
|
| 73 |
+
15:51:08.162 [I] Dataset repo_id: lsnu/twin_handover_256_train (6647:train_pytorch.py:281)
|
| 74 |
+
15:51:08.163 [I] Norm-stats file path: /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_parallel_pytorch_10k/lsnu/twin_handover_256_train/norm_stats.json (6647:train_pytorch.py:282)
|
| 75 |
+
15:51:08.163 [I] Norm-stats summary: {'keys': ['actions', 'state'], 'state_mean_len': 16, 'state_std_len': 16, 'actions_mean_len': 16, 'actions_std_len': 16} (6647:train_pytorch.py:283)
|
| 76 |
+
15:51:08.163 [I] Checkpoint source path: /workspace/checkpoints/pi05_base_parallel_packed_from_single (6647:train_pytorch.py:284)
|
| 77 |
+
15:51:08.163 [I] Model type: parallel (6647:train_pytorch.py:285)
|
| 78 |
+
15:51:08.164 [I] Packed transforms active: True (6647:train_pytorch.py:286)
|
| 79 |
+
15:51:08.164 [I] World size: 4 (6647:train_pytorch.py:287)
|
| 80 |
+
15:51:08.164 [I] Batch size: local=4, global=16 (6647:train_pytorch.py:288)
|
| 81 |
+
15:51:08.164 [I] num_workers: 8 (6647:train_pytorch.py:289)
|
| 82 |
+
15:51:08.164 [I] Precision: bfloat16 (6647:train_pytorch.py:290)
|
| 83 |
+
15:51:08.165 [I] LR schedule summary: warmup_steps=500, peak_lr=2.50e-05, decay_steps=10000, decay_lr=2.50e-06 (6647:train_pytorch.py:291)
|
| 84 |
+
15:51:08.165 [I] Save/log intervals: save_interval=20, log_interval=5 (6647:train_pytorch.py:298)
|
| 85 |
+
15:51:08.165 [I] Action-loss mask: (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) (6647:train_pytorch.py:299)
|
| 86 |
+
15:51:08.166 [I] Active mask dims: [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] (6647:train_pytorch.py:300)
|
| 87 |
+
15:51:08.166 [I] Masked dims: [8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] (6647:train_pytorch.py:301)
|
| 88 |
+
15:51:08.166 [I] Gradient bucket diagnostics: action_in_proj_arms, arm_token_fuse, action_out_proj_arms, shared_expert (6647:train_pytorch.py:694)
|
| 89 |
+
|
| 90 |
+
self.pid = os.fork()
|
| 91 |
+
15:51:15.420 [I] debug_step=1 observation.state shape=(4, 32) dtype=torch.float64 actions shape=(4, 16, 32) dtype=torch.float32 (6647:train_pytorch.py:799)
|
| 92 |
+
15:51:15.420 [I] debug_step=1 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (6647:train_pytorch.py:803)
|
| 93 |
+
15:51:15.421 [I] debug_step=1 prompt_token_lengths=[74, 72, 76, 78] (6647:train_pytorch.py:806)
|
| 94 |
+
15:51:15.421 [I] debug_step=1 state_stats min=-1.0000 max=1.0004 mean=0.0715 std=0.4362 (6647:train_pytorch.py:807)
|
| 95 |
+
15:51:15.421 [I] debug_step=1 action_stats min=-1.0000 max=1.0947 mean=0.0331 std=0.4134 (6647:train_pytorch.py:810)
|
| 96 |
+
15:51:15.422 [I] debug_step=1 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (6647:train_pytorch.py:813)
|
| 97 |
+
15:51:15.440 [I] debug_step=1 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (6647:train_pytorch.py:817)
|
| 98 |
+
15:51:15.440 [I] debug_step=1 lr=4.99e-08 grad_norm=16.1250 data_time=2.8420s step_time=4.3963s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (6647:train_pytorch.py:822)
|
| 99 |
+
15:51:15.441 [I] debug_step=1 grad_shared_expert=15.5090 grad_action_in_proj_arms=0.5665 grad_arm_token_fuse=2.6833 grad_action_out_proj_arms=2.1581 (6647:train_pytorch.py:830)
|
| 100 |
+
|
| 101 |
+
15:51:16.328 [I] debug_step=2 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (6647:train_pytorch.py:803)
|
| 102 |
+
15:51:16.328 [I] debug_step=2 prompt_token_lengths=[79, 76, 69, 69] (6647:train_pytorch.py:806)
|
| 103 |
+
15:51:16.329 [I] debug_step=2 state_stats min=-1.0000 max=1.0004 mean=0.0430 std=0.4223 (6647:train_pytorch.py:807)
|
| 104 |
+
15:51:16.329 [I] debug_step=2 action_stats min=-1.0000 max=1.0071 mean=0.0532 std=0.4394 (6647:train_pytorch.py:810)
|
| 105 |
+
15:51:16.330 [I] debug_step=2 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (6647:train_pytorch.py:813)
|
| 106 |
+
15:51:16.330 [I] debug_step=2 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (6647:train_pytorch.py:817)
|
| 107 |
+
15:51:16.331 [I] debug_step=2 lr=9.98e-08 grad_norm=7.6511 data_time=0.2351s step_time=0.6776s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (6647:train_pytorch.py:822)
|
| 108 |
+
15:51:16.331 [I] debug_step=2 grad_shared_expert=7.1020 grad_action_in_proj_arms=0.2685 grad_arm_token_fuse=1.0830 grad_action_out_proj_arms=2.2163 (6647:train_pytorch.py:830)
|
| 109 |
+
|
| 110 |
+
15:51:17.133 [I] debug_step=3 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (6647:train_pytorch.py:803)
|
| 111 |
+
15:51:17.134 [I] debug_step=3 prompt_token_lengths=[74, 68, 72, 73] (6647:train_pytorch.py:806)
|
| 112 |
+
15:51:17.135 [I] debug_step=3 state_stats min=-1.1677 max=1.0004 mean=0.0099 std=0.5093 (6647:train_pytorch.py:807)
|
| 113 |
+
15:51:17.135 [I] debug_step=3 action_stats min=-1.1487 max=1.1439 mean=0.0173 std=0.4079 (6647:train_pytorch.py:810)
|
| 114 |
+
15:51:17.136 [I] debug_step=3 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (6647:train_pytorch.py:813)
|
| 115 |
+
15:51:17.136 [I] debug_step=3 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (6647:train_pytorch.py:817)
|
| 116 |
+
15:51:17.137 [I] debug_step=3 lr=1.50e-07 grad_norm=10.7520 data_time=0.1342s step_time=0.6718s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (6647:train_pytorch.py:822)
|
| 117 |
+
15:51:17.137 [I] debug_step=3 grad_shared_expert=10.0588 grad_action_in_proj_arms=0.4205 grad_arm_token_fuse=2.1222 grad_action_out_proj_arms=2.4053 (6647:train_pytorch.py:830)
|
| 118 |
+
|
| 119 |
+
15:51:17.815 [I] debug_step=4 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (6647:train_pytorch.py:803)
|
| 120 |
+
15:51:17.816 [I] debug_step=4 prompt_token_lengths=[75, 73, 76, 71] (6647:train_pytorch.py:806)
|
| 121 |
+
15:51:17.817 [I] debug_step=4 state_stats min=-1.0000 max=1.0708 mean=0.0711 std=0.4551 (6647:train_pytorch.py:807)
|
| 122 |
+
15:51:17.817 [I] debug_step=4 action_stats min=-1.0000 max=1.4460 mean=0.0674 std=0.4311 (6647:train_pytorch.py:810)
|
| 123 |
+
15:51:17.817 [I] debug_step=4 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (6647:train_pytorch.py:813)
|
| 124 |
+
15:51:17.818 [I] debug_step=4 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (6647:train_pytorch.py:817)
|
| 125 |
+
15:51:17.818 [I] debug_step=4 lr=2.00e-07 grad_norm=13.1805 data_time=0.1481s step_time=0.5340s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (6647:train_pytorch.py:822)
|
| 126 |
+
15:51:17.818 [I] debug_step=4 grad_shared_expert=12.6101 grad_action_in_proj_arms=0.4385 grad_arm_token_fuse=1.8988 grad_action_out_proj_arms=2.2621 (6647:train_pytorch.py:830)
|
| 127 |
+
|
| 128 |
+
15:51:18.417 [I] debug_step=5 image_keys=['base_0_rgb', 'left_wrist_0_rgb', 'right_wrist_0_rgb'] image_shapes={'base_0_rgb': (4, 3, 224, 224), 'left_wrist_0_rgb': (4, 3, 224, 224), 'right_wrist_0_rgb': (4, 3, 224, 224)} (6647:train_pytorch.py:803)
|
| 129 |
+
15:51:18.418 [I] debug_step=5 prompt_token_lengths=[73, 75, 70, 73] (6647:train_pytorch.py:806)
|
| 130 |
+
15:51:18.419 [I] debug_step=5 state_stats min=-1.0000 max=1.0004 mean=0.0188 std=0.4734 (6647:train_pytorch.py:807)
|
| 131 |
+
15:51:18.419 [I] debug_step=5 action_stats min=-1.0000 max=1.0647 mean=0.0147 std=0.3985 (6647:train_pytorch.py:810)
|
| 132 |
+
15:51:18.419 [I] debug_step=5 state_nonzero_counts_8d_blocks=[32, 0, 32, 0] action_nonzero_counts_8d_blocks=[512, 0, 512, 0] (6647:train_pytorch.py:813)
|
| 133 |
+
15:51:18.420 [I] debug_step=5 masked_dims=[8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31] active_dims=[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] masked_zero_counts state=64 actions=1024 (6647:train_pytorch.py:817)
|
| 134 |
+
15:51:18.420 [I] debug_step=5 lr=2.50e-07 grad_norm=21.7086 data_time=0.0873s step_time=0.5143s gpu_mem_allocated=28.53GB gpu_mem_reserved=35.28GB gpu_mem_max_allocated=35.27GB gpu_mem_max_reserved=35.28GB (6647:train_pytorch.py:822)
|
| 135 |
+
15:51:18.421 [I] debug_step=5 grad_shared_expert=20.5760 grad_action_in_proj_arms=0.8192 grad_arm_token_fuse=4.1698 grad_action_out_proj_arms=2.2565 (6647:train_pytorch.py:830)
|
| 136 |
+
15:51:18.421 [I] step=5 loss=1.2618 smoothed_loss=1.3450 lr=1.50e-07 grad_norm=13.8835 step_time=1.3588s data_time=0.6894s it/s=0.486 eta_to_20=30.8s max_cuda_memory=35.27GB grad_action_in_proj_arms=0.8192 grad_action_out_proj_arms=2.2565 grad_arm_token_fuse=4.1698 grad_shared_expert=20.5760 (6647:train_pytorch.py:850)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 141 |
+
warnings.warn( # warn only once
|
| 142 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 143 |
+
warnings.warn( # warn only once
|
| 144 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 145 |
+
warnings.warn( # warn only once
|
| 146 |
+
15:53:26.238 [I] Saved checkpoint at step 20 -> /workspace/pi05tests-openpi-multiarm/openpi/checkpoints/pi05_twin_handover_256_packed_parallel_pytorch_10k/smoke_parallel_10k_diag/20 (6647:train_pytorch.py:350)
|
| 147 |
+
|
| 148 |
+
/workspace/pi05tests-openpi-multiarm/openpi/.venv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
|
| 149 |
+
warnings.warn( # warn only once
|
artifacts/twin_handover_packed_parallelization_10k_20260309/sanity_checks/inspect_twin_packed_batch_handover_train.log
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config_name: pi05_twin_handover_256_packed_baseline_pytorch_2k
|
| 2 |
+
repo_id: lsnu/twin_handover_256_train
|
| 3 |
+
sample_index: 0
|
| 4 |
+
norm_stats_path: /workspace/pi05tests-openpi-multiarm/openpi/assets/pi05_twin_handover_256_packed_baseline_pytorch_2k/lsnu/twin_handover_256_train/norm_stats.json
|
| 5 |
+
norm_stats_keys: ['actions', 'state']
|
| 6 |
+
norm_stats_lengths: state_mean=16 state_std=16 action_mean=16 action_std=16
|
| 7 |
+
block_boundaries: [0:8] [8:16] [16:24] [24:32]
|
| 8 |
+
raw_state_16d_shape: (16,)
|
| 9 |
+
raw_state_16d:
|
| 10 |
+
[ 7.1883e-07 1.7515e-01 -5.6890e-06 -8.7299e-01 -6.3130e-06 1.2216e+00
|
| 11 |
+
7.8540e-01 1.0000e+00 1.1957e-06 1.7514e-01 -9.2062e-07 -8.7312e-01
|
| 12 |
+
1.6098e-05 1.2216e+00 7.8539e-01 1.0000e+00]
|
| 13 |
+
raw_actions_16d_shape: (16, 16)
|
| 14 |
+
raw_actions_16d:
|
| 15 |
+
[[ 2.3842e-05 -8.2493e-04 -5.7220e-05 3.9577e-04 2.8610e-05 7.8201e-04
|
| 16 |
+
-1.2398e-04 1.0000e+00 9.5367e-05 4.0293e-03 9.5367e-06 7.2479e-04
|
| 17 |
+
1.8120e-04 -1.4305e-05 -2.2411e-04 1.0000e+00]
|
| 18 |
+
[ 5.0068e-04 -1.5645e-02 2.6083e-03 -5.5575e-02 1.8883e-03 2.5430e-02
|
| 19 |
+
-1.9326e-02 1.0000e+00 2.7800e-02 2.4877e-02 -2.7924e-02 -2.7843e-02
|
| 20 |
+
-1.6832e-02 1.0629e-02 3.8543e-02 1.0000e+00]
|
| 21 |
+
[ 1.7738e-03 -7.6041e-02 8.9645e-03 -1.7257e-01 6.0558e-03 8.7943e-02
|
| 22 |
+
-6.4831e-02 1.0000e+00 9.2287e-02 5.8761e-02 -9.3136e-02 -7.6413e-02
|
| 23 |
+
-5.3630e-02 4.2353e-02 1.2606e-01 1.0000e+00]
|
| 24 |
+
[ 3.2425e-03 -1.3747e-01 1.5845e-02 -3.1527e-01 1.0653e-02 1.6477e-01
|
| 25 |
+
-1.1840e-01 1.0000e+00 1.7036e-01 1.0629e-01 -1.7153e-01 -1.4015e-01
|
| 26 |
+
-9.7461e-02 7.8468e-02 2.3009e-01 1.0000e+00]
|
| 27 |
+
[ 5.5885e-03 -2.1545e-01 2.4767e-02 -4.6663e-01 1.6103e-02 2.4452e-01
|
| 28 |
+
-1.7446e-01 1.0000e+00 2.5305e-01 1.5107e-01 -2.5392e-01 -2.1260e-01
|
| 29 |
+
-1.4490e-01 1.1766e-01 3.4122e-01 1.0000e+00]
|
| 30 |
+
[ 6.1035e-03 -2.8390e-01 3.3288e-02 -6.1909e-01 2.1739e-02 3.2683e-01
|
| 31 |
+
-2.3199e-01 1.0000e+00 3.3677e-01 1.9970e-01 -3.3804e-01 -2.8173e-01
|
| 32 |
+
-1.9161e-01 1.5831e-01 4.5282e-01 1.0000e+00]
|
| 33 |
+
[ 9.3937e-03 -3.1736e-01 3.8815e-02 -7.2264e-01 2.9097e-02 3.8407e-01
|
| 34 |
+
-2.9788e-01 1.0000e+00 3.9431e-01 2.3764e-01 -3.9650e-01 -3.2045e-01
|
| 35 |
+
-2.2884e-01 1.8487e-01 5.3961e-01 1.0000e+00]
|
| 36 |
+
[ 1.1177e-02 -3.3051e-01 4.2367e-02 -7.4072e-01 3.5295e-02 4.0234e-01
|
| 37 |
+
-3.4810e-01 1.0000e+00 4.1353e-01 2.4687e-01 -4.1600e-01 -3.4033e-01
|
| 38 |
+
-2.4390e-01 1.9067e-01 5.7513e-01 1.0000e+00]
|
| 39 |
+
[ 1.2674e-02 -3.1841e-01 4.3559e-02 -7.5366e-01 3.7665e-02 4.1035e-01
|
| 40 |
+
-3.7488e-01 1.0000e+00 4.2095e-01 2.5672e-01 -4.2238e-01 -3.4335e-01
|
| 41 |
+
-2.4950e-01 1.9567e-01 5.8634e-01 1.0000e+00]
|
| 42 |
+
[ 1.5645e-02 -3.0324e-01 4.3592e-02 -7.4167e-01 4.2624e-02 4.1367e-01
|
| 43 |
+
-4.1199e-01 1.0000e+00 4.2353e-01 2.6254e-01 -4.2444e-01 -3.4899e-01
|
| 44 |
+
-2.5064e-01 1.9762e-01 5.8977e-01 1.0000e+00]
|
| 45 |
+
[ 1.6398e-02 -2.9560e-01 4.2553e-02 -7.3503e-01 4.5595e-02 4.1383e-01
|
| 46 |
+
-4.3354e-01 1.0000e+00 4.2382e-01 2.5776e-01 -4.2612e-01 -3.5491e-01
|
| 47 |
+
-2.5177e-01 1.9462e-01 5.9134e-01 1.0000e+00]
|
| 48 |
+
[ 2.0757e-02 -2.9058e-01 4.2739e-02 -7.3133e-01 4.6840e-02 4.1339e-01
|
| 49 |
+
-4.5310e-01 1.0000e+00 4.2468e-01 2.5057e-01 -4.2498e-01 -3.4835e-01
|
| 50 |
+
-2.5149e-01 2.0029e-01 5.9138e-01 1.0000e+00]
|
| 51 |
+
[ 2.3303e-02 -2.7753e-01 4.1437e-02 -7.2254e-01 4.8075e-02 4.1380e-01
|
| 52 |
+
-4.7155e-01 1.0000e+00 4.2468e-01 2.5254e-01 -4.2522e-01 -3.4195e-01
|
| 53 |
+
-2.5130e-01 1.9623e-01 5.9127e-01 1.0000e+00]
|
| 54 |
+
[ 2.7924e-02 -2.5505e-01 4.0684e-02 -7.0069e-01 5.3768e-02 4.1076e-01
|
| 55 |
+
-5.1048e-01 1.0000e+00 4.2446e-01 2.5574e-01 -4.2656e-01 -3.5101e-01
|
| 56 |
+
-2.5181e-01 1.9645e-01 5.9101e-01 1.0000e+00]
|
| 57 |
+
[ 3.2401e-02 -2.4053e-01 4.1451e-02 -6.8364e-01 5.6882e-02 4.1132e-01
|
| 58 |
+
-5.4158e-01 1.0000e+00 4.2435e-01 2.5109e-01 -4.2632e-01 -3.5082e-01
|
| 59 |
+
-2.5095e-01 1.9805e-01 5.9107e-01 1.0000e+00]
|
| 60 |
+
[ 3.4809e-02 -2.2431e-01 4.0565e-02 -6.7288e-01 5.6076e-02 4.0839e-01
|
| 61 |
+
-5.6400e-01 1.0000e+00 4.2504e-01 2.5486e-01 -4.2588e-01 -3.4874e-01
|
| 62 |
+
-2.5139e-01 1.9783e-01 5.9183e-01 1.0000e+00]]
|
| 63 |
+
normalized_state_16d_shape: (16,)
|
| 64 |
+
normalized_state_16d:
|
| 65 |
+
[-0.174 0.1055 -0.0061 1.0124 0.086 -0.4741 0.2016 1.0004 0.0951
|
| 66 |
+
0.0668 0.0549 1.0086 -0.053 -0.3299 -1.0068 1.0004]
|
| 67 |
+
normalized_actions_16d_shape: (16, 16)
|
| 68 |
+
normalized_actions_16d:
|
| 69 |
+
[[-0.2378 0.0147 0.1124 0.1989 0.1562 0.1251 0.0182 1.0004 0.1108
|
| 70 |
+
0.0624 0.0823 0.9208 0.055 -0.5935 -0.7448 1.0004]
|
| 71 |
+
[-0.2367 -0.0063 0.1178 0.1174 0.1593 0.1567 -0.0046 1.0004 0.1686
|
| 72 |
+
0.107 0.02 0.7676 0.0127 -0.5697 -0.6371 1.0004]
|
| 73 |
+
[-0.2338 -0.092 0.1305 -0.0529 0.1664 0.2368 -0.0585 1.0004 0.303
|
| 74 |
+
0.1794 -0.1254 0.5072 -0.0788 -0.499 -0.3941 1.0004]
|
| 75 |
+
[-0.2306 -0.1792 0.1444 -0.2606 0.1742 0.3352 -0.1219 1.0004 0.4658
|
| 76 |
+
0.2811 -0.3003 0.1655 -0.1877 -0.4185 -0.1052 1.0004]
|
| 77 |
+
[-0.2253 -0.2898 0.1623 -0.4809 0.1834 0.4374 -0.1883 1.0004 0.6382
|
| 78 |
+
0.3768 -0.484 -0.223 -0.3056 -0.3311 0.2034 1.0004]
|
| 79 |
+
[-0.2242 -0.3869 0.1795 -0.7028 0.193 0.5429 -0.2564 1.0004 0.8128
|
| 80 |
+
0.4808 -0.6717 -0.5936 -0.4217 -0.2404 0.5133 1.0004]
|
| 81 |
+
[-0.2168 -0.4344 0.1906 -0.8535 0.2055 0.6163 -0.3344 1.0004 0.9328
|
| 82 |
+
0.5619 -0.8021 -0.8012 -0.5143 -0.1812 0.7543 1.0004]
|
| 83 |
+
[-0.2129 -0.4531 0.1977 -0.8798 0.216 0.6397 -0.3939 1.0004 0.9729
|
| 84 |
+
0.5816 -0.8455 -0.9078 -0.5517 -0.1682 0.8529 1.0004]
|
| 85 |
+
[-0.2095 -0.4359 0.2001 -0.8986 0.2201 0.6499 -0.4256 1.0004 0.9883
|
| 86 |
+
0.6027 -0.8598 -0.924 -0.5656 -0.1571 0.8841 1.0004]
|
| 87 |
+
[-0.2029 -0.4144 0.2002 -0.8812 0.2285 0.6542 -0.4695 1.0004 0.9937
|
| 88 |
+
0.6151 -0.8644 -0.9542 -0.5684 -0.1527 0.8936 1.0004]
|
| 89 |
+
[-0.2012 -0.4035 0.1981 -0.8715 0.2335 0.6544 -0.495 1.0004 0.9943
|
| 90 |
+
0.6049 -0.8681 -0.986 -0.5713 -0.1594 0.8979 1.0004]
|
| 91 |
+
[-0.1915 -0.3964 0.1985 -0.8661 0.2356 0.6538 -0.5182 1.0004 0.9961
|
| 92 |
+
0.5895 -0.8656 -0.9508 -0.5705 -0.1468 0.8981 1.0004]
|
| 93 |
+
[-0.1858 -0.3779 0.1959 -0.8533 0.2377 0.6544 -0.54 1.0004 0.9961
|
| 94 |
+
0.5937 -0.8661 -0.9165 -0.5701 -0.1558 0.8978 1.0004]
|
| 95 |
+
[-0.1755 -0.346 0.1944 -0.8215 0.2474 0.6505 -0.5861 1.0004 0.9956
|
| 96 |
+
0.6006 -0.8691 -0.9651 -0.5713 -0.1554 0.897 1.0004]
|
| 97 |
+
[-0.1655 -0.3254 0.1959 -0.7967 0.2527 0.6512 -0.623 1.0004 0.9954
|
| 98 |
+
0.5907 -0.8686 -0.9641 -0.5692 -0.1518 0.8972 1.0004]
|
| 99 |
+
[-0.1601 -0.3024 0.1941 -0.7811 0.2513 0.6474 -0.6495 1.0004 0.9969
|
| 100 |
+
0.5987 -0.8676 -0.9529 -0.5703 -0.1523 0.8993 1.0004]]
|
| 101 |
+
packed_state_32d_shape: (32,)
|
| 102 |
+
packed_state_32d:
|
| 103 |
+
[-0.174 0.1055 -0.0061 1.0124 0.086 -0.4741 0.2016 1.0004 0.
|
| 104 |
+
0. 0. 0. 0. 0. 0. 0. 0.0951 0.0668
|
| 105 |
+
0.0549 1.0086 -0.053 -0.3299 -1.0068 1.0004 0. 0. 0.
|
| 106 |
+
0. 0. 0. 0. 0. ]
|
| 107 |
+
packed_actions_32d_shape: (16, 32)
|
| 108 |
+
packed_actions_32d:
|
| 109 |
+
[[-0.2378 0.0147 0.1124 0.1989 0.1562 0.1251 0.0182 1.0004 0.
|
| 110 |
+
0. 0. 0. 0. 0. 0. 0. 0.1108 0.0624
|
| 111 |
+
0.0823 0.9208 0.055 -0.5935 -0.7448 1.0004 0. 0. 0.
|
| 112 |
+
0. 0. 0. 0. 0. ]
|
| 113 |
+
[-0.2367 -0.0063 0.1178 0.1174 0.1593 0.1567 -0.0046 1.0004 0.
|
| 114 |
+
0. 0. 0. 0. 0. 0. 0. 0.1686 0.107
|
| 115 |
+
0.02 0.7676 0.0127 -0.5697 -0.6371 1.0004 0. 0. 0.
|
| 116 |
+
0. 0. 0. 0. 0. ]
|
| 117 |
+
[-0.2338 -0.092 0.1305 -0.0529 0.1664 0.2368 -0.0585 1.0004 0.
|
| 118 |
+
0. 0. 0. 0. 0. 0. 0. 0.303 0.1794
|
| 119 |
+
-0.1254 0.5072 -0.0788 -0.499 -0.3941 1.0004 0. 0. 0.
|
| 120 |
+
0. 0. 0. 0. 0. ]
|
| 121 |
+
[-0.2306 -0.1792 0.1444 -0.2606 0.1742 0.3352 -0.1219 1.0004 0.
|
| 122 |
+
0. 0. 0. 0. 0. 0. 0. 0.4658 0.2811
|
| 123 |
+
-0.3003 0.1655 -0.1877 -0.4185 -0.1052 1.0004 0. 0. 0.
|
| 124 |
+
0. 0. 0. 0. 0. ]
|
| 125 |
+
[-0.2253 -0.2898 0.1623 -0.4809 0.1834 0.4374 -0.1883 1.0004 0.
|
| 126 |
+
0. 0. 0. 0. 0. 0. 0. 0.6382 0.3768
|
| 127 |
+
-0.484 -0.223 -0.3056 -0.3311 0.2034 1.0004 0. 0. 0.
|
| 128 |
+
0. 0. 0. 0. 0. ]
|
| 129 |
+
[-0.2242 -0.3869 0.1795 -0.7028 0.193 0.5429 -0.2564 1.0004 0.
|
| 130 |
+
0. 0. 0. 0. 0. 0. 0. 0.8128 0.4808
|
| 131 |
+
-0.6717 -0.5936 -0.4217 -0.2404 0.5133 1.0004 0. 0. 0.
|
| 132 |
+
0. 0. 0. 0. 0. ]
|
| 133 |
+
[-0.2168 -0.4344 0.1906 -0.8535 0.2055 0.6163 -0.3344 1.0004 0.
|
| 134 |
+
0. 0. 0. 0. 0. 0. 0. 0.9328 0.5619
|
| 135 |
+
-0.8021 -0.8012 -0.5143 -0.1812 0.7543 1.0004 0. 0. 0.
|
| 136 |
+
0. 0. 0. 0. 0. ]
|
| 137 |
+
[-0.2129 -0.4531 0.1977 -0.8798 0.216 0.6397 -0.3939 1.0004 0.
|
| 138 |
+
0. 0. 0. 0. 0. 0. 0. 0.9729 0.5816
|
| 139 |
+
-0.8455 -0.9078 -0.5517 -0.1682 0.8529 1.0004 0. 0. 0.
|
| 140 |
+
0. 0. 0. 0. 0. ]
|
| 141 |
+
[-0.2095 -0.4359 0.2001 -0.8986 0.2201 0.6499 -0.4256 1.0004 0.
|
| 142 |
+
0. 0. 0. 0. 0. 0. 0. 0.9883 0.6027
|
| 143 |
+
-0.8598 -0.924 -0.5656 -0.1571 0.8841 1.0004 0. 0. 0.
|
| 144 |
+
0. 0. 0. 0. 0. ]
|
| 145 |
+
[-0.2029 -0.4144 0.2002 -0.8812 0.2285 0.6542 -0.4695 1.0004 0.
|
| 146 |
+
0. 0. 0. 0. 0. 0. 0. 0.9937 0.6151
|
| 147 |
+
-0.8644 -0.9542 -0.5684 -0.1527 0.8936 1.0004 0. 0. 0.
|
| 148 |
+
0. 0. 0. 0. 0. ]
|
| 149 |
+
[-0.2012 -0.4035 0.1981 -0.8715 0.2335 0.6544 -0.495 1.0004 0.
|
| 150 |
+
0. 0. 0. 0. 0. 0. 0. 0.9943 0.6049
|
| 151 |
+
-0.8681 -0.986 -0.5713 -0.1594 0.8979 1.0004 0. 0. 0.
|
| 152 |
+
0. 0. 0. 0. 0. ]
|
| 153 |
+
[-0.1915 -0.3964 0.1985 -0.8661 0.2356 0.6538 -0.5182 1.0004 0.
|
| 154 |
+
0. 0. 0. 0. 0. 0. 0. 0.9961 0.5895
|
| 155 |
+
-0.8656 -0.9508 -0.5705 -0.1468 0.8981 1.0004 0. 0. 0.
|
| 156 |
+
0. 0. 0. 0. 0. ]
|
| 157 |
+
[-0.1858 -0.3779 0.1959 -0.8533 0.2377 0.6544 -0.54 1.0004 0.
|
| 158 |
+
0. 0. 0. 0. 0. 0. 0. 0.9961 0.5937
|
| 159 |
+
-0.8661 -0.9165 -0.5701 -0.1558 0.8978 1.0004 0. 0. 0.
|
| 160 |
+
0. 0. 0. 0. 0. ]
|
| 161 |
+
[-0.1755 -0.346 0.1944 -0.8215 0.2474 0.6505 -0.5861 1.0004 0.
|
| 162 |
+
0. 0. 0. 0. 0. 0. 0. 0.9956 0.6006
|
| 163 |
+
-0.8691 -0.9651 -0.5713 -0.1554 0.897 1.0004 0. 0. 0.
|
| 164 |
+
0. 0. 0. 0. 0. ]
|
| 165 |
+
[-0.1655 -0.3254 0.1959 -0.7967 0.2527 0.6512 -0.623 1.0004 0.
|
| 166 |
+
0. 0. 0. 0. 0. 0. 0. 0.9954 0.5907
|
| 167 |
+
-0.8686 -0.9641 -0.5692 -0.1518 0.8972 1.0004 0. 0. 0.
|
| 168 |
+
0. 0. 0. 0. 0. ]
|
| 169 |
+
[-0.1601 -0.3024 0.1941 -0.7811 0.2513 0.6474 -0.6495 1.0004 0.
|
| 170 |
+
0. 0. 0. 0. 0. 0. 0. 0.9969 0.5987
|
| 171 |
+
-0.8676 -0.9529 -0.5703 -0.1523 0.8993 1.0004 0. 0. 0.
|
| 172 |
+
0. 0. 0. 0. 0. ]]
|
| 173 |
+
state_padded_zero_count: 16 / 16
|
| 174 |
+
actions_padded_zero_count: 256 / 256
|
| 175 |
+
state_padded_exact_zero: True
|
| 176 |
+
actions_padded_exact_zero: True
|
artifacts/twin_handover_packed_parallelization_10k_20260309/sanity_checks/warmstart_equivalence_10k.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
starting_warmstart_equivalence baseline_config=pi05_twin_handover_256_packed_baseline_pytorch_10k parallel_config=pi05_twin_handover_256_packed_parallel_pytorch_10k repo_id=lsnu/twin_handover_256_train
|
| 2 |
+
loaded_eval_dataloader
|
| 3 |
+
loaded_reference_batch
|
| 4 |
+
loading_model config=pi05_twin_handover_256_packed_baseline_pytorch_10k checkpoint=/workspace/checkpoints/pi05_base_single_pytorch
|
| 5 |
+
running_forward config=pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 6 |
+
finished_forward config=pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 7 |
+
loading_model config=pi05_twin_handover_256_packed_parallel_pytorch_10k checkpoint=/workspace/checkpoints/pi05_base_parallel_packed_from_single
|
| 8 |
+
running_forward config=pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 9 |
+
finished_forward config=pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 10 |
+
baseline_config_name: pi05_twin_handover_256_packed_baseline_pytorch_10k
|
| 11 |
+
parallel_config_name: pi05_twin_handover_256_packed_parallel_pytorch_10k
|
| 12 |
+
repo_id_used: lsnu/twin_handover_256_train
|
| 13 |
+
baseline_ckpt: /workspace/checkpoints/pi05_base_single_pytorch
|
| 14 |
+
parallel_ckpt: /workspace/checkpoints/pi05_base_parallel_packed_from_single
|
| 15 |
+
batch_size: 4
|
| 16 |
+
eval_seed: 777
|
| 17 |
+
tolerance: 1e-06
|
| 18 |
+
baseline_missing_keys: []
|
| 19 |
+
baseline_unexpected_keys: []
|
| 20 |
+
parallel_missing_keys: []
|
| 21 |
+
parallel_unexpected_keys: []
|
| 22 |
+
input_projection_max_abs_diff: 0.00122881
|
| 23 |
+
input_projection_mean_abs_diff: 0.00015435
|
| 24 |
+
loss_max_abs_diff: 0.90186501
|
| 25 |
+
loss_mean_abs_diff: 0.04585753
|
| 26 |
+
baseline_masked_loss: 1.00531137
|
| 27 |
+
parallel_masked_loss: 1.00929189
|
| 28 |
+
masked_loss_abs_diff: 0.00398052
|
| 29 |
+
warmstart_equivalent: False
|