CHYang25 commited on
Commit
a0c218a
·
verified ·
1 Parent(s): 50407c1

Delete 2026.04.05

Browse files
Files changed (44) hide show
  1. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/config.yaml +0 -168
  2. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/hydra.yaml +0 -156
  3. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/overrides.yaml +0 -2
  4. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0000-test_success_rate=0.300.ckpt +0 -3
  5. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0010-test_success_rate=0.260.ckpt +0 -3
  6. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0020-test_success_rate=0.300.ckpt +0 -3
  7. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0030-test_success_rate=0.280.ckpt +0 -3
  8. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0040-test_success_rate=0.240.ckpt +0 -3
  9. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/latest.ckpt +0 -3
  10. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/logs.json.txt +0 -0
  11. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/train.log +0 -11
  12. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug-internal.log +0 -16
  13. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug.log +0 -27
  14. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/config.yaml +0 -250
  15. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/output.log +0 -3
  16. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/wandb-metadata.json +0 -57
  17. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/wandb-summary.json +0 -1
  18. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-core.log +0 -14
  19. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-internal.log +0 -16
  20. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug.log +0 -27
  21. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/run-4ns1dtmm.wandb +0 -3
  22. 2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/wandb-resume.json +0 -1
  23. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/config.yaml +0 -168
  24. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/hydra.yaml +0 -156
  25. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/overrides.yaml +0 -2
  26. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0000-test_success_rate=0.260.ckpt +0 -3
  27. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0010-test_success_rate=0.380.ckpt +0 -3
  28. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0020-test_success_rate=0.160.ckpt +0 -3
  29. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0030-test_success_rate=0.360.ckpt +0 -3
  30. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0040-test_success_rate=0.300.ckpt +0 -3
  31. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/latest.ckpt +0 -3
  32. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/logs.json.txt +0 -0
  33. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/train.log +0 -11
  34. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug-internal.log +0 -16
  35. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug.log +0 -27
  36. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/config.yaml +0 -250
  37. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/output.log +0 -3
  38. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/wandb-metadata.json +0 -57
  39. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/wandb-summary.json +0 -1
  40. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-core.log +0 -14
  41. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-internal.log +0 -16
  42. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug.log +0 -27
  43. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/run-gsxv5o2x.wandb +0 -3
  44. 2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/wandb-resume.json +0 -1
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/config.yaml DELETED
@@ -1,168 +0,0 @@
1
- name: train_bdpo_unet_lowdim
2
- _target_: llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace
3
- obs_dim: ${task.obs_dim}
4
- action_dim: ${task.action_dim}
5
- task_name: ${task.name}
6
- exp_name: default
7
- horizon: 1
8
- n_obs_steps: 1
9
- n_action_steps: 1
10
- n_latency_steps: 0
11
- past_action_visible: false
12
- policy:
13
- _target_: llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy
14
- model:
15
- _target_: llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D
16
- input_dim: ${task.action_dim}
17
- local_cond_dim: null
18
- global_cond_dim: '${eval: ${task.obs_dim}*${n_obs_steps}}'
19
- diffusion_step_embed_dim: 256
20
- down_dims:
21
- - 256
22
- - 512
23
- - 1024
24
- kernel_size: 5
25
- n_groups: 8
26
- cond_predict_scale: true
27
- noise_scheduler:
28
- _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
29
- num_train_timesteps: 100
30
- beta_start: 0.0001
31
- beta_end: 0.02
32
- beta_schedule: squaredcos_cap_v2
33
- variance_type: fixed_small
34
- clip_sample: true
35
- prediction_type: epsilon
36
- obs_dim: ${obs_dim}
37
- action_dim: ${action_dim}
38
- horizon: ${horizon}
39
- n_obs_steps: ${n_obs_steps}
40
- n_action_steps: ${n_action_steps}
41
- hidden_size:
42
- - 256
43
- - 256
44
- activation: relu
45
- gamma: ${task.env_runner.discount}
46
- tau: 0.005
47
- eta: 1.0
48
- rho: 1.0
49
- target_reduction: min
50
- max_q_backup: false
51
- actor_update_interval: 1
52
- value_warmup_steps: 1000
53
- num_eval_sample_steps: 100
54
- act_limit: null
55
- squash_action: false
56
- init_weight: 0.01
57
- checkpoint_path: null
58
- behavior_checkpoint_path: data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0055-val_loss=0.215.ckpt
59
- init_actor_from_behavior: true
60
- dataloader:
61
- batch_size: 256
62
- num_workers: 0
63
- shuffle: true
64
- pin_memory: false
65
- persistent_workers: false
66
- val_dataloader:
67
- batch_size: 256
68
- num_workers: 0
69
- shuffle: true
70
- pin_memory: false
71
- persistent_workers: false
72
- optimizer:
73
- _target_: torch.optim.Adam
74
- lr: 0.0001
75
- betas:
76
- - 0.9
77
- - 0.999
78
- eps: 1.0e-08
79
- weight_decay: 0.0
80
- critic_optimizer:
81
- _target_: torch.optim.Adam
82
- lr: 0.0001
83
- betas:
84
- - 0.9
85
- - 0.999
86
- eps: 1.0e-08
87
- weight_decay: 0.0
88
- value_optimizer:
89
- _target_: torch.optim.Adam
90
- lr: 0.0001
91
- betas:
92
- - 0.9
93
- - 0.999
94
- eps: 1.0e-08
95
- weight_decay: 0.0
96
- training:
97
- device: cuda:0
98
- seed: 45
99
- debug: false
100
- resume: false
101
- lr_scheduler: constant
102
- lr_warmup_steps: 500
103
- num_epochs: 201
104
- gradient_accumulate_every: 1
105
- grad_norm_clip: 5.0
106
- rollout_every: 10
107
- checkpoint_every: 10
108
- val_every: 1
109
- sample_every: 10
110
- sample_max_batch: 128
111
- max_train_steps: null
112
- max_val_steps: null
113
- tqdm_interval_sec: 1.0
114
- logging:
115
- project: ${task.name}-bdpo-training
116
- resume: true
117
- mode: online
118
- name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
119
- tags:
120
- - ${name}
121
- - ${task_name}
122
- - ${exp_name}
123
- id: null
124
- group: null
125
- checkpoint:
126
- topk:
127
- monitor_key: test_success_rate
128
- mode: max
129
- k: 5
130
- format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
131
- save_last_ckpt: true
132
- save_last_snapshot: false
133
- multi_run:
134
- run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
135
- wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
136
- task:
137
- name: adroit-hand-hammer-v1
138
- obs_dim: 46
139
- action_dim: 26
140
- env_runner:
141
- _target_: llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner
142
- env_name: llf-adroit-adroit-hand-hammer-v1
143
- n_train: 10
144
- n_test: 50
145
- n_envs: 10
146
- max_steps: 150
147
- n_obs_steps: ${n_obs_steps}
148
- n_action_steps: ${n_action_steps}
149
- instruction_type: b
150
- feedback_type:
151
- - hp
152
- - hn
153
- - fp
154
- visual: false
155
- discount: 0.99
156
- dataset:
157
- _target_: llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset
158
- data_path: datasets/adroit-hand-hammer-v1-general-reward-trans.pt
159
- data_path2: datasets/adroit-hand-hammer-v1-reward-trans.pt
160
- horizon: ${horizon}
161
- pad_before: ${eval:'${n_obs_steps}-1'}
162
- pad_after: ${eval:'${n_action_steps}-1'}
163
- obs_eef_target: true
164
- use_manual_normalizer: false
165
- val_ratio: 0.1
166
- dummy_normalizer: false
167
- instructor:
168
- _target_: llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/hydra.yaml DELETED
@@ -1,156 +0,0 @@
1
- hydra:
2
- run:
3
- dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
- sweep:
5
- dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
- subdir: ${hydra.job.num}
7
- launcher:
8
- _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
- sweeper:
10
- _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
- max_batch_size: null
12
- params: null
13
- help:
14
- app_name: ${hydra.job.name}
15
- header: '${hydra.help.app_name} is powered by Hydra.
16
-
17
- '
18
- footer: 'Powered by Hydra (https://hydra.cc)
19
-
20
- Use --hydra-help to view Hydra specific help
21
-
22
- '
23
- template: '${hydra.help.header}
24
-
25
- == Configuration groups ==
26
-
27
- Compose your configuration from those groups (group=option)
28
-
29
-
30
- $APP_CONFIG_GROUPS
31
-
32
-
33
- == Config ==
34
-
35
- Override anything in the config (foo.bar=value)
36
-
37
-
38
- $CONFIG
39
-
40
-
41
- ${hydra.help.footer}
42
-
43
- '
44
- hydra_help:
45
- template: 'Hydra (${hydra.runtime.version})
46
-
47
- See https://hydra.cc for more info.
48
-
49
-
50
- == Flags ==
51
-
52
- $FLAGS_HELP
53
-
54
-
55
- == Configuration groups ==
56
-
57
- Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
- to command line)
59
-
60
-
61
- $HYDRA_CONFIG_GROUPS
62
-
63
-
64
- Use ''--cfg hydra'' to Show the Hydra config.
65
-
66
- '
67
- hydra_help: ???
68
- hydra_logging:
69
- version: 1
70
- formatters:
71
- simple:
72
- format: '[%(asctime)s][HYDRA] %(message)s'
73
- handlers:
74
- console:
75
- class: logging.StreamHandler
76
- formatter: simple
77
- stream: ext://sys.stdout
78
- root:
79
- level: INFO
80
- handlers:
81
- - console
82
- loggers:
83
- logging_example:
84
- level: DEBUG
85
- disable_existing_loggers: false
86
- job_logging:
87
- version: 1
88
- formatters:
89
- simple:
90
- format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
- handlers:
92
- console:
93
- class: logging.StreamHandler
94
- formatter: simple
95
- stream: ext://sys.stdout
96
- file:
97
- class: logging.FileHandler
98
- formatter: simple
99
- filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
- root:
101
- level: INFO
102
- handlers:
103
- - console
104
- - file
105
- disable_existing_loggers: false
106
- env: {}
107
- mode: RUN
108
- searchpath: []
109
- callbacks: {}
110
- output_subdir: .hydra
111
- overrides:
112
- hydra:
113
- - hydra.mode=RUN
114
- task:
115
- - training.seed=45
116
- - policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0055-val_loss\=0.215.ckpt
117
- job:
118
- name: train
119
- chdir: null
120
- override_dirname: policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0055-val_loss\=0.215.ckpt,training.seed=45
121
- id: ???
122
- num: ???
123
- config_name: bdpo_adroit-hand-hammer-v1_gen.yaml
124
- env_set: {}
125
- env_copy: []
126
- config:
127
- override_dirname:
128
- kv_sep: '='
129
- item_sep: ','
130
- exclude_keys: []
131
- runtime:
132
- version: 1.2.0
133
- version_base: '1.2'
134
- cwd: /tmp2/chyang/workspace/LLM-BC
135
- config_sources:
136
- - path: hydra.conf
137
- schema: pkg
138
- provider: hydra
139
- - path: /tmp2/chyang/workspace/LLM-BC/config/main_table
140
- schema: file
141
- provider: main
142
- - path: ''
143
- schema: structured
144
- provider: schema
145
- output_dir: /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
146
- choices:
147
- hydra/env: default
148
- hydra/callbacks: null
149
- hydra/job_logging: default
150
- hydra/hydra_logging: default
151
- hydra/hydra_help: default
152
- hydra/help: default
153
- hydra/sweeper: basic
154
- hydra/launcher: basic
155
- hydra/output: default
156
- verbose: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/overrides.yaml DELETED
@@ -1,2 +0,0 @@
1
- - training.seed=45
2
- - policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0055-val_loss\=0.215.ckpt
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0000-test_success_rate=0.300.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab983ad24178337d87c0786bb0806f9c1e503ce9a96576fb997bda65bd9667a9
3
- size 1055778720
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0010-test_success_rate=0.260.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:148ceda9d6dff960ca2ccff4be27c56d5dea10412bde3cf8fe4aa0359a6dc3f4
3
- size 1055778720
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0020-test_success_rate=0.300.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb55fb904569722c484398ec42b8417508de98906fc26a333cef9597ced2b5c
3
- size 1580582693
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0030-test_success_rate=0.280.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f30cc47b5a51be6dff657cf4e9e5e4a92082bc6ab97bab90d998a05f6d5b350c
3
- size 1580582693
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0040-test_success_rate=0.240.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa91a0661f2df1acaf2739892f21c5286c28e660bf6d8e20b3faa3865b64cb98
3
- size 1580582693
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/latest.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ffdd359d8d361b7fafd12f3c0ede60a66c242b6b00122391dbf09fcee8119b
3
- size 1580582693
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/logs.json.txt DELETED
The diff for this file is too large to render. See raw diff
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/train.log DELETED
@@ -1,11 +0,0 @@
1
- [2026-04-05 19:47:22,077][numexpr.utils][INFO] - Note: NumExpr detected 20 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
2
- [2026-04-05 19:47:22,077][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
3
- [2026-04-05 19:47:22,750][llmbc.model.diffusion.conditional_unet1d][INFO] - number of parameters: 6.558467e+07
4
- [2026-04-05 19:47:28,683][llmbc.model.diffusion.conditional_unet1d][INFO] - number of parameters: 6.558467e+07
5
- [2026-04-05 19:47:32,007][OpenGL.acceleratesupport][WARNING] - Incompatible version of OpenGL_accelerate found, need at least (3, 1, 10) found (3, 1, 9)
6
- [2026-04-05 19:47:32,007][OpenGL.acceleratesupport][INFO] - No OpenGL_accelerate module loaded: Old version of OpenGL_accelerate
7
- [2026-04-05 19:47:36,329][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
8
- [2026-04-05 19:47:36,339][absl][INFO] - MuJoCo library version is: 2.3.7
9
- [2026-04-05 19:47:36,507][datasets][INFO] - PyTorch version 2.2.2 available.
10
- [2026-04-05 19:47:36,508][datasets][INFO] - TensorFlow version 2.15.1 available.
11
- [2026-04-05 19:47:36,509][datasets][INFO] - JAX version 0.4.30 available.
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug-internal.log DELETED
@@ -1,16 +0,0 @@
1
- {"time":"2026-04-05T19:47:37.711575744+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
- {"time":"2026-04-05T19:47:37.711598693+08:00","level":"INFO","msg":"created symlink","path":"/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-core.log"}
3
- {"time":"2026-04-05T19:47:37.820659395+08:00","level":"INFO","msg":"created new stream","id":"4ns1dtmm"}
4
- {"time":"2026-04-05T19:47:37.820714248+08:00","level":"INFO","msg":"stream: started","id":"4ns1dtmm"}
5
- {"time":"2026-04-05T19:47:37.820739406+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"4ns1dtmm"}}
6
- {"time":"2026-04-05T19:47:37.820815969+08:00","level":"INFO","msg":"sender: started","stream_id":"4ns1dtmm"}
7
- {"time":"2026-04-05T19:47:37.820853585+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"4ns1dtmm"}}
8
- {"time":"2026-04-05T19:47:38.804274688+08:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2026-04-06T14:06:45.702933515+08:00","level":"INFO","msg":"stream: closing","id":"4ns1dtmm"}
10
- {"time":"2026-04-06T14:06:45.702982796+08:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2026-04-06T14:06:45.703854988+08:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2026-04-06T14:06:49.250748912+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
- {"time":"2026-04-06T14:06:49.546933574+08:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"4ns1dtmm"}}
14
- {"time":"2026-04-06T14:06:49.546991191+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"4ns1dtmm"}}
15
- {"time":"2026-04-06T14:06:49.547007286+08:00","level":"INFO","msg":"sender: closed","stream_id":"4ns1dtmm"}
16
- {"time":"2026-04-06T14:06:49.547065389+08:00","level":"INFO","msg":"stream: closed","id":"4ns1dtmm"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Configure stats pid to 337747
3
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from /home/chyang/.config/wandb/settings
4
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from /tmp2/chyang/workspace/LLM-BC/wandb/settings
5
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/tmp2/chyang/workspace/LLM-BC/train.py', 'program': '/tmp2/chyang/workspace/LLM-BC/./train.py'}
8
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:_log_setup():533] Logging user logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug.log
10
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:_log_setup():534] Logging internal logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-internal.log
11
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:init():619] calling init triggers
12
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {'name': 'train_bdpo_unet_lowdim', '_target_': 'llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace', 'obs_dim': 46, 'action_dim': 26, 'task_name': 'adroit-hand-hammer-v1', 'exp_name': 'default', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'policy': {'_target_': 'llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy', 'model': {'_target_': 'llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D', 'input_dim': 26, 'local_cond_dim': None, 'global_cond_dim': 46, 'diffusion_step_embed_dim': 256, 'down_dims': [256, 512, 1024], 'kernel_size': 5, 'n_groups': 8, 'cond_predict_scale': True}, 'noise_scheduler': {'_target_': 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler', 'num_train_timesteps': 100, 'beta_start': 0.0001, 'beta_end': 0.02, 'beta_schedule': 'squaredcos_cap_v2', 'variance_type': 'fixed_small', 'clip_sample': True, 'prediction_type': 'epsilon'}, 'obs_dim': 46, 'action_dim': 26, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'hidden_size': [256, 256], 'activation': 'relu', 'gamma': 0.99, 'tau': 0.005, 'eta': 1.0, 'rho': 1.0, 'target_reduction': 'min', 'max_q_backup': False, 'actor_update_interval': 1, 'value_warmup_steps': 1000, 'num_eval_sample_steps': 100, 'act_limit': None, 'squash_action': False, 'init_weight': 0.01, 'checkpoint_path': None, 'behavior_checkpoint_path': 'data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0055-val_loss=0.215.ckpt', 'init_actor_from_behavior': True}, 'dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'critic_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'value_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'training': {'device': 'cuda:0', 'seed': 45, 'debug': False, 'resume': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 500, 'num_epochs': 201, 'gradient_accumulate_every': 1, 'grad_norm_clip': 5.0, 'rollout_every': 10, 'checkpoint_every': 10, 'val_every': 1, 'sample_every': 10, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'adroit-hand-hammer-v1-bdpo-training', 'resume': True, 'mode': 'online', 'name': '2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'tags': ['train_bdpo_unet_lowdim', 'adroit-hand-hammer-v1', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'wandb_name_base': '2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}, 'task': {'name': 'adroit-hand-hammer-v1', 'obs_dim': 46, 'action_dim': 26, 'env_runner': {'_target_': 'llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner', 'env_name': 'llf-adroit-adroit-hand-hammer-v1', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 150, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.99}, 'dataset': {'_target_': 'llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset', 'data_path': 'datasets/adroit-hand-hammer-v1-general-reward-trans.pt', 'data_path2': 'datasets/adroit-hand-hammer-v1-reward-trans.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': False}, 'instructor': {'_target_': 'llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor'}}}
14
- 2026-04-05 19:47:37,704 INFO MainThread:337747 [wandb_init.py:init():669] starting backend
15
- 2026-04-05 19:47:37,704 INFO MainThread:337747 [wandb_init.py:init():673] sending inform_init request
16
- 2026-04-05 19:47:37,706 INFO MainThread:337747 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2026-04-05 19:47:37,707 INFO MainThread:337747 [wandb_init.py:init():686] backend started and connected
18
- 2026-04-05 19:47:37,722 INFO MainThread:337747 [wandb_init.py:init():781] updated telemetry
19
- 2026-04-05 19:47:37,839 INFO MainThread:337747 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2026-04-05 19:47:38,798 INFO MainThread:337747 [wandb_init.py:init():867] starting run threads in backend
21
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_console_start():2451] atexit reg
22
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2389] Redirects installed.
25
- 2026-04-05 19:47:39,357 INFO MainThread:337747 [wandb_init.py:init():911] run started, returning control to user process
26
- 2026-04-05 19:47:39,358 INFO MainThread:337747 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}
27
- 2026-04-06 14:06:45,703 WARNING MsgRouterThr:337747 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/config.yaml DELETED
@@ -1,250 +0,0 @@
1
- _target_:
2
- value: llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace
3
- _wandb:
4
- value:
5
- cli_version: 0.18.6
6
- m: []
7
- python_version: 3.9.21
8
- t:
9
- "1":
10
- - 1
11
- - 2
12
- - 3
13
- - 5
14
- - 11
15
- - 12
16
- - 41
17
- - 49
18
- - 50
19
- - 51
20
- - 53
21
- - 55
22
- - 71
23
- - 83
24
- - 95
25
- - 98
26
- - 100
27
- - 105
28
- "2":
29
- - 1
30
- - 2
31
- - 3
32
- - 5
33
- - 11
34
- - 12
35
- - 41
36
- - 49
37
- - 50
38
- - 51
39
- - 53
40
- - 55
41
- - 71
42
- - 83
43
- - 95
44
- - 98
45
- - 100
46
- - 105
47
- "3":
48
- - 13
49
- - 15
50
- - 16
51
- - 23
52
- - 55
53
- - 61
54
- "4": 3.9.21
55
- "5": 0.18.6
56
- "6": 4.47.1
57
- "8":
58
- - 5
59
- "12": 0.18.6
60
- "13": linux-x86_64
61
- action_dim:
62
- value: 26
63
- checkpoint:
64
- value:
65
- save_last_ckpt: true
66
- save_last_snapshot: false
67
- topk:
68
- format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
69
- k: 5
70
- mode: max
71
- monitor_key: test_success_rate
72
- critic_optimizer:
73
- value:
74
- _target_: torch.optim.Adam
75
- betas:
76
- - 0.9
77
- - 0.999
78
- eps: 1e-08
79
- lr: 0.0001
80
- weight_decay: 0
81
- dataloader:
82
- value:
83
- batch_size: 256
84
- num_workers: 0
85
- persistent_workers: false
86
- pin_memory: false
87
- shuffle: true
88
- exp_name:
89
- value: default
90
- horizon:
91
- value: 1
92
- logging:
93
- value:
94
- group: null
95
- id: null
96
- mode: online
97
- name: 2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
98
- project: adroit-hand-hammer-v1-bdpo-training
99
- resume: true
100
- tags:
101
- - train_bdpo_unet_lowdim
102
- - adroit-hand-hammer-v1
103
- - default
104
- multi_run:
105
- value:
106
- run_dir: data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
107
- wandb_name_base: 2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
108
- n_action_steps:
109
- value: 1
110
- n_latency_steps:
111
- value: 0
112
- n_obs_steps:
113
- value: 1
114
- name:
115
- value: train_bdpo_unet_lowdim
116
- obs_dim:
117
- value: 46
118
- optimizer:
119
- value:
120
- _target_: torch.optim.Adam
121
- betas:
122
- - 0.9
123
- - 0.999
124
- eps: 1e-08
125
- lr: 0.0001
126
- weight_decay: 0
127
- output_dir:
128
- value: /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
129
- past_action_visible:
130
- value: false
131
- policy:
132
- value:
133
- _target_: llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy
134
- act_limit: null
135
- action_dim: 26
136
- activation: relu
137
- actor_update_interval: 1
138
- behavior_checkpoint_path: data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0055-val_loss=0.215.ckpt
139
- checkpoint_path: null
140
- eta: 1
141
- gamma: 0.99
142
- hidden_size:
143
- - 256
144
- - 256
145
- horizon: 1
146
- init_actor_from_behavior: true
147
- init_weight: 0.01
148
- max_q_backup: false
149
- model:
150
- _target_: llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D
151
- cond_predict_scale: true
152
- diffusion_step_embed_dim: 256
153
- down_dims:
154
- - 256
155
- - 512
156
- - 1024
157
- global_cond_dim: 46
158
- input_dim: 26
159
- kernel_size: 5
160
- local_cond_dim: null
161
- n_groups: 8
162
- n_action_steps: 1
163
- n_obs_steps: 1
164
- noise_scheduler:
165
- _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
166
- beta_end: 0.02
167
- beta_schedule: squaredcos_cap_v2
168
- beta_start: 0.0001
169
- clip_sample: true
170
- num_train_timesteps: 100
171
- prediction_type: epsilon
172
- variance_type: fixed_small
173
- num_eval_sample_steps: 100
174
- obs_dim: 46
175
- rho: 1
176
- squash_action: false
177
- target_reduction: min
178
- tau: 0.005
179
- value_warmup_steps: 1000
180
- task:
181
- value:
182
- action_dim: 26
183
- dataset:
184
- _target_: llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset
185
- data_path: datasets/adroit-hand-hammer-v1-general-reward-trans.pt
186
- data_path2: datasets/adroit-hand-hammer-v1-reward-trans.pt
187
- dummy_normalizer: false
188
- horizon: 1
189
- obs_eef_target: true
190
- pad_after: 0
191
- pad_before: 0
192
- use_manual_normalizer: false
193
- val_ratio: 0.1
194
- env_runner:
195
- _target_: llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner
196
- discount: 0.99
197
- env_name: llf-adroit-adroit-hand-hammer-v1
198
- feedback_type:
199
- - hp
200
- - hn
201
- - fp
202
- instruction_type: b
203
- max_steps: 150
204
- n_action_steps: 1
205
- n_envs: 10
206
- n_obs_steps: 1
207
- n_test: 50
208
- n_train: 10
209
- visual: false
210
- instructor:
211
- _target_: llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor
212
- name: adroit-hand-hammer-v1
213
- obs_dim: 46
214
- task_name:
215
- value: adroit-hand-hammer-v1
216
- training:
217
- value:
218
- checkpoint_every: 10
219
- debug: false
220
- device: cuda:0
221
- grad_norm_clip: 5
222
- gradient_accumulate_every: 1
223
- lr_scheduler: constant
224
- lr_warmup_steps: 500
225
- max_train_steps: null
226
- max_val_steps: null
227
- num_epochs: 201
228
- resume: false
229
- rollout_every: 10
230
- sample_every: 10
231
- sample_max_batch: 128
232
- seed: 45
233
- tqdm_interval_sec: 1
234
- val_every: 1
235
- val_dataloader:
236
- value:
237
- batch_size: 256
238
- num_workers: 0
239
- persistent_workers: false
240
- pin_memory: false
241
- shuffle: true
242
- value_optimizer:
243
- value:
244
- _target_: torch.optim.Adam
245
- betas:
246
- - 0.9
247
- - 0.999
248
- eps: 1e-08
249
- lr: 0.0001
250
- weight_decay: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/output.log DELETED
@@ -1,3 +0,0 @@
1
- Eval AdroitHandLowdimRunner 1/6: 0%| | 0/150 [00:00<?, ?it/s]/tmp2/chyang/workspace/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
2
- obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
3
-
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/wandb-metadata.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-90-generic-x86_64-with-glibc2.35",
3
- "python": "3.9.21",
4
- "startedAt": "2026-04-05T11:47:37.707689Z",
5
- "args": [
6
- "--config-path",
7
- "config/main_table",
8
- "--config-name",
9
- "bdpo_adroit-hand-hammer-v1_gen.yaml",
10
- "training.seed=45",
11
- "policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\\=0055-val_loss\\=0.215.ckpt"
12
- ],
13
- "program": "/tmp2/chyang/workspace/LLM-BC/./train.py",
14
- "codePath": "train.py",
15
- "git": {
16
- "remote": "https://github.com/CHYang25/LLM-BC.git",
17
- "commit": "a983d49eed9d3b7c64b22a85eb6908c8694405af"
18
- },
19
- "email": "chris920325@gmail.com",
20
- "root": "/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1",
21
- "host": "rllab1012",
22
- "username": "chyang",
23
- "executable": "/home/chyang/miniconda3/envs/llm-bc/bin/python3",
24
- "codePathLocal": "train.py",
25
- "cpu_count": 10,
26
- "cpu_count_logical": 20,
27
- "gpu": "NVIDIA GeForce RTX 4070 Ti",
28
- "gpu_count": 2,
29
- "disk": {
30
- "/": {
31
- "total": "982820896768",
32
- "used": "414740176896"
33
- }
34
- },
35
- "memory": {
36
- "total": "134736023552"
37
- },
38
- "cpu": {
39
- "count": 10,
40
- "countLogical": 20
41
- },
42
- "gpu_nvidia": [
43
- {
44
- "name": "NVIDIA GeForce RTX 4070 Ti",
45
- "memoryTotal": "12878610432",
46
- "cudaCores": 7680,
47
- "architecture": "Ada"
48
- },
49
- {
50
- "name": "NVIDIA GeForce RTX 4070 Ti",
51
- "memoryTotal": "12878610432",
52
- "cudaCores": 7680,
53
- "architecture": "Ada"
54
- }
55
- ],
56
- "cudaVersion": "13.0"
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"stats/bootstrap_mean":-132.64468383789062,"stats/t_mean":48.97916793823242,"test/mean_score":-1.2400550559828623,"_wandb":{"runtime":65947},"stats/done_mean":0.010416666977107525,"loss/v1":11.482786178588867,"loss/q2":371.92071533203125,"stats/reward_mean":0.5011574029922485,"train/success_rate":0,"global_step":17687,"loss/v":26.77777862548828,"test/success_rate":0,"train_loss":5260.534036042,"_runtime":65947.995279549,"epoch":200,"loss/q":769.3199462890625,"lr":0.0001,"val_loss":5150.252891975641,"train/mean_score":-1.2464290659121704,"loss/actor_bootstrap":-132.64468383789062,"loss/q1":397.3992614746094,"_timestamp":1.775455588772426e+09,"loss/actor":133.28306579589844,"_step":17687,"train/cumulative_reward":-103.11810478313211,"loss/v2":15.29499340057373,"test/cumulative_reward":-102.2961005109052,"loss/actor_kl":0.6383782029151917,"stats/kl_mean":0.6383782029151917}
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2026-04-05T19:47:37.00983905+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpol07t4o4/port-337747.txt","pid":337747,"debug":false,"disable-analytics":false}
2
- {"time":"2026-04-05T19:47:37.009863037+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2026-04-05T19:47:37.027426037+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":337747}
4
- {"time":"2026-04-05T19:47:37.027459487+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45777,"Zone":""}}
5
- {"time":"2026-04-05T19:47:37.206359992+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51182"}
6
- {"time":"2026-04-05T19:47:37.71135652+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"4ns1dtmm","id":"127.0.0.1:51182"}
7
- {"time":"2026-04-05T19:47:37.820725434+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"4ns1dtmm","id":"127.0.0.1:51182"}
8
- {"time":"2026-04-06T14:06:45.702830394+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51182"}
9
- {"time":"2026-04-06T14:06:45.703001947+08:00","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2026-04-06T14:06:45.702969429+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:51182"}
11
- {"time":"2026-04-06T14:06:45.70315781+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:51182"}
12
- {"time":"2026-04-06T14:06:49.547222645+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51182"}
13
- {"time":"2026-04-06T14:06:49.547299852+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51182"}
14
- {"time":"2026-04-06T14:06:49.547332331+08:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-internal.log DELETED
@@ -1,16 +0,0 @@
1
- {"time":"2026-04-05T19:47:37.711575744+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
- {"time":"2026-04-05T19:47:37.711598693+08:00","level":"INFO","msg":"created symlink","path":"/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-core.log"}
3
- {"time":"2026-04-05T19:47:37.820659395+08:00","level":"INFO","msg":"created new stream","id":"4ns1dtmm"}
4
- {"time":"2026-04-05T19:47:37.820714248+08:00","level":"INFO","msg":"stream: started","id":"4ns1dtmm"}
5
- {"time":"2026-04-05T19:47:37.820739406+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"4ns1dtmm"}}
6
- {"time":"2026-04-05T19:47:37.820815969+08:00","level":"INFO","msg":"sender: started","stream_id":"4ns1dtmm"}
7
- {"time":"2026-04-05T19:47:37.820853585+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"4ns1dtmm"}}
8
- {"time":"2026-04-05T19:47:38.804274688+08:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2026-04-06T14:06:45.702933515+08:00","level":"INFO","msg":"stream: closing","id":"4ns1dtmm"}
10
- {"time":"2026-04-06T14:06:45.702982796+08:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2026-04-06T14:06:45.703854988+08:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2026-04-06T14:06:49.250748912+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
- {"time":"2026-04-06T14:06:49.546933574+08:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"4ns1dtmm"}}
14
- {"time":"2026-04-06T14:06:49.546991191+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"4ns1dtmm"}}
15
- {"time":"2026-04-06T14:06:49.547007286+08:00","level":"INFO","msg":"sender: closed","stream_id":"4ns1dtmm"}
16
- {"time":"2026-04-06T14:06:49.547065389+08:00","level":"INFO","msg":"stream: closed","id":"4ns1dtmm"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Configure stats pid to 337747
3
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from /home/chyang/.config/wandb/settings
4
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from /tmp2/chyang/workspace/LLM-BC/wandb/settings
5
- 2026-04-05 19:47:37,702 INFO MainThread:337747 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/tmp2/chyang/workspace/LLM-BC/train.py', 'program': '/tmp2/chyang/workspace/LLM-BC/./train.py'}
8
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:_log_setup():533] Logging user logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug.log
10
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:_log_setup():534] Logging internal logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/logs/debug-internal.log
11
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:init():619] calling init triggers
12
- 2026-04-05 19:47:37,703 INFO MainThread:337747 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {'name': 'train_bdpo_unet_lowdim', '_target_': 'llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace', 'obs_dim': 46, 'action_dim': 26, 'task_name': 'adroit-hand-hammer-v1', 'exp_name': 'default', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'policy': {'_target_': 'llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy', 'model': {'_target_': 'llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D', 'input_dim': 26, 'local_cond_dim': None, 'global_cond_dim': 46, 'diffusion_step_embed_dim': 256, 'down_dims': [256, 512, 1024], 'kernel_size': 5, 'n_groups': 8, 'cond_predict_scale': True}, 'noise_scheduler': {'_target_': 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler', 'num_train_timesteps': 100, 'beta_start': 0.0001, 'beta_end': 0.02, 'beta_schedule': 'squaredcos_cap_v2', 'variance_type': 'fixed_small', 'clip_sample': True, 'prediction_type': 'epsilon'}, 'obs_dim': 46, 'action_dim': 26, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'hidden_size': [256, 256], 'activation': 'relu', 'gamma': 0.99, 'tau': 0.005, 'eta': 1.0, 'rho': 1.0, 'target_reduction': 'min', 'max_q_backup': False, 'actor_update_interval': 1, 'value_warmup_steps': 1000, 'num_eval_sample_steps': 100, 'act_limit': None, 'squash_action': False, 'init_weight': 0.01, 'checkpoint_path': None, 'behavior_checkpoint_path': 'data/outputs/2026.04.04/12.00.56_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0055-val_loss=0.215.ckpt', 'init_actor_from_behavior': True}, 'dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'critic_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'value_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'training': {'device': 'cuda:0', 'seed': 45, 'debug': False, 'resume': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 500, 'num_epochs': 201, 'gradient_accumulate_every': 1, 'grad_norm_clip': 5.0, 'rollout_every': 10, 'checkpoint_every': 10, 'val_every': 1, 'sample_every': 10, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'adroit-hand-hammer-v1-bdpo-training', 'resume': True, 'mode': 'online', 'name': '2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'tags': ['train_bdpo_unet_lowdim', 'adroit-hand-hammer-v1', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'wandb_name_base': '2026.04.05-19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}, 'task': {'name': 'adroit-hand-hammer-v1', 'obs_dim': 46, 'action_dim': 26, 'env_runner': {'_target_': 'llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner', 'env_name': 'llf-adroit-adroit-hand-hammer-v1', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 150, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.99}, 'dataset': {'_target_': 'llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset', 'data_path': 'datasets/adroit-hand-hammer-v1-general-reward-trans.pt', 'data_path2': 'datasets/adroit-hand-hammer-v1-reward-trans.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': False}, 'instructor': {'_target_': 'llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor'}}}
14
- 2026-04-05 19:47:37,704 INFO MainThread:337747 [wandb_init.py:init():669] starting backend
15
- 2026-04-05 19:47:37,704 INFO MainThread:337747 [wandb_init.py:init():673] sending inform_init request
16
- 2026-04-05 19:47:37,706 INFO MainThread:337747 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2026-04-05 19:47:37,707 INFO MainThread:337747 [wandb_init.py:init():686] backend started and connected
18
- 2026-04-05 19:47:37,722 INFO MainThread:337747 [wandb_init.py:init():781] updated telemetry
19
- 2026-04-05 19:47:37,839 INFO MainThread:337747 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2026-04-05 19:47:38,798 INFO MainThread:337747 [wandb_init.py:init():867] starting run threads in backend
21
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_console_start():2451] atexit reg
22
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
- 2026-04-05 19:47:39,355 INFO MainThread:337747 [wandb_run.py:_redirect():2389] Redirects installed.
25
- 2026-04-05 19:47:39,357 INFO MainThread:337747 [wandb_init.py:init():911] run started, returning control to user process
26
- 2026-04-05 19:47:39,358 INFO MainThread:337747 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}
27
- 2026-04-06 14:06:45,703 WARNING MsgRouterThr:337747 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_194737-4ns1dtmm/run-4ns1dtmm.wandb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa79c191fcbc824eb60ac297cc1c938864b9b4b3cc644464f6875d37f27750c1
3
- size 51380340
 
 
 
 
2026.04.05/19.47.17_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/wandb-resume.json DELETED
@@ -1 +0,0 @@
1
- {"run_id": "4ns1dtmm"}
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/config.yaml DELETED
@@ -1,168 +0,0 @@
1
- name: train_bdpo_unet_lowdim
2
- _target_: llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace
3
- obs_dim: ${task.obs_dim}
4
- action_dim: ${task.action_dim}
5
- task_name: ${task.name}
6
- exp_name: default
7
- horizon: 1
8
- n_obs_steps: 1
9
- n_action_steps: 1
10
- n_latency_steps: 0
11
- past_action_visible: false
12
- policy:
13
- _target_: llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy
14
- model:
15
- _target_: llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D
16
- input_dim: ${task.action_dim}
17
- local_cond_dim: null
18
- global_cond_dim: '${eval: ${task.obs_dim}*${n_obs_steps}}'
19
- diffusion_step_embed_dim: 256
20
- down_dims:
21
- - 256
22
- - 512
23
- - 1024
24
- kernel_size: 5
25
- n_groups: 8
26
- cond_predict_scale: true
27
- noise_scheduler:
28
- _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
29
- num_train_timesteps: 100
30
- beta_start: 0.0001
31
- beta_end: 0.02
32
- beta_schedule: squaredcos_cap_v2
33
- variance_type: fixed_small
34
- clip_sample: true
35
- prediction_type: epsilon
36
- obs_dim: ${obs_dim}
37
- action_dim: ${action_dim}
38
- horizon: ${horizon}
39
- n_obs_steps: ${n_obs_steps}
40
- n_action_steps: ${n_action_steps}
41
- hidden_size:
42
- - 256
43
- - 256
44
- activation: relu
45
- gamma: ${task.env_runner.discount}
46
- tau: 0.005
47
- eta: 1.0
48
- rho: 1.0
49
- target_reduction: min
50
- max_q_backup: false
51
- actor_update_interval: 1
52
- value_warmup_steps: 1000
53
- num_eval_sample_steps: 100
54
- act_limit: null
55
- squash_action: false
56
- init_weight: 0.01
57
- checkpoint_path: null
58
- behavior_checkpoint_path: data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0095-val_loss=0.199.ckpt
59
- init_actor_from_behavior: true
60
- dataloader:
61
- batch_size: 256
62
- num_workers: 0
63
- shuffle: true
64
- pin_memory: false
65
- persistent_workers: false
66
- val_dataloader:
67
- batch_size: 256
68
- num_workers: 0
69
- shuffle: true
70
- pin_memory: false
71
- persistent_workers: false
72
- optimizer:
73
- _target_: torch.optim.Adam
74
- lr: 0.0001
75
- betas:
76
- - 0.9
77
- - 0.999
78
- eps: 1.0e-08
79
- weight_decay: 0.0
80
- critic_optimizer:
81
- _target_: torch.optim.Adam
82
- lr: 0.0001
83
- betas:
84
- - 0.9
85
- - 0.999
86
- eps: 1.0e-08
87
- weight_decay: 0.0
88
- value_optimizer:
89
- _target_: torch.optim.Adam
90
- lr: 0.0001
91
- betas:
92
- - 0.9
93
- - 0.999
94
- eps: 1.0e-08
95
- weight_decay: 0.0
96
- training:
97
- device: cuda:0
98
- seed: 46
99
- debug: false
100
- resume: false
101
- lr_scheduler: constant
102
- lr_warmup_steps: 500
103
- num_epochs: 201
104
- gradient_accumulate_every: 1
105
- grad_norm_clip: 5.0
106
- rollout_every: 10
107
- checkpoint_every: 10
108
- val_every: 1
109
- sample_every: 10
110
- sample_max_batch: 128
111
- max_train_steps: null
112
- max_val_steps: null
113
- tqdm_interval_sec: 1.0
114
- logging:
115
- project: ${task.name}-bdpo-training
116
- resume: true
117
- mode: online
118
- name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
119
- tags:
120
- - ${name}
121
- - ${task_name}
122
- - ${exp_name}
123
- id: null
124
- group: null
125
- checkpoint:
126
- topk:
127
- monitor_key: test_success_rate
128
- mode: max
129
- k: 5
130
- format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
131
- save_last_ckpt: true
132
- save_last_snapshot: false
133
- multi_run:
134
- run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
135
- wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
136
- task:
137
- name: adroit-hand-hammer-v1
138
- obs_dim: 46
139
- action_dim: 26
140
- env_runner:
141
- _target_: llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner
142
- env_name: llf-adroit-adroit-hand-hammer-v1
143
- n_train: 10
144
- n_test: 50
145
- n_envs: 10
146
- max_steps: 150
147
- n_obs_steps: ${n_obs_steps}
148
- n_action_steps: ${n_action_steps}
149
- instruction_type: b
150
- feedback_type:
151
- - hp
152
- - hn
153
- - fp
154
- visual: false
155
- discount: 0.99
156
- dataset:
157
- _target_: llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset
158
- data_path: datasets/adroit-hand-hammer-v1-general-reward-trans.pt
159
- data_path2: datasets/adroit-hand-hammer-v1-reward-trans.pt
160
- horizon: ${horizon}
161
- pad_before: ${eval:'${n_obs_steps}-1'}
162
- pad_after: ${eval:'${n_action_steps}-1'}
163
- obs_eef_target: true
164
- use_manual_normalizer: false
165
- val_ratio: 0.1
166
- dummy_normalizer: false
167
- instructor:
168
- _target_: llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/hydra.yaml DELETED
@@ -1,156 +0,0 @@
1
- hydra:
2
- run:
3
- dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
- sweep:
5
- dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
- subdir: ${hydra.job.num}
7
- launcher:
8
- _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
- sweeper:
10
- _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
- max_batch_size: null
12
- params: null
13
- help:
14
- app_name: ${hydra.job.name}
15
- header: '${hydra.help.app_name} is powered by Hydra.
16
-
17
- '
18
- footer: 'Powered by Hydra (https://hydra.cc)
19
-
20
- Use --hydra-help to view Hydra specific help
21
-
22
- '
23
- template: '${hydra.help.header}
24
-
25
- == Configuration groups ==
26
-
27
- Compose your configuration from those groups (group=option)
28
-
29
-
30
- $APP_CONFIG_GROUPS
31
-
32
-
33
- == Config ==
34
-
35
- Override anything in the config (foo.bar=value)
36
-
37
-
38
- $CONFIG
39
-
40
-
41
- ${hydra.help.footer}
42
-
43
- '
44
- hydra_help:
45
- template: 'Hydra (${hydra.runtime.version})
46
-
47
- See https://hydra.cc for more info.
48
-
49
-
50
- == Flags ==
51
-
52
- $FLAGS_HELP
53
-
54
-
55
- == Configuration groups ==
56
-
57
- Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
- to command line)
59
-
60
-
61
- $HYDRA_CONFIG_GROUPS
62
-
63
-
64
- Use ''--cfg hydra'' to Show the Hydra config.
65
-
66
- '
67
- hydra_help: ???
68
- hydra_logging:
69
- version: 1
70
- formatters:
71
- simple:
72
- format: '[%(asctime)s][HYDRA] %(message)s'
73
- handlers:
74
- console:
75
- class: logging.StreamHandler
76
- formatter: simple
77
- stream: ext://sys.stdout
78
- root:
79
- level: INFO
80
- handlers:
81
- - console
82
- loggers:
83
- logging_example:
84
- level: DEBUG
85
- disable_existing_loggers: false
86
- job_logging:
87
- version: 1
88
- formatters:
89
- simple:
90
- format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
- handlers:
92
- console:
93
- class: logging.StreamHandler
94
- formatter: simple
95
- stream: ext://sys.stdout
96
- file:
97
- class: logging.FileHandler
98
- formatter: simple
99
- filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
- root:
101
- level: INFO
102
- handlers:
103
- - console
104
- - file
105
- disable_existing_loggers: false
106
- env: {}
107
- mode: RUN
108
- searchpath: []
109
- callbacks: {}
110
- output_subdir: .hydra
111
- overrides:
112
- hydra:
113
- - hydra.mode=RUN
114
- task:
115
- - training.seed=46
116
- - policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0095-val_loss\=0.199.ckpt
117
- job:
118
- name: train
119
- chdir: null
120
- override_dirname: policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0095-val_loss\=0.199.ckpt,training.seed=46
121
- id: ???
122
- num: ???
123
- config_name: bdpo_adroit-hand-hammer-v1_gen.yaml
124
- env_set: {}
125
- env_copy: []
126
- config:
127
- override_dirname:
128
- kv_sep: '='
129
- item_sep: ','
130
- exclude_keys: []
131
- runtime:
132
- version: 1.2.0
133
- version_base: '1.2'
134
- cwd: /tmp2/chyang/workspace/LLM-BC
135
- config_sources:
136
- - path: hydra.conf
137
- schema: pkg
138
- provider: hydra
139
- - path: /tmp2/chyang/workspace/LLM-BC/config/main_table
140
- schema: file
141
- provider: main
142
- - path: ''
143
- schema: structured
144
- provider: schema
145
- output_dir: /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
146
- choices:
147
- hydra/env: default
148
- hydra/callbacks: null
149
- hydra/job_logging: default
150
- hydra/hydra_logging: default
151
- hydra/hydra_help: default
152
- hydra/help: default
153
- hydra/sweeper: basic
154
- hydra/launcher: basic
155
- hydra/output: default
156
- verbose: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/.hydra/overrides.yaml DELETED
@@ -1,2 +0,0 @@
1
- - training.seed=46
2
- - policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\=0095-val_loss\=0.199.ckpt
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0000-test_success_rate=0.260.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:47079e0e4d5b09e26e06694447130ac329307e1f6b535571b9b94c1d2280f0f7
3
- size 1055778720
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0010-test_success_rate=0.380.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:40bbe1b7b98ea2387dba707176f101aebba65d970e3e460dc02953a66d671fc8
3
- size 1055778720
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0020-test_success_rate=0.160.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2480d5100d2f36645ffe097e9c3cb3d8c8dab192e369784b5c61b3cad8a6d23b
3
- size 1580582693
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0030-test_success_rate=0.360.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:919839d7af703a49b8938af2dda0f44953ac2eb5ed3829aaa72751afec860800
3
- size 1580582693
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0040-test_success_rate=0.300.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b4bf2991dfc5b6c2eefa93a5ba4a9b1c5bfde1595f9542aa60e767780e758b2
3
- size 1580582693
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/checkpoints/latest.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeaadcac1a542e9eb381a8982b030d9d0f4a0b4bb752fa4dac6d97b92a05c79e
3
- size 1580582693
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/logs.json.txt DELETED
The diff for this file is too large to render. See raw diff
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/train.log DELETED
@@ -1,11 +0,0 @@
1
- [2026-04-05 20:14:52,028][numexpr.utils][INFO] - Note: NumExpr detected 20 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
2
- [2026-04-05 20:14:52,028][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
3
- [2026-04-05 20:14:52,509][llmbc.model.diffusion.conditional_unet1d][INFO] - number of parameters: 6.558467e+07
4
- [2026-04-05 20:14:56,299][llmbc.model.diffusion.conditional_unet1d][INFO] - number of parameters: 6.558467e+07
5
- [2026-04-05 20:14:59,375][OpenGL.acceleratesupport][WARNING] - Incompatible version of OpenGL_accelerate found, need at least (3, 1, 10) found (3, 1, 9)
6
- [2026-04-05 20:14:59,375][OpenGL.acceleratesupport][INFO] - No OpenGL_accelerate module loaded: Old version of OpenGL_accelerate
7
- [2026-04-05 20:15:01,473][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
8
- [2026-04-05 20:15:01,479][absl][INFO] - MuJoCo library version is: 2.3.7
9
- [2026-04-05 20:15:01,580][datasets][INFO] - PyTorch version 2.2.2 available.
10
- [2026-04-05 20:15:01,581][datasets][INFO] - TensorFlow version 2.15.1 available.
11
- [2026-04-05 20:15:01,581][datasets][INFO] - JAX version 0.4.30 available.
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug-internal.log DELETED
@@ -1,16 +0,0 @@
1
- {"time":"2026-04-05T20:15:02.675207147+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
- {"time":"2026-04-05T20:15:02.675236072+08:00","level":"INFO","msg":"created symlink","path":"/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-core.log"}
3
- {"time":"2026-04-05T20:15:02.783639967+08:00","level":"INFO","msg":"created new stream","id":"gsxv5o2x"}
4
- {"time":"2026-04-05T20:15:02.783682009+08:00","level":"INFO","msg":"stream: started","id":"gsxv5o2x"}
5
- {"time":"2026-04-05T20:15:02.783719383+08:00","level":"INFO","msg":"sender: started","stream_id":"gsxv5o2x"}
6
- {"time":"2026-04-05T20:15:02.783704075+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"gsxv5o2x"}}
7
- {"time":"2026-04-05T20:15:02.783701477+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"gsxv5o2x"}}
8
- {"time":"2026-04-05T20:15:03.681306658+08:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2026-04-06T14:21:13.810659156+08:00","level":"INFO","msg":"stream: closing","id":"gsxv5o2x"}
10
- {"time":"2026-04-06T14:21:13.81070755+08:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2026-04-06T14:21:13.811296491+08:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2026-04-06T14:21:15.381579169+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
- {"time":"2026-04-06T14:21:15.820068161+08:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"gsxv5o2x"}}
14
- {"time":"2026-04-06T14:21:15.820142874+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"gsxv5o2x"}}
15
- {"time":"2026-04-06T14:21:15.820175329+08:00","level":"INFO","msg":"sender: closed","stream_id":"gsxv5o2x"}
16
- {"time":"2026-04-06T14:21:15.820335911+08:00","level":"INFO","msg":"stream: closed","id":"gsxv5o2x"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Configure stats pid to 349699
3
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from /home/chyang/.config/wandb/settings
4
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from /tmp2/chyang/workspace/LLM-BC/wandb/settings
5
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/tmp2/chyang/workspace/LLM-BC/train.py', 'program': '/tmp2/chyang/workspace/LLM-BC/./train.py'}
8
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:_log_setup():533] Logging user logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug.log
10
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:_log_setup():534] Logging internal logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-internal.log
11
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:init():619] calling init triggers
12
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {'name': 'train_bdpo_unet_lowdim', '_target_': 'llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace', 'obs_dim': 46, 'action_dim': 26, 'task_name': 'adroit-hand-hammer-v1', 'exp_name': 'default', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'policy': {'_target_': 'llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy', 'model': {'_target_': 'llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D', 'input_dim': 26, 'local_cond_dim': None, 'global_cond_dim': 46, 'diffusion_step_embed_dim': 256, 'down_dims': [256, 512, 1024], 'kernel_size': 5, 'n_groups': 8, 'cond_predict_scale': True}, 'noise_scheduler': {'_target_': 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler', 'num_train_timesteps': 100, 'beta_start': 0.0001, 'beta_end': 0.02, 'beta_schedule': 'squaredcos_cap_v2', 'variance_type': 'fixed_small', 'clip_sample': True, 'prediction_type': 'epsilon'}, 'obs_dim': 46, 'action_dim': 26, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'hidden_size': [256, 256], 'activation': 'relu', 'gamma': 0.99, 'tau': 0.005, 'eta': 1.0, 'rho': 1.0, 'target_reduction': 'min', 'max_q_backup': False, 'actor_update_interval': 1, 'value_warmup_steps': 1000, 'num_eval_sample_steps': 100, 'act_limit': None, 'squash_action': False, 'init_weight': 0.01, 'checkpoint_path': None, 'behavior_checkpoint_path': 'data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0095-val_loss=0.199.ckpt', 'init_actor_from_behavior': True}, 'dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'critic_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'value_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'training': {'device': 'cuda:0', 'seed': 46, 'debug': False, 'resume': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 500, 'num_epochs': 201, 'gradient_accumulate_every': 1, 'grad_norm_clip': 5.0, 'rollout_every': 10, 'checkpoint_every': 10, 'val_every': 1, 'sample_every': 10, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'adroit-hand-hammer-v1-bdpo-training', 'resume': True, 'mode': 'online', 'name': '2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'tags': ['train_bdpo_unet_lowdim', 'adroit-hand-hammer-v1', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'wandb_name_base': '2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}, 'task': {'name': 'adroit-hand-hammer-v1', 'obs_dim': 46, 'action_dim': 26, 'env_runner': {'_target_': 'llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner', 'env_name': 'llf-adroit-adroit-hand-hammer-v1', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 150, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.99}, 'dataset': {'_target_': 'llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset', 'data_path': 'datasets/adroit-hand-hammer-v1-general-reward-trans.pt', 'data_path2': 'datasets/adroit-hand-hammer-v1-reward-trans.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': False}, 'instructor': {'_target_': 'llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor'}}}
14
- 2026-04-05 20:15:02,669 INFO MainThread:349699 [wandb_init.py:init():669] starting backend
15
- 2026-04-05 20:15:02,669 INFO MainThread:349699 [wandb_init.py:init():673] sending inform_init request
16
- 2026-04-05 20:15:02,671 INFO MainThread:349699 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2026-04-05 20:15:02,671 INFO MainThread:349699 [wandb_init.py:init():686] backend started and connected
18
- 2026-04-05 20:15:02,678 INFO MainThread:349699 [wandb_init.py:init():781] updated telemetry
19
- 2026-04-05 20:15:02,741 INFO MainThread:349699 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2026-04-05 20:15:03,675 INFO MainThread:349699 [wandb_init.py:init():867] starting run threads in backend
21
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_console_start():2451] atexit reg
22
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2389] Redirects installed.
25
- 2026-04-05 20:15:04,116 INFO MainThread:349699 [wandb_init.py:init():911] run started, returning control to user process
26
- 2026-04-05 20:15:04,117 INFO MainThread:349699 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}
27
- 2026-04-06 14:21:13,810 WARNING MsgRouterThr:349699 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/config.yaml DELETED
@@ -1,250 +0,0 @@
1
- _target_:
2
- value: llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace
3
- _wandb:
4
- value:
5
- cli_version: 0.18.6
6
- m: []
7
- python_version: 3.9.21
8
- t:
9
- "1":
10
- - 1
11
- - 2
12
- - 3
13
- - 5
14
- - 11
15
- - 12
16
- - 41
17
- - 49
18
- - 50
19
- - 51
20
- - 53
21
- - 55
22
- - 71
23
- - 83
24
- - 95
25
- - 98
26
- - 100
27
- - 105
28
- "2":
29
- - 1
30
- - 2
31
- - 3
32
- - 5
33
- - 11
34
- - 12
35
- - 41
36
- - 49
37
- - 50
38
- - 51
39
- - 53
40
- - 55
41
- - 71
42
- - 83
43
- - 95
44
- - 98
45
- - 100
46
- - 105
47
- "3":
48
- - 13
49
- - 15
50
- - 16
51
- - 23
52
- - 55
53
- - 61
54
- "4": 3.9.21
55
- "5": 0.18.6
56
- "6": 4.47.1
57
- "8":
58
- - 5
59
- "12": 0.18.6
60
- "13": linux-x86_64
61
- action_dim:
62
- value: 26
63
- checkpoint:
64
- value:
65
- save_last_ckpt: true
66
- save_last_snapshot: false
67
- topk:
68
- format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
69
- k: 5
70
- mode: max
71
- monitor_key: test_success_rate
72
- critic_optimizer:
73
- value:
74
- _target_: torch.optim.Adam
75
- betas:
76
- - 0.9
77
- - 0.999
78
- eps: 1e-08
79
- lr: 0.0001
80
- weight_decay: 0
81
- dataloader:
82
- value:
83
- batch_size: 256
84
- num_workers: 0
85
- persistent_workers: false
86
- pin_memory: false
87
- shuffle: true
88
- exp_name:
89
- value: default
90
- horizon:
91
- value: 1
92
- logging:
93
- value:
94
- group: null
95
- id: null
96
- mode: online
97
- name: 2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
98
- project: adroit-hand-hammer-v1-bdpo-training
99
- resume: true
100
- tags:
101
- - train_bdpo_unet_lowdim
102
- - adroit-hand-hammer-v1
103
- - default
104
- multi_run:
105
- value:
106
- run_dir: data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
107
- wandb_name_base: 2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
108
- n_action_steps:
109
- value: 1
110
- n_latency_steps:
111
- value: 0
112
- n_obs_steps:
113
- value: 1
114
- name:
115
- value: train_bdpo_unet_lowdim
116
- obs_dim:
117
- value: 46
118
- optimizer:
119
- value:
120
- _target_: torch.optim.Adam
121
- betas:
122
- - 0.9
123
- - 0.999
124
- eps: 1e-08
125
- lr: 0.0001
126
- weight_decay: 0
127
- output_dir:
128
- value: /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1
129
- past_action_visible:
130
- value: false
131
- policy:
132
- value:
133
- _target_: llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy
134
- act_limit: null
135
- action_dim: 26
136
- activation: relu
137
- actor_update_interval: 1
138
- behavior_checkpoint_path: data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0095-val_loss=0.199.ckpt
139
- checkpoint_path: null
140
- eta: 1
141
- gamma: 0.99
142
- hidden_size:
143
- - 256
144
- - 256
145
- horizon: 1
146
- init_actor_from_behavior: true
147
- init_weight: 0.01
148
- max_q_backup: false
149
- model:
150
- _target_: llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D
151
- cond_predict_scale: true
152
- diffusion_step_embed_dim: 256
153
- down_dims:
154
- - 256
155
- - 512
156
- - 1024
157
- global_cond_dim: 46
158
- input_dim: 26
159
- kernel_size: 5
160
- local_cond_dim: null
161
- n_groups: 8
162
- n_action_steps: 1
163
- n_obs_steps: 1
164
- noise_scheduler:
165
- _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
166
- beta_end: 0.02
167
- beta_schedule: squaredcos_cap_v2
168
- beta_start: 0.0001
169
- clip_sample: true
170
- num_train_timesteps: 100
171
- prediction_type: epsilon
172
- variance_type: fixed_small
173
- num_eval_sample_steps: 100
174
- obs_dim: 46
175
- rho: 1
176
- squash_action: false
177
- target_reduction: min
178
- tau: 0.005
179
- value_warmup_steps: 1000
180
- task:
181
- value:
182
- action_dim: 26
183
- dataset:
184
- _target_: llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset
185
- data_path: datasets/adroit-hand-hammer-v1-general-reward-trans.pt
186
- data_path2: datasets/adroit-hand-hammer-v1-reward-trans.pt
187
- dummy_normalizer: false
188
- horizon: 1
189
- obs_eef_target: true
190
- pad_after: 0
191
- pad_before: 0
192
- use_manual_normalizer: false
193
- val_ratio: 0.1
194
- env_runner:
195
- _target_: llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner
196
- discount: 0.99
197
- env_name: llf-adroit-adroit-hand-hammer-v1
198
- feedback_type:
199
- - hp
200
- - hn
201
- - fp
202
- instruction_type: b
203
- max_steps: 150
204
- n_action_steps: 1
205
- n_envs: 10
206
- n_obs_steps: 1
207
- n_test: 50
208
- n_train: 10
209
- visual: false
210
- instructor:
211
- _target_: llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor
212
- name: adroit-hand-hammer-v1
213
- obs_dim: 46
214
- task_name:
215
- value: adroit-hand-hammer-v1
216
- training:
217
- value:
218
- checkpoint_every: 10
219
- debug: false
220
- device: cuda:0
221
- grad_norm_clip: 5
222
- gradient_accumulate_every: 1
223
- lr_scheduler: constant
224
- lr_warmup_steps: 500
225
- max_train_steps: null
226
- max_val_steps: null
227
- num_epochs: 201
228
- resume: false
229
- rollout_every: 10
230
- sample_every: 10
231
- sample_max_batch: 128
232
- seed: 46
233
- tqdm_interval_sec: 1
234
- val_every: 1
235
- val_dataloader:
236
- value:
237
- batch_size: 256
238
- num_workers: 0
239
- persistent_workers: false
240
- pin_memory: false
241
- shuffle: true
242
- value_optimizer:
243
- value:
244
- _target_: torch.optim.Adam
245
- betas:
246
- - 0.9
247
- - 0.999
248
- eps: 1e-08
249
- lr: 0.0001
250
- weight_decay: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/output.log DELETED
@@ -1,3 +0,0 @@
1
- Eval AdroitHandLowdimRunner 1/6: 0%| | 0/150 [00:00<?, ?it/s]/tmp2/chyang/workspace/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
2
- obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
3
-
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/wandb-metadata.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "os": "Linux-6.8.0-90-generic-x86_64-with-glibc2.35",
3
- "python": "3.9.21",
4
- "startedAt": "2026-04-05T12:15:02.672124Z",
5
- "args": [
6
- "--config-path",
7
- "config/main_table",
8
- "--config-name",
9
- "bdpo_adroit-hand-hammer-v1_gen.yaml",
10
- "training.seed=46",
11
- "policy.behavior_checkpoint_path=data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch\\=0095-val_loss\\=0.199.ckpt"
12
- ],
13
- "program": "/tmp2/chyang/workspace/LLM-BC/./train.py",
14
- "codePath": "train.py",
15
- "git": {
16
- "remote": "https://github.com/CHYang25/LLM-BC.git",
17
- "commit": "a983d49eed9d3b7c64b22a85eb6908c8694405af"
18
- },
19
- "email": "chris920325@gmail.com",
20
- "root": "/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1",
21
- "host": "rllab1012",
22
- "username": "chyang",
23
- "executable": "/home/chyang/miniconda3/envs/llm-bc/bin/python3",
24
- "codePathLocal": "train.py",
25
- "cpu_count": 10,
26
- "cpu_count_logical": 20,
27
- "gpu": "NVIDIA GeForce RTX 4070 Ti",
28
- "gpu_count": 2,
29
- "disk": {
30
- "/": {
31
- "total": "982820896768",
32
- "used": "414742159360"
33
- }
34
- },
35
- "memory": {
36
- "total": "134736023552"
37
- },
38
- "cpu": {
39
- "count": 10,
40
- "countLogical": 20
41
- },
42
- "gpu_nvidia": [
43
- {
44
- "name": "NVIDIA GeForce RTX 4070 Ti",
45
- "memoryTotal": "12878610432",
46
- "cudaCores": 7680,
47
- "architecture": "Ada"
48
- },
49
- {
50
- "name": "NVIDIA GeForce RTX 4070 Ti",
51
- "memoryTotal": "12878610432",
52
- "cudaCores": 7680,
53
- "architecture": "Ada"
54
- }
55
- ],
56
- "cudaVersion": "13.0"
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"global_step":17486,"loss/q2":43288.23046875,"train/mean_score":-1.2233818703918429,"_runtime":65171.13856882,"train_loss":50942.46326462976,"loss/q1":43442.93359375,"loss/v":584.6388549804688,"stats/done_mean":0.018518518656492233,"test/cumulative_reward":-100.61537228953237,"stats/bootstrap_mean":-411.8333435058594,"loss/v1":303.723388671875,"test/mean_score":-1.214229753178261,"_wandb":{"runtime":65171},"lr":0.0001,"loss/actor_kl":1.2818808555603027,"epoch":200,"loss/q":86731.1640625,"_step":17486,"stats/t_mean":49.76852035522461,"_timestamp":1.775456456854072e+09,"val_loss":54167.66971343214,"loss/v2":280.91546630859375,"train/cumulative_reward":-101.41718616077296,"train/success_rate":0,"test/success_rate":0,"stats/kl_mean":1.2818808555603027,"stats/reward_mean":0.48456788063049316,"loss/actor_bootstrap":-411.8333435058594,"loss/actor":413.1152038574219}
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2026-04-05T20:15:02.000521617+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp4tsm4dtw/port-349699.txt","pid":349699,"debug":false,"disable-analytics":false}
2
- {"time":"2026-04-05T20:15:02.000551014+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2026-04-05T20:15:02.004632764+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":41859,"Zone":""}}
4
- {"time":"2026-04-05T20:15:02.004700387+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":349699}
5
- {"time":"2026-04-05T20:15:02.197125426+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:45888"}
6
- {"time":"2026-04-05T20:15:02.674902878+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"gsxv5o2x","id":"127.0.0.1:45888"}
7
- {"time":"2026-04-05T20:15:02.783690422+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"gsxv5o2x","id":"127.0.0.1:45888"}
8
- {"time":"2026-04-06T14:21:13.810553531+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:45888"}
9
- {"time":"2026-04-06T14:21:13.810680085+08:00","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2026-04-06T14:21:13.810660834+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:45888"}
11
- {"time":"2026-04-06T14:21:13.810854796+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:45888"}
12
- {"time":"2026-04-06T14:21:15.820439179+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:45888"}
13
- {"time":"2026-04-06T14:21:15.820478972+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:45888"}
14
- {"time":"2026-04-06T14:21:15.820503484+08:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-internal.log DELETED
@@ -1,16 +0,0 @@
1
- {"time":"2026-04-05T20:15:02.675207147+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
- {"time":"2026-04-05T20:15:02.675236072+08:00","level":"INFO","msg":"created symlink","path":"/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-core.log"}
3
- {"time":"2026-04-05T20:15:02.783639967+08:00","level":"INFO","msg":"created new stream","id":"gsxv5o2x"}
4
- {"time":"2026-04-05T20:15:02.783682009+08:00","level":"INFO","msg":"stream: started","id":"gsxv5o2x"}
5
- {"time":"2026-04-05T20:15:02.783719383+08:00","level":"INFO","msg":"sender: started","stream_id":"gsxv5o2x"}
6
- {"time":"2026-04-05T20:15:02.783704075+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"gsxv5o2x"}}
7
- {"time":"2026-04-05T20:15:02.783701477+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"gsxv5o2x"}}
8
- {"time":"2026-04-05T20:15:03.681306658+08:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2026-04-06T14:21:13.810659156+08:00","level":"INFO","msg":"stream: closing","id":"gsxv5o2x"}
10
- {"time":"2026-04-06T14:21:13.81070755+08:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2026-04-06T14:21:13.811296491+08:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2026-04-06T14:21:15.381579169+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
13
- {"time":"2026-04-06T14:21:15.820068161+08:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"gsxv5o2x"}}
14
- {"time":"2026-04-06T14:21:15.820142874+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"gsxv5o2x"}}
15
- {"time":"2026-04-06T14:21:15.820175329+08:00","level":"INFO","msg":"sender: closed","stream_id":"gsxv5o2x"}
16
- {"time":"2026-04-06T14:21:15.820335911+08:00","level":"INFO","msg":"stream: closed","id":"gsxv5o2x"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Configure stats pid to 349699
3
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from /home/chyang/.config/wandb/settings
4
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from /tmp2/chyang/workspace/LLM-BC/wandb/settings
5
- 2026-04-05 20:15:02,667 INFO MainThread:349699 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/tmp2/chyang/workspace/LLM-BC/train.py', 'program': '/tmp2/chyang/workspace/LLM-BC/./train.py'}
8
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:_log_setup():533] Logging user logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug.log
10
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:_log_setup():534] Logging internal logs to /tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/logs/debug-internal.log
11
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:init():619] calling init triggers
12
- 2026-04-05 20:15:02,668 INFO MainThread:349699 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {'name': 'train_bdpo_unet_lowdim', '_target_': 'llmbc.workspace.train_bdpo_unet_lowdim_workspace.TrainBDPOUnetLowdimWorkspace', 'obs_dim': 46, 'action_dim': 26, 'task_name': 'adroit-hand-hammer-v1', 'exp_name': 'default', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'policy': {'_target_': 'llmbc.policy.bdpo_unet_lowdim_policy.BDPOUnetLowdimPolicy', 'model': {'_target_': 'llmbc.model.diffusion.conditional_unet1d.ConditionalUnet1D', 'input_dim': 26, 'local_cond_dim': None, 'global_cond_dim': 46, 'diffusion_step_embed_dim': 256, 'down_dims': [256, 512, 1024], 'kernel_size': 5, 'n_groups': 8, 'cond_predict_scale': True}, 'noise_scheduler': {'_target_': 'diffusers.schedulers.scheduling_ddpm.DDPMScheduler', 'num_train_timesteps': 100, 'beta_start': 0.0001, 'beta_end': 0.02, 'beta_schedule': 'squaredcos_cap_v2', 'variance_type': 'fixed_small', 'clip_sample': True, 'prediction_type': 'epsilon'}, 'obs_dim': 46, 'action_dim': 26, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'hidden_size': [256, 256], 'activation': 'relu', 'gamma': 0.99, 'tau': 0.005, 'eta': 1.0, 'rho': 1.0, 'target_reduction': 'min', 'max_q_backup': False, 'actor_update_interval': 1, 'value_warmup_steps': 1000, 'num_eval_sample_steps': 100, 'act_limit': None, 'squash_action': False, 'init_weight': 0.01, 'checkpoint_path': None, 'behavior_checkpoint_path': 'data/outputs/2026.04.04/12.01.08_train_diffusion_unet_lowdim_adroit-hand-hammer-v1/checkpoints/epoch=0095-val_loss=0.199.ckpt', 'init_actor_from_behavior': True}, 'dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 256, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'critic_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'value_optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0}, 'training': {'device': 'cuda:0', 'seed': 46, 'debug': False, 'resume': False, 'lr_scheduler': 'constant', 'lr_warmup_steps': 500, 'num_epochs': 201, 'gradient_accumulate_every': 1, 'grad_norm_clip': 5.0, 'rollout_every': 10, 'checkpoint_every': 10, 'val_every': 1, 'sample_every': 10, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'adroit-hand-hammer-v1-bdpo-training', 'resume': True, 'mode': 'online', 'name': '2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'tags': ['train_bdpo_unet_lowdim', 'adroit-hand-hammer-v1', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1', 'wandb_name_base': '2026.04.05-20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}, 'task': {'name': 'adroit-hand-hammer-v1', 'obs_dim': 46, 'action_dim': 26, 'env_runner': {'_target_': 'llmbc.env_runner.adroit_lowdim_runner.AdroitHandLowdimRunner', 'env_name': 'llf-adroit-adroit-hand-hammer-v1', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 150, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.99}, 'dataset': {'_target_': 'llmbc.dataset.adroit_lowdim_dataset.AdroitHandLowdimDataset', 'data_path': 'datasets/adroit-hand-hammer-v1-general-reward-trans.pt', 'data_path2': 'datasets/adroit-hand-hammer-v1-reward-trans.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': False}, 'instructor': {'_target_': 'llmbc.translator.instructor.adroit_instructor.adroit_hand_hammer_v1_instructor.AdroitHandHammerV1Instructor'}}}
14
- 2026-04-05 20:15:02,669 INFO MainThread:349699 [wandb_init.py:init():669] starting backend
15
- 2026-04-05 20:15:02,669 INFO MainThread:349699 [wandb_init.py:init():673] sending inform_init request
16
- 2026-04-05 20:15:02,671 INFO MainThread:349699 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2026-04-05 20:15:02,671 INFO MainThread:349699 [wandb_init.py:init():686] backend started and connected
18
- 2026-04-05 20:15:02,678 INFO MainThread:349699 [wandb_init.py:init():781] updated telemetry
19
- 2026-04-05 20:15:02,741 INFO MainThread:349699 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2026-04-05 20:15:03,675 INFO MainThread:349699 [wandb_init.py:init():867] starting run threads in backend
21
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_console_start():2451] atexit reg
22
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
- 2026-04-05 20:15:04,113 INFO MainThread:349699 [wandb_run.py:_redirect():2389] Redirects installed.
25
- 2026-04-05 20:15:04,116 INFO MainThread:349699 [wandb_init.py:init():911] run started, returning control to user process
26
- 2026-04-05 20:15:04,117 INFO MainThread:349699 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/tmp2/chyang/workspace/LLM-BC/data/outputs/2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1'}
27
- 2026-04-06 14:21:13,810 WARNING MsgRouterThr:349699 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/run-20260405_201502-gsxv5o2x/run-gsxv5o2x.wandb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:27db059839fc26b03a18c0800b0320f5557ef864b844cf691c21c91647888d78
3
- size 50732328
 
 
 
 
2026.04.05/20.14.49_train_bdpo_unet_lowdim_adroit-hand-hammer-v1/wandb/wandb-resume.json DELETED
@@ -1 +0,0 @@
1
- {"run_id": "gsxv5o2x"}