urdshals commited on
Commit
bed89f5
·
verified ·
1 Parent(s): 885a048

Upload folder using huggingface_hub

Browse files
test_seed_702457/action_logprobs.pth.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695903588dbed91bd74ec8326ad5c532be76e55cf181ecae1de0185dcd1e70b6
3
+ size 296288
test_seed_702457/action_probs.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d557eec872f508d788339b16284ecd663b6ff563436b40ca0c710a06b80a8c2
3
+ size 125429
test_seed_702457/checkpoints.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbd23d49431441731fb491f53280ef47bfc1abd313569f332cf9f57258003299
3
+ size 11680131
test_seed_702457/config.cfg ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rl_action=train
2
+ model_type=impala
3
+ lr=5e-05
4
+ discount_rate=0.99
5
+ num_rollout_steps=64
6
+ grad_acc_per_chunk=4
7
+ num_rollout_chunks=1
8
+ cheese_loc=any
9
+ env_layout=open
10
+ alpha=0.0
11
+ env_size=13
12
+ num_levels=9600
13
+ compile=True
14
+ use_prev_action=False
15
+ weight_restrictions=None
16
+ weight_restrictions_invert=False
17
+ use_bf16=False
18
+ use_wandb=True
19
+ no_tqdm=False
20
+ seed=702457
21
+ mask_type=first_episode
22
+ ckpt_dir=jaxgmg_fleet_test
23
+ vis_average_state=False
24
+ trim_episodes=False
25
+ num_total_env_steps=9830400
26
+ eval_every=1
27
+ eff_horizon=None
28
+ optim=adam
29
+ env_rule=None
30
+ env_rule_mixture=None
31
+ hf_user=urdshals
32
+ hf_collection=davidquarel/jaxgmg
33
+ use_hf=True
34
+ num_hf_uploads=1
35
+ use_log=True
36
+ log_optimizer_state=False
37
+ resume=None
38
+ resume_id=None
39
+ resume_optim=False
40
+ checkpoint=test_seed_702457
41
+ wandb_project=jaxgmg_fleet_test
42
+ eval_schedule=0:1
43
+ render_sixel=False
44
+ sixel_idx=60
45
+ live_monitor=False
46
+ run_id=0
47
+ seed_formula=None
48
+ deterministic=True
49
+ penalize_time=False
50
+ f_str_ckpt=test_seed_702457
51
+ duplication_factor=-1
52
+ smoke=False
53
+ ntfy=david_jaxgmg
54
+ num_chains=6
55
+ num_draws=3000
56
+ num_steps_bw_draws=1
57
+ on_policy=True
58
+ llc_nbeta=3000
59
+ localization=10
60
+ exact_solver_each_draw=False
61
+ llc_optimizer=sgld
62
+ iw_clip_eps=None
63
+ rmsprop_burnin_steps=20
64
+ llc_data_file=llc_scan_open_reinforce.pkl
65
+ llc_checkpoint_index=None
66
+ llc_checkpoint_number=None
67
+ sink=None
68
+ repo_id=davidquarel/jaxgmg_ckpt_zip
69
+ use_shuffled_checkpoints=False
70
+ force_re_download=False
71
+ off_distribution_data=False
72
+ evaluate_every_position=False
73
+ num_prev_actions=1
74
+ eff_acc_steps=4
75
+ chunk_size=9600
76
+ env_steps_per_microbatch=153600
77
+ ckpt_path=jaxgmg_fleet_test/test_seed_702457
78
+ env_steps_per_loop=614400
79
+ total_loops=16
test_seed_702457/config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha": 0.0,
3
+ "checkpoint": "test_seed_702457",
4
+ "cheese_loc": "any",
5
+ "chunk_size": 9600,
6
+ "ckpt_dir": "jaxgmg_fleet_test",
7
+ "ckpt_path": "jaxgmg_fleet_test/test_seed_702457",
8
+ "compile": true,
9
+ "deterministic": true,
10
+ "discount_rate": 0.99,
11
+ "duplication_factor": -1,
12
+ "eff_acc_steps": 4,
13
+ "eff_horizon": null,
14
+ "env_layout": "open",
15
+ "env_rule": null,
16
+ "env_rule_mixture": null,
17
+ "env_size": 13,
18
+ "env_steps_per_loop": 614400,
19
+ "env_steps_per_microbatch": 153600,
20
+ "eval_every": 1,
21
+ "eval_schedule": "0:1",
22
+ "evaluate_every_position": false,
23
+ "exact_solver_each_draw": false,
24
+ "f_str_ckpt": "test_seed_702457",
25
+ "force_re_download": false,
26
+ "grad_acc_per_chunk": 4,
27
+ "hf_collection": "davidquarel/jaxgmg",
28
+ "hf_user": "urdshals",
29
+ "iw_clip_eps": null,
30
+ "live_monitor": false,
31
+ "llc_checkpoint_index": null,
32
+ "llc_checkpoint_number": null,
33
+ "llc_data_file": "llc_scan_open_reinforce.pkl",
34
+ "llc_nbeta": 3000,
35
+ "llc_optimizer": "sgld",
36
+ "localization": 10,
37
+ "log_optimizer_state": false,
38
+ "lr": 5e-05,
39
+ "mask_type": "first_episode",
40
+ "model_type": "impala",
41
+ "no_tqdm": false,
42
+ "ntfy": "david_jaxgmg",
43
+ "num_chains": 6,
44
+ "num_draws": 3000,
45
+ "num_hf_uploads": 1,
46
+ "num_levels": 9600,
47
+ "num_prev_actions": 1,
48
+ "num_rollout_chunks": 1,
49
+ "num_rollout_steps": 64,
50
+ "num_steps_bw_draws": 1,
51
+ "num_total_env_steps": 9830400,
52
+ "off_distribution_data": false,
53
+ "on_policy": true,
54
+ "optim": "adam",
55
+ "penalize_time": false,
56
+ "render_sixel": false,
57
+ "repo_id": "davidquarel/jaxgmg_ckpt_zip",
58
+ "resume": null,
59
+ "resume_id": null,
60
+ "resume_optim": false,
61
+ "rl_action": "train",
62
+ "rmsprop_burnin_steps": 20,
63
+ "run_id": 0,
64
+ "seed": 702457,
65
+ "seed_formula": null,
66
+ "sink": null,
67
+ "sixel_idx": 60,
68
+ "smoke": false,
69
+ "total_loops": 16,
70
+ "trim_episodes": false,
71
+ "use_bf16": false,
72
+ "use_hf": true,
73
+ "use_log": true,
74
+ "use_prev_action": false,
75
+ "use_shuffled_checkpoints": false,
76
+ "use_wandb": true,
77
+ "vis_average_state": false,
78
+ "wandb_project": "jaxgmg_fleet_test",
79
+ "weight_restrictions": null,
80
+ "weight_restrictions_invert": false
81
+ }
test_seed_702457/eval.jsonl ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"ts": "2026-03-19T17:21:29.514808Z", "loop": 0, "env_steps": 0, "exact_value_avg": 0.1466498076915741, "regret_dist": 0.766993522644043, "regret_other_env": 0.6786674857139587, "regret_corner": 0.766993522644043, "regret_row": 0.6969188451766968, "regret_any": 0.6786674857139587, "regret_bot": 0.7575015425682068, "value_dist": 0.1466498076915741, "value_other_env": 0.26028770208358765, "value_corner": 0.1466498076915741, "value_row": 0.22929459810256958, "value_any": 0.26028770208358765, "value_bot": 0.16871191561222076, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_00.png", "entropy": 1.3857083320617676}
2
+ {"ts": "2026-03-19T17:21:30.515727Z", "loop": 1, "env_steps": 614400, "exact_value_avg": 0.15038195252418518, "regret_dist": 0.7632613778114319, "regret_other_env": 0.6786831021308899, "regret_corner": 0.7632613778114319, "regret_row": 0.69448322057724, "regret_any": 0.6786831021308899, "regret_bot": 0.7595480680465698, "value_dist": 0.15038195252418518, "value_other_env": 0.2602720856666565, "value_corner": 0.15038195252418518, "value_row": 0.23173025250434875, "value_any": 0.2602720856666565, "value_bot": 0.1666654348373413, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_01.png", "entropy": 1.3857067823410034}
3
+ {"ts": "2026-03-19T17:21:31.421204Z", "loop": 2, "env_steps": 1228800, "exact_value_avg": 0.1542574018239975, "regret_dist": 0.759385883808136, "regret_other_env": 0.6787131428718567, "regret_corner": 0.759385883808136, "regret_row": 0.6920261979103088, "regret_any": 0.6787131428718567, "regret_bot": 0.7616009712219238, "value_dist": 0.1542574018239975, "value_other_env": 0.2602420747280121, "value_corner": 0.1542574018239975, "value_row": 0.2341872602701187, "value_any": 0.2602420747280121, "value_bot": 0.1646125316619873, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_02.png", "entropy": 1.3856995105743408}
4
+ {"ts": "2026-03-19T17:21:32.326929Z", "loop": 3, "env_steps": 1843200, "exact_value_avg": 0.15816651284694672, "regret_dist": 0.7554767727851868, "regret_other_env": 0.6787505149841309, "regret_corner": 0.7554767727851868, "regret_row": 0.6895408630371094, "regret_any": 0.6787505149841309, "regret_bot": 0.7636478543281555, "value_dist": 0.15816651284694672, "value_other_env": 0.26020464301109314, "value_corner": 0.15816651284694672, "value_row": 0.23667263984680176, "value_any": 0.26020464301109314, "value_bot": 0.16256560385227203, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_03.png", "entropy": 1.3856860399246216}
5
+ {"ts": "2026-03-19T17:21:33.237299Z", "loop": 4, "env_steps": 2457600, "exact_value_avg": 0.162040114402771, "regret_dist": 0.7516032457351685, "regret_other_env": 0.6788009405136108, "regret_corner": 0.7516032457351685, "regret_row": 0.6871009469032288, "regret_any": 0.6788009405136108, "regret_bot": 0.765644371509552, "value_dist": 0.162040114402771, "value_other_env": 0.26015421748161316, "value_corner": 0.162040114402771, "value_row": 0.23911258578300476, "value_any": 0.26015421748161316, "value_bot": 0.16056908667087555, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_04.png", "entropy": 1.3856663703918457}
6
+ {"ts": "2026-03-19T17:21:34.153507Z", "loop": 5, "env_steps": 3072000, "exact_value_avg": 0.16590137779712677, "regret_dist": 0.7477418780326843, "regret_other_env": 0.6788832545280457, "regret_corner": 0.7477418780326843, "regret_row": 0.6847108602523804, "regret_any": 0.6788832545280457, "regret_bot": 0.7676279544830322, "value_dist": 0.16590137779712677, "value_other_env": 0.26007187366485596, "value_corner": 0.16590137779712677, "value_row": 0.241502583026886, "value_any": 0.26007187366485596, "value_bot": 0.1585855484008789, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_05.png", "entropy": 1.3856405019760132}
7
+ {"ts": "2026-03-19T17:21:35.106227Z", "loop": 6, "env_steps": 3686400, "exact_value_avg": 0.16984908282756805, "regret_dist": 0.7437942028045654, "regret_other_env": 0.6789911389350891, "regret_corner": 0.7437942028045654, "regret_row": 0.6823408007621765, "regret_any": 0.6789911389350891, "regret_bot": 0.7696192860603333, "value_dist": 0.16984908282756805, "value_other_env": 0.2599640190601349, "value_corner": 0.16984908282756805, "value_row": 0.24387268722057343, "value_any": 0.2599640190601349, "value_bot": 0.15659412741661072, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_06.png", "entropy": 1.3856083154678345}
8
+ {"ts": "2026-03-19T17:21:36.032458Z", "loop": 7, "env_steps": 4300800, "exact_value_avg": 0.17389361560344696, "regret_dist": 0.7397496700286865, "regret_other_env": 0.6791173219680786, "regret_corner": 0.7397496700286865, "regret_row": 0.6799774765968323, "regret_any": 0.6791173219680786, "regret_bot": 0.7716079950332642, "value_dist": 0.17389361560344696, "value_other_env": 0.259837806224823, "value_corner": 0.17389361560344696, "value_row": 0.2462359219789505, "value_any": 0.259837806224823, "value_bot": 0.15460550785064697, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_07.png", "entropy": 1.3855693340301514}
9
+ {"ts": "2026-03-19T17:21:36.944709Z", "loop": 8, "env_steps": 4915200, "exact_value_avg": 0.17807503044605255, "regret_dist": 0.7355682849884033, "regret_other_env": 0.6792623400688171, "regret_corner": 0.7355682849884033, "regret_row": 0.677588164806366, "regret_any": 0.6792623400688171, "regret_bot": 0.7736051678657532, "value_dist": 0.17807503044605255, "value_other_env": 0.2596927881240845, "value_corner": 0.17807503044605255, "value_row": 0.24862535297870636, "value_any": 0.2596927881240845, "value_bot": 0.152608260512352, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_08.png", "entropy": 1.3855232000350952}
10
+ {"ts": "2026-03-19T17:21:37.865418Z", "loop": 9, "env_steps": 5529600, "exact_value_avg": 0.18242613971233368, "regret_dist": 0.7312171459197998, "regret_other_env": 0.679425060749054, "regret_corner": 0.7312171459197998, "regret_row": 0.6751810312271118, "regret_any": 0.679425060749054, "regret_bot": 0.7756032347679138, "value_dist": 0.18242613971233368, "value_other_env": 0.2595300078392029, "value_corner": 0.18242613971233368, "value_row": 0.25103244185447693, "value_any": 0.2595300078392029, "value_bot": 0.15061026811599731, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_09.png", "entropy": 1.3854701519012451}
11
+ {"ts": "2026-03-19T17:21:38.768846Z", "loop": 10, "env_steps": 6144000, "exact_value_avg": 0.18694233894348145, "regret_dist": 0.726701021194458, "regret_other_env": 0.6796091198921204, "regret_corner": 0.726701021194458, "regret_row": 0.6727761626243591, "regret_any": 0.6796091198921204, "regret_bot": 0.7776013612747192, "value_dist": 0.18694233894348145, "value_other_env": 0.2593460977077484, "value_corner": 0.18694233894348145, "value_row": 0.2534373700618744, "value_any": 0.2593460977077484, "value_bot": 0.14861206710338593, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_10.png", "entropy": 1.3854094743728638}
12
+ {"ts": "2026-03-19T17:21:39.712262Z", "loop": 11, "env_steps": 6758400, "exact_value_avg": 0.19159384071826935, "regret_dist": 0.7220494747161865, "regret_other_env": 0.6798242330551147, "regret_corner": 0.7220494747161865, "regret_row": 0.670360803604126, "regret_any": 0.6798242330551147, "regret_bot": 0.7796393036842346, "value_dist": 0.19159384071826935, "value_other_env": 0.25913089513778687, "value_corner": 0.19159384071826935, "value_row": 0.25585266947746277, "value_any": 0.25913089513778687, "value_bot": 0.1465742141008377, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_11.png", "entropy": 1.3853397369384766}
13
+ {"ts": "2026-03-19T17:21:40.626622Z", "loop": 12, "env_steps": 7372800, "exact_value_avg": 0.19639050960540771, "regret_dist": 0.717252790927887, "regret_other_env": 0.6800767183303833, "regret_corner": 0.717252790927887, "regret_row": 0.6679074764251709, "regret_any": 0.6800767183303833, "regret_bot": 0.781746506690979, "value_dist": 0.19639050960540771, "value_other_env": 0.2588783800601959, "value_corner": 0.19639050960540771, "value_row": 0.25830599665641785, "value_any": 0.2588783800601959, "value_bot": 0.14446696639060974, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_12.png", "entropy": 1.3852595090866089}
14
+ {"ts": "2026-03-19T17:21:41.558303Z", "loop": 13, "env_steps": 7987200, "exact_value_avg": 0.20135627686977386, "regret_dist": 0.7122870683670044, "regret_other_env": 0.6803653836250305, "regret_corner": 0.7122870683670044, "regret_row": 0.6654231548309326, "regret_any": 0.6803653836250305, "regret_bot": 0.7839027047157288, "value_dist": 0.20135627686977386, "value_other_env": 0.2585897445678711, "value_corner": 0.20135627686977386, "value_row": 0.26079028844833374, "value_any": 0.2585897445678711, "value_bot": 0.14231079816818237, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_13.png", "entropy": 1.3851679563522339}
15
+ {"ts": "2026-03-19T17:21:42.483732Z", "loop": 14, "env_steps": 8601600, "exact_value_avg": 0.20654135942459106, "regret_dist": 0.7071019411087036, "regret_other_env": 0.6806955337524414, "regret_corner": 0.7071019411087036, "regret_row": 0.6628521680831909, "regret_any": 0.6806955337524414, "regret_bot": 0.78614741563797, "value_dist": 0.20654135942459106, "value_other_env": 0.2582596242427826, "value_corner": 0.20654135942459106, "value_row": 0.2633613348007202, "value_any": 0.2582596242427826, "value_bot": 0.14006610214710236, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_14.png", "entropy": 1.3850629329681396}
16
+ {"ts": "2026-03-19T17:21:43.413873Z", "loop": 15, "env_steps": 9216000, "exact_value_avg": 0.2119634449481964, "regret_dist": 0.7016798853874207, "regret_other_env": 0.6810704469680786, "regret_corner": 0.7016798853874207, "regret_row": 0.6601826548576355, "regret_any": 0.6810704469680786, "regret_bot": 0.7884900569915771, "value_dist": 0.2119634449481964, "value_other_env": 0.2578847110271454, "value_corner": 0.2119634449481964, "value_row": 0.26603084802627563, "value_any": 0.2578847110271454, "value_bot": 0.1377233862876892, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_15.png", "entropy": 1.3849424123764038}
test_seed_702457/eval.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-19T17:21:29.514808Z] Loop 0: Reg: 0.7670, AltReg: 0.6787, CorReg: 0.7670, TopReg: 0.6969, AllReg: 0.6787, BotRReg: 0.7575, OV: 0.1466, AltV: 0.2603, CorV: 0.1466, TopV: 0.2293, BotRV: 0.1687, AllV: 0.2603
2
+ [2026-03-19T17:21:30.515727Z] Loop 1: Reg: 0.7633, AltReg: 0.6787, CorReg: 0.7633, TopReg: 0.6945, AllReg: 0.6787, BotRReg: 0.7595, OV: 0.1504, AltV: 0.2603, CorV: 0.1504, TopV: 0.2317, BotRV: 0.1667, AllV: 0.2603
3
+ [2026-03-19T17:21:31.421204Z] Loop 2: Reg: 0.7594, AltReg: 0.6787, CorReg: 0.7594, TopReg: 0.6920, AllReg: 0.6787, BotRReg: 0.7616, OV: 0.1543, AltV: 0.2602, CorV: 0.1543, TopV: 0.2342, BotRV: 0.1646, AllV: 0.2602
4
+ [2026-03-19T17:21:32.326929Z] Loop 3: Reg: 0.7555, AltReg: 0.6788, CorReg: 0.7555, TopReg: 0.6895, AllReg: 0.6788, BotRReg: 0.7636, OV: 0.1582, AltV: 0.2602, CorV: 0.1582, TopV: 0.2367, BotRV: 0.1626, AllV: 0.2602
5
+ [2026-03-19T17:21:33.237299Z] Loop 4: Reg: 0.7516, AltReg: 0.6788, CorReg: 0.7516, TopReg: 0.6871, AllReg: 0.6788, BotRReg: 0.7656, OV: 0.1620, AltV: 0.2602, CorV: 0.1620, TopV: 0.2391, BotRV: 0.1606, AllV: 0.2602
6
+ [2026-03-19T17:21:34.153507Z] Loop 5: Reg: 0.7477, AltReg: 0.6789, CorReg: 0.7477, TopReg: 0.6847, AllReg: 0.6789, BotRReg: 0.7676, OV: 0.1659, AltV: 0.2601, CorV: 0.1659, TopV: 0.2415, BotRV: 0.1586, AllV: 0.2601
7
+ [2026-03-19T17:21:35.106227Z] Loop 6: Reg: 0.7438, AltReg: 0.6790, CorReg: 0.7438, TopReg: 0.6823, AllReg: 0.6790, BotRReg: 0.7696, OV: 0.1698, AltV: 0.2600, CorV: 0.1698, TopV: 0.2439, BotRV: 0.1566, AllV: 0.2600
8
+ [2026-03-19T17:21:36.032458Z] Loop 7: Reg: 0.7397, AltReg: 0.6791, CorReg: 0.7397, TopReg: 0.6800, AllReg: 0.6791, BotRReg: 0.7716, OV: 0.1739, AltV: 0.2598, CorV: 0.1739, TopV: 0.2462, BotRV: 0.1546, AllV: 0.2598
9
+ [2026-03-19T17:21:36.944709Z] Loop 8: Reg: 0.7356, AltReg: 0.6793, CorReg: 0.7356, TopReg: 0.6776, AllReg: 0.6793, BotRReg: 0.7736, OV: 0.1781, AltV: 0.2597, CorV: 0.1781, TopV: 0.2486, BotRV: 0.1526, AllV: 0.2597
10
+ [2026-03-19T17:21:37.865418Z] Loop 9: Reg: 0.7312, AltReg: 0.6794, CorReg: 0.7312, TopReg: 0.6752, AllReg: 0.6794, BotRReg: 0.7756, OV: 0.1824, AltV: 0.2595, CorV: 0.1824, TopV: 0.2510, BotRV: 0.1506, AllV: 0.2595
11
+ [2026-03-19T17:21:38.768846Z] Loop 10: Reg: 0.7267, AltReg: 0.6796, CorReg: 0.7267, TopReg: 0.6728, AllReg: 0.6796, BotRReg: 0.7776, OV: 0.1869, AltV: 0.2593, CorV: 0.1869, TopV: 0.2534, BotRV: 0.1486, AllV: 0.2593
12
+ [2026-03-19T17:21:39.712262Z] Loop 11: Reg: 0.7220, AltReg: 0.6798, CorReg: 0.7220, TopReg: 0.6704, AllReg: 0.6798, BotRReg: 0.7796, OV: 0.1916, AltV: 0.2591, CorV: 0.1916, TopV: 0.2559, BotRV: 0.1466, AllV: 0.2591
13
+ [2026-03-19T17:21:40.626622Z] Loop 12: Reg: 0.7173, AltReg: 0.6801, CorReg: 0.7173, TopReg: 0.6679, AllReg: 0.6801, BotRReg: 0.7817, OV: 0.1964, AltV: 0.2589, CorV: 0.1964, TopV: 0.2583, BotRV: 0.1445, AllV: 0.2589
14
+ [2026-03-19T17:21:41.558303Z] Loop 13: Reg: 0.7123, AltReg: 0.6804, CorReg: 0.7123, TopReg: 0.6654, AllReg: 0.6804, BotRReg: 0.7839, OV: 0.2014, AltV: 0.2586, CorV: 0.2014, TopV: 0.2608, BotRV: 0.1423, AllV: 0.2586
15
+ [2026-03-19T17:21:42.483732Z] Loop 14: Reg: 0.7071, AltReg: 0.6807, CorReg: 0.7071, TopReg: 0.6629, AllReg: 0.6807, BotRReg: 0.7861, OV: 0.2065, AltV: 0.2583, CorV: 0.2065, TopV: 0.2634, BotRV: 0.1401, AllV: 0.2583
16
+ [2026-03-19T17:21:43.413873Z] Loop 15: Reg: 0.7017, AltReg: 0.6811, CorReg: 0.7017, TopReg: 0.6602, AllReg: 0.6811, BotRReg: 0.7885, OV: 0.2120, AltV: 0.2579, CorV: 0.2120, TopV: 0.2660, BotRV: 0.1377, AllV: 0.2579
test_seed_702457/latest_train.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ts": "2026-03-19T17:21:43.505798Z",
3
+ "loop": 15,
4
+ "env_steps": 9830399,
5
+ "loss": 0.021627891808748245,
6
+ "avg_return": 0.11401231586933136,
7
+ "regret": 0.7016798853874207,
8
+ "entropy": 1.3849334716796875,
9
+ "lr": 5e-05,
10
+ "discount_rate": 0.99,
11
+ "num_rollout_steps": 64
12
+ }
test_seed_702457/setup.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "rl_action": "train",
4
+ "model_type": "impala",
5
+ "lr": 5e-05,
6
+ "discount_rate": 0.99,
7
+ "num_rollout_steps": 64,
8
+ "grad_acc_per_chunk": 4,
9
+ "num_rollout_chunks": 1,
10
+ "cheese_loc": "any",
11
+ "env_layout": "open",
12
+ "alpha": 0.0,
13
+ "env_size": 13,
14
+ "num_levels": 9600,
15
+ "compile": true,
16
+ "use_prev_action": false,
17
+ "weight_restrictions": null,
18
+ "weight_restrictions_invert": false,
19
+ "use_bf16": false,
20
+ "use_wandb": true,
21
+ "no_tqdm": false,
22
+ "seed": 702457,
23
+ "mask_type": "first_episode",
24
+ "ckpt_dir": "jaxgmg_fleet_test",
25
+ "vis_average_state": false,
26
+ "trim_episodes": false,
27
+ "num_total_env_steps": 9830400,
28
+ "eval_every": 1,
29
+ "eff_horizon": null,
30
+ "optim": "adam",
31
+ "env_rule": null,
32
+ "env_rule_mixture": null,
33
+ "hf_user": "urdshals",
34
+ "hf_collection": "davidquarel/jaxgmg",
35
+ "use_hf": true,
36
+ "num_hf_uploads": 1,
37
+ "use_log": true,
38
+ "log_optimizer_state": false,
39
+ "resume": null,
40
+ "resume_id": null,
41
+ "resume_optim": false,
42
+ "checkpoint": "test_seed_702457",
43
+ "wandb_project": "jaxgmg_fleet_test",
44
+ "eval_schedule": "0:1",
45
+ "render_sixel": false,
46
+ "sixel_idx": 60,
47
+ "live_monitor": false,
48
+ "run_id": 0,
49
+ "seed_formula": null,
50
+ "deterministic": true,
51
+ "penalize_time": false,
52
+ "f_str_ckpt": "test_seed_702457",
53
+ "duplication_factor": -1,
54
+ "smoke": false,
55
+ "ntfy": "david_jaxgmg",
56
+ "num_chains": 6,
57
+ "num_draws": 3000,
58
+ "num_steps_bw_draws": 1,
59
+ "on_policy": true,
60
+ "llc_nbeta": 3000,
61
+ "localization": 10,
62
+ "exact_solver_each_draw": false,
63
+ "llc_optimizer": "sgld",
64
+ "iw_clip_eps": null,
65
+ "rmsprop_burnin_steps": 20,
66
+ "llc_data_file": "llc_scan_open_reinforce.pkl",
67
+ "llc_checkpoint_index": null,
68
+ "llc_checkpoint_number": null,
69
+ "sink": null,
70
+ "repo_id": "davidquarel/jaxgmg_ckpt_zip",
71
+ "use_shuffled_checkpoints": false,
72
+ "force_re_download": false,
73
+ "off_distribution_data": false,
74
+ "evaluate_every_position": false,
75
+ "num_prev_actions": 1,
76
+ "eff_acc_steps": 4,
77
+ "chunk_size": 9600,
78
+ "env_steps_per_microbatch": 153600,
79
+ "ckpt_path": "jaxgmg_fleet_test/test_seed_702457",
80
+ "env_steps_per_loop": 614400,
81
+ "total_loops": 16
82
+ },
83
+ "setup": {
84
+ "start_time_utc": "2026-03-19T17:20:51.761066Z",
85
+ "seed": 702457,
86
+ "device": "cuda",
87
+ "python_version": "3.11.11",
88
+ "torch_version": "2.10.0+cu128",
89
+ "jax_version": "0.6.2",
90
+ "hostname": "28c60cd91d95",
91
+ "platform": "Linux-6.14.0-35-generic-x86_64-with-glibc2.35",
92
+ "git_sha": null,
93
+ "git_branch": null,
94
+ "use_wandb": true
95
+ }
96
+ }
test_seed_702457/setup.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Start: 2026-03-19T17:20:51.761066Z
2
+ Seed: 702457
3
+ Device: cuda
4
+ Python: 3.11.11 | Torch: 2.10.0+cu128 | JAX: 0.6.2
5
+ Host: 28c60cd91d95
6
+ Platform: Linux-6.14.0-35-generic-x86_64-with-glibc2.35
7
+ Git: branch=None sha=None
8
+ use_wandb: True
test_seed_702457/state_counts.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75970e4cc6062e2ae422277f46c08859ab9d8db2ff32fe5544189665fa74a0ba
3
+ size 116288
test_seed_702457/train.jsonl ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"ts": "2026-03-19T17:21:29.652829Z", "loop": 0, "env_steps": 614399, "loss": 0.015351004898548126, "avg_return": 0.08953062444925308, "regret": 0.766993522644043, "entropy": 1.3857018947601318, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
2
+ {"ts": "2026-03-19T17:21:30.564293Z", "loop": 1, "env_steps": 1228799, "loss": 0.014568308368325233, "avg_return": 0.08772362023591995, "regret": 0.7632613778114319, "entropy": 1.3856998682022095, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
3
+ {"ts": "2026-03-19T17:21:31.466618Z", "loop": 2, "env_steps": 1843199, "loss": 0.01513539906591177, "avg_return": 0.08583012968301773, "regret": 0.759385883808136, "entropy": 1.3856923580169678, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
4
+ {"ts": "2026-03-19T17:21:32.376689Z", "loop": 3, "env_steps": 2457599, "loss": 0.02005593106150627, "avg_return": 0.09247411042451859, "regret": 0.7554767727851868, "entropy": 1.3856785297393799, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
5
+ {"ts": "2026-03-19T17:21:33.292418Z", "loop": 4, "env_steps": 3071999, "loss": 0.016897454857826233, "avg_return": 0.09452583640813828, "regret": 0.7516032457351685, "entropy": 1.3856585025787354, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
6
+ {"ts": "2026-03-19T17:21:34.242449Z", "loop": 5, "env_steps": 3686399, "loss": 0.016774149611592293, "avg_return": 0.0945819616317749, "regret": 0.7477418780326843, "entropy": 1.3856326341629028, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
7
+ {"ts": "2026-03-19T17:21:35.172895Z", "loop": 6, "env_steps": 4300799, "loss": 0.018350157886743546, "avg_return": 0.09880296885967255, "regret": 0.7437942028045654, "entropy": 1.385600209236145, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
8
+ {"ts": "2026-03-19T17:21:36.085630Z", "loop": 7, "env_steps": 4915199, "loss": 0.018662534654140472, "avg_return": 0.09848998486995697, "regret": 0.7397496700286865, "entropy": 1.3855609893798828, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
9
+ {"ts": "2026-03-19T17:21:37.007675Z", "loop": 8, "env_steps": 5529599, "loss": 0.017929529771208763, "avg_return": 0.10079097002744675, "regret": 0.7355682849884033, "entropy": 1.3855141401290894, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
10
+ {"ts": "2026-03-19T17:21:37.910989Z", "loop": 9, "env_steps": 6143999, "loss": 0.018253426998853683, "avg_return": 0.09943941235542297, "regret": 0.7312171459197998, "entropy": 1.3854602575302124, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
11
+ {"ts": "2026-03-19T17:21:38.853387Z", "loop": 10, "env_steps": 6758399, "loss": 0.021026184782385826, "avg_return": 0.10836489498615265, "regret": 0.726701021194458, "entropy": 1.3853987455368042, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
12
+ {"ts": "2026-03-19T17:21:39.769091Z", "loop": 11, "env_steps": 7372799, "loss": 0.022691909223794937, "avg_return": 0.10879600793123245, "regret": 0.7220494747161865, "entropy": 1.3853288888931274, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
13
+ {"ts": "2026-03-19T17:21:40.701901Z", "loop": 12, "env_steps": 7987199, "loss": 0.019339917227625847, "avg_return": 0.10545758903026581, "regret": 0.717252790927887, "entropy": 1.3852492570877075, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
14
+ {"ts": "2026-03-19T17:21:41.624686Z", "loop": 13, "env_steps": 8601599, "loss": 0.020299173891544342, "avg_return": 0.11108823120594025, "regret": 0.7122870683670044, "entropy": 1.385157823562622, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
15
+ {"ts": "2026-03-19T17:21:42.557072Z", "loop": 14, "env_steps": 9215999, "loss": 0.01998845487833023, "avg_return": 0.1091562807559967, "regret": 0.7071019411087036, "entropy": 1.3850531578063965, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
16
+ {"ts": "2026-03-19T17:21:43.505798Z", "loop": 15, "env_steps": 9830399, "loss": 0.021627891808748245, "avg_return": 0.11401231586933136, "regret": 0.7016798853874207, "entropy": 1.3849334716796875, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
test_seed_702457/train.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-19T17:21:29.652829Z] Loop 0: Loss: 1.54e-02, Avg Return: 0.090, Regret: 0.7670, Entropy: 1.3857
2
+ [2026-03-19T17:21:30.564293Z] Loop 1: Loss: 1.46e-02, Avg Return: 0.088, Regret: 0.7633, Entropy: 1.3857
3
+ [2026-03-19T17:21:31.466618Z] Loop 2: Loss: 1.51e-02, Avg Return: 0.086, Regret: 0.7594, Entropy: 1.3857
4
+ [2026-03-19T17:21:32.376689Z] Loop 3: Loss: 2.01e-02, Avg Return: 0.092, Regret: 0.7555, Entropy: 1.3857
5
+ [2026-03-19T17:21:33.292418Z] Loop 4: Loss: 1.69e-02, Avg Return: 0.095, Regret: 0.7516, Entropy: 1.3857
6
+ [2026-03-19T17:21:34.242449Z] Loop 5: Loss: 1.68e-02, Avg Return: 0.095, Regret: 0.7477, Entropy: 1.3856
7
+ [2026-03-19T17:21:35.172895Z] Loop 6: Loss: 1.84e-02, Avg Return: 0.099, Regret: 0.7438, Entropy: 1.3856
8
+ [2026-03-19T17:21:36.085630Z] Loop 7: Loss: 1.87e-02, Avg Return: 0.098, Regret: 0.7397, Entropy: 1.3856
9
+ [2026-03-19T17:21:37.007675Z] Loop 8: Loss: 1.79e-02, Avg Return: 0.101, Regret: 0.7356, Entropy: 1.3855
10
+ [2026-03-19T17:21:37.910989Z] Loop 9: Loss: 1.83e-02, Avg Return: 0.099, Regret: 0.7312, Entropy: 1.3855
11
+ [2026-03-19T17:21:38.853387Z] Loop 10: Loss: 2.10e-02, Avg Return: 0.108, Regret: 0.7267, Entropy: 1.3854
12
+ [2026-03-19T17:21:39.769091Z] Loop 11: Loss: 2.27e-02, Avg Return: 0.109, Regret: 0.7220, Entropy: 1.3853
13
+ [2026-03-19T17:21:40.701901Z] Loop 12: Loss: 1.93e-02, Avg Return: 0.105, Regret: 0.7173, Entropy: 1.3852
14
+ [2026-03-19T17:21:41.624686Z] Loop 13: Loss: 2.03e-02, Avg Return: 0.111, Regret: 0.7123, Entropy: 1.3852
15
+ [2026-03-19T17:21:42.557072Z] Loop 14: Loss: 2.00e-02, Avg Return: 0.109, Regret: 0.7071, Entropy: 1.3851
16
+ [2026-03-19T17:21:43.505798Z] Loop 15: Loss: 2.16e-02, Avg Return: 0.114, Regret: 0.7017, Entropy: 1.3849