Upload folder using huggingface_hub
Browse files- test_seed_702457/action_logprobs.pth.gz +3 -0
- test_seed_702457/action_probs.tar.gz +3 -0
- test_seed_702457/checkpoints.tar.gz +3 -0
- test_seed_702457/config.cfg +79 -0
- test_seed_702457/config.json +81 -0
- test_seed_702457/eval.jsonl +16 -0
- test_seed_702457/eval.log +16 -0
- test_seed_702457/latest_train.json +12 -0
- test_seed_702457/setup.json +96 -0
- test_seed_702457/setup.txt +8 -0
- test_seed_702457/state_counts.npy +3 -0
- test_seed_702457/train.jsonl +16 -0
- test_seed_702457/train.log +16 -0
test_seed_702457/action_logprobs.pth.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:695903588dbed91bd74ec8326ad5c532be76e55cf181ecae1de0185dcd1e70b6
|
| 3 |
+
size 296288
|
test_seed_702457/action_probs.tar.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d557eec872f508d788339b16284ecd663b6ff563436b40ca0c710a06b80a8c2
|
| 3 |
+
size 125429
|
test_seed_702457/checkpoints.tar.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbd23d49431441731fb491f53280ef47bfc1abd313569f332cf9f57258003299
|
| 3 |
+
size 11680131
|
test_seed_702457/config.cfg
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
rl_action=train
|
| 2 |
+
model_type=impala
|
| 3 |
+
lr=5e-05
|
| 4 |
+
discount_rate=0.99
|
| 5 |
+
num_rollout_steps=64
|
| 6 |
+
grad_acc_per_chunk=4
|
| 7 |
+
num_rollout_chunks=1
|
| 8 |
+
cheese_loc=any
|
| 9 |
+
env_layout=open
|
| 10 |
+
alpha=0.0
|
| 11 |
+
env_size=13
|
| 12 |
+
num_levels=9600
|
| 13 |
+
compile=True
|
| 14 |
+
use_prev_action=False
|
| 15 |
+
weight_restrictions=None
|
| 16 |
+
weight_restrictions_invert=False
|
| 17 |
+
use_bf16=False
|
| 18 |
+
use_wandb=True
|
| 19 |
+
no_tqdm=False
|
| 20 |
+
seed=702457
|
| 21 |
+
mask_type=first_episode
|
| 22 |
+
ckpt_dir=jaxgmg_fleet_test
|
| 23 |
+
vis_average_state=False
|
| 24 |
+
trim_episodes=False
|
| 25 |
+
num_total_env_steps=9830400
|
| 26 |
+
eval_every=1
|
| 27 |
+
eff_horizon=None
|
| 28 |
+
optim=adam
|
| 29 |
+
env_rule=None
|
| 30 |
+
env_rule_mixture=None
|
| 31 |
+
hf_user=urdshals
|
| 32 |
+
hf_collection=davidquarel/jaxgmg
|
| 33 |
+
use_hf=True
|
| 34 |
+
num_hf_uploads=1
|
| 35 |
+
use_log=True
|
| 36 |
+
log_optimizer_state=False
|
| 37 |
+
resume=None
|
| 38 |
+
resume_id=None
|
| 39 |
+
resume_optim=False
|
| 40 |
+
checkpoint=test_seed_702457
|
| 41 |
+
wandb_project=jaxgmg_fleet_test
|
| 42 |
+
eval_schedule=0:1
|
| 43 |
+
render_sixel=False
|
| 44 |
+
sixel_idx=60
|
| 45 |
+
live_monitor=False
|
| 46 |
+
run_id=0
|
| 47 |
+
seed_formula=None
|
| 48 |
+
deterministic=True
|
| 49 |
+
penalize_time=False
|
| 50 |
+
f_str_ckpt=test_seed_702457
|
| 51 |
+
duplication_factor=-1
|
| 52 |
+
smoke=False
|
| 53 |
+
ntfy=david_jaxgmg
|
| 54 |
+
num_chains=6
|
| 55 |
+
num_draws=3000
|
| 56 |
+
num_steps_bw_draws=1
|
| 57 |
+
on_policy=True
|
| 58 |
+
llc_nbeta=3000
|
| 59 |
+
localization=10
|
| 60 |
+
exact_solver_each_draw=False
|
| 61 |
+
llc_optimizer=sgld
|
| 62 |
+
iw_clip_eps=None
|
| 63 |
+
rmsprop_burnin_steps=20
|
| 64 |
+
llc_data_file=llc_scan_open_reinforce.pkl
|
| 65 |
+
llc_checkpoint_index=None
|
| 66 |
+
llc_checkpoint_number=None
|
| 67 |
+
sink=None
|
| 68 |
+
repo_id=davidquarel/jaxgmg_ckpt_zip
|
| 69 |
+
use_shuffled_checkpoints=False
|
| 70 |
+
force_re_download=False
|
| 71 |
+
off_distribution_data=False
|
| 72 |
+
evaluate_every_position=False
|
| 73 |
+
num_prev_actions=1
|
| 74 |
+
eff_acc_steps=4
|
| 75 |
+
chunk_size=9600
|
| 76 |
+
env_steps_per_microbatch=153600
|
| 77 |
+
ckpt_path=jaxgmg_fleet_test/test_seed_702457
|
| 78 |
+
env_steps_per_loop=614400
|
| 79 |
+
total_loops=16
|
test_seed_702457/config.json
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha": 0.0,
|
| 3 |
+
"checkpoint": "test_seed_702457",
|
| 4 |
+
"cheese_loc": "any",
|
| 5 |
+
"chunk_size": 9600,
|
| 6 |
+
"ckpt_dir": "jaxgmg_fleet_test",
|
| 7 |
+
"ckpt_path": "jaxgmg_fleet_test/test_seed_702457",
|
| 8 |
+
"compile": true,
|
| 9 |
+
"deterministic": true,
|
| 10 |
+
"discount_rate": 0.99,
|
| 11 |
+
"duplication_factor": -1,
|
| 12 |
+
"eff_acc_steps": 4,
|
| 13 |
+
"eff_horizon": null,
|
| 14 |
+
"env_layout": "open",
|
| 15 |
+
"env_rule": null,
|
| 16 |
+
"env_rule_mixture": null,
|
| 17 |
+
"env_size": 13,
|
| 18 |
+
"env_steps_per_loop": 614400,
|
| 19 |
+
"env_steps_per_microbatch": 153600,
|
| 20 |
+
"eval_every": 1,
|
| 21 |
+
"eval_schedule": "0:1",
|
| 22 |
+
"evaluate_every_position": false,
|
| 23 |
+
"exact_solver_each_draw": false,
|
| 24 |
+
"f_str_ckpt": "test_seed_702457",
|
| 25 |
+
"force_re_download": false,
|
| 26 |
+
"grad_acc_per_chunk": 4,
|
| 27 |
+
"hf_collection": "davidquarel/jaxgmg",
|
| 28 |
+
"hf_user": "urdshals",
|
| 29 |
+
"iw_clip_eps": null,
|
| 30 |
+
"live_monitor": false,
|
| 31 |
+
"llc_checkpoint_index": null,
|
| 32 |
+
"llc_checkpoint_number": null,
|
| 33 |
+
"llc_data_file": "llc_scan_open_reinforce.pkl",
|
| 34 |
+
"llc_nbeta": 3000,
|
| 35 |
+
"llc_optimizer": "sgld",
|
| 36 |
+
"localization": 10,
|
| 37 |
+
"log_optimizer_state": false,
|
| 38 |
+
"lr": 5e-05,
|
| 39 |
+
"mask_type": "first_episode",
|
| 40 |
+
"model_type": "impala",
|
| 41 |
+
"no_tqdm": false,
|
| 42 |
+
"ntfy": "david_jaxgmg",
|
| 43 |
+
"num_chains": 6,
|
| 44 |
+
"num_draws": 3000,
|
| 45 |
+
"num_hf_uploads": 1,
|
| 46 |
+
"num_levels": 9600,
|
| 47 |
+
"num_prev_actions": 1,
|
| 48 |
+
"num_rollout_chunks": 1,
|
| 49 |
+
"num_rollout_steps": 64,
|
| 50 |
+
"num_steps_bw_draws": 1,
|
| 51 |
+
"num_total_env_steps": 9830400,
|
| 52 |
+
"off_distribution_data": false,
|
| 53 |
+
"on_policy": true,
|
| 54 |
+
"optim": "adam",
|
| 55 |
+
"penalize_time": false,
|
| 56 |
+
"render_sixel": false,
|
| 57 |
+
"repo_id": "davidquarel/jaxgmg_ckpt_zip",
|
| 58 |
+
"resume": null,
|
| 59 |
+
"resume_id": null,
|
| 60 |
+
"resume_optim": false,
|
| 61 |
+
"rl_action": "train",
|
| 62 |
+
"rmsprop_burnin_steps": 20,
|
| 63 |
+
"run_id": 0,
|
| 64 |
+
"seed": 702457,
|
| 65 |
+
"seed_formula": null,
|
| 66 |
+
"sink": null,
|
| 67 |
+
"sixel_idx": 60,
|
| 68 |
+
"smoke": false,
|
| 69 |
+
"total_loops": 16,
|
| 70 |
+
"trim_episodes": false,
|
| 71 |
+
"use_bf16": false,
|
| 72 |
+
"use_hf": true,
|
| 73 |
+
"use_log": true,
|
| 74 |
+
"use_prev_action": false,
|
| 75 |
+
"use_shuffled_checkpoints": false,
|
| 76 |
+
"use_wandb": true,
|
| 77 |
+
"vis_average_state": false,
|
| 78 |
+
"wandb_project": "jaxgmg_fleet_test",
|
| 79 |
+
"weight_restrictions": null,
|
| 80 |
+
"weight_restrictions_invert": false
|
| 81 |
+
}
|
test_seed_702457/eval.jsonl
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"ts": "2026-03-19T17:21:29.514808Z", "loop": 0, "env_steps": 0, "exact_value_avg": 0.1466498076915741, "regret_dist": 0.766993522644043, "regret_other_env": 0.6786674857139587, "regret_corner": 0.766993522644043, "regret_row": 0.6969188451766968, "regret_any": 0.6786674857139587, "regret_bot": 0.7575015425682068, "value_dist": 0.1466498076915741, "value_other_env": 0.26028770208358765, "value_corner": 0.1466498076915741, "value_row": 0.22929459810256958, "value_any": 0.26028770208358765, "value_bot": 0.16871191561222076, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_00.png", "entropy": 1.3857083320617676}
|
| 2 |
+
{"ts": "2026-03-19T17:21:30.515727Z", "loop": 1, "env_steps": 614400, "exact_value_avg": 0.15038195252418518, "regret_dist": 0.7632613778114319, "regret_other_env": 0.6786831021308899, "regret_corner": 0.7632613778114319, "regret_row": 0.69448322057724, "regret_any": 0.6786831021308899, "regret_bot": 0.7595480680465698, "value_dist": 0.15038195252418518, "value_other_env": 0.2602720856666565, "value_corner": 0.15038195252418518, "value_row": 0.23173025250434875, "value_any": 0.2602720856666565, "value_bot": 0.1666654348373413, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_01.png", "entropy": 1.3857067823410034}
|
| 3 |
+
{"ts": "2026-03-19T17:21:31.421204Z", "loop": 2, "env_steps": 1228800, "exact_value_avg": 0.1542574018239975, "regret_dist": 0.759385883808136, "regret_other_env": 0.6787131428718567, "regret_corner": 0.759385883808136, "regret_row": 0.6920261979103088, "regret_any": 0.6787131428718567, "regret_bot": 0.7616009712219238, "value_dist": 0.1542574018239975, "value_other_env": 0.2602420747280121, "value_corner": 0.1542574018239975, "value_row": 0.2341872602701187, "value_any": 0.2602420747280121, "value_bot": 0.1646125316619873, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_02.png", "entropy": 1.3856995105743408}
|
| 4 |
+
{"ts": "2026-03-19T17:21:32.326929Z", "loop": 3, "env_steps": 1843200, "exact_value_avg": 0.15816651284694672, "regret_dist": 0.7554767727851868, "regret_other_env": 0.6787505149841309, "regret_corner": 0.7554767727851868, "regret_row": 0.6895408630371094, "regret_any": 0.6787505149841309, "regret_bot": 0.7636478543281555, "value_dist": 0.15816651284694672, "value_other_env": 0.26020464301109314, "value_corner": 0.15816651284694672, "value_row": 0.23667263984680176, "value_any": 0.26020464301109314, "value_bot": 0.16256560385227203, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_03.png", "entropy": 1.3856860399246216}
|
| 5 |
+
{"ts": "2026-03-19T17:21:33.237299Z", "loop": 4, "env_steps": 2457600, "exact_value_avg": 0.162040114402771, "regret_dist": 0.7516032457351685, "regret_other_env": 0.6788009405136108, "regret_corner": 0.7516032457351685, "regret_row": 0.6871009469032288, "regret_any": 0.6788009405136108, "regret_bot": 0.765644371509552, "value_dist": 0.162040114402771, "value_other_env": 0.26015421748161316, "value_corner": 0.162040114402771, "value_row": 0.23911258578300476, "value_any": 0.26015421748161316, "value_bot": 0.16056908667087555, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_04.png", "entropy": 1.3856663703918457}
|
| 6 |
+
{"ts": "2026-03-19T17:21:34.153507Z", "loop": 5, "env_steps": 3072000, "exact_value_avg": 0.16590137779712677, "regret_dist": 0.7477418780326843, "regret_other_env": 0.6788832545280457, "regret_corner": 0.7477418780326843, "regret_row": 0.6847108602523804, "regret_any": 0.6788832545280457, "regret_bot": 0.7676279544830322, "value_dist": 0.16590137779712677, "value_other_env": 0.26007187366485596, "value_corner": 0.16590137779712677, "value_row": 0.241502583026886, "value_any": 0.26007187366485596, "value_bot": 0.1585855484008789, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_05.png", "entropy": 1.3856405019760132}
|
| 7 |
+
{"ts": "2026-03-19T17:21:35.106227Z", "loop": 6, "env_steps": 3686400, "exact_value_avg": 0.16984908282756805, "regret_dist": 0.7437942028045654, "regret_other_env": 0.6789911389350891, "regret_corner": 0.7437942028045654, "regret_row": 0.6823408007621765, "regret_any": 0.6789911389350891, "regret_bot": 0.7696192860603333, "value_dist": 0.16984908282756805, "value_other_env": 0.2599640190601349, "value_corner": 0.16984908282756805, "value_row": 0.24387268722057343, "value_any": 0.2599640190601349, "value_bot": 0.15659412741661072, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_06.png", "entropy": 1.3856083154678345}
|
| 8 |
+
{"ts": "2026-03-19T17:21:36.032458Z", "loop": 7, "env_steps": 4300800, "exact_value_avg": 0.17389361560344696, "regret_dist": 0.7397496700286865, "regret_other_env": 0.6791173219680786, "regret_corner": 0.7397496700286865, "regret_row": 0.6799774765968323, "regret_any": 0.6791173219680786, "regret_bot": 0.7716079950332642, "value_dist": 0.17389361560344696, "value_other_env": 0.259837806224823, "value_corner": 0.17389361560344696, "value_row": 0.2462359219789505, "value_any": 0.259837806224823, "value_bot": 0.15460550785064697, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_07.png", "entropy": 1.3855693340301514}
|
| 9 |
+
{"ts": "2026-03-19T17:21:36.944709Z", "loop": 8, "env_steps": 4915200, "exact_value_avg": 0.17807503044605255, "regret_dist": 0.7355682849884033, "regret_other_env": 0.6792623400688171, "regret_corner": 0.7355682849884033, "regret_row": 0.677588164806366, "regret_any": 0.6792623400688171, "regret_bot": 0.7736051678657532, "value_dist": 0.17807503044605255, "value_other_env": 0.2596927881240845, "value_corner": 0.17807503044605255, "value_row": 0.24862535297870636, "value_any": 0.2596927881240845, "value_bot": 0.152608260512352, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_08.png", "entropy": 1.3855232000350952}
|
| 10 |
+
{"ts": "2026-03-19T17:21:37.865418Z", "loop": 9, "env_steps": 5529600, "exact_value_avg": 0.18242613971233368, "regret_dist": 0.7312171459197998, "regret_other_env": 0.679425060749054, "regret_corner": 0.7312171459197998, "regret_row": 0.6751810312271118, "regret_any": 0.679425060749054, "regret_bot": 0.7756032347679138, "value_dist": 0.18242613971233368, "value_other_env": 0.2595300078392029, "value_corner": 0.18242613971233368, "value_row": 0.25103244185447693, "value_any": 0.2595300078392029, "value_bot": 0.15061026811599731, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_09.png", "entropy": 1.3854701519012451}
|
| 11 |
+
{"ts": "2026-03-19T17:21:38.768846Z", "loop": 10, "env_steps": 6144000, "exact_value_avg": 0.18694233894348145, "regret_dist": 0.726701021194458, "regret_other_env": 0.6796091198921204, "regret_corner": 0.726701021194458, "regret_row": 0.6727761626243591, "regret_any": 0.6796091198921204, "regret_bot": 0.7776013612747192, "value_dist": 0.18694233894348145, "value_other_env": 0.2593460977077484, "value_corner": 0.18694233894348145, "value_row": 0.2534373700618744, "value_any": 0.2593460977077484, "value_bot": 0.14861206710338593, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_10.png", "entropy": 1.3854094743728638}
|
| 12 |
+
{"ts": "2026-03-19T17:21:39.712262Z", "loop": 11, "env_steps": 6758400, "exact_value_avg": 0.19159384071826935, "regret_dist": 0.7220494747161865, "regret_other_env": 0.6798242330551147, "regret_corner": 0.7220494747161865, "regret_row": 0.670360803604126, "regret_any": 0.6798242330551147, "regret_bot": 0.7796393036842346, "value_dist": 0.19159384071826935, "value_other_env": 0.25913089513778687, "value_corner": 0.19159384071826935, "value_row": 0.25585266947746277, "value_any": 0.25913089513778687, "value_bot": 0.1465742141008377, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_11.png", "entropy": 1.3853397369384766}
|
| 13 |
+
{"ts": "2026-03-19T17:21:40.626622Z", "loop": 12, "env_steps": 7372800, "exact_value_avg": 0.19639050960540771, "regret_dist": 0.717252790927887, "regret_other_env": 0.6800767183303833, "regret_corner": 0.717252790927887, "regret_row": 0.6679074764251709, "regret_any": 0.6800767183303833, "regret_bot": 0.781746506690979, "value_dist": 0.19639050960540771, "value_other_env": 0.2588783800601959, "value_corner": 0.19639050960540771, "value_row": 0.25830599665641785, "value_any": 0.2588783800601959, "value_bot": 0.14446696639060974, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_12.png", "entropy": 1.3852595090866089}
|
| 14 |
+
{"ts": "2026-03-19T17:21:41.558303Z", "loop": 13, "env_steps": 7987200, "exact_value_avg": 0.20135627686977386, "regret_dist": 0.7122870683670044, "regret_other_env": 0.6803653836250305, "regret_corner": 0.7122870683670044, "regret_row": 0.6654231548309326, "regret_any": 0.6803653836250305, "regret_bot": 0.7839027047157288, "value_dist": 0.20135627686977386, "value_other_env": 0.2585897445678711, "value_corner": 0.20135627686977386, "value_row": 0.26079028844833374, "value_any": 0.2585897445678711, "value_bot": 0.14231079816818237, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_13.png", "entropy": 1.3851679563522339}
|
| 15 |
+
{"ts": "2026-03-19T17:21:42.483732Z", "loop": 14, "env_steps": 8601600, "exact_value_avg": 0.20654135942459106, "regret_dist": 0.7071019411087036, "regret_other_env": 0.6806955337524414, "regret_corner": 0.7071019411087036, "regret_row": 0.6628521680831909, "regret_any": 0.6806955337524414, "regret_bot": 0.78614741563797, "value_dist": 0.20654135942459106, "value_other_env": 0.2582596242427826, "value_corner": 0.20654135942459106, "value_row": 0.2633613348007202, "value_any": 0.2582596242427826, "value_bot": 0.14006610214710236, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_14.png", "entropy": 1.3850629329681396}
|
| 16 |
+
{"ts": "2026-03-19T17:21:43.413873Z", "loop": 15, "env_steps": 9216000, "exact_value_avg": 0.2119634449481964, "regret_dist": 0.7016798853874207, "regret_other_env": 0.6810704469680786, "regret_corner": 0.7016798853874207, "regret_row": 0.6601826548576355, "regret_any": 0.6810704469680786, "regret_bot": 0.7884900569915771, "value_dist": 0.2119634449481964, "value_other_env": 0.2578847110271454, "value_corner": 0.2119634449481964, "value_row": 0.26603084802627563, "value_any": 0.2578847110271454, "value_bot": 0.1377233862876892, "action_probs_image_path": "jaxgmg_fleet_test/test_seed_702457/action_probs/img_15.png", "entropy": 1.3849424123764038}
|
test_seed_702457/eval.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-03-19T17:21:29.514808Z] Loop 0: Reg: 0.7670, AltReg: 0.6787, CorReg: 0.7670, TopReg: 0.6969, AllReg: 0.6787, BotRReg: 0.7575, OV: 0.1466, AltV: 0.2603, CorV: 0.1466, TopV: 0.2293, BotRV: 0.1687, AllV: 0.2603
|
| 2 |
+
[2026-03-19T17:21:30.515727Z] Loop 1: Reg: 0.7633, AltReg: 0.6787, CorReg: 0.7633, TopReg: 0.6945, AllReg: 0.6787, BotRReg: 0.7595, OV: 0.1504, AltV: 0.2603, CorV: 0.1504, TopV: 0.2317, BotRV: 0.1667, AllV: 0.2603
|
| 3 |
+
[2026-03-19T17:21:31.421204Z] Loop 2: Reg: 0.7594, AltReg: 0.6787, CorReg: 0.7594, TopReg: 0.6920, AllReg: 0.6787, BotRReg: 0.7616, OV: 0.1543, AltV: 0.2602, CorV: 0.1543, TopV: 0.2342, BotRV: 0.1646, AllV: 0.2602
|
| 4 |
+
[2026-03-19T17:21:32.326929Z] Loop 3: Reg: 0.7555, AltReg: 0.6788, CorReg: 0.7555, TopReg: 0.6895, AllReg: 0.6788, BotRReg: 0.7636, OV: 0.1582, AltV: 0.2602, CorV: 0.1582, TopV: 0.2367, BotRV: 0.1626, AllV: 0.2602
|
| 5 |
+
[2026-03-19T17:21:33.237299Z] Loop 4: Reg: 0.7516, AltReg: 0.6788, CorReg: 0.7516, TopReg: 0.6871, AllReg: 0.6788, BotRReg: 0.7656, OV: 0.1620, AltV: 0.2602, CorV: 0.1620, TopV: 0.2391, BotRV: 0.1606, AllV: 0.2602
|
| 6 |
+
[2026-03-19T17:21:34.153507Z] Loop 5: Reg: 0.7477, AltReg: 0.6789, CorReg: 0.7477, TopReg: 0.6847, AllReg: 0.6789, BotRReg: 0.7676, OV: 0.1659, AltV: 0.2601, CorV: 0.1659, TopV: 0.2415, BotRV: 0.1586, AllV: 0.2601
|
| 7 |
+
[2026-03-19T17:21:35.106227Z] Loop 6: Reg: 0.7438, AltReg: 0.6790, CorReg: 0.7438, TopReg: 0.6823, AllReg: 0.6790, BotRReg: 0.7696, OV: 0.1698, AltV: 0.2600, CorV: 0.1698, TopV: 0.2439, BotRV: 0.1566, AllV: 0.2600
|
| 8 |
+
[2026-03-19T17:21:36.032458Z] Loop 7: Reg: 0.7397, AltReg: 0.6791, CorReg: 0.7397, TopReg: 0.6800, AllReg: 0.6791, BotRReg: 0.7716, OV: 0.1739, AltV: 0.2598, CorV: 0.1739, TopV: 0.2462, BotRV: 0.1546, AllV: 0.2598
|
| 9 |
+
[2026-03-19T17:21:36.944709Z] Loop 8: Reg: 0.7356, AltReg: 0.6793, CorReg: 0.7356, TopReg: 0.6776, AllReg: 0.6793, BotRReg: 0.7736, OV: 0.1781, AltV: 0.2597, CorV: 0.1781, TopV: 0.2486, BotRV: 0.1526, AllV: 0.2597
|
| 10 |
+
[2026-03-19T17:21:37.865418Z] Loop 9: Reg: 0.7312, AltReg: 0.6794, CorReg: 0.7312, TopReg: 0.6752, AllReg: 0.6794, BotRReg: 0.7756, OV: 0.1824, AltV: 0.2595, CorV: 0.1824, TopV: 0.2510, BotRV: 0.1506, AllV: 0.2595
|
| 11 |
+
[2026-03-19T17:21:38.768846Z] Loop 10: Reg: 0.7267, AltReg: 0.6796, CorReg: 0.7267, TopReg: 0.6728, AllReg: 0.6796, BotRReg: 0.7776, OV: 0.1869, AltV: 0.2593, CorV: 0.1869, TopV: 0.2534, BotRV: 0.1486, AllV: 0.2593
|
| 12 |
+
[2026-03-19T17:21:39.712262Z] Loop 11: Reg: 0.7220, AltReg: 0.6798, CorReg: 0.7220, TopReg: 0.6704, AllReg: 0.6798, BotRReg: 0.7796, OV: 0.1916, AltV: 0.2591, CorV: 0.1916, TopV: 0.2559, BotRV: 0.1466, AllV: 0.2591
|
| 13 |
+
[2026-03-19T17:21:40.626622Z] Loop 12: Reg: 0.7173, AltReg: 0.6801, CorReg: 0.7173, TopReg: 0.6679, AllReg: 0.6801, BotRReg: 0.7817, OV: 0.1964, AltV: 0.2589, CorV: 0.1964, TopV: 0.2583, BotRV: 0.1445, AllV: 0.2589
|
| 14 |
+
[2026-03-19T17:21:41.558303Z] Loop 13: Reg: 0.7123, AltReg: 0.6804, CorReg: 0.7123, TopReg: 0.6654, AllReg: 0.6804, BotRReg: 0.7839, OV: 0.2014, AltV: 0.2586, CorV: 0.2014, TopV: 0.2608, BotRV: 0.1423, AllV: 0.2586
|
| 15 |
+
[2026-03-19T17:21:42.483732Z] Loop 14: Reg: 0.7071, AltReg: 0.6807, CorReg: 0.7071, TopReg: 0.6629, AllReg: 0.6807, BotRReg: 0.7861, OV: 0.2065, AltV: 0.2583, CorV: 0.2065, TopV: 0.2634, BotRV: 0.1401, AllV: 0.2583
|
| 16 |
+
[2026-03-19T17:21:43.413873Z] Loop 15: Reg: 0.7017, AltReg: 0.6811, CorReg: 0.7017, TopReg: 0.6602, AllReg: 0.6811, BotRReg: 0.7885, OV: 0.2120, AltV: 0.2579, CorV: 0.2120, TopV: 0.2660, BotRV: 0.1377, AllV: 0.2579
|
test_seed_702457/latest_train.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ts": "2026-03-19T17:21:43.505798Z",
|
| 3 |
+
"loop": 15,
|
| 4 |
+
"env_steps": 9830399,
|
| 5 |
+
"loss": 0.021627891808748245,
|
| 6 |
+
"avg_return": 0.11401231586933136,
|
| 7 |
+
"regret": 0.7016798853874207,
|
| 8 |
+
"entropy": 1.3849334716796875,
|
| 9 |
+
"lr": 5e-05,
|
| 10 |
+
"discount_rate": 0.99,
|
| 11 |
+
"num_rollout_steps": 64
|
| 12 |
+
}
|
test_seed_702457/setup.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"rl_action": "train",
|
| 4 |
+
"model_type": "impala",
|
| 5 |
+
"lr": 5e-05,
|
| 6 |
+
"discount_rate": 0.99,
|
| 7 |
+
"num_rollout_steps": 64,
|
| 8 |
+
"grad_acc_per_chunk": 4,
|
| 9 |
+
"num_rollout_chunks": 1,
|
| 10 |
+
"cheese_loc": "any",
|
| 11 |
+
"env_layout": "open",
|
| 12 |
+
"alpha": 0.0,
|
| 13 |
+
"env_size": 13,
|
| 14 |
+
"num_levels": 9600,
|
| 15 |
+
"compile": true,
|
| 16 |
+
"use_prev_action": false,
|
| 17 |
+
"weight_restrictions": null,
|
| 18 |
+
"weight_restrictions_invert": false,
|
| 19 |
+
"use_bf16": false,
|
| 20 |
+
"use_wandb": true,
|
| 21 |
+
"no_tqdm": false,
|
| 22 |
+
"seed": 702457,
|
| 23 |
+
"mask_type": "first_episode",
|
| 24 |
+
"ckpt_dir": "jaxgmg_fleet_test",
|
| 25 |
+
"vis_average_state": false,
|
| 26 |
+
"trim_episodes": false,
|
| 27 |
+
"num_total_env_steps": 9830400,
|
| 28 |
+
"eval_every": 1,
|
| 29 |
+
"eff_horizon": null,
|
| 30 |
+
"optim": "adam",
|
| 31 |
+
"env_rule": null,
|
| 32 |
+
"env_rule_mixture": null,
|
| 33 |
+
"hf_user": "urdshals",
|
| 34 |
+
"hf_collection": "davidquarel/jaxgmg",
|
| 35 |
+
"use_hf": true,
|
| 36 |
+
"num_hf_uploads": 1,
|
| 37 |
+
"use_log": true,
|
| 38 |
+
"log_optimizer_state": false,
|
| 39 |
+
"resume": null,
|
| 40 |
+
"resume_id": null,
|
| 41 |
+
"resume_optim": false,
|
| 42 |
+
"checkpoint": "test_seed_702457",
|
| 43 |
+
"wandb_project": "jaxgmg_fleet_test",
|
| 44 |
+
"eval_schedule": "0:1",
|
| 45 |
+
"render_sixel": false,
|
| 46 |
+
"sixel_idx": 60,
|
| 47 |
+
"live_monitor": false,
|
| 48 |
+
"run_id": 0,
|
| 49 |
+
"seed_formula": null,
|
| 50 |
+
"deterministic": true,
|
| 51 |
+
"penalize_time": false,
|
| 52 |
+
"f_str_ckpt": "test_seed_702457",
|
| 53 |
+
"duplication_factor": -1,
|
| 54 |
+
"smoke": false,
|
| 55 |
+
"ntfy": "david_jaxgmg",
|
| 56 |
+
"num_chains": 6,
|
| 57 |
+
"num_draws": 3000,
|
| 58 |
+
"num_steps_bw_draws": 1,
|
| 59 |
+
"on_policy": true,
|
| 60 |
+
"llc_nbeta": 3000,
|
| 61 |
+
"localization": 10,
|
| 62 |
+
"exact_solver_each_draw": false,
|
| 63 |
+
"llc_optimizer": "sgld",
|
| 64 |
+
"iw_clip_eps": null,
|
| 65 |
+
"rmsprop_burnin_steps": 20,
|
| 66 |
+
"llc_data_file": "llc_scan_open_reinforce.pkl",
|
| 67 |
+
"llc_checkpoint_index": null,
|
| 68 |
+
"llc_checkpoint_number": null,
|
| 69 |
+
"sink": null,
|
| 70 |
+
"repo_id": "davidquarel/jaxgmg_ckpt_zip",
|
| 71 |
+
"use_shuffled_checkpoints": false,
|
| 72 |
+
"force_re_download": false,
|
| 73 |
+
"off_distribution_data": false,
|
| 74 |
+
"evaluate_every_position": false,
|
| 75 |
+
"num_prev_actions": 1,
|
| 76 |
+
"eff_acc_steps": 4,
|
| 77 |
+
"chunk_size": 9600,
|
| 78 |
+
"env_steps_per_microbatch": 153600,
|
| 79 |
+
"ckpt_path": "jaxgmg_fleet_test/test_seed_702457",
|
| 80 |
+
"env_steps_per_loop": 614400,
|
| 81 |
+
"total_loops": 16
|
| 82 |
+
},
|
| 83 |
+
"setup": {
|
| 84 |
+
"start_time_utc": "2026-03-19T17:20:51.761066Z",
|
| 85 |
+
"seed": 702457,
|
| 86 |
+
"device": "cuda",
|
| 87 |
+
"python_version": "3.11.11",
|
| 88 |
+
"torch_version": "2.10.0+cu128",
|
| 89 |
+
"jax_version": "0.6.2",
|
| 90 |
+
"hostname": "28c60cd91d95",
|
| 91 |
+
"platform": "Linux-6.14.0-35-generic-x86_64-with-glibc2.35",
|
| 92 |
+
"git_sha": null,
|
| 93 |
+
"git_branch": null,
|
| 94 |
+
"use_wandb": true
|
| 95 |
+
}
|
| 96 |
+
}
|
test_seed_702457/setup.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Start: 2026-03-19T17:20:51.761066Z
|
| 2 |
+
Seed: 702457
|
| 3 |
+
Device: cuda
|
| 4 |
+
Python: 3.11.11 | Torch: 2.10.0+cu128 | JAX: 0.6.2
|
| 5 |
+
Host: 28c60cd91d95
|
| 6 |
+
Platform: Linux-6.14.0-35-generic-x86_64-with-glibc2.35
|
| 7 |
+
Git: branch=None sha=None
|
| 8 |
+
use_wandb: True
|
test_seed_702457/state_counts.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75970e4cc6062e2ae422277f46c08859ab9d8db2ff32fe5544189665fa74a0ba
|
| 3 |
+
size 116288
|
test_seed_702457/train.jsonl
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"ts": "2026-03-19T17:21:29.652829Z", "loop": 0, "env_steps": 614399, "loss": 0.015351004898548126, "avg_return": 0.08953062444925308, "regret": 0.766993522644043, "entropy": 1.3857018947601318, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 2 |
+
{"ts": "2026-03-19T17:21:30.564293Z", "loop": 1, "env_steps": 1228799, "loss": 0.014568308368325233, "avg_return": 0.08772362023591995, "regret": 0.7632613778114319, "entropy": 1.3856998682022095, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 3 |
+
{"ts": "2026-03-19T17:21:31.466618Z", "loop": 2, "env_steps": 1843199, "loss": 0.01513539906591177, "avg_return": 0.08583012968301773, "regret": 0.759385883808136, "entropy": 1.3856923580169678, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 4 |
+
{"ts": "2026-03-19T17:21:32.376689Z", "loop": 3, "env_steps": 2457599, "loss": 0.02005593106150627, "avg_return": 0.09247411042451859, "regret": 0.7554767727851868, "entropy": 1.3856785297393799, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 5 |
+
{"ts": "2026-03-19T17:21:33.292418Z", "loop": 4, "env_steps": 3071999, "loss": 0.016897454857826233, "avg_return": 0.09452583640813828, "regret": 0.7516032457351685, "entropy": 1.3856585025787354, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 6 |
+
{"ts": "2026-03-19T17:21:34.242449Z", "loop": 5, "env_steps": 3686399, "loss": 0.016774149611592293, "avg_return": 0.0945819616317749, "regret": 0.7477418780326843, "entropy": 1.3856326341629028, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 7 |
+
{"ts": "2026-03-19T17:21:35.172895Z", "loop": 6, "env_steps": 4300799, "loss": 0.018350157886743546, "avg_return": 0.09880296885967255, "regret": 0.7437942028045654, "entropy": 1.385600209236145, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 8 |
+
{"ts": "2026-03-19T17:21:36.085630Z", "loop": 7, "env_steps": 4915199, "loss": 0.018662534654140472, "avg_return": 0.09848998486995697, "regret": 0.7397496700286865, "entropy": 1.3855609893798828, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 9 |
+
{"ts": "2026-03-19T17:21:37.007675Z", "loop": 8, "env_steps": 5529599, "loss": 0.017929529771208763, "avg_return": 0.10079097002744675, "regret": 0.7355682849884033, "entropy": 1.3855141401290894, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 10 |
+
{"ts": "2026-03-19T17:21:37.910989Z", "loop": 9, "env_steps": 6143999, "loss": 0.018253426998853683, "avg_return": 0.09943941235542297, "regret": 0.7312171459197998, "entropy": 1.3854602575302124, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 11 |
+
{"ts": "2026-03-19T17:21:38.853387Z", "loop": 10, "env_steps": 6758399, "loss": 0.021026184782385826, "avg_return": 0.10836489498615265, "regret": 0.726701021194458, "entropy": 1.3853987455368042, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 12 |
+
{"ts": "2026-03-19T17:21:39.769091Z", "loop": 11, "env_steps": 7372799, "loss": 0.022691909223794937, "avg_return": 0.10879600793123245, "regret": 0.7220494747161865, "entropy": 1.3853288888931274, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 13 |
+
{"ts": "2026-03-19T17:21:40.701901Z", "loop": 12, "env_steps": 7987199, "loss": 0.019339917227625847, "avg_return": 0.10545758903026581, "regret": 0.717252790927887, "entropy": 1.3852492570877075, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 14 |
+
{"ts": "2026-03-19T17:21:41.624686Z", "loop": 13, "env_steps": 8601599, "loss": 0.020299173891544342, "avg_return": 0.11108823120594025, "regret": 0.7122870683670044, "entropy": 1.385157823562622, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 15 |
+
{"ts": "2026-03-19T17:21:42.557072Z", "loop": 14, "env_steps": 9215999, "loss": 0.01998845487833023, "avg_return": 0.1091562807559967, "regret": 0.7071019411087036, "entropy": 1.3850531578063965, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
| 16 |
+
{"ts": "2026-03-19T17:21:43.505798Z", "loop": 15, "env_steps": 9830399, "loss": 0.021627891808748245, "avg_return": 0.11401231586933136, "regret": 0.7016798853874207, "entropy": 1.3849334716796875, "lr": 5e-05, "discount_rate": 0.99, "num_rollout_steps": 64}
|
test_seed_702457/train.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-03-19T17:21:29.652829Z] Loop 0: Loss: 1.54e-02, Avg Return: 0.090, Regret: 0.7670, Entropy: 1.3857
|
| 2 |
+
[2026-03-19T17:21:30.564293Z] Loop 1: Loss: 1.46e-02, Avg Return: 0.088, Regret: 0.7633, Entropy: 1.3857
|
| 3 |
+
[2026-03-19T17:21:31.466618Z] Loop 2: Loss: 1.51e-02, Avg Return: 0.086, Regret: 0.7594, Entropy: 1.3857
|
| 4 |
+
[2026-03-19T17:21:32.376689Z] Loop 3: Loss: 2.01e-02, Avg Return: 0.092, Regret: 0.7555, Entropy: 1.3857
|
| 5 |
+
[2026-03-19T17:21:33.292418Z] Loop 4: Loss: 1.69e-02, Avg Return: 0.095, Regret: 0.7516, Entropy: 1.3857
|
| 6 |
+
[2026-03-19T17:21:34.242449Z] Loop 5: Loss: 1.68e-02, Avg Return: 0.095, Regret: 0.7477, Entropy: 1.3856
|
| 7 |
+
[2026-03-19T17:21:35.172895Z] Loop 6: Loss: 1.84e-02, Avg Return: 0.099, Regret: 0.7438, Entropy: 1.3856
|
| 8 |
+
[2026-03-19T17:21:36.085630Z] Loop 7: Loss: 1.87e-02, Avg Return: 0.098, Regret: 0.7397, Entropy: 1.3856
|
| 9 |
+
[2026-03-19T17:21:37.007675Z] Loop 8: Loss: 1.79e-02, Avg Return: 0.101, Regret: 0.7356, Entropy: 1.3855
|
| 10 |
+
[2026-03-19T17:21:37.910989Z] Loop 9: Loss: 1.83e-02, Avg Return: 0.099, Regret: 0.7312, Entropy: 1.3855
|
| 11 |
+
[2026-03-19T17:21:38.853387Z] Loop 10: Loss: 2.10e-02, Avg Return: 0.108, Regret: 0.7267, Entropy: 1.3854
|
| 12 |
+
[2026-03-19T17:21:39.769091Z] Loop 11: Loss: 2.27e-02, Avg Return: 0.109, Regret: 0.7220, Entropy: 1.3853
|
| 13 |
+
[2026-03-19T17:21:40.701901Z] Loop 12: Loss: 1.93e-02, Avg Return: 0.105, Regret: 0.7173, Entropy: 1.3852
|
| 14 |
+
[2026-03-19T17:21:41.624686Z] Loop 13: Loss: 2.03e-02, Avg Return: 0.111, Regret: 0.7123, Entropy: 1.3852
|
| 15 |
+
[2026-03-19T17:21:42.557072Z] Loop 14: Loss: 2.00e-02, Avg Return: 0.109, Regret: 0.7071, Entropy: 1.3851
|
| 16 |
+
[2026-03-19T17:21:43.505798Z] Loop 15: Loss: 2.16e-02, Avg Return: 0.114, Regret: 0.7017, Entropy: 1.3849
|