| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.026333113890717578, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 2.3684210526315787e-07, | |
| "loss": 0.1142, | |
| "lras/base_loss": 0.10167421974547323, | |
| "lras/critic_logp": -0.9331221472347808, | |
| "lras/eos_logp_mean": -0.5980340207461268, | |
| "lras/eos_logratio_mean": -0.052469537456636316, | |
| "lras/len_signal_mean": -0.10167421974547323, | |
| "lras/policy_logp": -0.8831515078396478, | |
| "lras/signal_mean": -0.049970643251954536, | |
| "lras/signal_std": 0.5512100008316339, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.052666227781435156, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.1118, | |
| "lras/base_loss": 0.09821037890214938, | |
| "lras/critic_logp": -0.9952976092397623, | |
| "lras/eos_logp_mean": -0.7277968910173513, | |
| "lras/eos_logratio_mean": -0.043992197626357664, | |
| "lras/len_signal_mean": -0.09821037890214938, | |
| "lras/policy_logp": -0.9570521209021733, | |
| "lras/signal_mean": -0.03824548757672638, | |
| "lras/signal_std": 0.5455936640966683, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07899934167215274, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 7.631578947368421e-07, | |
| "loss": 0.1178, | |
| "lras/base_loss": 0.10162280514559825, | |
| "lras/critic_logp": -0.9195498289467471, | |
| "lras/eos_logp_mean": -0.6399376740329898, | |
| "lras/eos_logratio_mean": -0.11855290308740223, | |
| "lras/len_signal_mean": -0.10162280514559825, | |
| "lras/policy_logp": -0.8777651083465754, | |
| "lras/signal_mean": -0.04178472033515842, | |
| "lras/signal_std": 0.5618033302482217, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10533245556287031, | |
| "grad_norm": 2.28125, | |
| "learning_rate": 1.0263157894736843e-06, | |
| "loss": 0.0924, | |
| "lras/base_loss": 0.08383929146366427, | |
| "lras/critic_logp": -0.9476410532435064, | |
| "lras/eos_logp_mean": -0.6351981242245529, | |
| "lras/eos_logratio_mean": -0.07531133993761614, | |
| "lras/len_signal_mean": -0.08383929146366427, | |
| "lras/policy_logp": -0.8975484069094307, | |
| "lras/signal_mean": -0.05009264715579568, | |
| "lras/signal_std": 0.5447276963386685, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1316655694535879, | |
| "grad_norm": 2.0, | |
| "learning_rate": 1.2894736842105264e-06, | |
| "loss": 0.1126, | |
| "lras/base_loss": 0.12420870506011852, | |
| "lras/critic_logp": -0.9581334099634796, | |
| "lras/eos_logp_mean": -0.6612339781131595, | |
| "lras/eos_logratio_mean": -0.058000585943227635, | |
| "lras/len_signal_mean": -0.12420870506011852, | |
| "lras/policy_logp": -0.9160325896369954, | |
| "lras/signal_mean": -0.042100816195298534, | |
| "lras/signal_std": 0.5634231527801603, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15799868334430547, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 1.5526315789473682e-06, | |
| "loss": 0.0985, | |
| "lras/base_loss": 0.10172083652432776, | |
| "lras/critic_logp": -0.9688242651130583, | |
| "lras/eos_logp_mean": -0.5217704355076421, | |
| "lras/eos_logratio_mean": -0.09221591723398888, | |
| "lras/len_signal_mean": -0.10172083652432776, | |
| "lras/policy_logp": -0.9193514281016277, | |
| "lras/signal_mean": -0.04947284637066594, | |
| "lras/signal_std": 0.559893012791872, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18433179723502305, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 1.8157894736842106e-06, | |
| "loss": 0.1206, | |
| "lras/base_loss": 0.0932533636863809, | |
| "lras/critic_logp": -0.9705762892193726, | |
| "lras/eos_logp_mean": -0.6604083704442019, | |
| "lras/eos_logratio_mean": -0.05219597876712214, | |
| "lras/len_signal_mean": -0.0932533636863809, | |
| "lras/policy_logp": -0.9210699695716034, | |
| "lras/signal_mean": -0.04950632269988121, | |
| "lras/signal_std": 0.5388945213519036, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.21066491112574062, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 1.999905072250599e-06, | |
| "loss": 0.1017, | |
| "lras/base_loss": 0.09924555227044038, | |
| "lras/critic_logp": -1.0084971192060963, | |
| "lras/eos_logp_mean": -0.7261479496373795, | |
| "lras/eos_logratio_mean": 0.005740139741101302, | |
| "lras/len_signal_mean": -0.09924555227044038, | |
| "lras/policy_logp": -0.9647895054685967, | |
| "lras/signal_mean": -0.04370760570421148, | |
| "lras/signal_std": 0.5396730012260378, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2369980250164582, | |
| "grad_norm": 3.15625, | |
| "learning_rate": 1.9982179691381198e-06, | |
| "loss": 0.0908, | |
| "lras/base_loss": 0.08481774836254771, | |
| "lras/critic_logp": -0.9895585100273292, | |
| "lras/eos_logp_mean": -0.6592542878817766, | |
| "lras/eos_logratio_mean": -0.027437633188674225, | |
| "lras/len_signal_mean": -0.08481774836254771, | |
| "lras/policy_logp": -0.9531850873636806, | |
| "lras/signal_mean": -0.036373416428257366, | |
| "lras/signal_std": 0.5869219893123955, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2633311389071758, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 1.994425456530222e-06, | |
| "loss": 0.1129, | |
| "lras/base_loss": 0.11199428366671782, | |
| "lras/critic_logp": -0.9438399925283527, | |
| "lras/eos_logp_mean": -0.8256492391199572, | |
| "lras/eos_logratio_mean": -0.02534918904711958, | |
| "lras/len_signal_mean": -0.11199428366671782, | |
| "lras/policy_logp": -0.8999046212004524, | |
| "lras/signal_mean": -0.043935376302829135, | |
| "lras/signal_std": 0.5621409649495035, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2896642527978934, | |
| "grad_norm": 2.40625, | |
| "learning_rate": 1.988535533473508e-06, | |
| "loss": 0.1024, | |
| "lras/base_loss": 0.10810597011150094, | |
| "lras/critic_logp": -0.9577996732205776, | |
| "lras/eos_logp_mean": -0.8592553051421419, | |
| "lras/eos_logratio_mean": -0.0717643543437589, | |
| "lras/len_signal_mean": -0.10810597011150094, | |
| "lras/policy_logp": -0.9219818851700149, | |
| "lras/signal_mean": -0.03581778571356334, | |
| "lras/signal_std": 0.5725194892380386, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31599736668861095, | |
| "grad_norm": 3.015625, | |
| "learning_rate": 1.9805606228057916e-06, | |
| "loss": 0.1265, | |
| "lras/base_loss": 0.12835807931842283, | |
| "lras/critic_logp": -1.0095817961274958, | |
| "lras/eos_logp_mean": -0.7684085411019623, | |
| "lras/eos_logratio_mean": -0.08709225608035923, | |
| "lras/len_signal_mean": -0.12835807931842283, | |
| "lras/policy_logp": -0.9530015037296113, | |
| "lras/signal_mean": -0.05658029363644257, | |
| "lras/signal_std": 0.6249249442014844, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3423304805793285, | |
| "grad_norm": 2.859375, | |
| "learning_rate": 1.9705175449542357e-06, | |
| "loss": 0.1224, | |
| "lras/base_loss": 0.10125547938478122, | |
| "lras/critic_logp": -0.9600592509813903, | |
| "lras/eos_logp_mean": -1.0218269965684157, | |
| "lras/eos_logratio_mean": -0.11965790124522754, | |
| "lras/len_signal_mean": -0.10125547938478122, | |
| "lras/policy_logp": -0.9064808995336749, | |
| "lras/signal_mean": -0.05357835431948087, | |
| "lras/signal_std": 0.610074704233557, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3686635944700461, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 1.9584274824582526e-06, | |
| "loss": 0.1018, | |
| "lras/base_loss": 0.10054350115387933, | |
| "lras/critic_logp": -1.0451293905971037, | |
| "lras/eos_logp_mean": -0.9627558108069934, | |
| "lras/eos_logratio_mean": -0.045046407813788394, | |
| "lras/len_signal_mean": -0.10054350115387933, | |
| "lras/policy_logp": -0.9888114047770802, | |
| "lras/signal_mean": -0.05631798307189244, | |
| "lras/signal_std": 0.6387343685142696, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.39499670836076367, | |
| "grad_norm": 3.625, | |
| "learning_rate": 1.944315935291962e-06, | |
| "loss": 0.1035, | |
| "lras/base_loss": 0.12737492522719548, | |
| "lras/critic_logp": -0.9631433882083107, | |
| "lras/eos_logp_mean": -0.9592411807272583, | |
| "lras/eos_logratio_mean": -0.07466923534375383, | |
| "lras/len_signal_mean": -0.12737492522719548, | |
| "lras/policy_logp": -0.9105693695366387, | |
| "lras/signal_mean": -0.05257401920949931, | |
| "lras/signal_std": 0.5964867433533072, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.42132982225148125, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 1.9282126670804613e-06, | |
| "loss": 0.123, | |
| "lras/base_loss": 0.11884618600852263, | |
| "lras/critic_logp": -1.0371889562378134, | |
| "lras/eos_logp_mean": -1.3217710635391995, | |
| "lras/eos_logratio_mean": -0.1382402204500977, | |
| "lras/len_signal_mean": -0.11884618600852263, | |
| "lras/policy_logp": -0.9810781215265075, | |
| "lras/signal_mean": -0.056110837148049986, | |
| "lras/signal_std": 0.6808470624499023, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4476629361421988, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 1.9101516423233367e-06, | |
| "loss": 0.1198, | |
| "lras/base_loss": 0.12323949924029876, | |
| "lras/critic_logp": -1.0210646749700396, | |
| "lras/eos_logp_mean": -1.4520364223513753, | |
| "lras/eos_logratio_mean": 0.058064849331276494, | |
| "lras/len_signal_mean": -0.12323949924029876, | |
| "lras/policy_logp": -0.9611373103043608, | |
| "lras/signal_mean": -0.05992736518949152, | |
| "lras/signal_std": 0.6614585957024246, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4739960500329164, | |
| "grad_norm": 4.875, | |
| "learning_rate": 1.8901709547578243e-06, | |
| "loss": 0.1146, | |
| "lras/base_loss": 0.09676948979840745, | |
| "lras/critic_logp": -1.0977452527910458, | |
| "lras/eos_logp_mean": -1.5375359531419235, | |
| "lras/eos_logratio_mean": -0.02415443646314088, | |
| "lras/len_signal_mean": -0.09676948979840745, | |
| "lras/policy_logp": -1.048075962383212, | |
| "lras/signal_mean": -0.04966929181947091, | |
| "lras/signal_std": 0.7021123599261045, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.500329163923634, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.868312747012715e-06, | |
| "loss": 0.1145, | |
| "lras/base_loss": 0.1299875703494763, | |
| "lras/critic_logp": -1.1543702971034613, | |
| "lras/eos_logp_mean": -1.7598466821829788, | |
| "lras/eos_logratio_mean": -0.09233958793920465, | |
| "lras/len_signal_mean": -0.1299875703494763, | |
| "lras/policy_logp": -1.089421026425288, | |
| "lras/signal_mean": -0.06494927172706769, | |
| "lras/signal_std": 0.7361256897449493, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5266622778143516, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 1.844623121722465e-06, | |
| "loss": 0.1085, | |
| "lras/base_loss": 0.10210197179476381, | |
| "lras/critic_logp": -1.166679017353766, | |
| "lras/eos_logp_mean": -2.256354816397652, | |
| "lras/eos_logratio_mean": 0.06535769480396994, | |
| "lras/len_signal_mean": -0.10210197179476381, | |
| "lras/policy_logp": -1.1016303254100095, | |
| "lras/signal_mean": -0.06504870085186205, | |
| "lras/signal_std": 0.700895220413804, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5529953917050692, | |
| "grad_norm": 6.625, | |
| "learning_rate": 1.8191520442889917e-06, | |
| "loss": 0.0873, | |
| "lras/base_loss": 0.0839455575478496, | |
| "lras/critic_logp": -1.16306316395247, | |
| "lras/eos_logp_mean": -2.341231356584467, | |
| "lras/eos_logratio_mean": -0.05382752762234304, | |
| "lras/len_signal_mean": -0.0839455575478496, | |
| "lras/policy_logp": -1.1152066120170876, | |
| "lras/signal_mean": -0.047856557965688676, | |
| "lras/signal_std": 0.714643185120076, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5793285055957867, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 1.7919532374962413e-06, | |
| "loss": 0.1069, | |
| "lras/base_loss": 0.10131343678513076, | |
| "lras/critic_logp": -1.1800464059677862, | |
| "lras/eos_logp_mean": -2.5432689307490364, | |
| "lras/eos_logratio_mean": -0.1375842320965603, | |
| "lras/len_signal_mean": -0.10131343678513076, | |
| "lras/policy_logp": -1.1335452714123295, | |
| "lras/signal_mean": -0.04650113194303708, | |
| "lras/signal_std": 0.7621932537294924, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6056616194865043, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.7630840681998065e-06, | |
| "loss": 0.0999, | |
| "lras/base_loss": 0.11961322827119147, | |
| "lras/critic_logp": -1.2542716302399035, | |
| "lras/eos_logp_mean": -3.1082507333368996, | |
| "lras/eos_logratio_mean": -0.18245783513411878, | |
| "lras/len_signal_mean": -0.11961322827119147, | |
| "lras/policy_logp": -1.1768926516372546, | |
| "lras/signal_mean": -0.07737897220920904, | |
| "lras/signal_std": 0.7640609228983521, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6319947333772219, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.7326054263305844e-06, | |
| "loss": 0.0977, | |
| "lras/base_loss": 0.14090983905480242, | |
| "lras/critic_logp": -1.3281353382264833, | |
| "lras/eos_logp_mean": -3.7832687627058474, | |
| "lras/eos_logratio_mean": -0.2914778530233889, | |
| "lras/len_signal_mean": -0.14090983905480242, | |
| "lras/policy_logp": -1.245892498020099, | |
| "lras/signal_mean": -0.08224283723857301, | |
| "lras/signal_std": 0.8179677240550518, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6583278472679395, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.7005815964676785e-06, | |
| "loss": 0.0713, | |
| "lras/base_loss": 0.09521725961567426, | |
| "lras/critic_logp": -1.3763090826428979, | |
| "lras/eos_logp_mean": -4.498324389662594, | |
| "lras/eos_logratio_mean": -0.13128667979035527, | |
| "lras/len_signal_mean": -0.09521725961567426, | |
| "lras/policy_logp": -1.3112279033000536, | |
| "lras/signal_mean": -0.06508118566841849, | |
| "lras/signal_std": 0.7666989624034614, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.684660961158657, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.6670801222514133e-06, | |
| "loss": 0.0811, | |
| "lras/base_loss": 0.04594566313608084, | |
| "lras/critic_logp": -1.4262544499332057, | |
| "lras/eos_logp_mean": -5.154077760595828, | |
| "lras/eos_logratio_mean": -0.11717722485773266, | |
| "lras/len_signal_mean": -0.04594566313608084, | |
| "lras/policy_logp": -1.3737510004653928, | |
| "lras/signal_mean": -0.05250344919726765, | |
| "lras/signal_std": 0.8633918762207031, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7109940750493746, | |
| "grad_norm": 7.53125, | |
| "learning_rate": 1.6321716639224433e-06, | |
| "loss": 0.0677, | |
| "lras/base_loss": 0.08778424562478904, | |
| "lras/critic_logp": -1.3960568453598623, | |
| "lras/eos_logp_mean": -5.789941209973767, | |
| "lras/eos_logratio_mean": -0.27142641189275307, | |
| "lras/len_signal_mean": -0.08778424562478904, | |
| "lras/policy_logp": -1.333653977581725, | |
| "lras/signal_mean": -0.06240286751914599, | |
| "lras/signal_std": 0.8300917990505695, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7373271889400922, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 1.5959298492874288e-06, | |
| "loss": 0.0679, | |
| "lras/base_loss": 0.09906132236574194, | |
| "lras/critic_logp": -1.498338274056929, | |
| "lras/eos_logp_mean": -5.556122765317559, | |
| "lras/eos_logratio_mean": -0.3260869638994336, | |
| "lras/len_signal_mean": -0.09906132236574194, | |
| "lras/policy_logp": -1.425540412903731, | |
| "lras/signal_mean": -0.07279786617986106, | |
| "lras/signal_std": 0.8526875531300903, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7636603028308098, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 1.558431118425614e-06, | |
| "loss": 0.0328, | |
| "lras/base_loss": 0.03676887405454181, | |
| "lras/critic_logp": -1.4990205292569576, | |
| "lras/eos_logp_mean": -6.733434393815696, | |
| "lras/eos_logratio_mean": 0.036905642971396445, | |
| "lras/len_signal_mean": -0.03676887405454181, | |
| "lras/policy_logp": -1.4447339227398688, | |
| "lras/signal_mean": -0.05428659613297363, | |
| "lras/signal_std": 0.912068764027208, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7899934167215273, | |
| "grad_norm": 7.125, | |
| "learning_rate": 1.5197545624638505e-06, | |
| "loss": 0.0699, | |
| "lras/base_loss": 0.07409377554431558, | |
| "lras/critic_logp": -1.5187017017601145, | |
| "lras/eos_logp_mean": -7.141052421624773, | |
| "lras/eos_logratio_mean": -0.07092250637360849, | |
| "lras/len_signal_mean": -0.07409377554431558, | |
| "lras/policy_logp": -1.4784312940628777, | |
| "lras/signal_mean": -0.04027039942419637, | |
| "lras/signal_std": 0.9233064419589937, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 1.4799817567601156e-06, | |
| "loss": 0.0349, | |
| "lras/base_loss": 0.07394144225254422, | |
| "lras/critic_logp": -1.569767797320449, | |
| "lras/eos_logp_mean": -8.250754566118122, | |
| "lras/eos_logratio_mean": -0.09081489420495928, | |
| "lras/len_signal_mean": -0.07394144225254422, | |
| "lras/policy_logp": -1.5029217528392145, | |
| "lras/signal_mean": -0.0668460627845576, | |
| "lras/signal_std": 0.9525773910805583, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8426596445029625, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 1.4391965888473703e-06, | |
| "loss": 0.0505, | |
| "lras/base_loss": 0.01940623640548438, | |
| "lras/critic_logp": -1.5741578373848195, | |
| "lras/eos_logp_mean": -8.720079303951934, | |
| "lras/eos_logratio_mean": -0.27737888786941767, | |
| "lras/len_signal_mean": -0.01940623640548438, | |
| "lras/policy_logp": -1.532193167358912, | |
| "lras/signal_mean": -0.04196467372728212, | |
| "lras/signal_std": 0.9471153903752565, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8689927583936801, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 1.39748508150065e-06, | |
| "loss": 0.0336, | |
| "lras/base_loss": 0.09266982418484986, | |
| "lras/critic_logp": -1.6629574067419906, | |
| "lras/eos_logp_mean": -9.290708424896001, | |
| "lras/eos_logratio_mean": -0.22771619798149914, | |
| "lras/len_signal_mean": -0.09266982418484986, | |
| "lras/policy_logp": -1.5927546886739972, | |
| "lras/signal_mean": -0.07020271456252122, | |
| "lras/signal_std": 0.9127426297403872, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8953258722843976, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 1.3549352113005727e-06, | |
| "loss": -0.0109, | |
| "lras/base_loss": -0.04638232409124612, | |
| "lras/critic_logp": -1.6357357667299923, | |
| "lras/eos_logp_mean": -9.180945594608783, | |
| "lras/eos_logratio_mean": 0.0466614278499037, | |
| "lras/len_signal_mean": 0.04638232409124612, | |
| "lras/policy_logp": -1.6036042739469465, | |
| "lras/signal_mean": -0.03213150173185709, | |
| "lras/signal_std": 0.8950126395560801, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9216589861751152, | |
| "grad_norm": 10.375, | |
| "learning_rate": 1.3116367230759414e-06, | |
| "loss": 0.0409, | |
| "lras/base_loss": 0.06272783552994951, | |
| "lras/critic_logp": -1.7237166816719136, | |
| "lras/eos_logp_mean": -9.778454429842531, | |
| "lras/eos_logratio_mean": -0.5281537286005914, | |
| "lras/len_signal_mean": -0.06272783552994951, | |
| "lras/policy_logp": -1.6541788693096684, | |
| "lras/signal_mean": -0.06953781113950352, | |
| "lras/signal_std": 0.9097115381620824, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9479921000658328, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 1.2676809406168133e-06, | |
| "loss": 0.0195, | |
| "lras/base_loss": 0.020540355570847168, | |
| "lras/critic_logp": -1.783623909266041, | |
| "lras/eos_logp_mean": -10.980530028697103, | |
| "lras/eos_logratio_mean": -0.20119506297633052, | |
| "lras/len_signal_mean": -0.020540355570847168, | |
| "lras/policy_logp": -1.737673381956658, | |
| "lras/signal_mean": -0.04595051322927215, | |
| "lras/signal_std": 0.9669136556796729, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9743252139565504, | |
| "grad_norm": 10.0, | |
| "learning_rate": 1.2231605740572765e-06, | |
| "loss": 0.0101, | |
| "lras/base_loss": -0.014017862822220195, | |
| "lras/critic_logp": -1.8771186804407587, | |
| "lras/eos_logp_mean": -10.722892824187875, | |
| "lras/eos_logratio_mean": -0.19163870057091117, | |
| "lras/len_signal_mean": 0.014017862822220195, | |
| "lras/policy_logp": -1.8190668903678329, | |
| "lras/signal_mean": -0.058051801457459584, | |
| "lras/signal_std": 0.9267905389890074, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 1.1781695243341932e-06, | |
| "loss": 0.0096, | |
| "lras/base_loss": 0.01787232184328903, | |
| "lras/critic_logp": -1.8658862981943412, | |
| "lras/eos_logp_mean": -11.325760680035902, | |
| "lras/eos_logratio_mean": -0.3614978581093825, | |
| "lras/len_signal_mean": -0.01787232184328903, | |
| "lras/policy_logp": -1.7924621608585636, | |
| "lras/signal_mean": -0.07342414031628625, | |
| "lras/signal_std": 0.9301170832835711, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0263331138907177, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 1.1328026851343365e-06, | |
| "loss": -0.0022, | |
| "lras/base_loss": -0.03552438716578763, | |
| "lras/critic_logp": -1.9353655989692151, | |
| "lras/eos_logp_mean": -12.03261490613222, | |
| "lras/eos_logratio_mean": -0.21478390959091484, | |
| "lras/len_signal_mean": 0.03552438716578763, | |
| "lras/policy_logp": -1.8756053114619102, | |
| "lras/signal_mean": -0.059760286731985225, | |
| "lras/signal_std": 0.9637264542281627, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0526662277814351, | |
| "grad_norm": 11.5, | |
| "learning_rate": 1.0871557427476583e-06, | |
| "loss": 0.014, | |
| "lras/base_loss": 0.020028115014429203, | |
| "lras/critic_logp": -2.0396122023801113, | |
| "lras/eos_logp_mean": -12.066804607212543, | |
| "lras/eos_logratio_mean": -0.6468546989664901, | |
| "lras/len_signal_mean": -0.020028115014429203, | |
| "lras/policy_logp": -1.990343654454398, | |
| "lras/signal_mean": -0.049268564877216176, | |
| "lras/signal_std": 0.9831893128342927, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0789993416721528, | |
| "grad_norm": 12.375, | |
| "learning_rate": 1.041324974248813e-06, | |
| "loss": 0.0192, | |
| "lras/base_loss": 0.03678358557954198, | |
| "lras/critic_logp": -2.008346917672788, | |
| "lras/eos_logp_mean": -13.69460350126028, | |
| "lras/eos_logratio_mean": -0.22998049389570951, | |
| "lras/len_signal_mean": -0.03678358557954198, | |
| "lras/policy_logp": -1.9487180784452243, | |
| "lras/signal_mean": -0.05962882687038085, | |
| "lras/signal_std": 1.0659304469823838, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1053324555628703, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 9.954070444326292e-07, | |
| "loss": 0.0331, | |
| "lras/base_loss": 0.04220464527315926, | |
| "lras/critic_logp": -2.0565851966669575, | |
| "lras/eos_logp_mean": -13.397886303812266, | |
| "lras/eos_logratio_mean": -0.2665590210468508, | |
| "lras/len_signal_mean": -0.04220464527315926, | |
| "lras/policy_logp": -2.001194008887519, | |
| "lras/signal_mean": -0.05539118589257863, | |
| "lras/signal_std": 1.02963876305148, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.131665569453588, | |
| "grad_norm": 11.875, | |
| "learning_rate": 9.49498801931804e-07, | |
| "loss": 0.0367, | |
| "lras/base_loss": -0.0027663632179610433, | |
| "lras/critic_logp": -2.06930486489408, | |
| "lras/eos_logp_mean": -14.120340882986785, | |
| "lras/eos_logratio_mean": -0.04804678615182638, | |
| "lras/len_signal_mean": 0.0027663632179610433, | |
| "lras/policy_logp": -1.9985212805169184, | |
| "lras/signal_mean": -0.0707835745968341, | |
| "lras/signal_std": 0.9479865215718746, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1579986833443054, | |
| "grad_norm": 14.6875, | |
| "learning_rate": 9.036970749468583e-07, | |
| "loss": 0.0175, | |
| "lras/base_loss": 0.044095185585319996, | |
| "lras/critic_logp": -2.247919617028651, | |
| "lras/eos_logp_mean": -14.632720437645911, | |
| "lras/eos_logratio_mean": -0.2282877266407013, | |
| "lras/len_signal_mean": -0.044095185585319996, | |
| "lras/policy_logp": -2.15015134356286, | |
| "lras/signal_mean": -0.09776828288686631, | |
| "lras/signal_std": 1.0318338803946971, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1843317972350231, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 8.580984670191846e-07, | |
| "loss": 0.0309, | |
| "lras/base_loss": -0.030644303339067848, | |
| "lras/critic_logp": -2.279695383551006, | |
| "lras/eos_logp_mean": -14.45384646076709, | |
| "lras/eos_logratio_mean": -0.08510959930717946, | |
| "lras/len_signal_mean": 0.030644303339067848, | |
| "lras/policy_logp": -2.239907492669972, | |
| "lras/signal_mean": -0.03978788317634428, | |
| "lras/signal_std": 1.1209653861820699, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.2106649111257406, | |
| "grad_norm": 14.125, | |
| "learning_rate": 8.127991532779401e-07, | |
| "loss": 0.0096, | |
| "lras/base_loss": -0.03979152666870504, | |
| "lras/critic_logp": -2.231798601824813, | |
| "lras/eos_logp_mean": -15.657092943787575, | |
| "lras/eos_logratio_mean": 0.09287480898201465, | |
| "lras/len_signal_mean": 0.03979152666870504, | |
| "lras/policy_logp": -2.1883991963309577, | |
| "lras/signal_mean": -0.04339941730130521, | |
| "lras/signal_std": 1.0946278177201747, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2369980250164583, | |
| "grad_norm": 14.375, | |
| "learning_rate": 7.678946775905323e-07, | |
| "loss": -0.003, | |
| "lras/base_loss": 0.08693211713980417, | |
| "lras/critic_logp": -2.4233703207256694, | |
| "lras/eos_logp_mean": -15.827413031458855, | |
| "lras/eos_logratio_mean": -0.17956097405403854, | |
| "lras/len_signal_mean": -0.08693211713980417, | |
| "lras/policy_logp": -2.349267315874182, | |
| "lras/signal_mean": -0.07410297757779284, | |
| "lras/signal_std": 1.134578407369554, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.2633311389071757, | |
| "grad_norm": 15.75, | |
| "learning_rate": 7.234797510445411e-07, | |
| "loss": -0.0003, | |
| "lras/base_loss": 0.025725822610547767, | |
| "lras/critic_logp": -2.434139209148783, | |
| "lras/eos_logp_mean": -15.722262739762664, | |
| "lras/eos_logratio_mean": -0.026692338287830353, | |
| "lras/len_signal_mean": -0.025725822610547767, | |
| "lras/policy_logp": -2.328056712114324, | |
| "lras/signal_mean": -0.10608248874414787, | |
| "lras/signal_std": 1.0992211825214326, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2896642527978934, | |
| "grad_norm": 15.3125, | |
| "learning_rate": 6.79648052186115e-07, | |
| "loss": 0.0369, | |
| "lras/base_loss": 0.03026081353018526, | |
| "lras/critic_logp": -2.110416657504174, | |
| "lras/eos_logp_mean": -15.254110097885132, | |
| "lras/eos_logratio_mean": -0.496195587515831, | |
| "lras/len_signal_mean": -0.03026081353018526, | |
| "lras/policy_logp": -2.0470811323319777, | |
| "lras/signal_mean": -0.06333551601408564, | |
| "lras/signal_std": 0.9533898154273629, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.315997366688611, | |
| "grad_norm": 15.8125, | |
| "learning_rate": 6.364920294361699e-07, | |
| "loss": 0.0575, | |
| "lras/base_loss": 0.06158552574343048, | |
| "lras/critic_logp": -2.224405850326711, | |
| "lras/eos_logp_mean": -15.108362324908375, | |
| "lras/eos_logratio_mean": -0.7382585784420371, | |
| "lras/len_signal_mean": -0.06158552574343048, | |
| "lras/policy_logp": -2.1515553726047054, | |
| "lras/signal_mean": -0.07285048099603358, | |
| "lras/signal_std": 0.9770862588658928, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3423304805793286, | |
| "grad_norm": 16.75, | |
| "learning_rate": 5.941027061011303e-07, | |
| "loss": -0.0174, | |
| "lras/base_loss": 0.0008343593450263143, | |
| "lras/critic_logp": -2.4317118576817474, | |
| "lras/eos_logp_mean": -15.597903436794876, | |
| "lras/eos_logratio_mean": -0.2150385939516127, | |
| "lras/len_signal_mean": -0.0008343593450263143, | |
| "lras/policy_logp": -2.3689128480731942, | |
| "lras/signal_mean": -0.06279901310225874, | |
| "lras/signal_std": 1.0831936337985097, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.368663594470046, | |
| "grad_norm": 15.9375, | |
| "learning_rate": 5.52569488389472e-07, | |
| "loss": 0.043, | |
| "lras/base_loss": 0.058240242666215636, | |
| "lras/critic_logp": -2.6131641746570837, | |
| "lras/eos_logp_mean": -16.068558446317912, | |
| "lras/eos_logratio_mean": -0.2740385436452925, | |
| "lras/len_signal_mean": -0.058240242666215636, | |
| "lras/policy_logp": -2.5551394695444665, | |
| "lras/signal_mean": -0.05802470178027107, | |
| "lras/signal_std": 1.149809922138229, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3949967083607637, | |
| "grad_norm": 17.0, | |
| "learning_rate": 5.11979976839002e-07, | |
| "loss": -0.0169, | |
| "lras/base_loss": -0.0021918164269663976, | |
| "lras/critic_logp": -2.3649934510763186, | |
| "lras/eos_logp_mean": -14.996331504732371, | |
| "lras/eos_logratio_mean": -0.14344595246948302, | |
| "lras/len_signal_mean": 0.0021918164269663976, | |
| "lras/policy_logp": -2.330086388513167, | |
| "lras/signal_mean": -0.0349070573574782, | |
| "lras/signal_std": 1.0876175165176392, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.4213298222514812, | |
| "grad_norm": 15.375, | |
| "learning_rate": 4.724197815525992e-07, | |
| "loss": 0.0197, | |
| "lras/base_loss": -0.0468383116327459, | |
| "lras/critic_logp": -2.53067398066888, | |
| "lras/eos_logp_mean": -16.535732762515543, | |
| "lras/eos_logratio_mean": -0.08761630833614617, | |
| "lras/len_signal_mean": 0.0468383116327459, | |
| "lras/policy_logp": -2.4693613863555597, | |
| "lras/signal_mean": -0.06131259949122595, | |
| "lras/signal_std": 1.1454029347747565, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4476629361421989, | |
| "grad_norm": 17.5, | |
| "learning_rate": 4.3397234163211484e-07, | |
| "loss": 0.0268, | |
| "lras/base_loss": 0.04140681747230701, | |
| "lras/critic_logp": -2.385503157051001, | |
| "lras/eos_logp_mean": -15.886599569767714, | |
| "lras/eos_logratio_mean": -0.3488292686641216, | |
| "lras/len_signal_mean": -0.04140681747230701, | |
| "lras/policy_logp": -2.322975989256461, | |
| "lras/signal_mean": -0.06252715005886085, | |
| "lras/signal_std": 1.037683429988101, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.4739960500329163, | |
| "grad_norm": 18.0, | |
| "learning_rate": 3.9671874919128125e-07, | |
| "loss": 0.0145, | |
| "lras/base_loss": -0.0025144488725345584, | |
| "lras/critic_logp": -2.5965433204335, | |
| "lras/eos_logp_mean": -16.94149838462472, | |
| "lras/eos_logratio_mean": -0.015818399004638196, | |
| "lras/len_signal_mean": 0.0025144488725345584, | |
| "lras/policy_logp": -2.5307474223972894, | |
| "lras/signal_mean": -0.0657959025496038, | |
| "lras/signal_std": 1.0758267390541731, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.500329163923634, | |
| "grad_norm": 15.3125, | |
| "learning_rate": 3.6073757831881244e-07, | |
| "loss": 0.0176, | |
| "lras/base_loss": 0.06200949700141791, | |
| "lras/critic_logp": -2.392406307298439, | |
| "lras/eos_logp_mean": -16.300534684956073, | |
| "lras/eos_logratio_mean": -0.30153655624017117, | |
| "lras/len_signal_mean": -0.06200949700141791, | |
| "lras/policy_logp": -2.3042945046805796, | |
| "lras/signal_mean": -0.08811182339869983, | |
| "lras/signal_std": 1.0404585162177682, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5266622778143515, | |
| "grad_norm": 16.875, | |
| "learning_rate": 3.261047193524439e-07, | |
| "loss": -0.0326, | |
| "lras/base_loss": -0.06760416808247101, | |
| "lras/critic_logp": -2.6577579113558363, | |
| "lras/eos_logp_mean": -17.370185589790346, | |
| "lras/eos_logratio_mean": 0.29878572942689063, | |
| "lras/len_signal_mean": 0.06760416808247101, | |
| "lras/policy_logp": -2.6247902024977887, | |
| "lras/signal_mean": -0.032967715641459255, | |
| "lras/signal_std": 1.2247103542089461, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5529953917050692, | |
| "grad_norm": 15.75, | |
| "learning_rate": 2.9289321881345254e-07, | |
| "loss": 0.0452, | |
| "lras/base_loss": -0.007348847654066048, | |
| "lras/critic_logp": -2.306881194473115, | |
| "lras/eos_logp_mean": -16.12271338701248, | |
| "lras/eos_logratio_mean": -0.11883539147675037, | |
| "lras/len_signal_mean": 0.007348847654066048, | |
| "lras/policy_logp": -2.2347532244045207, | |
| "lras/signal_mean": -0.07212796678582019, | |
| "lras/signal_std": 0.9801751000806689, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.5793285055957869, | |
| "grad_norm": 16.625, | |
| "learning_rate": 2.611731253392636e-07, | |
| "loss": 0.0156, | |
| "lras/base_loss": 0.012532747784280217, | |
| "lras/critic_logp": -2.4853958261376463, | |
| "lras/eos_logp_mean": -16.995042578876017, | |
| "lras/eos_logratio_mean": -0.21502913725562395, | |
| "lras/len_signal_mean": -0.012532747784280217, | |
| "lras/policy_logp": -2.4150401586631465, | |
| "lras/signal_mean": -0.07035568429843155, | |
| "lras/signal_std": 1.0984039671719075, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.6056616194865043, | |
| "grad_norm": 17.875, | |
| "learning_rate": 2.310113419391002e-07, | |
| "loss": 0.0125, | |
| "lras/base_loss": -0.013646831101505085, | |
| "lras/critic_logp": -2.456756533377968, | |
| "lras/eos_logp_mean": -16.79105181824416, | |
| "lras/eos_logratio_mean": 0.12275656980345957, | |
| "lras/len_signal_mean": 0.013646831101505085, | |
| "lras/policy_logp": -2.403605774764165, | |
| "lras/signal_mean": -0.05315075517672918, | |
| "lras/signal_std": 1.1066610222682356, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.6319947333772218, | |
| "grad_norm": 16.25, | |
| "learning_rate": 2.02471484884291e-07, | |
| "loss": 0.0119, | |
| "lras/base_loss": -0.047292615578044206, | |
| "lras/critic_logp": -2.546518089146452, | |
| "lras/eos_logp_mean": -17.33827044069767, | |
| "lras/eos_logratio_mean": 0.05550766550004482, | |
| "lras/len_signal_mean": 0.047292615578044206, | |
| "lras/policy_logp": -2.562370175782289, | |
| "lras/signal_mean": 0.015852110908139077, | |
| "lras/signal_std": 1.1021088421344758, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6583278472679395, | |
| "grad_norm": 15.875, | |
| "learning_rate": 1.756137495308594e-07, | |
| "loss": 0.0072, | |
| "lras/base_loss": -0.003694472834467888, | |
| "lras/critic_logp": -2.437133218209924, | |
| "lras/eos_logp_mean": -16.410118286311626, | |
| "lras/eos_logratio_mean": -0.3761923125013709, | |
| "lras/len_signal_mean": 0.003694472834467888, | |
| "lras/policy_logp": -2.3621272154242097, | |
| "lras/signal_mean": -0.07500599257311638, | |
| "lras/signal_std": 1.0659800309687852, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.6846609611586572, | |
| "grad_norm": 17.0, | |
| "learning_rate": 1.5049478335739883e-07, | |
| "loss": 0.0084, | |
| "lras/base_loss": -0.02764880711620208, | |
| "lras/critic_logp": -2.5047604143513587, | |
| "lras/eos_logp_mean": -16.866475162468852, | |
| "lras/eos_logratio_mean": -0.10876648616977036, | |
| "lras/len_signal_mean": 0.02764880711620208, | |
| "lras/policy_logp": -2.4860704024467766, | |
| "lras/signal_mean": -0.018689985024040467, | |
| "lras/signal_std": 1.1336077319458127, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7109940750493746, | |
| "grad_norm": 18.5, | |
| "learning_rate": 1.2716756648601856e-07, | |
| "loss": 0.017, | |
| "lras/base_loss": 0.031482654724823075, | |
| "lras/critic_logp": -2.4651034242143015, | |
| "lras/eos_logp_mean": -17.17294084727764, | |
| "lras/eos_logratio_mean": -0.16243524220772088, | |
| "lras/len_signal_mean": -0.031482654724823075, | |
| "lras/policy_logp": -2.384486557203078, | |
| "lras/signal_mean": -0.08061687196427163, | |
| "lras/signal_std": 1.076336015574634, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.737327188940092, | |
| "grad_norm": 18.25, | |
| "learning_rate": 1.0568129993836039e-07, | |
| "loss": -0.0159, | |
| "lras/base_loss": 0.027414782461710273, | |
| "lras/critic_logp": -2.5933194540201234, | |
| "lras/eos_logp_mean": -17.062600272521376, | |
| "lras/eos_logratio_mean": -0.24033235143870116, | |
| "lras/len_signal_mean": -0.027414782461710273, | |
| "lras/policy_logp": -2.536566412894524, | |
| "lras/signal_mean": -0.056753033917510444, | |
| "lras/signal_std": 1.1109898013994097, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.7636603028308098, | |
| "grad_norm": 17.75, | |
| "learning_rate": 8.608130186237328e-08, | |
| "loss": 0.0115, | |
| "lras/base_loss": -0.03821857803268358, | |
| "lras/critic_logp": -2.3734066799687032, | |
| "lras/eos_logp_mean": -16.351687154173852, | |
| "lras/eos_logratio_mean": 0.0580406597815454, | |
| "lras/len_signal_mean": 0.03821857803268358, | |
| "lras/policy_logp": -2.314815712059991, | |
| "lras/signal_mean": -0.058590953137746295, | |
| "lras/signal_std": 1.0151456581428646, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.7899934167215275, | |
| "grad_norm": 19.5, | |
| "learning_rate": 6.840891194872111e-08, | |
| "loss": 0.002, | |
| "lras/base_loss": -0.04417614057601895, | |
| "lras/critic_logp": -2.6200193790038826, | |
| "lras/eos_logp_mean": -16.932576566934586, | |
| "lras/eos_logratio_mean": 0.1384289343841374, | |
| "lras/len_signal_mean": 0.04417614057601895, | |
| "lras/policy_logp": -2.570919756222518, | |
| "lras/signal_mean": -0.049099608114942496, | |
| "lras/signal_std": 1.108743331208825, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.816326530612245, | |
| "grad_norm": 16.875, | |
| "learning_rate": 5.270140423842606e-08, | |
| "loss": -0.0075, | |
| "lras/base_loss": -0.04747567040612921, | |
| "lras/critic_logp": -2.5634090900972355, | |
| "lras/eos_logp_mean": -16.961749491095542, | |
| "lras/eos_logratio_mean": 0.2620813576504588, | |
| "lras/len_signal_mean": 0.04747567040612921, | |
| "lras/policy_logp": -2.502021145417211, | |
| "lras/signal_mean": -0.06138794084279452, | |
| "lras/signal_std": 1.0915549699217082, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.8426596445029624, | |
| "grad_norm": 16.5, | |
| "learning_rate": 3.899190850565115e-08, | |
| "loss": 0.0097, | |
| "lras/base_loss": 0.05106182043091394, | |
| "lras/critic_logp": -2.4543609573990937, | |
| "lras/eos_logp_mean": -15.569522052630782, | |
| "lras/eos_logratio_mean": -0.332894785143435, | |
| "lras/len_signal_mean": -0.05106182043091394, | |
| "lras/policy_logp": -2.367433876935615, | |
| "lras/signal_mean": -0.08692705947167448, | |
| "lras/signal_std": 1.041911705583334, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.86899275839368, | |
| "grad_norm": 17.625, | |
| "learning_rate": 2.7309340381436064e-08, | |
| "loss": 0.0118, | |
| "lras/base_loss": -0.03686453927366529, | |
| "lras/critic_logp": -2.454046491647108, | |
| "lras/eos_logp_mean": -17.108138289675118, | |
| "lras/eos_logratio_mean": -0.02865053452551365, | |
| "lras/len_signal_mean": 0.03686453927366529, | |
| "lras/policy_logp": -2.413266429991225, | |
| "lras/signal_mean": -0.04078007064555185, | |
| "lras/signal_std": 1.085499944910407, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.8953258722843978, | |
| "grad_norm": 18.0, | |
| "learning_rate": 1.7678340365772203e-08, | |
| "loss": 0.0382, | |
| "lras/base_loss": 0.0055879024948808365, | |
| "lras/critic_logp": -2.502583679419604, | |
| "lras/eos_logp_mean": -17.123634773492814, | |
| "lras/eos_logratio_mean": -0.4117021427722648, | |
| "lras/len_signal_mean": -0.0055879024948808365, | |
| "lras/policy_logp": -2.427938670433141, | |
| "lras/signal_mean": -0.07464500179640673, | |
| "lras/signal_std": 1.0575703646987678, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.9216589861751152, | |
| "grad_norm": 22.0, | |
| "learning_rate": 1.011922185664471e-08, | |
| "loss": -0.0027, | |
| "lras/base_loss": -0.10265924405830447, | |
| "lras/critic_logp": -2.6089221828032767, | |
| "lras/eos_logp_mean": -16.981125724315643, | |
| "lras/eos_logratio_mean": 0.19451139154843985, | |
| "lras/len_signal_mean": 0.10265924405830447, | |
| "lras/policy_logp": -2.554374224068253, | |
| "lras/signal_mean": -0.054547943455510464, | |
| "lras/signal_std": 1.1660822635516523, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.9479921000658327, | |
| "grad_norm": 18.0, | |
| "learning_rate": 4.647928305662851e-09, | |
| "loss": 0.0317, | |
| "lras/base_loss": 0.007404178951401263, | |
| "lras/critic_logp": -2.518398690186143, | |
| "lras/eos_logp_mean": -16.704739168286324, | |
| "lras/eos_logratio_mean": -0.32062844494357706, | |
| "lras/len_signal_mean": -0.007404178951401263, | |
| "lras/policy_logp": -2.4638789154207856, | |
| "lras/signal_mean": -0.054519774758635786, | |
| "lras/signal_std": 1.034599607810378, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.9743252139565504, | |
| "grad_norm": 17.875, | |
| "learning_rate": 1.2759995906392873e-09, | |
| "loss": 0.0181, | |
| "lras/base_loss": -0.07731338242592756, | |
| "lras/critic_logp": -2.6291819973882866, | |
| "lras/eos_logp_mean": -17.124461753666402, | |
| "lras/eos_logratio_mean": 0.06718716314062476, | |
| "lras/len_signal_mean": 0.07731338242592756, | |
| "lras/policy_logp": -2.6082319075483857, | |
| "lras/signal_mean": -0.02095007741266897, | |
| "lras/signal_std": 1.116696286201477, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 16.375, | |
| "learning_rate": 1.0547676048688892e-11, | |
| "loss": 0.039, | |
| "lras/base_loss": 0.03092502773954318, | |
| "lras/critic_logp": -2.568873341916912, | |
| "lras/eos_logp_mean": -16.902431547450714, | |
| "lras/eos_logratio_mean": -0.3201881614633096, | |
| "lras/len_signal_mean": -0.03092502773954318, | |
| "lras/policy_logp": -2.4733478146684633, | |
| "lras/signal_mean": -0.0955255111616805, | |
| "lras/signal_std": 1.0889918152720501, | |
| "step": 760 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 760, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |