| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9991431019708654, | |
| "eval_steps": 500, | |
| "global_step": 583, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3369.013916015625, | |
| "epoch": 0.001713796058269066, | |
| "grad_norm": 0.0729789063334465, | |
| "kl": 0.0, | |
| "learning_rate": 1.6949152542372882e-08, | |
| "loss": 0.0284, | |
| "reward": 0.3027777820825577, | |
| "reward_std": 0.18115540593862534, | |
| "rewards/accuracy_multibox_reward": 0.3027777820825577, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3575.9583740234375, | |
| "epoch": 0.003427592116538132, | |
| "grad_norm": 0.04535234719514847, | |
| "kl": 0.0, | |
| "learning_rate": 3.3898305084745764e-08, | |
| "loss": 0.0112, | |
| "reward": 0.19722223281860352, | |
| "reward_std": 0.006804139818996191, | |
| "rewards/accuracy_multibox_reward": 0.19722223281860352, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3752.8611450195312, | |
| "epoch": 0.005141388174807198, | |
| "grad_norm": 0.0870731994509697, | |
| "kl": 3.477931022644043e-05, | |
| "learning_rate": 5.0847457627118645e-08, | |
| "loss": 0.0209, | |
| "reward": 0.15833333600312471, | |
| "reward_std": 0.1156703345477581, | |
| "rewards/accuracy_multibox_reward": 0.15833333600312471, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3356.5972290039062, | |
| "epoch": 0.006855184233076264, | |
| "grad_norm": 0.15480031073093414, | |
| "kl": 3.68952751159668e-05, | |
| "learning_rate": 6.779661016949153e-08, | |
| "loss": 0.0328, | |
| "reward": 0.25555556267499924, | |
| "reward_std": 0.16539253294467926, | |
| "rewards/accuracy_multibox_reward": 0.25555556267499924, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3112.0556030273438, | |
| "epoch": 0.00856898029134533, | |
| "grad_norm": 0.08386220782995224, | |
| "kl": 2.740323543548584e-05, | |
| "learning_rate": 8.47457627118644e-08, | |
| "loss": 0.0205, | |
| "reward": 0.22222221922129393, | |
| "reward_std": 0.09246460720896721, | |
| "rewards/accuracy_multibox_reward": 0.22222221922129393, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3598.1666870117188, | |
| "epoch": 0.010282776349614395, | |
| "grad_norm": 0.10517380386590958, | |
| "kl": 4.082918167114258e-05, | |
| "learning_rate": 1.0169491525423729e-07, | |
| "loss": 0.019, | |
| "reward": 0.15555555932223797, | |
| "reward_std": 0.16550763323903084, | |
| "rewards/accuracy_multibox_reward": 0.15555555932223797, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2754.5139770507812, | |
| "epoch": 0.011996572407883462, | |
| "grad_norm": 0.17207880318164825, | |
| "kl": 4.1365623474121094e-05, | |
| "learning_rate": 1.1864406779661017e-07, | |
| "loss": -0.0011, | |
| "reward": 0.1999999973922968, | |
| "reward_std": 0.12247449532151222, | |
| "rewards/accuracy_multibox_reward": 0.1999999973922968, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3535.166748046875, | |
| "epoch": 0.013710368466152529, | |
| "grad_norm": 0.10689281672239304, | |
| "kl": 3.0279159545898438e-05, | |
| "learning_rate": 1.3559322033898305e-07, | |
| "loss": 0.0506, | |
| "reward": 0.4833333417773247, | |
| "reward_std": 0.35711392015218735, | |
| "rewards/accuracy_multibox_reward": 0.4833333417773247, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3265.8611450195312, | |
| "epoch": 0.015424164524421594, | |
| "grad_norm": 0.08282666653394699, | |
| "kl": 3.629922866821289e-05, | |
| "learning_rate": 1.5254237288135593e-07, | |
| "loss": 0.0397, | |
| "reward": 0.3027777932584286, | |
| "reward_std": 0.08879294991493225, | |
| "rewards/accuracy_multibox_reward": 0.3027777932584286, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3640.5416870117188, | |
| "epoch": 0.01713796058269066, | |
| "grad_norm": 0.2386394590139389, | |
| "kl": 4.100799560546875e-05, | |
| "learning_rate": 1.694915254237288e-07, | |
| "loss": 0.0168, | |
| "reward": 0.3000000063329935, | |
| "reward_std": 0.17865028232336044, | |
| "rewards/accuracy_multibox_reward": 0.3000000063329935, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3558.5694274902344, | |
| "epoch": 0.018851756640959727, | |
| "grad_norm": 0.14041270315647125, | |
| "kl": 3.7342309951782227e-05, | |
| "learning_rate": 1.8644067796610168e-07, | |
| "loss": -0.0266, | |
| "reward": 0.26666666753590107, | |
| "reward_std": 0.15563356783241034, | |
| "rewards/accuracy_multibox_reward": 0.26666666753590107, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3895.90283203125, | |
| "epoch": 0.02056555269922879, | |
| "grad_norm": 0.07892497628927231, | |
| "kl": 3.7789344787597656e-05, | |
| "learning_rate": 2.0338983050847458e-07, | |
| "loss": -0.0199, | |
| "reward": 0.08611110597848892, | |
| "reward_std": 0.006804138422012329, | |
| "rewards/accuracy_multibox_reward": 0.08611110597848892, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3267.6527709960938, | |
| "epoch": 0.022279348757497857, | |
| "grad_norm": 0.11310506612062454, | |
| "kl": 3.218650817871094e-05, | |
| "learning_rate": 2.2033898305084743e-07, | |
| "loss": -0.0589, | |
| "reward": 0.2388889081776142, | |
| "reward_std": 0.15712504088878632, | |
| "rewards/accuracy_multibox_reward": 0.2388889081776142, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3487.5555419921875, | |
| "epoch": 0.023993144815766924, | |
| "grad_norm": 0.13372649252414703, | |
| "kl": 3.540515899658203e-05, | |
| "learning_rate": 2.3728813559322033e-07, | |
| "loss": -0.0237, | |
| "reward": 0.34166667610406876, | |
| "reward_std": 0.07638109382241964, | |
| "rewards/accuracy_multibox_reward": 0.34166667610406876, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4123.027770996094, | |
| "epoch": 0.02570694087403599, | |
| "grad_norm": 0.03403710201382637, | |
| "kl": 3.981590270996094e-05, | |
| "learning_rate": 2.542372881355932e-07, | |
| "loss": 0.0072, | |
| "reward": 0.07777778059244156, | |
| "reward_std": 0.038968171924352646, | |
| "rewards/accuracy_multibox_reward": 0.07777778059244156, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2854.277801513672, | |
| "epoch": 0.027420736932305057, | |
| "grad_norm": 0.1481838822364807, | |
| "kl": 2.8446316719055176e-05, | |
| "learning_rate": 2.711864406779661e-07, | |
| "loss": 0.0689, | |
| "reward": 0.4513888955116272, | |
| "reward_std": 0.24150167778134346, | |
| "rewards/accuracy_multibox_reward": 0.4513888955116272, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3492.5556030273438, | |
| "epoch": 0.02913453299057412, | |
| "grad_norm": 0.11502959579229355, | |
| "kl": 3.3020973205566406e-05, | |
| "learning_rate": 2.88135593220339e-07, | |
| "loss": 0.0551, | |
| "reward": 0.2916666716337204, | |
| "reward_std": 0.17893299087882042, | |
| "rewards/accuracy_multibox_reward": 0.2916666716337204, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3877.1944580078125, | |
| "epoch": 0.030848329048843187, | |
| "grad_norm": 0.11807677894830704, | |
| "kl": 3.978610038757324e-05, | |
| "learning_rate": 3.0508474576271186e-07, | |
| "loss": 0.0539, | |
| "reward": 0.17222221195697784, | |
| "reward_std": 0.15248354524374008, | |
| "rewards/accuracy_multibox_reward": 0.17222221195697784, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3784.4584350585938, | |
| "epoch": 0.032562125107112254, | |
| "grad_norm": 0.061447624117136, | |
| "kl": 3.1322240829467773e-05, | |
| "learning_rate": 3.220338983050847e-07, | |
| "loss": 0.0116, | |
| "reward": 0.10833333805203438, | |
| "reward_std": 0.09728333353996277, | |
| "rewards/accuracy_multibox_reward": 0.10833333805203438, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4219.7362060546875, | |
| "epoch": 0.03427592116538132, | |
| "grad_norm": 0.07380050420761108, | |
| "kl": 3.9458274841308594e-05, | |
| "learning_rate": 3.389830508474576e-07, | |
| "loss": 0.028, | |
| "reward": 0.18888889625668526, | |
| "reward_std": 0.2043508067727089, | |
| "rewards/accuracy_multibox_reward": 0.18888889625668526, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3547.1944580078125, | |
| "epoch": 0.03598971722365039, | |
| "grad_norm": 0.1372532844543457, | |
| "kl": 4.184246063232422e-05, | |
| "learning_rate": 3.559322033898305e-07, | |
| "loss": 0.0396, | |
| "reward": 0.21944444626569748, | |
| "reward_std": 0.2384839840233326, | |
| "rewards/accuracy_multibox_reward": 0.21944444626569748, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3996.02783203125, | |
| "epoch": 0.037703513281919454, | |
| "grad_norm": 0.09319818019866943, | |
| "kl": 3.0308961868286133e-05, | |
| "learning_rate": 3.7288135593220336e-07, | |
| "loss": 0.0336, | |
| "reward": 0.09722222574055195, | |
| "reward_std": 0.14455072581768036, | |
| "rewards/accuracy_multibox_reward": 0.09722222574055195, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3764.5555419921875, | |
| "epoch": 0.03941730934018852, | |
| "grad_norm": 0.025350527837872505, | |
| "kl": 3.3408403396606445e-05, | |
| "learning_rate": 3.898305084745763e-07, | |
| "loss": 0.0014, | |
| "reward": 0.04722222313284874, | |
| "reward_std": 0.052086107432842255, | |
| "rewards/accuracy_multibox_reward": 0.04722222313284874, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3345.5556030273438, | |
| "epoch": 0.04113110539845758, | |
| "grad_norm": 0.1198243498802185, | |
| "kl": 3.853440284729004e-05, | |
| "learning_rate": 4.0677966101694916e-07, | |
| "loss": 0.0104, | |
| "reward": 0.3027777820825577, | |
| "reward_std": 0.0634455606341362, | |
| "rewards/accuracy_multibox_reward": 0.3027777820825577, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3292.611083984375, | |
| "epoch": 0.04284490145672665, | |
| "grad_norm": 0.11393000930547714, | |
| "kl": 3.6656856536865234e-05, | |
| "learning_rate": 4.23728813559322e-07, | |
| "loss": 0.0363, | |
| "reward": 0.12500000093132257, | |
| "reward_std": 0.12462865561246872, | |
| "rewards/accuracy_multibox_reward": 0.12500000093132257, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3794.4861450195312, | |
| "epoch": 0.044558697514995714, | |
| "grad_norm": 0.07714420557022095, | |
| "kl": 3.8564205169677734e-05, | |
| "learning_rate": 4.4067796610169486e-07, | |
| "loss": 0.0274, | |
| "reward": 0.180555559694767, | |
| "reward_std": 0.12167280167341232, | |
| "rewards/accuracy_multibox_reward": 0.180555559694767, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3858.5694580078125, | |
| "epoch": 0.04627249357326478, | |
| "grad_norm": 0.10176072269678116, | |
| "kl": 3.3855438232421875e-05, | |
| "learning_rate": 4.576271186440678e-07, | |
| "loss": 0.0345, | |
| "reward": 0.25555555149912834, | |
| "reward_std": 0.27371224481612444, | |
| "rewards/accuracy_multibox_reward": 0.25555555149912834, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2773.138916015625, | |
| "epoch": 0.04798628963153385, | |
| "grad_norm": 0.1292557567358017, | |
| "kl": 3.6776065826416016e-05, | |
| "learning_rate": 4.7457627118644066e-07, | |
| "loss": 0.0215, | |
| "reward": 0.28333333134651184, | |
| "reward_std": 0.12207041308283806, | |
| "rewards/accuracy_multibox_reward": 0.28333333134651184, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3680.1250610351562, | |
| "epoch": 0.049700085689802914, | |
| "grad_norm": 0.11318140476942062, | |
| "kl": 4.595518112182617e-05, | |
| "learning_rate": 4.915254237288136e-07, | |
| "loss": 0.0587, | |
| "reward": 0.2666666619479656, | |
| "reward_std": 0.2565731294453144, | |
| "rewards/accuracy_multibox_reward": 0.2666666619479656, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3128.0695190429688, | |
| "epoch": 0.05141388174807198, | |
| "grad_norm": 0.1526479423046112, | |
| "kl": 2.8625130653381348e-05, | |
| "learning_rate": 5.084745762711864e-07, | |
| "loss": -0.0127, | |
| "reward": 0.291666679084301, | |
| "reward_std": 0.26189908012747765, | |
| "rewards/accuracy_multibox_reward": 0.291666679084301, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3383.8055419921875, | |
| "epoch": 0.05312767780634105, | |
| "grad_norm": 0.15038132667541504, | |
| "kl": 3.081560134887695e-05, | |
| "learning_rate": 5.254237288135593e-07, | |
| "loss": -0.029, | |
| "reward": 0.2527777636423707, | |
| "reward_std": 0.1411241190508008, | |
| "rewards/accuracy_multibox_reward": 0.2527777636423707, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3549.22216796875, | |
| "epoch": 0.054841473864610114, | |
| "grad_norm": 0.15169385075569153, | |
| "kl": 2.8342008590698242e-05, | |
| "learning_rate": 5.423728813559322e-07, | |
| "loss": 0.0735, | |
| "reward": 0.28611112385988235, | |
| "reward_std": 0.3107793517410755, | |
| "rewards/accuracy_multibox_reward": 0.28611112385988235, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2948.8333435058594, | |
| "epoch": 0.056555269922879174, | |
| "grad_norm": 0.1284538060426712, | |
| "kl": 3.7476420402526855e-05, | |
| "learning_rate": 5.59322033898305e-07, | |
| "loss": 0.0196, | |
| "reward": 0.15555556491017342, | |
| "reward_std": 0.19845105707645416, | |
| "rewards/accuracy_multibox_reward": 0.15555556491017342, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3375.8750610351562, | |
| "epoch": 0.05826906598114824, | |
| "grad_norm": 0.11808554828166962, | |
| "kl": 1.1228024959564209e-05, | |
| "learning_rate": 5.76271186440678e-07, | |
| "loss": 0.0471, | |
| "reward": 0.2500000009313226, | |
| "reward_std": 0.13328944146633148, | |
| "rewards/accuracy_multibox_reward": 0.2500000009313226, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4247.138916015625, | |
| "epoch": 0.05998286203941731, | |
| "grad_norm": 0.056442879140377045, | |
| "kl": 3.167986869812012e-05, | |
| "learning_rate": 5.932203389830508e-07, | |
| "loss": 0.0015, | |
| "reward": 0.18333333916962147, | |
| "reward_std": 0.13392800837755203, | |
| "rewards/accuracy_multibox_reward": 0.18333333916962147, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3485.3889770507812, | |
| "epoch": 0.061696658097686374, | |
| "grad_norm": 0.1033148318529129, | |
| "kl": 5.3822994232177734e-05, | |
| "learning_rate": 6.101694915254237e-07, | |
| "loss": -0.0115, | |
| "reward": 0.013888888992369175, | |
| "reward_std": 0.03402068838477135, | |
| "rewards/accuracy_multibox_reward": 0.013888888992369175, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3763.7222900390625, | |
| "epoch": 0.06341045415595545, | |
| "grad_norm": 0.04204072803258896, | |
| "kl": 4.0531158447265625e-05, | |
| "learning_rate": 6.271186440677966e-07, | |
| "loss": 0.0091, | |
| "reward": 0.13333333656191826, | |
| "reward_std": 0.09525793045759201, | |
| "rewards/accuracy_multibox_reward": 0.13333333656191826, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4059.8472290039062, | |
| "epoch": 0.06512425021422451, | |
| "grad_norm": 0.06253790110349655, | |
| "kl": 2.1141022443771362e-05, | |
| "learning_rate": 6.440677966101694e-07, | |
| "loss": 0.0261, | |
| "reward": 0.1722222277894616, | |
| "reward_std": 0.18810024484992027, | |
| "rewards/accuracy_multibox_reward": 0.1722222277894616, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3860.0834350585938, | |
| "epoch": 0.06683804627249357, | |
| "grad_norm": 0.0565963052213192, | |
| "kl": 5.817413330078125e-05, | |
| "learning_rate": 6.610169491525423e-07, | |
| "loss": 0.0099, | |
| "reward": 0.19444445334374905, | |
| "reward_std": 0.1307150013744831, | |
| "rewards/accuracy_multibox_reward": 0.19444445334374905, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3351.6527709960938, | |
| "epoch": 0.06855184233076264, | |
| "grad_norm": 0.16113883256912231, | |
| "kl": 3.742426633834839e-05, | |
| "learning_rate": 6.779661016949152e-07, | |
| "loss": 0.0547, | |
| "reward": 0.28333333879709244, | |
| "reward_std": 0.08298853784799576, | |
| "rewards/accuracy_multibox_reward": 0.28333333879709244, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3132.7222900390625, | |
| "epoch": 0.0702656383890317, | |
| "grad_norm": 0.10562114417552948, | |
| "kl": 5.888938903808594e-05, | |
| "learning_rate": 6.949152542372881e-07, | |
| "loss": 0.0319, | |
| "reward": 0.38055556267499924, | |
| "reward_std": 0.2572016939520836, | |
| "rewards/accuracy_multibox_reward": 0.38055556267499924, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3273.5972900390625, | |
| "epoch": 0.07197943444730077, | |
| "grad_norm": 0.07645858079195023, | |
| "kl": 7.233023643493652e-05, | |
| "learning_rate": 7.11864406779661e-07, | |
| "loss": 0.0232, | |
| "reward": 0.2916666716337204, | |
| "reward_std": 0.1642507091164589, | |
| "rewards/accuracy_multibox_reward": 0.2916666716337204, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3467.3472290039062, | |
| "epoch": 0.07369323050556983, | |
| "grad_norm": 0.08606153726577759, | |
| "kl": 8.346140384674072e-05, | |
| "learning_rate": 7.288135593220338e-07, | |
| "loss": -0.0136, | |
| "reward": 0.2888888940215111, | |
| "reward_std": 0.1884133517742157, | |
| "rewards/accuracy_multibox_reward": 0.2888888940215111, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3642.0416870117188, | |
| "epoch": 0.07540702656383891, | |
| "grad_norm": 0.050211239606142044, | |
| "kl": 9.173154830932617e-05, | |
| "learning_rate": 7.457627118644067e-07, | |
| "loss": 0.0452, | |
| "reward": 0.06666666828095913, | |
| "reward_std": 0.09559708833694458, | |
| "rewards/accuracy_multibox_reward": 0.06666666828095913, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3848.5833129882812, | |
| "epoch": 0.07712082262210797, | |
| "grad_norm": 0.14496099948883057, | |
| "kl": 6.848946213722229e-05, | |
| "learning_rate": 7.627118644067796e-07, | |
| "loss": 0.0188, | |
| "reward": 0.20833333861082792, | |
| "reward_std": 0.22847873345017433, | |
| "rewards/accuracy_multibox_reward": 0.20833333861082792, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3372.5277709960938, | |
| "epoch": 0.07883461868037704, | |
| "grad_norm": 0.07342476397752762, | |
| "kl": 0.0002734661102294922, | |
| "learning_rate": 7.796610169491526e-07, | |
| "loss": -0.0032, | |
| "reward": 0.13888889085501432, | |
| "reward_std": 0.09025628957897425, | |
| "rewards/accuracy_multibox_reward": 0.13888889085501432, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3698.77783203125, | |
| "epoch": 0.0805484147386461, | |
| "grad_norm": 0.0904027596116066, | |
| "kl": 0.00021238625049591064, | |
| "learning_rate": 7.966101694915253e-07, | |
| "loss": -0.0142, | |
| "reward": 0.24444444943219423, | |
| "reward_std": 0.22922567278146744, | |
| "rewards/accuracy_multibox_reward": 0.24444444943219423, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3441.7083740234375, | |
| "epoch": 0.08226221079691516, | |
| "grad_norm": 0.04250216484069824, | |
| "kl": 0.00018003582954406738, | |
| "learning_rate": 8.135593220338983e-07, | |
| "loss": -0.0042, | |
| "reward": 0.28611111640930176, | |
| "reward_std": 0.006804134231060743, | |
| "rewards/accuracy_multibox_reward": 0.28611111640930176, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3176.0972900390625, | |
| "epoch": 0.08397600685518423, | |
| "grad_norm": 0.07988190650939941, | |
| "kl": 0.0003917217254638672, | |
| "learning_rate": 8.305084745762712e-07, | |
| "loss": 0.0069, | |
| "reward": 0.24600409343838692, | |
| "reward_std": 0.12324022501707077, | |
| "rewards/accuracy_multibox_reward": 0.24600409343838692, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3351.9166870117188, | |
| "epoch": 0.0856898029134533, | |
| "grad_norm": 0.10819848626852036, | |
| "kl": 0.0002573728561401367, | |
| "learning_rate": 8.47457627118644e-07, | |
| "loss": 0.0018, | |
| "reward": 0.1444444479420781, | |
| "reward_std": 0.12693162634968758, | |
| "rewards/accuracy_multibox_reward": 0.1444444479420781, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3545.0972900390625, | |
| "epoch": 0.08740359897172237, | |
| "grad_norm": 0.10281869769096375, | |
| "kl": 0.0002764463424682617, | |
| "learning_rate": 8.64406779661017e-07, | |
| "loss": 0.0657, | |
| "reward": 0.2361111156642437, | |
| "reward_std": 0.18883788585662842, | |
| "rewards/accuracy_multibox_reward": 0.2361111156642437, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3776.1804809570312, | |
| "epoch": 0.08911739502999143, | |
| "grad_norm": 0.10941097885370255, | |
| "kl": 0.00015738606452941895, | |
| "learning_rate": 8.813559322033897e-07, | |
| "loss": 0.0438, | |
| "reward": 0.23611111752688885, | |
| "reward_std": 0.23143858462572098, | |
| "rewards/accuracy_multibox_reward": 0.23611111752688885, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3732.0, | |
| "epoch": 0.0908311910882605, | |
| "grad_norm": 0.07426908612251282, | |
| "kl": 0.0001125335693359375, | |
| "learning_rate": 8.983050847457627e-07, | |
| "loss": 0.0082, | |
| "reward": 0.13055555894970894, | |
| "reward_std": 0.12927863001823425, | |
| "rewards/accuracy_multibox_reward": 0.13055555894970894, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4182.194519042969, | |
| "epoch": 0.09254498714652956, | |
| "grad_norm": 0.0753699541091919, | |
| "kl": 0.00020003318786621094, | |
| "learning_rate": 9.152542372881356e-07, | |
| "loss": 0.0146, | |
| "reward": 0.15555556677281857, | |
| "reward_std": 0.1384073393419385, | |
| "rewards/accuracy_multibox_reward": 0.15555556677281857, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4253.486083984375, | |
| "epoch": 0.09425878320479864, | |
| "grad_norm": 0.06538266688585281, | |
| "kl": 2.937018871307373e-05, | |
| "learning_rate": 9.322033898305083e-07, | |
| "loss": -0.0029, | |
| "reward": 0.07777778152376413, | |
| "reward_std": 0.08392801135778427, | |
| "rewards/accuracy_multibox_reward": 0.07777778152376413, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3653.4722290039062, | |
| "epoch": 0.0959725792630677, | |
| "grad_norm": 0.12083703279495239, | |
| "kl": 0.00012385845184326172, | |
| "learning_rate": 9.491525423728813e-07, | |
| "loss": 0.0753, | |
| "reward": 0.2083333358168602, | |
| "reward_std": 0.09066211432218552, | |
| "rewards/accuracy_multibox_reward": 0.2083333358168602, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3767.4306030273438, | |
| "epoch": 0.09768637532133675, | |
| "grad_norm": 0.09833119064569473, | |
| "kl": 0.00011223554611206055, | |
| "learning_rate": 9.661016949152542e-07, | |
| "loss": 0.0183, | |
| "reward": 0.16388890519738197, | |
| "reward_std": 0.17456060647964478, | |
| "rewards/accuracy_multibox_reward": 0.16388890519738197, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3878.541748046875, | |
| "epoch": 0.09940017137960583, | |
| "grad_norm": 0.07018587738275528, | |
| "kl": 0.0003782510757446289, | |
| "learning_rate": 9.830508474576272e-07, | |
| "loss": 0.0283, | |
| "reward": 0.12361111491918564, | |
| "reward_std": 0.18504582345485687, | |
| "rewards/accuracy_multibox_reward": 0.12361111491918564, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3661.5833740234375, | |
| "epoch": 0.10111396743787489, | |
| "grad_norm": 0.1790696680545807, | |
| "kl": 0.0031952857971191406, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0431, | |
| "reward": 0.202777786180377, | |
| "reward_std": 0.17676891759037971, | |
| "rewards/accuracy_multibox_reward": 0.202777786180377, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3969.0556640625, | |
| "epoch": 0.10282776349614396, | |
| "grad_norm": 0.0050058672204613686, | |
| "kl": 0.00023481249809265137, | |
| "learning_rate": 9.999919124237425e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3888.7500610351562, | |
| "epoch": 0.10454155955441302, | |
| "grad_norm": 0.04888669028878212, | |
| "kl": 4.138052463531494e-05, | |
| "learning_rate": 9.999676499856762e-07, | |
| "loss": 0.0118, | |
| "reward": 0.030555556528270245, | |
| "reward_std": 0.07484551891684532, | |
| "rewards/accuracy_multibox_reward": 0.030555556528270245, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3314.25, | |
| "epoch": 0.1062553556126821, | |
| "grad_norm": 0.07405786216259003, | |
| "kl": 0.00037682056427001953, | |
| "learning_rate": 9.999272135579094e-07, | |
| "loss": -0.013, | |
| "reward": 0.1250000037252903, | |
| "reward_std": 0.13671264052391052, | |
| "rewards/accuracy_multibox_reward": 0.1250000037252903, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4179.736145019531, | |
| "epoch": 0.10796915167095116, | |
| "grad_norm": 0.08928368240594864, | |
| "kl": 7.692351937294006e-05, | |
| "learning_rate": 9.998706045939205e-07, | |
| "loss": 0.038, | |
| "reward": 0.28611113503575325, | |
| "reward_std": 0.3344815857708454, | |
| "rewards/accuracy_multibox_reward": 0.28611113503575325, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3543.1943359375, | |
| "epoch": 0.10968294772922023, | |
| "grad_norm": 0.004529690835624933, | |
| "kl": 0.0002321302890777588, | |
| "learning_rate": 9.997978251285065e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3841.8472290039062, | |
| "epoch": 0.11139674378748929, | |
| "grad_norm": 0.0741211548447609, | |
| "kl": 0.0004911422729492188, | |
| "learning_rate": 9.997088777777095e-07, | |
| "loss": -0.0287, | |
| "reward": 0.20555556006729603, | |
| "reward_std": 0.05163978133350611, | |
| "rewards/accuracy_multibox_reward": 0.20555556006729603, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3522.8472290039062, | |
| "epoch": 0.11311053984575835, | |
| "grad_norm": 0.08505967259407043, | |
| "kl": 7.012486457824707e-05, | |
| "learning_rate": 9.99603765738723e-07, | |
| "loss": -0.0033, | |
| "reward": 0.1805555671453476, | |
| "reward_std": 0.047628968954086304, | |
| "rewards/accuracy_multibox_reward": 0.1805555671453476, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3747.5000610351562, | |
| "epoch": 0.11482433590402742, | |
| "grad_norm": 0.12990954518318176, | |
| "kl": 0.0002924986183643341, | |
| "learning_rate": 9.994824927897762e-07, | |
| "loss": 0.081, | |
| "reward": 0.30000000540167093, | |
| "reward_std": 0.2106725387275219, | |
| "rewards/accuracy_multibox_reward": 0.30000000540167093, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3701.9999389648438, | |
| "epoch": 0.11653813196229648, | |
| "grad_norm": 0.08377218246459961, | |
| "kl": 0.00019532442092895508, | |
| "learning_rate": 9.993450632899989e-07, | |
| "loss": 0.012, | |
| "reward": 0.10000000335276127, | |
| "reward_std": 0.14658837020397186, | |
| "rewards/accuracy_multibox_reward": 0.10000000335276127, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3993.6111450195312, | |
| "epoch": 0.11825192802056556, | |
| "grad_norm": 0.11144928634166718, | |
| "kl": 7.551908493041992e-05, | |
| "learning_rate": 9.99191482179265e-07, | |
| "loss": 0.0745, | |
| "reward": 0.30000001192092896, | |
| "reward_std": 0.14266664674505591, | |
| "rewards/accuracy_multibox_reward": 0.30000001192092896, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3512.90283203125, | |
| "epoch": 0.11996572407883462, | |
| "grad_norm": 0.07971691340208054, | |
| "kl": 0.0001417398452758789, | |
| "learning_rate": 9.99021754978014e-07, | |
| "loss": -0.0495, | |
| "reward": 0.35555558651685715, | |
| "reward_std": 0.06531809410080314, | |
| "rewards/accuracy_multibox_reward": 0.35555558651685715, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3704.638916015625, | |
| "epoch": 0.12167952013710369, | |
| "grad_norm": 0.09618542343378067, | |
| "kl": 0.0011048316955566406, | |
| "learning_rate": 9.988358877870534e-07, | |
| "loss": 0.0251, | |
| "reward": 0.1944444365799427, | |
| "reward_std": 0.1830725520849228, | |
| "rewards/accuracy_multibox_reward": 0.1944444365799427, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3004.2639770507812, | |
| "epoch": 0.12339331619537275, | |
| "grad_norm": 0.10384859889745712, | |
| "kl": 0.001033782958984375, | |
| "learning_rate": 9.986338872873393e-07, | |
| "loss": -0.0148, | |
| "reward": 0.31388891115784645, | |
| "reward_std": 0.09966091066598892, | |
| "rewards/accuracy_multibox_reward": 0.31388891115784645, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3841.6945190429688, | |
| "epoch": 0.12510711225364182, | |
| "grad_norm": 0.08576276898384094, | |
| "kl": 0.0003203153610229492, | |
| "learning_rate": 9.984157607397357e-07, | |
| "loss": -0.019, | |
| "reward": 0.25098040141165257, | |
| "reward_std": 0.16210347414016724, | |
| "rewards/accuracy_multibox_reward": 0.25098040141165257, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3088.4306030273438, | |
| "epoch": 0.1268209083119109, | |
| "grad_norm": 0.10984783619642258, | |
| "kl": 0.00015528500080108643, | |
| "learning_rate": 9.981815159847542e-07, | |
| "loss": 0.0622, | |
| "reward": 0.3500000163912773, | |
| "reward_std": 0.16940699890255928, | |
| "rewards/accuracy_multibox_reward": 0.3500000163912773, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3770.5972900390625, | |
| "epoch": 0.12853470437017994, | |
| "grad_norm": 0.05956293269991875, | |
| "kl": 0.000252649188041687, | |
| "learning_rate": 9.979311614422718e-07, | |
| "loss": 0.0233, | |
| "reward": 0.11666666809469461, | |
| "reward_std": 0.04656690079718828, | |
| "rewards/accuracy_multibox_reward": 0.11666666809469461, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2590.3194885253906, | |
| "epoch": 0.13024850042844902, | |
| "grad_norm": 0.11681970208883286, | |
| "kl": 0.00028908252716064453, | |
| "learning_rate": 9.976647061112284e-07, | |
| "loss": -0.0261, | |
| "reward": 0.31111111864447594, | |
| "reward_std": 0.13108112383633852, | |
| "rewards/accuracy_multibox_reward": 0.31111111864447594, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3896.77783203125, | |
| "epoch": 0.1319622964867181, | |
| "grad_norm": 0.04527441784739494, | |
| "kl": 0.00019824504852294922, | |
| "learning_rate": 9.973821595693026e-07, | |
| "loss": -0.0219, | |
| "reward": 0.12777778133749962, | |
| "reward_std": 0.06024641543626785, | |
| "rewards/accuracy_multibox_reward": 0.12777778133749962, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4062.3473510742188, | |
| "epoch": 0.13367609254498714, | |
| "grad_norm": 0.08603829145431519, | |
| "kl": 0.00020599365234375, | |
| "learning_rate": 9.970835319725696e-07, | |
| "loss": 0.0136, | |
| "reward": 0.11944445036351681, | |
| "reward_std": 0.12648530676960945, | |
| "rewards/accuracy_multibox_reward": 0.11944445036351681, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2974.8333740234375, | |
| "epoch": 0.1353898886032562, | |
| "grad_norm": 0.11471610516309738, | |
| "kl": 0.000255584716796875, | |
| "learning_rate": 9.967688340551327e-07, | |
| "loss": 0.0203, | |
| "reward": 0.1583333369344473, | |
| "reward_std": 0.15370185673236847, | |
| "rewards/accuracy_multibox_reward": 0.1583333369344473, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3748.3472900390625, | |
| "epoch": 0.13710368466152528, | |
| "grad_norm": 0.0530245341360569, | |
| "kl": 0.00018739700317382812, | |
| "learning_rate": 9.96438077128741e-07, | |
| "loss": 0.0181, | |
| "reward": 0.08333333767950535, | |
| "reward_std": 0.0924646146595478, | |
| "rewards/accuracy_multibox_reward": 0.08333333767950535, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3942.2222290039062, | |
| "epoch": 0.13881748071979436, | |
| "grad_norm": 0.033869773149490356, | |
| "kl": 0.00017734616994857788, | |
| "learning_rate": 9.960912730823802e-07, | |
| "loss": 0.003, | |
| "reward": 0.01666666753590107, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.01666666753590107, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3887.1527709960938, | |
| "epoch": 0.1405312767780634, | |
| "grad_norm": 0.051016539335250854, | |
| "kl": 0.0006910562515258789, | |
| "learning_rate": 9.95728434381847e-07, | |
| "loss": 0.0215, | |
| "reward": 0.10000000521540642, | |
| "reward_std": 0.13328944519162178, | |
| "rewards/accuracy_multibox_reward": 0.10000000521540642, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3243.7361450195312, | |
| "epoch": 0.14224507283633248, | |
| "grad_norm": 0.1145719587802887, | |
| "kl": 4.1833147406578064e-05, | |
| "learning_rate": 9.953495740692994e-07, | |
| "loss": 0.0295, | |
| "reward": 0.44444446358829737, | |
| "reward_std": 0.1346283107995987, | |
| "rewards/accuracy_multibox_reward": 0.44444446358829737, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3524.7777709960938, | |
| "epoch": 0.14395886889460155, | |
| "grad_norm": 0.06519696861505508, | |
| "kl": 0.00024712085723876953, | |
| "learning_rate": 9.949547057627897e-07, | |
| "loss": 0.0036, | |
| "reward": 0.23055556789040565, | |
| "reward_std": 0.10240122675895691, | |
| "rewards/accuracy_multibox_reward": 0.23055556789040565, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3696.7083740234375, | |
| "epoch": 0.1456726649528706, | |
| "grad_norm": 0.08743790537118912, | |
| "kl": 0.00013685226440429688, | |
| "learning_rate": 9.945438436557734e-07, | |
| "loss": -0.0275, | |
| "reward": 0.16111112013459206, | |
| "reward_std": 0.09327251464128494, | |
| "rewards/accuracy_multibox_reward": 0.16111112013459206, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3471.9584350585938, | |
| "epoch": 0.14738646101113967, | |
| "grad_norm": 0.12433793395757675, | |
| "kl": 0.00027489662170410156, | |
| "learning_rate": 9.941170025166e-07, | |
| "loss": 0.0183, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.08606629818677902, | |
| "rewards/accuracy_multibox_reward": 0.0833333358168602, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4222.527770996094, | |
| "epoch": 0.14910025706940874, | |
| "grad_norm": 0.047481805086135864, | |
| "kl": 0.00011542439460754395, | |
| "learning_rate": 9.93674197687982e-07, | |
| "loss": 0.0174, | |
| "reward": 0.05000000074505806, | |
| "reward_std": 0.09246460348367691, | |
| "rewards/accuracy_multibox_reward": 0.05000000074505806, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4146.972229003906, | |
| "epoch": 0.15081405312767782, | |
| "grad_norm": 0.05658392608165741, | |
| "kl": 0.00022298097610473633, | |
| "learning_rate": 9.932154450864423e-07, | |
| "loss": -0.0065, | |
| "reward": 0.2069444451481104, | |
| "reward_std": 0.06892023421823978, | |
| "rewards/accuracy_multibox_reward": 0.2069444451481104, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3308.5833435058594, | |
| "epoch": 0.15252784918594686, | |
| "grad_norm": 0.06159645691514015, | |
| "kl": 0.00022017955780029297, | |
| "learning_rate": 9.927407612017446e-07, | |
| "loss": -0.0002, | |
| "reward": 0.03333333507180214, | |
| "reward_std": 0.08164966106414795, | |
| "rewards/accuracy_multibox_reward": 0.03333333507180214, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3472.1806030273438, | |
| "epoch": 0.15424164524421594, | |
| "grad_norm": 0.14356717467308044, | |
| "kl": 0.00024110078811645508, | |
| "learning_rate": 9.92250163096298e-07, | |
| "loss": -0.0522, | |
| "reward": 0.2638888955116272, | |
| "reward_std": 0.19115756452083588, | |
| "rewards/accuracy_multibox_reward": 0.2638888955116272, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3022.2083740234375, | |
| "epoch": 0.155955441302485, | |
| "grad_norm": 0.13912639021873474, | |
| "kl": 0.0007758140563964844, | |
| "learning_rate": 9.91743668404545e-07, | |
| "loss": 0.0096, | |
| "reward": 0.08611111249774694, | |
| "reward_std": 0.16091181710362434, | |
| "rewards/accuracy_multibox_reward": 0.08611111249774694, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3657.7916870117188, | |
| "epoch": 0.15766923736075408, | |
| "grad_norm": 0.1251571923494339, | |
| "kl": 0.00020956993103027344, | |
| "learning_rate": 9.912212953323279e-07, | |
| "loss": 0.0008, | |
| "reward": 0.1972222402691841, | |
| "reward_std": 0.10793919768184423, | |
| "rewards/accuracy_multibox_reward": 0.1972222402691841, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3448.3055419921875, | |
| "epoch": 0.15938303341902313, | |
| "grad_norm": 0.09677375853061676, | |
| "kl": 0.00038814544677734375, | |
| "learning_rate": 9.906830626562331e-07, | |
| "loss": -0.0189, | |
| "reward": 0.21388888731598854, | |
| "reward_std": 0.1911335177719593, | |
| "rewards/accuracy_multibox_reward": 0.21388888731598854, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3558.236083984375, | |
| "epoch": 0.1610968294772922, | |
| "grad_norm": 0.07670744508504868, | |
| "kl": 0.00030541419982910156, | |
| "learning_rate": 9.90128989722918e-07, | |
| "loss": -0.0108, | |
| "reward": 0.2638889029622078, | |
| "reward_std": 0.06705055013298988, | |
| "rewards/accuracy_multibox_reward": 0.2638889029622078, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3916.7084350585938, | |
| "epoch": 0.16281062553556128, | |
| "grad_norm": 0.030671466141939163, | |
| "kl": 0.00013083219528198242, | |
| "learning_rate": 9.89559096448414e-07, | |
| "loss": 0.0106, | |
| "reward": 0.16944444924592972, | |
| "reward_std": 0.04270917922258377, | |
| "rewards/accuracy_multibox_reward": 0.16944444924592972, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3688.3889770507812, | |
| "epoch": 0.16452442159383032, | |
| "grad_norm": 0.06178181990981102, | |
| "kl": 0.00023984909057617188, | |
| "learning_rate": 9.889734033174114e-07, | |
| "loss": -0.0043, | |
| "reward": 0.15000000223517418, | |
| "reward_std": 0.09525793045759201, | |
| "rewards/accuracy_multibox_reward": 0.15000000223517418, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4247.6944580078125, | |
| "epoch": 0.1662382176520994, | |
| "grad_norm": 0.08675690740346909, | |
| "kl": 0.0004794597625732422, | |
| "learning_rate": 9.883719313825227e-07, | |
| "loss": 0.0155, | |
| "reward": 0.1756944451481104, | |
| "reward_std": 0.23645534738898277, | |
| "rewards/accuracy_multibox_reward": 0.1756944451481104, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3608.95849609375, | |
| "epoch": 0.16795201371036847, | |
| "grad_norm": 0.21770375967025757, | |
| "kl": 0.0022001266479492188, | |
| "learning_rate": 9.877547022635267e-07, | |
| "loss": 0.0198, | |
| "reward": 0.30000001564621925, | |
| "reward_std": 0.18772254511713982, | |
| "rewards/accuracy_multibox_reward": 0.30000001564621925, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4113.236145019531, | |
| "epoch": 0.16966580976863754, | |
| "grad_norm": 0.07856108248233795, | |
| "kl": 0.0003705024719238281, | |
| "learning_rate": 9.871217381465902e-07, | |
| "loss": 0.0191, | |
| "reward": 0.14444445073604584, | |
| "reward_std": 0.05960626155138016, | |
| "rewards/accuracy_multibox_reward": 0.14444445073604584, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3443.6389770507812, | |
| "epoch": 0.1713796058269066, | |
| "grad_norm": 0.08727405965328217, | |
| "kl": 0.00025773048400878906, | |
| "learning_rate": 9.864730617834712e-07, | |
| "loss": 0.013, | |
| "reward": 0.1694444539025426, | |
| "reward_std": 0.18434381857514381, | |
| "rewards/accuracy_multibox_reward": 0.1694444539025426, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3932.3333740234375, | |
| "epoch": 0.17309340188517566, | |
| "grad_norm": 0.03477713465690613, | |
| "kl": 9.900331497192383e-05, | |
| "learning_rate": 9.85808696490701e-07, | |
| "loss": -0.0078, | |
| "reward": 0.013888888992369175, | |
| "reward_std": 0.03402068838477135, | |
| "rewards/accuracy_multibox_reward": 0.013888888992369175, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3730.4166870117188, | |
| "epoch": 0.17480719794344474, | |
| "grad_norm": 0.11818379908800125, | |
| "kl": 0.0005511045455932617, | |
| "learning_rate": 9.851286661487463e-07, | |
| "loss": -0.0091, | |
| "reward": 0.3111111233010888, | |
| "reward_std": 0.19217968732118607, | |
| "rewards/accuracy_multibox_reward": 0.3111111233010888, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3841.1806640625, | |
| "epoch": 0.17652099400171378, | |
| "grad_norm": 0.09309526532888412, | |
| "kl": 0.00031375885009765625, | |
| "learning_rate": 9.844329952011504e-07, | |
| "loss": 0.0031, | |
| "reward": 0.3083333447575569, | |
| "reward_std": 0.165663858409971, | |
| "rewards/accuracy_multibox_reward": 0.3083333447575569, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4225.27783203125, | |
| "epoch": 0.17823479005998286, | |
| "grad_norm": 0.05423985421657562, | |
| "kl": 0.000133514404296875, | |
| "learning_rate": 9.837217086536547e-07, | |
| "loss": -0.0179, | |
| "reward": 0.04444444552063942, | |
| "reward_std": 0.08385797962546349, | |
| "rewards/accuracy_multibox_reward": 0.04444444552063942, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3186.4861450195312, | |
| "epoch": 0.17994858611825193, | |
| "grad_norm": 0.16297383606433868, | |
| "kl": 0.00027930736541748047, | |
| "learning_rate": 9.829948320733e-07, | |
| "loss": 0.0063, | |
| "reward": 0.19444444961845875, | |
| "reward_std": 0.1605059839785099, | |
| "rewards/accuracy_multibox_reward": 0.19444444961845875, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3781.1666870117188, | |
| "epoch": 0.181662382176521, | |
| "grad_norm": 0.08750122785568237, | |
| "kl": 0.00018417835235595703, | |
| "learning_rate": 9.822523915875077e-07, | |
| "loss": 0.0374, | |
| "reward": 0.2083333395421505, | |
| "reward_std": 0.1768389567732811, | |
| "rewards/accuracy_multibox_reward": 0.2083333395421505, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3983.5833740234375, | |
| "epoch": 0.18337617823479005, | |
| "grad_norm": 0.06607924401760101, | |
| "kl": 0.0003471970558166504, | |
| "learning_rate": 9.8149441388314e-07, | |
| "loss": 0.0093, | |
| "reward": 0.21111111342906952, | |
| "reward_std": 0.1445206105709076, | |
| "rewards/accuracy_multibox_reward": 0.21111111342906952, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4123.319519042969, | |
| "epoch": 0.18508997429305912, | |
| "grad_norm": 0.08298283070325851, | |
| "kl": 0.0002357959747314453, | |
| "learning_rate": 9.807209262055415e-07, | |
| "loss": 0.0309, | |
| "reward": 0.21111111715435982, | |
| "reward_std": 0.14038429781794548, | |
| "rewards/accuracy_multibox_reward": 0.21111111715435982, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3583.6111450195312, | |
| "epoch": 0.1868037703513282, | |
| "grad_norm": 0.03456662967801094, | |
| "kl": 0.00021064281463623047, | |
| "learning_rate": 9.799319563575593e-07, | |
| "loss": 0.0046, | |
| "reward": 0.06388889253139496, | |
| "reward_std": 0.049907319247722626, | |
| "rewards/accuracy_multibox_reward": 0.06388889253139496, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3675.6527709960938, | |
| "epoch": 0.18851756640959727, | |
| "grad_norm": 0.13920719921588898, | |
| "kl": 0.00023365020751953125, | |
| "learning_rate": 9.791275326985434e-07, | |
| "loss": 0.0161, | |
| "reward": 0.21666667237877846, | |
| "reward_std": 0.20940591394901276, | |
| "rewards/accuracy_multibox_reward": 0.21666667237877846, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3196.5277709960938, | |
| "epoch": 0.19023136246786632, | |
| "grad_norm": 0.13540855050086975, | |
| "kl": 0.00030612945556640625, | |
| "learning_rate": 9.783076841433279e-07, | |
| "loss": -0.0233, | |
| "reward": 0.14166666194796562, | |
| "reward_std": 0.09287043660879135, | |
| "rewards/accuracy_multibox_reward": 0.14166666194796562, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3594.8194580078125, | |
| "epoch": 0.1919451585261354, | |
| "grad_norm": 0.0719911977648735, | |
| "kl": 0.00022208690643310547, | |
| "learning_rate": 9.774724401611918e-07, | |
| "loss": 0.0338, | |
| "reward": 0.08055556006729603, | |
| "reward_std": 0.12617595866322517, | |
| "rewards/accuracy_multibox_reward": 0.08055556006729603, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3552.0001220703125, | |
| "epoch": 0.19365895458440446, | |
| "grad_norm": 0.09335467219352722, | |
| "kl": 7.987022399902344e-05, | |
| "learning_rate": 9.76621830774799e-07, | |
| "loss": 0.0115, | |
| "reward": 0.28333334624767303, | |
| "reward_std": 0.13514240086078644, | |
| "rewards/accuracy_multibox_reward": 0.28333334624767303, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2711.6111450195312, | |
| "epoch": 0.1953727506426735, | |
| "grad_norm": 0.10667743533849716, | |
| "kl": 0.00023448467254638672, | |
| "learning_rate": 9.757558865591196e-07, | |
| "loss": -0.0481, | |
| "reward": 0.11111111380159855, | |
| "reward_std": 0.08765210583806038, | |
| "rewards/accuracy_multibox_reward": 0.11111111380159855, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3084.0277709960938, | |
| "epoch": 0.19708654670094258, | |
| "grad_norm": 0.15260815620422363, | |
| "kl": 0.00034546852111816406, | |
| "learning_rate": 9.748746386403305e-07, | |
| "loss": 0.0344, | |
| "reward": 0.2777777835726738, | |
| "reward_std": 0.15382321178913116, | |
| "rewards/accuracy_multibox_reward": 0.2777777835726738, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3909.0278930664062, | |
| "epoch": 0.19880034275921166, | |
| "grad_norm": 0.034480199217796326, | |
| "kl": 0.00033986568450927734, | |
| "learning_rate": 9.739781186946978e-07, | |
| "loss": 0.0158, | |
| "reward": 0.09722222574055195, | |
| "reward_std": 0.08084797114133835, | |
| "rewards/accuracy_multibox_reward": 0.09722222574055195, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3500.1249389648438, | |
| "epoch": 0.20051413881748073, | |
| "grad_norm": 0.06379447132349014, | |
| "kl": 0.0004094243049621582, | |
| "learning_rate": 9.730663589474364e-07, | |
| "loss": 0.0217, | |
| "reward": 0.1472222153097391, | |
| "reward_std": 0.1438429169356823, | |
| "rewards/accuracy_multibox_reward": 0.1472222153097391, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3388.5, | |
| "epoch": 0.20222793487574978, | |
| "grad_norm": 0.09970454126596451, | |
| "kl": 0.00026702880859375, | |
| "learning_rate": 9.721393921715533e-07, | |
| "loss": 0.0199, | |
| "reward": 0.26388888247311115, | |
| "reward_std": 0.18143848329782486, | |
| "rewards/accuracy_multibox_reward": 0.26388888247311115, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3447.2222900390625, | |
| "epoch": 0.20394173093401885, | |
| "grad_norm": 0.05983908474445343, | |
| "kl": 0.00017380714416503906, | |
| "learning_rate": 9.711972516866678e-07, | |
| "loss": -0.007, | |
| "reward": 0.39444444328546524, | |
| "reward_std": 0.08901692926883698, | |
| "rewards/accuracy_multibox_reward": 0.39444444328546524, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3045.8195190429688, | |
| "epoch": 0.20565552699228792, | |
| "grad_norm": 0.12526124715805054, | |
| "kl": 0.0005441904067993164, | |
| "learning_rate": 9.70239971357816e-07, | |
| "loss": 0.0426, | |
| "reward": 0.2888888940215111, | |
| "reward_std": 0.23521282523870468, | |
| "rewards/accuracy_multibox_reward": 0.2888888940215111, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3838.25, | |
| "epoch": 0.207369323050557, | |
| "grad_norm": 0.1413683146238327, | |
| "kl": 0.00042748451232910156, | |
| "learning_rate": 9.692675855942318e-07, | |
| "loss": 0.0441, | |
| "reward": 0.25833334028720856, | |
| "reward_std": 0.2818593308329582, | |
| "rewards/accuracy_multibox_reward": 0.25833334028720856, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3934.34716796875, | |
| "epoch": 0.20908311910882604, | |
| "grad_norm": 0.07403361797332764, | |
| "kl": 0.0004477500915527344, | |
| "learning_rate": 9.682801293481108e-07, | |
| "loss": -0.0124, | |
| "reward": 0.03333333507180214, | |
| "reward_std": 0.08164966106414795, | |
| "rewards/accuracy_multibox_reward": 0.03333333507180214, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3495.5833129882812, | |
| "epoch": 0.21079691516709512, | |
| "grad_norm": 0.15298302471637726, | |
| "kl": 0.0009818077087402344, | |
| "learning_rate": 9.67277638113354e-07, | |
| "loss": 0.0353, | |
| "reward": 0.4194444566965103, | |
| "reward_std": 0.3589022159576416, | |
| "rewards/accuracy_multibox_reward": 0.4194444566965103, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3814.458251953125, | |
| "epoch": 0.2125107112253642, | |
| "grad_norm": 0.03969154506921768, | |
| "kl": 0.00025534629821777344, | |
| "learning_rate": 9.662601479242914e-07, | |
| "loss": 0.0059, | |
| "reward": 0.19444444961845875, | |
| "reward_std": 0.08526759967207909, | |
| "rewards/accuracy_multibox_reward": 0.19444444961845875, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3635.916748046875, | |
| "epoch": 0.21422450728363324, | |
| "grad_norm": 0.07392498850822449, | |
| "kl": 0.0003396272659301758, | |
| "learning_rate": 9.652276953543877e-07, | |
| "loss": -0.0594, | |
| "reward": 0.1722222287207842, | |
| "reward_std": 0.09986086189746857, | |
| "rewards/accuracy_multibox_reward": 0.1722222287207842, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3455.7083129882812, | |
| "epoch": 0.2159383033419023, | |
| "grad_norm": 0.07057610899209976, | |
| "kl": 0.0007448196411132812, | |
| "learning_rate": 9.641803175149264e-07, | |
| "loss": 0.0313, | |
| "reward": 0.19722223468124866, | |
| "reward_std": 0.08845379948616028, | |
| "rewards/accuracy_multibox_reward": 0.19722223468124866, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3872.0695190429688, | |
| "epoch": 0.21765209940017138, | |
| "grad_norm": 0.08014718443155289, | |
| "kl": 0.00054931640625, | |
| "learning_rate": 9.631180520536777e-07, | |
| "loss": 0.0275, | |
| "reward": 0.12777777947485447, | |
| "reward_std": 0.1775858923792839, | |
| "rewards/accuracy_multibox_reward": 0.12777777947485447, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4065.819580078125, | |
| "epoch": 0.21936589545844046, | |
| "grad_norm": 0.10742916166782379, | |
| "kl": 0.0011324882507324219, | |
| "learning_rate": 9.62040937153543e-07, | |
| "loss": 0.0039, | |
| "reward": 0.09722222574055195, | |
| "reward_std": 0.08084797114133835, | |
| "rewards/accuracy_multibox_reward": 0.09722222574055195, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3241.8194274902344, | |
| "epoch": 0.2210796915167095, | |
| "grad_norm": 0.04453056678175926, | |
| "kl": 0.0014786720275878906, | |
| "learning_rate": 9.60949011531184e-07, | |
| "loss": 0.0001, | |
| "reward": 0.2805555537343025, | |
| "reward_std": 0.03402068838477135, | |
| "rewards/accuracy_multibox_reward": 0.2805555537343025, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2679.9166564941406, | |
| "epoch": 0.22279348757497858, | |
| "grad_norm": 0.16622285544872284, | |
| "kl": 0.0011043548583984375, | |
| "learning_rate": 9.598423144356312e-07, | |
| "loss": -0.1009, | |
| "reward": 0.31111111491918564, | |
| "reward_std": 0.1034984439611435, | |
| "rewards/accuracy_multibox_reward": 0.31111111491918564, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4074.7222290039062, | |
| "epoch": 0.22450728363324765, | |
| "grad_norm": 0.05698971822857857, | |
| "kl": 0.0005660057067871094, | |
| "learning_rate": 9.587208856468713e-07, | |
| "loss": -0.0063, | |
| "reward": 0.10641026496887207, | |
| "reward_std": 0.09661635011434555, | |
| "rewards/accuracy_multibox_reward": 0.10641026496887207, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3492.5000610351562, | |
| "epoch": 0.2262210796915167, | |
| "grad_norm": 0.1345558613538742, | |
| "kl": 0.0007143020629882812, | |
| "learning_rate": 9.575847654744196e-07, | |
| "loss": -0.0399, | |
| "reward": 0.17222222685813904, | |
| "reward_std": 0.16550764068961143, | |
| "rewards/accuracy_multibox_reward": 0.17222222685813904, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3675.0554809570312, | |
| "epoch": 0.22793487574978577, | |
| "grad_norm": 0.1444007307291031, | |
| "kl": 0.0004534721374511719, | |
| "learning_rate": 9.564339947558697e-07, | |
| "loss": -0.0205, | |
| "reward": 0.19722222536802292, | |
| "reward_std": 0.2356591746211052, | |
| "rewards/accuracy_multibox_reward": 0.19722222536802292, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3074.5834350585938, | |
| "epoch": 0.22964867180805484, | |
| "grad_norm": 0.25425997376441956, | |
| "kl": 0.00079345703125, | |
| "learning_rate": 9.552686148554252e-07, | |
| "loss": -0.08, | |
| "reward": 0.2472222177311778, | |
| "reward_std": 0.15991497784852982, | |
| "rewards/accuracy_multibox_reward": 0.2472222177311778, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3254.5555419921875, | |
| "epoch": 0.23136246786632392, | |
| "grad_norm": 0.14068569242954254, | |
| "kl": 0.00110626220703125, | |
| "learning_rate": 9.540886676624145e-07, | |
| "loss": -0.019, | |
| "reward": 0.24722222238779068, | |
| "reward_std": 0.2142559178173542, | |
| "rewards/accuracy_multibox_reward": 0.24722222238779068, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3609.638916015625, | |
| "epoch": 0.23307626392459296, | |
| "grad_norm": 0.12296868860721588, | |
| "kl": 0.00060272216796875, | |
| "learning_rate": 9.528941955897839e-07, | |
| "loss": 0.0086, | |
| "reward": 0.3222222328186035, | |
| "reward_std": 0.1588597260415554, | |
| "rewards/accuracy_multibox_reward": 0.3222222328186035, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4187.416748046875, | |
| "epoch": 0.23479005998286204, | |
| "grad_norm": 0.10791420191526413, | |
| "kl": 0.0004839897155761719, | |
| "learning_rate": 9.516852415725732e-07, | |
| "loss": 0.041, | |
| "reward": 0.29444444365799427, | |
| "reward_std": 0.2758632116019726, | |
| "rewards/accuracy_multibox_reward": 0.29444444365799427, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3849.9722900390625, | |
| "epoch": 0.2365038560411311, | |
| "grad_norm": 0.07364249229431152, | |
| "kl": 0.00045490264892578125, | |
| "learning_rate": 9.504618490663726e-07, | |
| "loss": -0.0078, | |
| "reward": 0.13765432965010405, | |
| "reward_std": 0.15906352922320366, | |
| "rewards/accuracy_multibox_reward": 0.13765432965010405, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3574.138916015625, | |
| "epoch": 0.23821765209940018, | |
| "grad_norm": 0.037080395966768265, | |
| "kl": 0.0009355545043945312, | |
| "learning_rate": 9.492240620457606e-07, | |
| "loss": 0.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.1666666716337204, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3124.2777709960938, | |
| "epoch": 0.23993144815766923, | |
| "grad_norm": 0.2319633513689041, | |
| "kl": 0.0008628368377685547, | |
| "learning_rate": 9.479719250027239e-07, | |
| "loss": 0.083, | |
| "reward": 0.31388888880610466, | |
| "reward_std": 0.23057278245687485, | |
| "rewards/accuracy_multibox_reward": 0.31388888880610466, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3921.0, | |
| "epoch": 0.2416452442159383, | |
| "grad_norm": 0.028503138571977615, | |
| "kl": 0.00047016143798828125, | |
| "learning_rate": 9.467054829450571e-07, | |
| "loss": 0.0028, | |
| "reward": 0.01666666753590107, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.01666666753590107, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3927.0137939453125, | |
| "epoch": 0.24335904027420738, | |
| "grad_norm": 0.0805613249540329, | |
| "kl": 0.0003600120544433594, | |
| "learning_rate": 9.454247813947455e-07, | |
| "loss": 0.0259, | |
| "reward": 0.19722222536802292, | |
| "reward_std": 0.07484553009271622, | |
| "rewards/accuracy_multibox_reward": 0.19722222536802292, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4076.9862060546875, | |
| "epoch": 0.24507283633247642, | |
| "grad_norm": 0.005627193488180637, | |
| "kl": 0.00037097930908203125, | |
| "learning_rate": 9.441298663863289e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3977.263916015625, | |
| "epoch": 0.2467866323907455, | |
| "grad_norm": 0.0878337025642395, | |
| "kl": 0.0004087686538696289, | |
| "learning_rate": 9.428207844652466e-07, | |
| "loss": 0.0468, | |
| "reward": 0.1250000037252903, | |
| "reward_std": 0.12388112023472786, | |
| "rewards/accuracy_multibox_reward": 0.1250000037252903, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3565.1943969726562, | |
| "epoch": 0.24850042844901457, | |
| "grad_norm": 0.05937693268060684, | |
| "kl": 0.0004811286926269531, | |
| "learning_rate": 9.414975826861651e-07, | |
| "loss": -0.0061, | |
| "reward": 0.22500000335276127, | |
| "reward_std": 0.08845379576086998, | |
| "rewards/accuracy_multibox_reward": 0.22500000335276127, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3216.65283203125, | |
| "epoch": 0.25021422450728364, | |
| "grad_norm": 0.20345333218574524, | |
| "kl": 0.0004911422729492188, | |
| "learning_rate": 9.401603086112854e-07, | |
| "loss": 0.063, | |
| "reward": 0.29484126158058643, | |
| "reward_std": 0.29107866808772087, | |
| "rewards/accuracy_multibox_reward": 0.29484126158058643, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3823.6806640625, | |
| "epoch": 0.2519280205655527, | |
| "grad_norm": 0.013609305955469608, | |
| "kl": 0.0009183883666992188, | |
| "learning_rate": 9.388090103086343e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4169.472229003906, | |
| "epoch": 0.2536418166238218, | |
| "grad_norm": 0.05844099074602127, | |
| "kl": 0.000560760498046875, | |
| "learning_rate": 9.374437363503368e-07, | |
| "loss": 0.032, | |
| "reward": 0.05833332985639572, | |
| "reward_std": 0.04564354941248894, | |
| "rewards/accuracy_multibox_reward": 0.05833332985639572, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3436.222198486328, | |
| "epoch": 0.25535561268209084, | |
| "grad_norm": 0.06468667089939117, | |
| "kl": 0.00043582916259765625, | |
| "learning_rate": 9.360645358108695e-07, | |
| "loss": 0.009, | |
| "reward": 0.3083333298563957, | |
| "reward_std": 0.20225221663713455, | |
| "rewards/accuracy_multibox_reward": 0.3083333298563957, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3546.2083129882812, | |
| "epoch": 0.2570694087403599, | |
| "grad_norm": 0.04001326858997345, | |
| "kl": 0.0007069110870361328, | |
| "learning_rate": 9.34671458265297e-07, | |
| "loss": 0.0045, | |
| "reward": 0.20000000298023224, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.20000000298023224, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3814.1943969726562, | |
| "epoch": 0.258783204798629, | |
| "grad_norm": 0.05507076159119606, | |
| "kl": 0.0006656646728515625, | |
| "learning_rate": 9.332645537874899e-07, | |
| "loss": 0.0124, | |
| "reward": 0.03055555559694767, | |
| "reward_std": 0.047628965228796005, | |
| "rewards/accuracy_multibox_reward": 0.03055555559694767, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4172.277893066406, | |
| "epoch": 0.26049700085689803, | |
| "grad_norm": 0.033659715205430984, | |
| "kl": 0.0004649162292480469, | |
| "learning_rate": 9.318438729483249e-07, | |
| "loss": 0.0015, | |
| "reward": 0.11666666902601719, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.11666666902601719, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3776.013916015625, | |
| "epoch": 0.2622107969151671, | |
| "grad_norm": 0.14738063514232635, | |
| "kl": 0.0009088516235351562, | |
| "learning_rate": 9.304094668138669e-07, | |
| "loss": -0.0128, | |
| "reward": 0.15555555932223797, | |
| "reward_std": 0.177732203155756, | |
| "rewards/accuracy_multibox_reward": 0.15555555932223797, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3807.986083984375, | |
| "epoch": 0.2639245929734362, | |
| "grad_norm": 0.059987735003232956, | |
| "kl": 0.00044465065002441406, | |
| "learning_rate": 9.289613869435336e-07, | |
| "loss": 0.0282, | |
| "reward": 0.24444445967674255, | |
| "reward_std": 0.18720757588744164, | |
| "rewards/accuracy_multibox_reward": 0.24444445967674255, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4017.7361450195312, | |
| "epoch": 0.2656383890317052, | |
| "grad_norm": 0.1823851764202118, | |
| "kl": 0.000599980354309082, | |
| "learning_rate": 9.274996853882425e-07, | |
| "loss": -0.0125, | |
| "reward": 0.14583333767950535, | |
| "reward_std": 0.16549263149499893, | |
| "rewards/accuracy_multibox_reward": 0.14583333767950535, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3784.2362060546875, | |
| "epoch": 0.26735218508997427, | |
| "grad_norm": 0.14699044823646545, | |
| "kl": 0.0005002021789550781, | |
| "learning_rate": 9.260244146885391e-07, | |
| "loss": 0.0727, | |
| "reward": 0.21388889476656914, | |
| "reward_std": 0.13698505237698555, | |
| "rewards/accuracy_multibox_reward": 0.21388889476656914, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3605.1943969726562, | |
| "epoch": 0.26906598114824337, | |
| "grad_norm": 0.05055173486471176, | |
| "kl": 0.0008873939514160156, | |
| "learning_rate": 9.245356278727093e-07, | |
| "loss": 0.0167, | |
| "reward": 0.20555555820465088, | |
| "reward_std": 0.1159338504076004, | |
| "rewards/accuracy_multibox_reward": 0.20555555820465088, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3649.3193969726562, | |
| "epoch": 0.2707797772065124, | |
| "grad_norm": 0.04654080420732498, | |
| "kl": 0.0006418228149414062, | |
| "learning_rate": 9.230333784548726e-07, | |
| "loss": -0.0008, | |
| "reward": 0.0972222313284874, | |
| "reward_std": 0.006804139818996191, | |
| "rewards/accuracy_multibox_reward": 0.0972222313284874, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4109.624938964844, | |
| "epoch": 0.27249357326478146, | |
| "grad_norm": 0.0605013482272625, | |
| "kl": 0.0007648468017578125, | |
| "learning_rate": 9.215177204330587e-07, | |
| "loss": 0.0201, | |
| "reward": 0.02777777798473835, | |
| "reward_std": 0.06804138049483299, | |
| "rewards/accuracy_multibox_reward": 0.02777777798473835, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3737.375, | |
| "epoch": 0.27420736932305056, | |
| "grad_norm": 0.054639820009469986, | |
| "kl": 0.0006661415100097656, | |
| "learning_rate": 9.199887082872672e-07, | |
| "loss": 0.0063, | |
| "reward": 0.11666667088866234, | |
| "reward_std": 0.12247449159622192, | |
| "rewards/accuracy_multibox_reward": 0.11666667088866234, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3564.5694580078125, | |
| "epoch": 0.2759211653813196, | |
| "grad_norm": 0.16255205869674683, | |
| "kl": 0.000888824462890625, | |
| "learning_rate": 9.184463969775083e-07, | |
| "loss": 0.0504, | |
| "reward": 0.2611111216247082, | |
| "reward_std": 0.2880193069577217, | |
| "rewards/accuracy_multibox_reward": 0.2611111216247082, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3338.5000610351562, | |
| "epoch": 0.2776349614395887, | |
| "grad_norm": 0.08532693237066269, | |
| "kl": 0.0014801025390625, | |
| "learning_rate": 9.168908419418278e-07, | |
| "loss": 0.0123, | |
| "reward": 0.15555556304752827, | |
| "reward_std": 0.13840734213590622, | |
| "rewards/accuracy_multibox_reward": 0.15555556304752827, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3847.263916015625, | |
| "epoch": 0.27934875749785776, | |
| "grad_norm": 0.05799272283911705, | |
| "kl": 0.0011796951293945312, | |
| "learning_rate": 9.153220990943145e-07, | |
| "loss": 0.023, | |
| "reward": 0.21388890035450459, | |
| "reward_std": 0.1400935798883438, | |
| "rewards/accuracy_multibox_reward": 0.21388890035450459, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3339.611083984375, | |
| "epoch": 0.2810625535561268, | |
| "grad_norm": 0.05917629599571228, | |
| "kl": 0.0008940696716308594, | |
| "learning_rate": 9.137402248230903e-07, | |
| "loss": 0.0031, | |
| "reward": 0.23333333618938923, | |
| "reward_std": 0.09525793418288231, | |
| "rewards/accuracy_multibox_reward": 0.23333333618938923, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3567.8056030273438, | |
| "epoch": 0.2827763496143959, | |
| "grad_norm": 0.1181679219007492, | |
| "kl": 0.001010894775390625, | |
| "learning_rate": 9.121452759882831e-07, | |
| "loss": 0.0271, | |
| "reward": 0.3611111165955663, | |
| "reward_std": 0.2910319156944752, | |
| "rewards/accuracy_multibox_reward": 0.3611111165955663, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3669.1527709960938, | |
| "epoch": 0.28449014567266495, | |
| "grad_norm": 0.18609587848186493, | |
| "kl": 0.0011425018310546875, | |
| "learning_rate": 9.105373099199835e-07, | |
| "loss": 0.1031, | |
| "reward": 0.22777777351439, | |
| "reward_std": 0.26025062054395676, | |
| "rewards/accuracy_multibox_reward": 0.22777777351439, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3581.9445190429688, | |
| "epoch": 0.286203941730934, | |
| "grad_norm": 0.08257146924734116, | |
| "kl": 0.0012798309326171875, | |
| "learning_rate": 9.08916384416183e-07, | |
| "loss": 0.0034, | |
| "reward": 0.3027777746319771, | |
| "reward_std": 0.09511926025152206, | |
| "rewards/accuracy_multibox_reward": 0.3027777746319771, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3730.0555419921875, | |
| "epoch": 0.2879177377892031, | |
| "grad_norm": 0.10405819118022919, | |
| "kl": 0.0005664825439453125, | |
| "learning_rate": 9.072825577406981e-07, | |
| "loss": 0.0068, | |
| "reward": 0.24166667833924294, | |
| "reward_std": 0.18540507555007935, | |
| "rewards/accuracy_multibox_reward": 0.24166667833924294, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3390.2639770507812, | |
| "epoch": 0.28963153384747214, | |
| "grad_norm": 0.0679510086774826, | |
| "kl": 0.0014696121215820312, | |
| "learning_rate": 9.056358886210747e-07, | |
| "loss": 0.0196, | |
| "reward": 0.2499999962747097, | |
| "reward_std": 0.09559708088636398, | |
| "rewards/accuracy_multibox_reward": 0.2499999962747097, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3542.15283203125, | |
| "epoch": 0.2913453299057412, | |
| "grad_norm": 0.09599950164556503, | |
| "kl": 0.001415252685546875, | |
| "learning_rate": 9.039764362464775e-07, | |
| "loss": -0.0079, | |
| "reward": 0.25277778692543507, | |
| "reward_std": 0.15102478861808777, | |
| "rewards/accuracy_multibox_reward": 0.25277778692543507, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3400.8333740234375, | |
| "epoch": 0.2930591259640103, | |
| "grad_norm": 0.05047090724110603, | |
| "kl": 0.0017642974853515625, | |
| "learning_rate": 9.023042602655623e-07, | |
| "loss": -0.0254, | |
| "reward": 0.1944444626569748, | |
| "reward_std": 0.008606631308794022, | |
| "rewards/accuracy_multibox_reward": 0.1944444626569748, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3728.4861450195312, | |
| "epoch": 0.29477292202227934, | |
| "grad_norm": 0.07424576580524445, | |
| "kl": 0.0009732246398925781, | |
| "learning_rate": 9.00619420784333e-07, | |
| "loss": 0.0179, | |
| "reward": 0.2805555649101734, | |
| "reward_std": 0.1627657264471054, | |
| "rewards/accuracy_multibox_reward": 0.2805555649101734, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3089.416717529297, | |
| "epoch": 0.29648671808054844, | |
| "grad_norm": 0.14618317782878876, | |
| "kl": 0.0013365745544433594, | |
| "learning_rate": 8.989219783639795e-07, | |
| "loss": 0.0056, | |
| "reward": 0.12083333265036345, | |
| "reward_std": 0.13268069550395012, | |
| "rewards/accuracy_multibox_reward": 0.12083333265036345, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3497.9444580078125, | |
| "epoch": 0.2982005141388175, | |
| "grad_norm": 0.11303048580884933, | |
| "kl": 0.0022602081298828125, | |
| "learning_rate": 8.972119940187017e-07, | |
| "loss": 0.009, | |
| "reward": 0.21388890035450459, | |
| "reward_std": 0.16831958666443825, | |
| "rewards/accuracy_multibox_reward": 0.21388890035450459, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3958.5554809570312, | |
| "epoch": 0.29991431019708653, | |
| "grad_norm": 0.07563745975494385, | |
| "kl": 0.0026092529296875, | |
| "learning_rate": 8.95489529213517e-07, | |
| "loss": 0.0288, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.10954451560974121, | |
| "rewards/accuracy_multibox_reward": 0.10000000149011612, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3655.1111450195312, | |
| "epoch": 0.30162810625535563, | |
| "grad_norm": 0.0935165137052536, | |
| "kl": 0.0021076202392578125, | |
| "learning_rate": 8.93754645862049e-07, | |
| "loss": 0.0142, | |
| "reward": 0.19166667014360428, | |
| "reward_std": 0.10041580721735954, | |
| "rewards/accuracy_multibox_reward": 0.19166667014360428, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3986.9307250976562, | |
| "epoch": 0.3033419023136247, | |
| "grad_norm": 0.048015084117650986, | |
| "kl": 0.0013837814331054688, | |
| "learning_rate": 8.920074063243045e-07, | |
| "loss": 0.0054, | |
| "reward": 0.1472222302109003, | |
| "reward_std": 0.08845379576086998, | |
| "rewards/accuracy_multibox_reward": 0.1472222302109003, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3746.638916015625, | |
| "epoch": 0.3050556983718937, | |
| "grad_norm": 0.086827851831913, | |
| "kl": 0.0020017623901367188, | |
| "learning_rate": 8.902478734044297e-07, | |
| "loss": -0.0086, | |
| "reward": 0.15833333879709244, | |
| "reward_std": 0.04262732062488794, | |
| "rewards/accuracy_multibox_reward": 0.15833333879709244, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4156.5694580078125, | |
| "epoch": 0.3067694944301628, | |
| "grad_norm": 0.08786217868328094, | |
| "kl": 0.0014667510986328125, | |
| "learning_rate": 8.884761103484547e-07, | |
| "loss": 0.0081, | |
| "reward": 0.17777777649462223, | |
| "reward_std": 0.18305182829499245, | |
| "rewards/accuracy_multibox_reward": 0.17777777649462223, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2806.8472900390625, | |
| "epoch": 0.30848329048843187, | |
| "grad_norm": 0.08783501386642456, | |
| "kl": 0.00263214111328125, | |
| "learning_rate": 8.866921808420184e-07, | |
| "loss": 0.0215, | |
| "reward": 0.20000000298023224, | |
| "reward_std": 0.08646837202832103, | |
| "rewards/accuracy_multibox_reward": 0.20000000298023224, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3707.8194580078125, | |
| "epoch": 0.3101970865467009, | |
| "grad_norm": 0.1184110939502716, | |
| "kl": 0.002193450927734375, | |
| "learning_rate": 8.848961490080805e-07, | |
| "loss": 0.0136, | |
| "reward": 0.21388890035450459, | |
| "reward_std": 0.27392324805259705, | |
| "rewards/accuracy_multibox_reward": 0.21388890035450459, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3995.3749389648438, | |
| "epoch": 0.31191088260497, | |
| "grad_norm": 0.07299068570137024, | |
| "kl": 0.0027980804443359375, | |
| "learning_rate": 8.830880794046162e-07, | |
| "loss": 0.01, | |
| "reward": 0.06666666828095913, | |
| "reward_std": 0.0966021679341793, | |
| "rewards/accuracy_multibox_reward": 0.06666666828095913, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4063.0556030273438, | |
| "epoch": 0.31362467866323906, | |
| "grad_norm": 0.09112781286239624, | |
| "kl": 0.001811981201171875, | |
| "learning_rate": 8.81268037022296e-07, | |
| "loss": 0.0481, | |
| "reward": 0.11666667461395264, | |
| "reward_std": 0.10641203820705414, | |
| "rewards/accuracy_multibox_reward": 0.11666667461395264, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3543.3194580078125, | |
| "epoch": 0.31533847472150817, | |
| "grad_norm": 0.13286913931369781, | |
| "kl": 0.0023365020751953125, | |
| "learning_rate": 8.794360872821486e-07, | |
| "loss": 0.0053, | |
| "reward": 0.17500000540167093, | |
| "reward_std": 0.18949807435274124, | |
| "rewards/accuracy_multibox_reward": 0.17500000540167093, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3834.5557250976562, | |
| "epoch": 0.3170522707797772, | |
| "grad_norm": 0.11131037771701813, | |
| "kl": 0.00193023681640625, | |
| "learning_rate": 8.775922960332108e-07, | |
| "loss": 0.0645, | |
| "reward": 0.2152777798473835, | |
| "reward_std": 0.1455001193098724, | |
| "rewards/accuracy_multibox_reward": 0.2152777798473835, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3573.02783203125, | |
| "epoch": 0.31876606683804626, | |
| "grad_norm": 0.10231616348028183, | |
| "kl": 0.00202178955078125, | |
| "learning_rate": 8.757367295501594e-07, | |
| "loss": 0.0162, | |
| "reward": 0.22222222853451967, | |
| "reward_std": 0.22923456132411957, | |
| "rewards/accuracy_multibox_reward": 0.22222222853451967, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3228.9166259765625, | |
| "epoch": 0.32047986289631536, | |
| "grad_norm": 0.09195641428232193, | |
| "kl": 0.001346588134765625, | |
| "learning_rate": 8.738694545309298e-07, | |
| "loss": 0.037, | |
| "reward": 0.2694444451481104, | |
| "reward_std": 0.20644672587513924, | |
| "rewards/accuracy_multibox_reward": 0.2694444451481104, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4250.763916015625, | |
| "epoch": 0.3221936589545844, | |
| "grad_norm": 0.06196644902229309, | |
| "kl": 0.00168609619140625, | |
| "learning_rate": 8.719905380943182e-07, | |
| "loss": 0.0089, | |
| "reward": 0.13055555894970894, | |
| "reward_std": 0.14043273031711578, | |
| "rewards/accuracy_multibox_reward": 0.13055555894970894, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3956.4722900390625, | |
| "epoch": 0.32390745501285345, | |
| "grad_norm": 0.011679406277835369, | |
| "kl": 0.0022439956665039062, | |
| "learning_rate": 8.701000477775687e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3855.8056030273438, | |
| "epoch": 0.32562125107112255, | |
| "grad_norm": 0.17739200592041016, | |
| "kl": 0.0020618438720703125, | |
| "learning_rate": 8.681980515339463e-07, | |
| "loss": 0.0314, | |
| "reward": 0.2555555570870638, | |
| "reward_std": 0.22807863354682922, | |
| "rewards/accuracy_multibox_reward": 0.2555555570870638, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4054.9306640625, | |
| "epoch": 0.3273350471293916, | |
| "grad_norm": 0.061446595937013626, | |
| "kl": 0.0018014907836914062, | |
| "learning_rate": 8.662846177302938e-07, | |
| "loss": 0.0083, | |
| "reward": 0.20000000670552254, | |
| "reward_std": 0.13437210768461227, | |
| "rewards/accuracy_multibox_reward": 0.20000000670552254, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3487.916748046875, | |
| "epoch": 0.32904884318766064, | |
| "grad_norm": 0.13475653529167175, | |
| "kl": 0.009555816650390625, | |
| "learning_rate": 8.643598151445749e-07, | |
| "loss": -0.049, | |
| "reward": 0.19722223468124866, | |
| "reward_std": 0.09439631085842848, | |
| "rewards/accuracy_multibox_reward": 0.19722223468124866, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3126.625, | |
| "epoch": 0.33076263924592975, | |
| "grad_norm": 0.14124466478824615, | |
| "kl": 0.0047931671142578125, | |
| "learning_rate": 8.624237129634014e-07, | |
| "loss": 0.0147, | |
| "reward": 0.20555555820465088, | |
| "reward_std": 0.11993212252855301, | |
| "rewards/accuracy_multibox_reward": 0.20555555820465088, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3133.9583740234375, | |
| "epoch": 0.3324764353041988, | |
| "grad_norm": 0.15288712084293365, | |
| "kl": 0.0027637481689453125, | |
| "learning_rate": 8.604763807795471e-07, | |
| "loss": -0.0326, | |
| "reward": 0.3222222402691841, | |
| "reward_std": 0.15572724491357803, | |
| "rewards/accuracy_multibox_reward": 0.3222222402691841, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3596.5, | |
| "epoch": 0.3341902313624679, | |
| "grad_norm": 0.10813848674297333, | |
| "kl": 0.00366973876953125, | |
| "learning_rate": 8.58517888589445e-07, | |
| "loss": -0.0202, | |
| "reward": 0.12222221866250038, | |
| "reward_std": 0.18573712185025215, | |
| "rewards/accuracy_multibox_reward": 0.12222221866250038, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3624.0138549804688, | |
| "epoch": 0.33590402742073694, | |
| "grad_norm": 0.07153791189193726, | |
| "kl": 0.0025272369384765625, | |
| "learning_rate": 8.56548306790673e-07, | |
| "loss": -0.0018, | |
| "reward": 0.11111111380159855, | |
| "reward_std": 0.14944519102573395, | |
| "rewards/accuracy_multibox_reward": 0.11111111380159855, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3263.6527709960938, | |
| "epoch": 0.337617823479006, | |
| "grad_norm": 0.1408548802137375, | |
| "kl": 0.001537322998046875, | |
| "learning_rate": 8.54567706179422e-07, | |
| "loss": 0.0218, | |
| "reward": 0.3833333384245634, | |
| "reward_std": 0.2687872089445591, | |
| "rewards/accuracy_multibox_reward": 0.3833333384245634, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3826.5972900390625, | |
| "epoch": 0.3393316195372751, | |
| "grad_norm": 0.07099936157464981, | |
| "kl": 0.002452850341796875, | |
| "learning_rate": 8.525761579479519e-07, | |
| "loss": 0.0072, | |
| "reward": 0.17777778767049313, | |
| "reward_std": 0.12693162634968758, | |
| "rewards/accuracy_multibox_reward": 0.17777778767049313, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3684.7361450195312, | |
| "epoch": 0.34104541559554413, | |
| "grad_norm": 0.11462104320526123, | |
| "kl": 0.00241851806640625, | |
| "learning_rate": 8.505737336820326e-07, | |
| "loss": 0.011, | |
| "reward": 0.3388889078050852, | |
| "reward_std": 0.259547037538141, | |
| "rewards/accuracy_multibox_reward": 0.3388889078050852, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3224.5695190429688, | |
| "epoch": 0.3427592116538132, | |
| "grad_norm": 0.08289490640163422, | |
| "kl": 0.001834869384765625, | |
| "learning_rate": 8.485605053583704e-07, | |
| "loss": 0.021, | |
| "reward": 0.25833334028720856, | |
| "reward_std": 0.1391730047762394, | |
| "rewards/accuracy_multibox_reward": 0.25833334028720856, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3282.3194580078125, | |
| "epoch": 0.3444730077120823, | |
| "grad_norm": 0.030939659103751183, | |
| "kl": 0.0029296875, | |
| "learning_rate": 8.465365453420214e-07, | |
| "loss": 0.0179, | |
| "reward": 0.01666666753590107, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.01666666753590107, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3216.5, | |
| "epoch": 0.3461868037703513, | |
| "grad_norm": 0.08705315738916397, | |
| "kl": 0.0026617050170898438, | |
| "learning_rate": 8.445019263837897e-07, | |
| "loss": 0.062, | |
| "reward": 0.2222222238779068, | |
| "reward_std": 0.10107123851776123, | |
| "rewards/accuracy_multibox_reward": 0.2222222238779068, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3266.6528930664062, | |
| "epoch": 0.34790059982862037, | |
| "grad_norm": 0.13114742934703827, | |
| "kl": 0.0020885467529296875, | |
| "learning_rate": 8.42456721617613e-07, | |
| "loss": 0.1051, | |
| "reward": 0.4277777709066868, | |
| "reward_std": 0.2543094791471958, | |
| "rewards/accuracy_multibox_reward": 0.4277777709066868, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3588.4306640625, | |
| "epoch": 0.3496143958868895, | |
| "grad_norm": 0.06504377722740173, | |
| "kl": 0.0024700164794921875, | |
| "learning_rate": 8.404010045579339e-07, | |
| "loss": 0.0001, | |
| "reward": 0.1611111145466566, | |
| "reward_std": 0.13608276098966599, | |
| "rewards/accuracy_multibox_reward": 0.1611111145466566, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3283.638916015625, | |
| "epoch": 0.3513281919451585, | |
| "grad_norm": 0.11038251221179962, | |
| "kl": 0.0028858184814453125, | |
| "learning_rate": 8.383348490970566e-07, | |
| "loss": 0.041, | |
| "reward": 0.32500002160668373, | |
| "reward_std": 0.24852001667022705, | |
| "rewards/accuracy_multibox_reward": 0.32500002160668373, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3466.666748046875, | |
| "epoch": 0.35304198800342756, | |
| "grad_norm": 0.11361179500818253, | |
| "kl": 0.002765655517578125, | |
| "learning_rate": 8.362583295024916e-07, | |
| "loss": 0.025, | |
| "reward": 0.09444444626569748, | |
| "reward_std": 0.10377813130617142, | |
| "rewards/accuracy_multibox_reward": 0.09444444626569748, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3133.8333129882812, | |
| "epoch": 0.35475578406169667, | |
| "grad_norm": 0.10333088040351868, | |
| "kl": 0.002292633056640625, | |
| "learning_rate": 8.341715204142854e-07, | |
| "loss": 0.0017, | |
| "reward": 0.236111119389534, | |
| "reward_std": 0.034020692110061646, | |
| "rewards/accuracy_multibox_reward": 0.236111119389534, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3639.944580078125, | |
| "epoch": 0.3564695801199657, | |
| "grad_norm": 0.13132986426353455, | |
| "kl": 0.0026569366455078125, | |
| "learning_rate": 8.320744968423391e-07, | |
| "loss": 0.069, | |
| "reward": 0.23055557161569595, | |
| "reward_std": 0.23892851173877716, | |
| "rewards/accuracy_multibox_reward": 0.23055557161569595, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3531.75, | |
| "epoch": 0.3581833761782348, | |
| "grad_norm": 0.12211009114980698, | |
| "kl": 0.002147674560546875, | |
| "learning_rate": 8.299673341637108e-07, | |
| "loss": 0.0301, | |
| "reward": 0.3750000037252903, | |
| "reward_std": 0.27551666647195816, | |
| "rewards/accuracy_multibox_reward": 0.3750000037252903, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3299.0416870117188, | |
| "epoch": 0.35989717223650386, | |
| "grad_norm": 0.08455192297697067, | |
| "kl": 0.0027332305908203125, | |
| "learning_rate": 8.278501081199061e-07, | |
| "loss": 0.0338, | |
| "reward": 0.15000000596046448, | |
| "reward_std": 0.199572391808033, | |
| "rewards/accuracy_multibox_reward": 0.15000000596046448, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4096.597351074219, | |
| "epoch": 0.3616109682947729, | |
| "grad_norm": 0.0720938965678215, | |
| "kl": 0.00205230712890625, | |
| "learning_rate": 8.257228948141567e-07, | |
| "loss": 0.0007, | |
| "reward": 0.02777777798473835, | |
| "reward_std": 0.0680413767695427, | |
| "rewards/accuracy_multibox_reward": 0.02777777798473835, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3587.1390380859375, | |
| "epoch": 0.363324764353042, | |
| "grad_norm": 0.062211308628320694, | |
| "kl": 0.0028743743896484375, | |
| "learning_rate": 8.23585770708684e-07, | |
| "loss": 0.0283, | |
| "reward": 0.16805556090548635, | |
| "reward_std": 0.14394198171794415, | |
| "rewards/accuracy_multibox_reward": 0.16805556090548635, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3718.5694580078125, | |
| "epoch": 0.36503856041131105, | |
| "grad_norm": 0.09128718823194504, | |
| "kl": 0.002971649169921875, | |
| "learning_rate": 8.214388126219512e-07, | |
| "loss": 0.0502, | |
| "reward": 0.1388888917863369, | |
| "reward_std": 0.1525019183754921, | |
| "rewards/accuracy_multibox_reward": 0.1388888917863369, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3876.8056030273438, | |
| "epoch": 0.3667523564695801, | |
| "grad_norm": 0.07255463302135468, | |
| "kl": 0.004268646240234375, | |
| "learning_rate": 8.192820977259012e-07, | |
| "loss": 0.0197, | |
| "reward": 0.22499999776482582, | |
| "reward_std": 0.15090851671993732, | |
| "rewards/accuracy_multibox_reward": 0.22499999776482582, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3373.7500610351562, | |
| "epoch": 0.3684661525278492, | |
| "grad_norm": 0.05599533021450043, | |
| "kl": 0.00420379638671875, | |
| "learning_rate": 8.17115703543184e-07, | |
| "loss": -0.017, | |
| "reward": 0.32777778804302216, | |
| "reward_std": 0.09474550932645798, | |
| "rewards/accuracy_multibox_reward": 0.32777778804302216, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3926.6944580078125, | |
| "epoch": 0.37017994858611825, | |
| "grad_norm": 0.175499826669693, | |
| "kl": 0.0037384033203125, | |
| "learning_rate": 8.149397079443693e-07, | |
| "loss": -0.1117, | |
| "reward": 0.11944445222616196, | |
| "reward_std": 0.05844391882419586, | |
| "rewards/accuracy_multibox_reward": 0.11944445222616196, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3173.763885498047, | |
| "epoch": 0.3718937446443873, | |
| "grad_norm": 0.1027192547917366, | |
| "kl": 0.0028362274169921875, | |
| "learning_rate": 8.127541891451473e-07, | |
| "loss": -0.0078, | |
| "reward": 0.3277777973562479, | |
| "reward_std": 0.22094155102968216, | |
| "rewards/accuracy_multibox_reward": 0.3277777973562479, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3176.9166870117188, | |
| "epoch": 0.3736075407026564, | |
| "grad_norm": 0.12294210493564606, | |
| "kl": 0.003101348876953125, | |
| "learning_rate": 8.105592257035178e-07, | |
| "loss": -0.0192, | |
| "reward": 0.32222223840653896, | |
| "reward_std": 0.1513843908905983, | |
| "rewards/accuracy_multibox_reward": 0.32222223840653896, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3910.8056030273438, | |
| "epoch": 0.37532133676092544, | |
| "grad_norm": 0.06198888272047043, | |
| "kl": 0.00330352783203125, | |
| "learning_rate": 8.083548965169663e-07, | |
| "loss": 0.006, | |
| "reward": 0.213888892903924, | |
| "reward_std": 0.08845379576086998, | |
| "rewards/accuracy_multibox_reward": 0.213888892903924, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3725.0416259765625, | |
| "epoch": 0.37703513281919454, | |
| "grad_norm": 0.048343949019908905, | |
| "kl": 0.003200531005859375, | |
| "learning_rate": 8.061412808196279e-07, | |
| "loss": 0.0074, | |
| "reward": 0.08611110597848892, | |
| "reward_std": 0.04270917922258377, | |
| "rewards/accuracy_multibox_reward": 0.08611110597848892, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3947.6666870117188, | |
| "epoch": 0.3787489288774636, | |
| "grad_norm": 0.10289061814546585, | |
| "kl": 0.00290679931640625, | |
| "learning_rate": 8.039184581794389e-07, | |
| "loss": 0.0409, | |
| "reward": 0.20000000670552254, | |
| "reward_std": 0.14824195206165314, | |
| "rewards/accuracy_multibox_reward": 0.20000000670552254, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3527.9305419921875, | |
| "epoch": 0.38046272493573263, | |
| "grad_norm": 0.08110466599464417, | |
| "kl": 0.003814697265625, | |
| "learning_rate": 8.016865084952783e-07, | |
| "loss": -0.0129, | |
| "reward": 0.280555572360754, | |
| "reward_std": 0.15817855298519135, | |
| "rewards/accuracy_multibox_reward": 0.280555572360754, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3584.9583129882812, | |
| "epoch": 0.38217652099400173, | |
| "grad_norm": 0.24395205080509186, | |
| "kl": 0.0024871826171875, | |
| "learning_rate": 7.994455119940934e-07, | |
| "loss": 0.0415, | |
| "reward": 0.18333333730697632, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.18333333730697632, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3989.486083984375, | |
| "epoch": 0.3838903170522708, | |
| "grad_norm": 0.11237452179193497, | |
| "kl": 0.00478363037109375, | |
| "learning_rate": 7.971955492280181e-07, | |
| "loss": 0.0469, | |
| "reward": 0.19444444868713617, | |
| "reward_std": 0.24729109928011894, | |
| "rewards/accuracy_multibox_reward": 0.19444444868713617, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3525.9444885253906, | |
| "epoch": 0.3856041131105398, | |
| "grad_norm": 0.12232717871665955, | |
| "kl": 0.003505706787109375, | |
| "learning_rate": 7.949367010714766e-07, | |
| "loss": 0.0263, | |
| "reward": 0.19074074272066355, | |
| "reward_std": 0.2204592414200306, | |
| "rewards/accuracy_multibox_reward": 0.19074074272066355, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3373.3472290039062, | |
| "epoch": 0.3873179091688089, | |
| "grad_norm": 0.11432304233312607, | |
| "kl": 0.002788543701171875, | |
| "learning_rate": 7.926690487182766e-07, | |
| "loss": 0.0027, | |
| "reward": 0.27222223207354546, | |
| "reward_std": 0.19937064871191978, | |
| "rewards/accuracy_multibox_reward": 0.27222223207354546, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3713.2083129882812, | |
| "epoch": 0.389031705227078, | |
| "grad_norm": 0.1423819214105606, | |
| "kl": 0.0024089813232421875, | |
| "learning_rate": 7.903926736786907e-07, | |
| "loss": 0.0275, | |
| "reward": 0.3444444499909878, | |
| "reward_std": 0.4059048369526863, | |
| "rewards/accuracy_multibox_reward": 0.3444444499909878, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3779.2638549804688, | |
| "epoch": 0.390745501285347, | |
| "grad_norm": 0.13266809284687042, | |
| "kl": 0.0039196014404296875, | |
| "learning_rate": 7.881076577765265e-07, | |
| "loss": 0.0381, | |
| "reward": 0.30277777649462223, | |
| "reward_std": 0.2644210234284401, | |
| "rewards/accuracy_multibox_reward": 0.30277777649462223, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3463.65283203125, | |
| "epoch": 0.3924592973436161, | |
| "grad_norm": 0.07100173085927963, | |
| "kl": 0.004810333251953125, | |
| "learning_rate": 7.858140831461858e-07, | |
| "loss": 0.0341, | |
| "reward": 0.24722223728895187, | |
| "reward_std": 0.0907321497797966, | |
| "rewards/accuracy_multibox_reward": 0.24722223728895187, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3667.1527709960938, | |
| "epoch": 0.39417309340188517, | |
| "grad_norm": 0.11311890929937363, | |
| "kl": 0.00492095947265625, | |
| "learning_rate": 7.835120322297115e-07, | |
| "loss": 0.013, | |
| "reward": 0.2666666768491268, | |
| "reward_std": 0.1994203245267272, | |
| "rewards/accuracy_multibox_reward": 0.2666666768491268, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3937.7083740234375, | |
| "epoch": 0.39588688946015427, | |
| "grad_norm": 0.07955088466405869, | |
| "kl": 0.00560760498046875, | |
| "learning_rate": 7.812015877738252e-07, | |
| "loss": 0.0249, | |
| "reward": 0.14444444607943296, | |
| "reward_std": 0.1834559664130211, | |
| "rewards/accuracy_multibox_reward": 0.14444444607943296, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3412.041748046875, | |
| "epoch": 0.3976006855184233, | |
| "grad_norm": 0.0900406539440155, | |
| "kl": 0.003536224365234375, | |
| "learning_rate": 7.788828328269524e-07, | |
| "loss": -0.0264, | |
| "reward": 0.2972222324460745, | |
| "reward_std": 0.19229139387607574, | |
| "rewards/accuracy_multibox_reward": 0.2972222324460745, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2703.77783203125, | |
| "epoch": 0.39931448157669236, | |
| "grad_norm": 0.09732761979103088, | |
| "kl": 0.003452301025390625, | |
| "learning_rate": 7.765558507362374e-07, | |
| "loss": -0.0032, | |
| "reward": 0.3500000201165676, | |
| "reward_std": 0.09246460674330592, | |
| "rewards/accuracy_multibox_reward": 0.3500000201165676, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3878.6666870117188, | |
| "epoch": 0.40102827763496146, | |
| "grad_norm": 0.06457243114709854, | |
| "kl": 0.00704193115234375, | |
| "learning_rate": 7.742207251445473e-07, | |
| "loss": 0.0294, | |
| "reward": 0.1944444440305233, | |
| "reward_std": 0.10633628442883492, | |
| "rewards/accuracy_multibox_reward": 0.1944444440305233, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3859.2361450195312, | |
| "epoch": 0.4027420736932305, | |
| "grad_norm": 0.05687883496284485, | |
| "kl": 0.003997802734375, | |
| "learning_rate": 7.718775399874654e-07, | |
| "loss": -0.002, | |
| "reward": 0.15277777798473835, | |
| "reward_std": 0.13978423923254013, | |
| "rewards/accuracy_multibox_reward": 0.15277777798473835, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4131.47216796875, | |
| "epoch": 0.40445586975149955, | |
| "grad_norm": 0.10156913101673126, | |
| "kl": 0.007080078125, | |
| "learning_rate": 7.69526379490275e-07, | |
| "loss": 0.0404, | |
| "reward": 0.31111110374331474, | |
| "reward_std": 0.23365548998117447, | |
| "rewards/accuracy_multibox_reward": 0.31111110374331474, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3488.9444580078125, | |
| "epoch": 0.40616966580976865, | |
| "grad_norm": 0.15492533147335052, | |
| "kl": 0.00403594970703125, | |
| "learning_rate": 7.671673281649303e-07, | |
| "loss": -0.0192, | |
| "reward": 0.2972222277894616, | |
| "reward_std": 0.04762897174805403, | |
| "rewards/accuracy_multibox_reward": 0.2972222277894616, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4189.84716796875, | |
| "epoch": 0.4078834618680377, | |
| "grad_norm": 0.03800947219133377, | |
| "kl": 0.0063323974609375, | |
| "learning_rate": 7.648004708070207e-07, | |
| "loss": 0.0086, | |
| "reward": 0.05833332985639572, | |
| "reward_std": 0.06390096247196198, | |
| "rewards/accuracy_multibox_reward": 0.05833332985639572, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3855.0278930664062, | |
| "epoch": 0.40959725792630675, | |
| "grad_norm": 0.08446434140205383, | |
| "kl": 0.006084442138671875, | |
| "learning_rate": 7.624258924927209e-07, | |
| "loss": 0.0498, | |
| "reward": 0.21111111715435982, | |
| "reward_std": 0.13456526026129723, | |
| "rewards/accuracy_multibox_reward": 0.21111111715435982, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3175.236083984375, | |
| "epoch": 0.41131105398457585, | |
| "grad_norm": 0.09515467286109924, | |
| "kl": 0.00904083251953125, | |
| "learning_rate": 7.600436785757339e-07, | |
| "loss": 0.0261, | |
| "reward": 0.21666667237877846, | |
| "reward_std": 0.0924646146595478, | |
| "rewards/accuracy_multibox_reward": 0.21666667237877846, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3237.763916015625, | |
| "epoch": 0.4130248500428449, | |
| "grad_norm": 0.12149989604949951, | |
| "kl": 0.006256103515625, | |
| "learning_rate": 7.57653914684223e-07, | |
| "loss": 0.0306, | |
| "reward": 0.17500000726431608, | |
| "reward_std": 0.20691687241196632, | |
| "rewards/accuracy_multibox_reward": 0.17500000726431608, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3935.40283203125, | |
| "epoch": 0.414738646101114, | |
| "grad_norm": 0.06861577928066254, | |
| "kl": 0.004978179931640625, | |
| "learning_rate": 7.552566867177336e-07, | |
| "loss": 0.0022, | |
| "reward": 0.0694444477558136, | |
| "reward_std": 0.14115842059254646, | |
| "rewards/accuracy_multibox_reward": 0.0694444477558136, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3688.65283203125, | |
| "epoch": 0.41645244215938304, | |
| "grad_norm": 0.08823354542255402, | |
| "kl": 0.0063724517822265625, | |
| "learning_rate": 7.528520808441057e-07, | |
| "loss": -0.023, | |
| "reward": 0.1472222162410617, | |
| "reward_std": 0.2328973263502121, | |
| "rewards/accuracy_multibox_reward": 0.1472222162410617, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3868.375, | |
| "epoch": 0.4181662382176521, | |
| "grad_norm": 0.0955139696598053, | |
| "kl": 0.0059051513671875, | |
| "learning_rate": 7.504401834963763e-07, | |
| "loss": 0.0209, | |
| "reward": 0.19722223468124866, | |
| "reward_std": 0.08584140241146088, | |
| "rewards/accuracy_multibox_reward": 0.19722223468124866, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2261.611114501953, | |
| "epoch": 0.4198800342759212, | |
| "grad_norm": 0.20315730571746826, | |
| "kl": 0.01070404052734375, | |
| "learning_rate": 7.480210813696732e-07, | |
| "loss": 0.0614, | |
| "reward": 0.5000000074505806, | |
| "reward_std": 0.21589511632919312, | |
| "rewards/accuracy_multibox_reward": 0.5000000074505806, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3365.4722900390625, | |
| "epoch": 0.42159383033419023, | |
| "grad_norm": 0.06878823041915894, | |
| "kl": 0.005733489990234375, | |
| "learning_rate": 7.455948614180983e-07, | |
| "loss": 0.0011, | |
| "reward": 0.1805555708706379, | |
| "reward_std": 0.12961778044700623, | |
| "rewards/accuracy_multibox_reward": 0.1805555708706379, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4023.3611450195312, | |
| "epoch": 0.4233076263924593, | |
| "grad_norm": 0.13501141965389252, | |
| "kl": 0.005615234375, | |
| "learning_rate": 7.431616108516021e-07, | |
| "loss": 0.0354, | |
| "reward": 0.25000000186264515, | |
| "reward_std": 0.22590594366192818, | |
| "rewards/accuracy_multibox_reward": 0.25000000186264515, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3460.7083129882812, | |
| "epoch": 0.4250214224507284, | |
| "grad_norm": 0.14876386523246765, | |
| "kl": 0.00508880615234375, | |
| "learning_rate": 7.407214171328491e-07, | |
| "loss": -0.0241, | |
| "reward": 0.16111110616475344, | |
| "reward_std": 0.13549776002764702, | |
| "rewards/accuracy_multibox_reward": 0.16111110616475344, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3835.7777709960938, | |
| "epoch": 0.4267352185089974, | |
| "grad_norm": 0.0493805892765522, | |
| "kl": 0.005680084228515625, | |
| "learning_rate": 7.382743679740741e-07, | |
| "loss": 0.0214, | |
| "reward": 0.02777777798473835, | |
| "reward_std": 0.04303314909338951, | |
| "rewards/accuracy_multibox_reward": 0.02777777798473835, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3717.6527709960938, | |
| "epoch": 0.4284490145672665, | |
| "grad_norm": 0.011747607961297035, | |
| "kl": 0.00537872314453125, | |
| "learning_rate": 7.358205513339286e-07, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3550.0694580078125, | |
| "epoch": 0.4301628106255356, | |
| "grad_norm": 0.0789899155497551, | |
| "kl": 0.005947113037109375, | |
| "learning_rate": 7.333600554143203e-07, | |
| "loss": 0.0074, | |
| "reward": 0.158333333209157, | |
| "reward_std": 0.1312829628586769, | |
| "rewards/accuracy_multibox_reward": 0.158333333209157, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3679.5000610351562, | |
| "epoch": 0.4318766066838046, | |
| "grad_norm": 0.08612791448831558, | |
| "kl": 0.00604248046875, | |
| "learning_rate": 7.308929686572423e-07, | |
| "loss": -0.0342, | |
| "reward": 0.23055556416511536, | |
| "reward_std": 0.15883970633149147, | |
| "rewards/accuracy_multibox_reward": 0.23055556416511536, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3925.75, | |
| "epoch": 0.43359040274207367, | |
| "grad_norm": 0.06517502665519714, | |
| "kl": 0.00677490234375, | |
| "learning_rate": 7.284193797415932e-07, | |
| "loss": 0.007, | |
| "reward": 0.12777778040617704, | |
| "reward_std": 0.12381335347890854, | |
| "rewards/accuracy_multibox_reward": 0.12777778040617704, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3874.2083740234375, | |
| "epoch": 0.43530419880034277, | |
| "grad_norm": 0.06618792563676834, | |
| "kl": 0.006839752197265625, | |
| "learning_rate": 7.25939377579991e-07, | |
| "loss": 0.0262, | |
| "reward": 0.13333334028720856, | |
| "reward_std": 0.09559708833694458, | |
| "rewards/accuracy_multibox_reward": 0.13333334028720856, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3202.4027709960938, | |
| "epoch": 0.4370179948586118, | |
| "grad_norm": 0.07630536705255508, | |
| "kl": 0.0047454833984375, | |
| "learning_rate": 7.234530513155761e-07, | |
| "loss": 0.0034, | |
| "reward": 0.013888888992369175, | |
| "reward_std": 0.03402068838477135, | |
| "rewards/accuracy_multibox_reward": 0.013888888992369175, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3459.5833740234375, | |
| "epoch": 0.4387317909168809, | |
| "grad_norm": 0.06247660145163536, | |
| "kl": 0.00519561767578125, | |
| "learning_rate": 7.209604903188073e-07, | |
| "loss": -0.0021, | |
| "reward": 0.13611110672354698, | |
| "reward_std": 0.16573532670736313, | |
| "rewards/accuracy_multibox_reward": 0.13611110672354698, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3457.9862060546875, | |
| "epoch": 0.44044558697514996, | |
| "grad_norm": 0.11025259643793106, | |
| "kl": 0.006378173828125, | |
| "learning_rate": 7.184617841842498e-07, | |
| "loss": 0.015, | |
| "reward": 0.24722222611308098, | |
| "reward_std": 0.23265774548053741, | |
| "rewards/accuracy_multibox_reward": 0.24722222611308098, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3773.4999389648438, | |
| "epoch": 0.442159383033419, | |
| "grad_norm": 0.06174551695585251, | |
| "kl": 0.00612640380859375, | |
| "learning_rate": 7.159570227273543e-07, | |
| "loss": 0.0017, | |
| "reward": 0.16111111640930176, | |
| "reward_std": 0.10420372057706118, | |
| "rewards/accuracy_multibox_reward": 0.16111111640930176, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3197.541778564453, | |
| "epoch": 0.4438731790916881, | |
| "grad_norm": 0.08307260274887085, | |
| "kl": 0.00504302978515625, | |
| "learning_rate": 7.134462959812286e-07, | |
| "loss": -0.0071, | |
| "reward": 0.28333335742354393, | |
| "reward_std": 0.10641204193234444, | |
| "rewards/accuracy_multibox_reward": 0.28333335742354393, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3729.4028930664062, | |
| "epoch": 0.44558697514995715, | |
| "grad_norm": 0.14313653111457825, | |
| "kl": 0.007175445556640625, | |
| "learning_rate": 7.10929694193402e-07, | |
| "loss": 0.0259, | |
| "reward": 0.25439815409481525, | |
| "reward_std": 0.21843025088310242, | |
| "rewards/accuracy_multibox_reward": 0.25439815409481525, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3181.263916015625, | |
| "epoch": 0.4473007712082262, | |
| "grad_norm": 0.11463939398527145, | |
| "kl": 0.0074615478515625, | |
| "learning_rate": 7.084073078225803e-07, | |
| "loss": -0.0039, | |
| "reward": 0.3500000089406967, | |
| "reward_std": 0.21163223311305046, | |
| "rewards/accuracy_multibox_reward": 0.3500000089406967, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3292.3334350585938, | |
| "epoch": 0.4490145672664953, | |
| "grad_norm": 0.02985597960650921, | |
| "kl": 0.004222869873046875, | |
| "learning_rate": 7.05879227535395e-07, | |
| "loss": 0.0054, | |
| "reward": 0.01944444514811039, | |
| "reward_std": 0.047628965228796005, | |
| "rewards/accuracy_multibox_reward": 0.01944444514811039, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3737.7083740234375, | |
| "epoch": 0.45072836332476435, | |
| "grad_norm": 0.1231893002986908, | |
| "kl": 0.0080413818359375, | |
| "learning_rate": 7.033455442031449e-07, | |
| "loss": -0.0532, | |
| "reward": 0.030555556528270245, | |
| "reward_std": 0.07484551891684532, | |
| "rewards/accuracy_multibox_reward": 0.030555556528270245, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3703.361083984375, | |
| "epoch": 0.4524421593830334, | |
| "grad_norm": 0.08054221421480179, | |
| "kl": 0.0058746337890625, | |
| "learning_rate": 7.008063488985282e-07, | |
| "loss": 0.0306, | |
| "reward": 0.10000000335276127, | |
| "reward_std": 0.13328943774104118, | |
| "rewards/accuracy_multibox_reward": 0.10000000335276127, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4014.888916015625, | |
| "epoch": 0.4541559554413025, | |
| "grad_norm": 0.04839732497930527, | |
| "kl": 0.00646209716796875, | |
| "learning_rate": 6.9826173289237e-07, | |
| "loss": 0.0117, | |
| "reward": 0.15000002086162567, | |
| "reward_std": 0.09246460348367691, | |
| "rewards/accuracy_multibox_reward": 0.15000002086162567, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4016.0416259765625, | |
| "epoch": 0.45586975149957154, | |
| "grad_norm": 0.08003103733062744, | |
| "kl": 0.005584716796875, | |
| "learning_rate": 6.957117876503413e-07, | |
| "loss": 0.0239, | |
| "reward": 0.1611111145466566, | |
| "reward_std": 0.18120911717414856, | |
| "rewards/accuracy_multibox_reward": 0.1611111145466566, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3641.986083984375, | |
| "epoch": 0.45758354755784064, | |
| "grad_norm": 0.10396629571914673, | |
| "kl": 0.007061004638671875, | |
| "learning_rate": 6.931566048296717e-07, | |
| "loss": 0.0165, | |
| "reward": 0.1750000137835741, | |
| "reward_std": 0.10460954159498215, | |
| "rewards/accuracy_multibox_reward": 0.1750000137835741, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3525.7916870117188, | |
| "epoch": 0.4592973436161097, | |
| "grad_norm": 0.06527090817689896, | |
| "kl": 0.00571441650390625, | |
| "learning_rate": 6.90596276275854e-07, | |
| "loss": 0.0071, | |
| "reward": 0.11944445222616196, | |
| "reward_std": 0.08879294991493225, | |
| "rewards/accuracy_multibox_reward": 0.11944445222616196, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3950.3889770507812, | |
| "epoch": 0.46101113967437873, | |
| "grad_norm": 0.011974954977631569, | |
| "kl": 0.0067138671875, | |
| "learning_rate": 6.880308940193435e-07, | |
| "loss": 0.0003, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.10000000149011612, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3298.5138549804688, | |
| "epoch": 0.46272493573264784, | |
| "grad_norm": 0.13799262046813965, | |
| "kl": 0.005023956298828125, | |
| "learning_rate": 6.854605502722496e-07, | |
| "loss": -0.0334, | |
| "reward": 0.3083333447575569, | |
| "reward_std": 0.16458682715892792, | |
| "rewards/accuracy_multibox_reward": 0.3083333447575569, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3470.4443969726562, | |
| "epoch": 0.4644387317909169, | |
| "grad_norm": 0.14329153299331665, | |
| "kl": 0.00666046142578125, | |
| "learning_rate": 6.828853374250211e-07, | |
| "loss": 0.0601, | |
| "reward": 0.19722224306315184, | |
| "reward_std": 0.12648530304431915, | |
| "rewards/accuracy_multibox_reward": 0.19722224306315184, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3206.5555419921875, | |
| "epoch": 0.4661525278491859, | |
| "grad_norm": 0.06516207754611969, | |
| "kl": 0.006229400634765625, | |
| "learning_rate": 6.803053480431267e-07, | |
| "loss": 0.0116, | |
| "reward": 0.2611111178994179, | |
| "reward_std": 0.08392801880836487, | |
| "rewards/accuracy_multibox_reward": 0.2611111178994179, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3501.90283203125, | |
| "epoch": 0.46786632390745503, | |
| "grad_norm": 0.06776957958936691, | |
| "kl": 0.005809783935546875, | |
| "learning_rate": 6.777206748637253e-07, | |
| "loss": -0.0022, | |
| "reward": 0.14722222462296486, | |
| "reward_std": 0.1556708700954914, | |
| "rewards/accuracy_multibox_reward": 0.14722222462296486, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4049.7501220703125, | |
| "epoch": 0.4695801199657241, | |
| "grad_norm": 0.06353005766868591, | |
| "kl": 0.00780487060546875, | |
| "learning_rate": 6.751314107923343e-07, | |
| "loss": 0.0095, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.14516924694180489, | |
| "rewards/accuracy_multibox_reward": 0.0833333358168602, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4024.2916870117188, | |
| "epoch": 0.4712939160239931, | |
| "grad_norm": 0.09348088502883911, | |
| "kl": 0.00728607177734375, | |
| "learning_rate": 6.725376488994902e-07, | |
| "loss": 0.0037, | |
| "reward": 0.21111111529171467, | |
| "reward_std": 0.18272089585661888, | |
| "rewards/accuracy_multibox_reward": 0.21111111529171467, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3852.75, | |
| "epoch": 0.4730077120822622, | |
| "grad_norm": 0.13266617059707642, | |
| "kl": 0.006317138671875, | |
| "learning_rate": 6.699394824174023e-07, | |
| "loss": 0.0811, | |
| "reward": 0.20833334513008595, | |
| "reward_std": 0.21034328266978264, | |
| "rewards/accuracy_multibox_reward": 0.20833334513008595, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4039.916748046875, | |
| "epoch": 0.47472150814053127, | |
| "grad_norm": 0.09649207442998886, | |
| "kl": 0.00707244873046875, | |
| "learning_rate": 6.673370047366016e-07, | |
| "loss": 0.0375, | |
| "reward": 0.20972222462296486, | |
| "reward_std": 0.14547907561063766, | |
| "rewards/accuracy_multibox_reward": 0.20972222462296486, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3533.1250610351562, | |
| "epoch": 0.47643530419880037, | |
| "grad_norm": 0.07691027969121933, | |
| "kl": 0.00530242919921875, | |
| "learning_rate": 6.647303094025848e-07, | |
| "loss": 0.0346, | |
| "reward": 0.1888888943940401, | |
| "reward_std": 0.2083984725177288, | |
| "rewards/accuracy_multibox_reward": 0.1888888943940401, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4049.7499389648438, | |
| "epoch": 0.4781491002570694, | |
| "grad_norm": 0.07552186399698257, | |
| "kl": 0.00714874267578125, | |
| "learning_rate": 6.621194901124511e-07, | |
| "loss": 0.0233, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.13328944519162178, | |
| "rewards/accuracy_multibox_reward": 0.1666666716337204, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3652.7084350585938, | |
| "epoch": 0.47986289631533846, | |
| "grad_norm": 0.16730470955371857, | |
| "kl": 0.007080078125, | |
| "learning_rate": 6.59504640711534e-07, | |
| "loss": 0.0546, | |
| "reward": 0.4277777820825577, | |
| "reward_std": 0.2172950990498066, | |
| "rewards/accuracy_multibox_reward": 0.4277777820825577, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3280.9166870117188, | |
| "epoch": 0.48157669237360756, | |
| "grad_norm": 0.12958385050296783, | |
| "kl": 0.006626129150390625, | |
| "learning_rate": 6.568858551900289e-07, | |
| "loss": -0.0125, | |
| "reward": 0.3611111156642437, | |
| "reward_std": 0.24132077395915985, | |
| "rewards/accuracy_multibox_reward": 0.3611111156642437, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3546.5416259765625, | |
| "epoch": 0.4832904884318766, | |
| "grad_norm": 0.07077536731958389, | |
| "kl": 0.006992340087890625, | |
| "learning_rate": 6.542632276796142e-07, | |
| "loss": 0.0189, | |
| "reward": 0.2111111218109727, | |
| "reward_std": 0.10886621475219727, | |
| "rewards/accuracy_multibox_reward": 0.2111111218109727, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3904.5, | |
| "epoch": 0.48500428449014565, | |
| "grad_norm": 0.029314473271369934, | |
| "kl": 0.0055694580078125, | |
| "learning_rate": 6.516368524500672e-07, | |
| "loss": 0.009, | |
| "reward": 0.05277778208255768, | |
| "reward_std": 0.05813458189368248, | |
| "rewards/accuracy_multibox_reward": 0.05277778208255768, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4136.513854980469, | |
| "epoch": 0.48671808054841476, | |
| "grad_norm": 0.08904825896024704, | |
| "kl": 0.00783538818359375, | |
| "learning_rate": 6.49006823905877e-07, | |
| "loss": 0.0269, | |
| "reward": 0.1777777848765254, | |
| "reward_std": 0.2760572284460068, | |
| "rewards/accuracy_multibox_reward": 0.1777777848765254, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3801.958251953125, | |
| "epoch": 0.4884318766066838, | |
| "grad_norm": 0.045517902821302414, | |
| "kl": 0.007007598876953125, | |
| "learning_rate": 6.463732365828497e-07, | |
| "loss": 0.0056, | |
| "reward": 0.1305555570870638, | |
| "reward_std": 0.07484552264213562, | |
| "rewards/accuracy_multibox_reward": 0.1305555570870638, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3589.138916015625, | |
| "epoch": 0.49014567266495285, | |
| "grad_norm": 0.10814561694860458, | |
| "kl": 0.0062713623046875, | |
| "learning_rate": 6.437361851447111e-07, | |
| "loss": 0.022, | |
| "reward": 0.1805555634200573, | |
| "reward_std": 0.17489975318312645, | |
| "rewards/accuracy_multibox_reward": 0.1805555634200573, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3951.013916015625, | |
| "epoch": 0.49185946872322195, | |
| "grad_norm": 0.11155269294977188, | |
| "kl": 0.00701141357421875, | |
| "learning_rate": 6.410957643797038e-07, | |
| "loss": 0.0047, | |
| "reward": 0.11388889327645302, | |
| "reward_std": 0.137300256639719, | |
| "rewards/accuracy_multibox_reward": 0.11388889327645302, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3735.5000610351562, | |
| "epoch": 0.493573264781491, | |
| "grad_norm": 0.08366963267326355, | |
| "kl": 0.00881195068359375, | |
| "learning_rate": 6.384520691971805e-07, | |
| "loss": 0.0251, | |
| "reward": 0.15000000223517418, | |
| "reward_std": 0.16499295085668564, | |
| "rewards/accuracy_multibox_reward": 0.15000000223517418, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3893.7083740234375, | |
| "epoch": 0.4952870608397601, | |
| "grad_norm": 0.12087099254131317, | |
| "kl": 0.00865936279296875, | |
| "learning_rate": 6.358051946241914e-07, | |
| "loss": -0.0129, | |
| "reward": 0.1305555570870638, | |
| "reward_std": 0.1156703531742096, | |
| "rewards/accuracy_multibox_reward": 0.1305555570870638, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3415.1388549804688, | |
| "epoch": 0.49700085689802914, | |
| "grad_norm": 0.039078667759895325, | |
| "kl": 0.008026123046875, | |
| "learning_rate": 6.331552358020698e-07, | |
| "loss": 0.0164, | |
| "reward": 0.21666668727993965, | |
| "reward_std": 0.08164966106414795, | |
| "rewards/accuracy_multibox_reward": 0.21666668727993965, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4068.263916015625, | |
| "epoch": 0.4987146529562982, | |
| "grad_norm": 0.09128863364458084, | |
| "kl": 0.008331298828125, | |
| "learning_rate": 6.305022879830115e-07, | |
| "loss": 0.0152, | |
| "reward": 0.11388889141380787, | |
| "reward_std": 0.18125756084918976, | |
| "rewards/accuracy_multibox_reward": 0.11388889141380787, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3434.8194580078125, | |
| "epoch": 0.5004284490145673, | |
| "grad_norm": 0.0994429886341095, | |
| "kl": 0.00992584228515625, | |
| "learning_rate": 6.278464465266511e-07, | |
| "loss": 0.0175, | |
| "reward": 0.24722222238779068, | |
| "reward_std": 0.17120948433876038, | |
| "rewards/accuracy_multibox_reward": 0.24722222238779068, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3136.8472290039062, | |
| "epoch": 0.5021422450728363, | |
| "grad_norm": 0.09907528758049011, | |
| "kl": 0.00666046142578125, | |
| "learning_rate": 6.251878068966345e-07, | |
| "loss": 0.0343, | |
| "reward": 0.20000000298023224, | |
| "reward_std": 0.1837409771978855, | |
| "rewards/accuracy_multibox_reward": 0.20000000298023224, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3546.52783203125, | |
| "epoch": 0.5038560411311054, | |
| "grad_norm": 0.1010170429944992, | |
| "kl": 0.00666046142578125, | |
| "learning_rate": 6.225264646571872e-07, | |
| "loss": 0.0222, | |
| "reward": 0.21666667610406876, | |
| "reward_std": 0.13630566373467445, | |
| "rewards/accuracy_multibox_reward": 0.21666667610406876, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3615.0277099609375, | |
| "epoch": 0.5055698371893744, | |
| "grad_norm": 0.08977912366390228, | |
| "kl": 0.0081329345703125, | |
| "learning_rate": 6.198625154696796e-07, | |
| "loss": 0.0457, | |
| "reward": 0.21111111715435982, | |
| "reward_std": 0.13456526026129723, | |
| "rewards/accuracy_multibox_reward": 0.21111111715435982, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3981.7222290039062, | |
| "epoch": 0.5072836332476436, | |
| "grad_norm": 0.09215124696493149, | |
| "kl": 0.00696563720703125, | |
| "learning_rate": 6.171960550891878e-07, | |
| "loss": 0.0303, | |
| "reward": 0.17777778394520283, | |
| "reward_std": 0.1392918899655342, | |
| "rewards/accuracy_multibox_reward": 0.17777778394520283, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3897.3611450195312, | |
| "epoch": 0.5089974293059126, | |
| "grad_norm": 0.05696404352784157, | |
| "kl": 0.00701141357421875, | |
| "learning_rate": 6.145271793610529e-07, | |
| "loss": 0.0006, | |
| "reward": 0.09444444812834263, | |
| "reward_std": 0.14053990691900253, | |
| "rewards/accuracy_multibox_reward": 0.09444444812834263, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3396.5, | |
| "epoch": 0.5107112253641817, | |
| "grad_norm": 0.07551192492246628, | |
| "kl": 0.00839996337890625, | |
| "learning_rate": 6.118559842174344e-07, | |
| "loss": 0.0345, | |
| "reward": 0.2138888956978917, | |
| "reward_std": 0.137300256639719, | |
| "rewards/accuracy_multibox_reward": 0.2138888956978917, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3695.7361450195312, | |
| "epoch": 0.5124250214224507, | |
| "grad_norm": 0.10014130920171738, | |
| "kl": 0.0122528076171875, | |
| "learning_rate": 6.091825656738635e-07, | |
| "loss": 0.0486, | |
| "reward": 0.31111113727092743, | |
| "reward_std": 0.14917607000097632, | |
| "rewards/accuracy_multibox_reward": 0.31111113727092743, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3904.27783203125, | |
| "epoch": 0.5141388174807198, | |
| "grad_norm": 0.1314992606639862, | |
| "kl": 0.0090484619140625, | |
| "learning_rate": 6.065070198257903e-07, | |
| "loss": 0.0655, | |
| "reward": 0.21111112087965012, | |
| "reward_std": 0.18830861151218414, | |
| "rewards/accuracy_multibox_reward": 0.21111112087965012, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4120.277893066406, | |
| "epoch": 0.5158526135389888, | |
| "grad_norm": 0.12695325911045074, | |
| "kl": 0.00933837890625, | |
| "learning_rate": 6.038294428451308e-07, | |
| "loss": 0.0421, | |
| "reward": 0.1888888906687498, | |
| "reward_std": 0.2878003902733326, | |
| "rewards/accuracy_multibox_reward": 0.1888888906687498, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4025.263916015625, | |
| "epoch": 0.517566409597258, | |
| "grad_norm": 0.07065106183290482, | |
| "kl": 0.00797271728515625, | |
| "learning_rate": 6.011499309768094e-07, | |
| "loss": 0.0268, | |
| "reward": 0.13333333656191826, | |
| "reward_std": 0.14723686128854752, | |
| "rewards/accuracy_multibox_reward": 0.13333333656191826, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3822.6666870117188, | |
| "epoch": 0.519280205655527, | |
| "grad_norm": 0.028606431558728218, | |
| "kl": 0.00812530517578125, | |
| "learning_rate": 5.984685805353001e-07, | |
| "loss": 0.0131, | |
| "reward": 0.03333333507180214, | |
| "reward_std": 0.05163978412747383, | |
| "rewards/accuracy_multibox_reward": 0.03333333507180214, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3596.3611450195312, | |
| "epoch": 0.5209940017137961, | |
| "grad_norm": 0.06768631190061569, | |
| "kl": 0.0095977783203125, | |
| "learning_rate": 5.95785487901163e-07, | |
| "loss": -0.0007, | |
| "reward": 0.12777778133749962, | |
| "reward_std": 0.09025629330426455, | |
| "rewards/accuracy_multibox_reward": 0.12777778133749962, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3732.5277709960938, | |
| "epoch": 0.5227077977720651, | |
| "grad_norm": 0.08157099783420563, | |
| "kl": 0.00991058349609375, | |
| "learning_rate": 5.931007495175823e-07, | |
| "loss": 0.0364, | |
| "reward": 0.0972222276031971, | |
| "reward_std": 0.10154710337519646, | |
| "rewards/accuracy_multibox_reward": 0.0972222276031971, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3670.4583740234375, | |
| "epoch": 0.5244215938303342, | |
| "grad_norm": 0.1322575956583023, | |
| "kl": 0.0094451904296875, | |
| "learning_rate": 5.904144618868971e-07, | |
| "loss": 0.043, | |
| "reward": 0.2000000113621354, | |
| "reward_std": 0.1632993184030056, | |
| "rewards/accuracy_multibox_reward": 0.2000000113621354, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3508.65283203125, | |
| "epoch": 0.5261353898886033, | |
| "grad_norm": 0.16425685584545135, | |
| "kl": 0.00931549072265625, | |
| "learning_rate": 5.877267215671345e-07, | |
| "loss": -0.0353, | |
| "reward": 0.16666667349636555, | |
| "reward_std": 0.20441708713769913, | |
| "rewards/accuracy_multibox_reward": 0.16666667349636555, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3782.2222900390625, | |
| "epoch": 0.5278491859468724, | |
| "grad_norm": 0.06635606288909912, | |
| "kl": 0.0097808837890625, | |
| "learning_rate": 5.85037625168538e-07, | |
| "loss": 0.0118, | |
| "reward": 0.213888892903924, | |
| "reward_std": 0.09445624379441142, | |
| "rewards/accuracy_multibox_reward": 0.213888892903924, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3412.2083740234375, | |
| "epoch": 0.5295629820051414, | |
| "grad_norm": 0.10666731745004654, | |
| "kl": 0.00982666015625, | |
| "learning_rate": 5.823472693500952e-07, | |
| "loss": -0.0308, | |
| "reward": 0.1944444440305233, | |
| "reward_std": 0.10408747335895896, | |
| "rewards/accuracy_multibox_reward": 0.1944444440305233, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3972.4445190429688, | |
| "epoch": 0.5312767780634104, | |
| "grad_norm": 0.06598849594593048, | |
| "kl": 0.00959014892578125, | |
| "learning_rate": 5.796557508160631e-07, | |
| "loss": 0.0116, | |
| "reward": 0.09444444626569748, | |
| "reward_std": 0.14689771458506584, | |
| "rewards/accuracy_multibox_reward": 0.09444444626569748, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4023.8472290039062, | |
| "epoch": 0.5329905741216795, | |
| "grad_norm": 0.06214049085974693, | |
| "kl": 0.0081787109375, | |
| "learning_rate": 5.769631663124922e-07, | |
| "loss": 0.0133, | |
| "reward": 0.0972222238779068, | |
| "reward_std": 0.14322605356574059, | |
| "rewards/accuracy_multibox_reward": 0.0972222238779068, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3895.0695190429688, | |
| "epoch": 0.5347043701799485, | |
| "grad_norm": 0.1492040604352951, | |
| "kl": 0.00836944580078125, | |
| "learning_rate": 5.742696126237495e-07, | |
| "loss": 0.0893, | |
| "reward": 0.4000000171363354, | |
| "reward_std": 0.3115817941725254, | |
| "rewards/accuracy_multibox_reward": 0.4000000171363354, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3045.4444580078125, | |
| "epoch": 0.5364181662382177, | |
| "grad_norm": 0.11095013469457626, | |
| "kl": 0.0103302001953125, | |
| "learning_rate": 5.715751865690387e-07, | |
| "loss": 0.0584, | |
| "reward": 0.3305555433034897, | |
| "reward_std": 0.18827469646930695, | |
| "rewards/accuracy_multibox_reward": 0.3305555433034897, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3677.9583740234375, | |
| "epoch": 0.5381319622964867, | |
| "grad_norm": 0.14290910959243774, | |
| "kl": 0.0107574462890625, | |
| "learning_rate": 5.688799849989206e-07, | |
| "loss": 0.0556, | |
| "reward": 0.33888889104127884, | |
| "reward_std": 0.18434159085154533, | |
| "rewards/accuracy_multibox_reward": 0.33888889104127884, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3848.8611450195312, | |
| "epoch": 0.5398457583547558, | |
| "grad_norm": 0.1208048015832901, | |
| "kl": 0.01318359375, | |
| "learning_rate": 5.661841047918316e-07, | |
| "loss": 0.0492, | |
| "reward": 0.1333333384245634, | |
| "reward_std": 0.19119417667388916, | |
| "rewards/accuracy_multibox_reward": 0.1333333384245634, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3350.3472900390625, | |
| "epoch": 0.5415595544130248, | |
| "grad_norm": 0.08760150521993637, | |
| "kl": 0.0077056884765625, | |
| "learning_rate": 5.634876428506019e-07, | |
| "loss": -0.0033, | |
| "reward": 0.17499998956918716, | |
| "reward_std": 0.1314869299530983, | |
| "rewards/accuracy_multibox_reward": 0.17499998956918716, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4048.875, | |
| "epoch": 0.5432733504712939, | |
| "grad_norm": 0.06247352063655853, | |
| "kl": 0.0103607177734375, | |
| "learning_rate": 5.607906960989718e-07, | |
| "loss": 0.0153, | |
| "reward": 0.16111111640930176, | |
| "reward_std": 0.12568361684679985, | |
| "rewards/accuracy_multibox_reward": 0.16111111640930176, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3126.6111450195312, | |
| "epoch": 0.5449871465295629, | |
| "grad_norm": 0.1342483013868332, | |
| "kl": 0.0086517333984375, | |
| "learning_rate": 5.58093361478108e-07, | |
| "loss": -0.0178, | |
| "reward": 0.3374745845794678, | |
| "reward_std": 0.1333315335214138, | |
| "rewards/accuracy_multibox_reward": 0.3374745845794678, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3299.5972290039062, | |
| "epoch": 0.5467009425878321, | |
| "grad_norm": 0.0902925431728363, | |
| "kl": 0.00801849365234375, | |
| "learning_rate": 5.553957359431194e-07, | |
| "loss": 0.0073, | |
| "reward": 0.1916666803881526, | |
| "reward_std": 0.1809184066951275, | |
| "rewards/accuracy_multibox_reward": 0.1916666803881526, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3396.2083129882812, | |
| "epoch": 0.5484147386461011, | |
| "grad_norm": 0.11534847319126129, | |
| "kl": 0.00858306884765625, | |
| "learning_rate": 5.526979164595709e-07, | |
| "loss": 0.0166, | |
| "reward": 0.20555554889142513, | |
| "reward_std": 0.21903013065457344, | |
| "rewards/accuracy_multibox_reward": 0.20555554889142513, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3899.25, | |
| "epoch": 0.5501285347043702, | |
| "grad_norm": 0.08833047747612, | |
| "kl": 0.0130767822265625, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0427, | |
| "reward": 0.0879629673436284, | |
| "reward_std": 0.10380484536290169, | |
| "rewards/accuracy_multibox_reward": 0.0879629673436284, | |
| "step": 321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3127.9166870117188, | |
| "epoch": 0.5518423307626392, | |
| "grad_norm": 0.1444021463394165, | |
| "kl": 0.00853729248046875, | |
| "learning_rate": 5.47302083540429e-07, | |
| "loss": 0.0037, | |
| "reward": 0.31666666828095913, | |
| "reward_std": 0.22254914790391922, | |
| "rewards/accuracy_multibox_reward": 0.31666666828095913, | |
| "step": 322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3870.0416259765625, | |
| "epoch": 0.5535561268209083, | |
| "grad_norm": 0.03946667164564133, | |
| "kl": 0.00936126708984375, | |
| "learning_rate": 5.446042640568808e-07, | |
| "loss": 0.008, | |
| "reward": 0.08333333767950535, | |
| "reward_std": 0.0924646146595478, | |
| "rewards/accuracy_multibox_reward": 0.08333333767950535, | |
| "step": 323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3470.5972290039062, | |
| "epoch": 0.5552699228791774, | |
| "grad_norm": 0.11320355534553528, | |
| "kl": 0.0094757080078125, | |
| "learning_rate": 5.419066385218919e-07, | |
| "loss": 0.0348, | |
| "reward": 0.30277777649462223, | |
| "reward_std": 0.24434533715248108, | |
| "rewards/accuracy_multibox_reward": 0.30277777649462223, | |
| "step": 324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3547.27783203125, | |
| "epoch": 0.5569837189374465, | |
| "grad_norm": 0.08855713158845901, | |
| "kl": 0.00734710693359375, | |
| "learning_rate": 5.392093039010283e-07, | |
| "loss": 0.0063, | |
| "reward": 0.23888889700174332, | |
| "reward_std": 0.18713753670454025, | |
| "rewards/accuracy_multibox_reward": 0.23888889700174332, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3295.2918090820312, | |
| "epoch": 0.5586975149957155, | |
| "grad_norm": 0.11228316277265549, | |
| "kl": 0.01061248779296875, | |
| "learning_rate": 5.365123571493981e-07, | |
| "loss": -0.0331, | |
| "reward": 0.25277779437601566, | |
| "reward_std": 0.11721683200448751, | |
| "rewards/accuracy_multibox_reward": 0.25277779437601566, | |
| "step": 326 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3727.6251220703125, | |
| "epoch": 0.5604113110539846, | |
| "grad_norm": 0.07023253291845322, | |
| "kl": 0.00946807861328125, | |
| "learning_rate": 5.338158952081685e-07, | |
| "loss": -0.0226, | |
| "reward": 0.14166667684912682, | |
| "reward_std": 0.06390097085386515, | |
| "rewards/accuracy_multibox_reward": 0.14166667684912682, | |
| "step": 327 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3405.9444274902344, | |
| "epoch": 0.5621251071122536, | |
| "grad_norm": 0.3554980456829071, | |
| "kl": 0.0128631591796875, | |
| "learning_rate": 5.311200150010795e-07, | |
| "loss": -0.0157, | |
| "reward": 0.1611111182719469, | |
| "reward_std": 0.1310363239608705, | |
| "rewards/accuracy_multibox_reward": 0.1611111182719469, | |
| "step": 328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3561.236083984375, | |
| "epoch": 0.5638389031705227, | |
| "grad_norm": 0.0955643355846405, | |
| "kl": 0.01035308837890625, | |
| "learning_rate": 5.284248134309614e-07, | |
| "loss": -0.0211, | |
| "reward": 0.33888890594244003, | |
| "reward_std": 0.18947673309594393, | |
| "rewards/accuracy_multibox_reward": 0.33888890594244003, | |
| "step": 329 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3539.02783203125, | |
| "epoch": 0.5655526992287918, | |
| "grad_norm": 0.05937019735574722, | |
| "kl": 0.01039886474609375, | |
| "learning_rate": 5.257303873762506e-07, | |
| "loss": 0.0092, | |
| "reward": 0.04722222313284874, | |
| "reward_std": 0.08566047251224518, | |
| "rewards/accuracy_multibox_reward": 0.04722222313284874, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4065.3194580078125, | |
| "epoch": 0.5672664952870609, | |
| "grad_norm": 0.1071372851729393, | |
| "kl": 0.0146331787109375, | |
| "learning_rate": 5.230368336875077e-07, | |
| "loss": 0.0448, | |
| "reward": 0.20555556379258633, | |
| "reward_std": 0.17982063069939613, | |
| "rewards/accuracy_multibox_reward": 0.20555556379258633, | |
| "step": 331 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3322.0833740234375, | |
| "epoch": 0.5689802913453299, | |
| "grad_norm": 0.10832535475492477, | |
| "kl": 0.01290130615234375, | |
| "learning_rate": 5.20344249183937e-07, | |
| "loss": 0.0474, | |
| "reward": 0.27499999199062586, | |
| "reward_std": 0.1215914860367775, | |
| "rewards/accuracy_multibox_reward": 0.27499999199062586, | |
| "step": 332 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3679.4861450195312, | |
| "epoch": 0.570694087403599, | |
| "grad_norm": 0.08198282122612, | |
| "kl": 0.00943756103515625, | |
| "learning_rate": 5.176527306499048e-07, | |
| "loss": 0.0164, | |
| "reward": 0.24166667833924294, | |
| "reward_std": 0.27152637019753456, | |
| "rewards/accuracy_multibox_reward": 0.24166667833924294, | |
| "step": 333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3983.5695190429688, | |
| "epoch": 0.572407883461868, | |
| "grad_norm": 0.11519487202167511, | |
| "kl": 0.01200103759765625, | |
| "learning_rate": 5.14962374831462e-07, | |
| "loss": 0.0196, | |
| "reward": 0.16388889402151108, | |
| "reward_std": 0.18091841042041779, | |
| "rewards/accuracy_multibox_reward": 0.16388889402151108, | |
| "step": 334 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3554.916748046875, | |
| "epoch": 0.5741216795201372, | |
| "grad_norm": 0.2749873995780945, | |
| "kl": 0.0094757080078125, | |
| "learning_rate": 5.122732784328656e-07, | |
| "loss": 0.0304, | |
| "reward": 0.34166666865348816, | |
| "reward_std": 0.2517244531773031, | |
| "rewards/accuracy_multibox_reward": 0.34166666865348816, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3377.666748046875, | |
| "epoch": 0.5758354755784062, | |
| "grad_norm": 0.10420504212379456, | |
| "kl": 0.01032257080078125, | |
| "learning_rate": 5.09585538113103e-07, | |
| "loss": -0.0197, | |
| "reward": 0.0381944440305233, | |
| "reward_std": 0.042525868862867355, | |
| "rewards/accuracy_multibox_reward": 0.0381944440305233, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3468.375, | |
| "epoch": 0.5775492716366752, | |
| "grad_norm": 0.11138604581356049, | |
| "kl": 0.01139068603515625, | |
| "learning_rate": 5.068992504824178e-07, | |
| "loss": 0.0118, | |
| "reward": 0.22777778841555119, | |
| "reward_std": 0.1753900907933712, | |
| "rewards/accuracy_multibox_reward": 0.22777778841555119, | |
| "step": 337 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3558.4583740234375, | |
| "epoch": 0.5792630676949443, | |
| "grad_norm": 0.13349705934524536, | |
| "kl": 0.00995635986328125, | |
| "learning_rate": 5.042145120988369e-07, | |
| "loss": 0.042, | |
| "reward": 0.3944444749504328, | |
| "reward_std": 0.2846442982554436, | |
| "rewards/accuracy_multibox_reward": 0.3944444749504328, | |
| "step": 338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3860.875, | |
| "epoch": 0.5809768637532133, | |
| "grad_norm": 0.040316056460142136, | |
| "kl": 0.0123291015625, | |
| "learning_rate": 5.015314194647e-07, | |
| "loss": 0.01, | |
| "reward": 0.01666666753590107, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.01666666753590107, | |
| "step": 339 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4038.40283203125, | |
| "epoch": 0.5826906598114824, | |
| "grad_norm": 0.09849846363067627, | |
| "kl": 0.01251220703125, | |
| "learning_rate": 4.988500690231905e-07, | |
| "loss": 0.0135, | |
| "reward": 0.20833333022892475, | |
| "reward_std": 0.30693812295794487, | |
| "rewards/accuracy_multibox_reward": 0.20833333022892475, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3395.1250610351562, | |
| "epoch": 0.5844044558697515, | |
| "grad_norm": 0.04051634296774864, | |
| "kl": 0.0121307373046875, | |
| "learning_rate": 4.961705571548692e-07, | |
| "loss": 0.0112, | |
| "reward": 0.013888888992369175, | |
| "reward_std": 0.034020692110061646, | |
| "rewards/accuracy_multibox_reward": 0.013888888992369175, | |
| "step": 341 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3765.2638549804688, | |
| "epoch": 0.5861182519280206, | |
| "grad_norm": 0.10633303225040436, | |
| "kl": 0.0087738037109375, | |
| "learning_rate": 4.934929801742095e-07, | |
| "loss": 0.0658, | |
| "reward": 0.2972222352400422, | |
| "reward_std": 0.19192392751574516, | |
| "rewards/accuracy_multibox_reward": 0.2972222352400422, | |
| "step": 342 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2874.8194580078125, | |
| "epoch": 0.5878320479862896, | |
| "grad_norm": 0.18583592772483826, | |
| "kl": 0.00754547119140625, | |
| "learning_rate": 4.908174343261365e-07, | |
| "loss": -0.0413, | |
| "reward": 0.4583333283662796, | |
| "reward_std": 0.20319286175072193, | |
| "rewards/accuracy_multibox_reward": 0.4583333283662796, | |
| "step": 343 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3640.041748046875, | |
| "epoch": 0.5895458440445587, | |
| "grad_norm": 0.10591695457696915, | |
| "kl": 0.01131439208984375, | |
| "learning_rate": 4.881440157825656e-07, | |
| "loss": 0.0284, | |
| "reward": 0.17222222592681646, | |
| "reward_std": 0.1632993146777153, | |
| "rewards/accuracy_multibox_reward": 0.17222222592681646, | |
| "step": 344 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3592.7083740234375, | |
| "epoch": 0.5912596401028277, | |
| "grad_norm": 0.055938608944416046, | |
| "kl": 0.01094818115234375, | |
| "learning_rate": 4.854728206389473e-07, | |
| "loss": 0.0184, | |
| "reward": 0.3750000074505806, | |
| "reward_std": 0.0907321460545063, | |
| "rewards/accuracy_multibox_reward": 0.3750000074505806, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3740.6527709960938, | |
| "epoch": 0.5929734361610969, | |
| "grad_norm": 0.08842834830284119, | |
| "kl": 0.012176513671875, | |
| "learning_rate": 4.828039449108121e-07, | |
| "loss": 0.0065, | |
| "reward": 0.18611112236976624, | |
| "reward_std": 0.1546795777976513, | |
| "rewards/accuracy_multibox_reward": 0.18611112236976624, | |
| "step": 346 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3148.8472900390625, | |
| "epoch": 0.5946872322193659, | |
| "grad_norm": 0.08482852578163147, | |
| "kl": 0.00974273681640625, | |
| "learning_rate": 4.801374845303204e-07, | |
| "loss": -0.012, | |
| "reward": 0.2047288417816162, | |
| "reward_std": 0.021652573719620705, | |
| "rewards/accuracy_multibox_reward": 0.2047288417816162, | |
| "step": 347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3254.4305725097656, | |
| "epoch": 0.596401028277635, | |
| "grad_norm": 0.06464767456054688, | |
| "kl": 0.00865936279296875, | |
| "learning_rate": 4.774735353428128e-07, | |
| "loss": -0.0033, | |
| "reward": 0.06944444589316845, | |
| "reward_std": 0.08867669850587845, | |
| "rewards/accuracy_multibox_reward": 0.06944444589316845, | |
| "step": 348 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3811.8472290039062, | |
| "epoch": 0.598114824335904, | |
| "grad_norm": 0.08608708530664444, | |
| "kl": 0.0101776123046875, | |
| "learning_rate": 4.748121931033655e-07, | |
| "loss": -0.0072, | |
| "reward": 0.30555556807667017, | |
| "reward_std": 0.14924753084778786, | |
| "rewards/accuracy_multibox_reward": 0.30555556807667017, | |
| "step": 349 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3856.8611450195312, | |
| "epoch": 0.5998286203941731, | |
| "grad_norm": 0.09154751896858215, | |
| "kl": 0.0105133056640625, | |
| "learning_rate": 4.7215355347334893e-07, | |
| "loss": 0.0288, | |
| "reward": 0.3166666813194752, | |
| "reward_std": 0.1980317458510399, | |
| "rewards/accuracy_multibox_reward": 0.3166666813194752, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3805.6251220703125, | |
| "epoch": 0.6015424164524421, | |
| "grad_norm": 0.09605634212493896, | |
| "kl": 0.01256561279296875, | |
| "learning_rate": 4.694977120169885e-07, | |
| "loss": -0.0121, | |
| "reward": 0.3000000026077032, | |
| "reward_std": 0.17860494181513786, | |
| "rewards/accuracy_multibox_reward": 0.3000000026077032, | |
| "step": 351 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4176.0972900390625, | |
| "epoch": 0.6032562125107113, | |
| "grad_norm": 0.053906649351119995, | |
| "kl": 0.01190185546875, | |
| "learning_rate": 4.668447641979303e-07, | |
| "loss": 0.0115, | |
| "reward": 0.08611111342906952, | |
| "reward_std": 0.14322605356574059, | |
| "rewards/accuracy_multibox_reward": 0.08611111342906952, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3887.7916259765625, | |
| "epoch": 0.6049700085689803, | |
| "grad_norm": 0.11869771033525467, | |
| "kl": 0.0137786865234375, | |
| "learning_rate": 4.641948053758088e-07, | |
| "loss": 0.0232, | |
| "reward": 0.1944444589316845, | |
| "reward_std": 0.14281827211380005, | |
| "rewards/accuracy_multibox_reward": 0.1944444589316845, | |
| "step": 353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2876.3472595214844, | |
| "epoch": 0.6066838046272494, | |
| "grad_norm": 0.12787601351737976, | |
| "kl": 0.00714874267578125, | |
| "learning_rate": 4.6154793080281974e-07, | |
| "loss": 0.0003, | |
| "reward": 0.338888899423182, | |
| "reward_std": 0.1057635392062366, | |
| "rewards/accuracy_multibox_reward": 0.338888899423182, | |
| "step": 354 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3474.9584350585938, | |
| "epoch": 0.6083976006855184, | |
| "grad_norm": 0.11639636754989624, | |
| "kl": 0.0111083984375, | |
| "learning_rate": 4.58904235620296e-07, | |
| "loss": 0.023, | |
| "reward": 0.23055556043982506, | |
| "reward_std": 0.13659637421369553, | |
| "rewards/accuracy_multibox_reward": 0.23055556043982506, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3345.2361450195312, | |
| "epoch": 0.6101113967437874, | |
| "grad_norm": 0.11142376065254211, | |
| "kl": 0.01397705078125, | |
| "learning_rate": 4.562638148552889e-07, | |
| "loss": -0.0061, | |
| "reward": 0.21944444254040718, | |
| "reward_std": 0.15649519115686417, | |
| "rewards/accuracy_multibox_reward": 0.21944444254040718, | |
| "step": 356 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3514.5416870117188, | |
| "epoch": 0.6118251928020566, | |
| "grad_norm": 0.06658034771680832, | |
| "kl": 0.01039886474609375, | |
| "learning_rate": 4.5362676341715036e-07, | |
| "loss": -0.0134, | |
| "reward": 0.2611111141741276, | |
| "reward_std": 0.09971508383750916, | |
| "rewards/accuracy_multibox_reward": 0.2611111141741276, | |
| "step": 357 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3867.3890380859375, | |
| "epoch": 0.6135389888603257, | |
| "grad_norm": 0.05978209525346756, | |
| "kl": 0.0114898681640625, | |
| "learning_rate": 4.50993176094123e-07, | |
| "loss": 0.0129, | |
| "reward": 0.21388889476656914, | |
| "reward_std": 0.14457029476761818, | |
| "rewards/accuracy_multibox_reward": 0.21388889476656914, | |
| "step": 358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3519.8195190429688, | |
| "epoch": 0.6152527849185947, | |
| "grad_norm": 0.1380893588066101, | |
| "kl": 0.0118560791015625, | |
| "learning_rate": 4.4836314754993287e-07, | |
| "loss": 0.0116, | |
| "reward": 0.3305555619299412, | |
| "reward_std": 0.27433473989367485, | |
| "rewards/accuracy_multibox_reward": 0.3305555619299412, | |
| "step": 359 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3374.083251953125, | |
| "epoch": 0.6169665809768637, | |
| "grad_norm": 0.10526759177446365, | |
| "kl": 0.011138916015625, | |
| "learning_rate": 4.457367723203859e-07, | |
| "loss": -0.0616, | |
| "reward": 0.33333335164934397, | |
| "reward_std": 0.13915619999170303, | |
| "rewards/accuracy_multibox_reward": 0.33333335164934397, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2938.125, | |
| "epoch": 0.6186803770351328, | |
| "grad_norm": 0.11278838664293289, | |
| "kl": 0.0094757080078125, | |
| "learning_rate": 4.431141448099711e-07, | |
| "loss": -0.0024, | |
| "reward": 0.31388890743255615, | |
| "reward_std": 0.2480742409825325, | |
| "rewards/accuracy_multibox_reward": 0.31388890743255615, | |
| "step": 361 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4091.0556030273438, | |
| "epoch": 0.6203941730934018, | |
| "grad_norm": 0.07235784083604813, | |
| "kl": 0.01161956787109375, | |
| "learning_rate": 4.40495359288466e-07, | |
| "loss": -0.0058, | |
| "reward": 0.06388889160007238, | |
| "reward_std": 0.1275501511991024, | |
| "rewards/accuracy_multibox_reward": 0.06388889160007238, | |
| "step": 362 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3934.3888549804688, | |
| "epoch": 0.622107969151671, | |
| "grad_norm": 0.074801005423069, | |
| "kl": 0.0130767822265625, | |
| "learning_rate": 4.3788050988754896e-07, | |
| "loss": -0.0073, | |
| "reward": 0.14841271564364433, | |
| "reward_std": 0.14048392698168755, | |
| "rewards/accuracy_multibox_reward": 0.14841271564364433, | |
| "step": 363 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3767.0000610351562, | |
| "epoch": 0.62382176520994, | |
| "grad_norm": 0.08381088823080063, | |
| "kl": 0.0135498046875, | |
| "learning_rate": 4.3526969059741503e-07, | |
| "loss": -0.0153, | |
| "reward": 0.1333333346992731, | |
| "reward_std": 0.15545956417918205, | |
| "rewards/accuracy_multibox_reward": 0.1333333346992731, | |
| "step": 364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3945.34716796875, | |
| "epoch": 0.6255355612682091, | |
| "grad_norm": 0.11830548197031021, | |
| "kl": 0.013153076171875, | |
| "learning_rate": 4.326629952633983e-07, | |
| "loss": 0.0288, | |
| "reward": 0.1555555583909154, | |
| "reward_std": 0.22664671763777733, | |
| "rewards/accuracy_multibox_reward": 0.1555555583909154, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3709.5416259765625, | |
| "epoch": 0.6272493573264781, | |
| "grad_norm": 0.07871238142251968, | |
| "kl": 0.01262664794921875, | |
| "learning_rate": 4.300605175825978e-07, | |
| "loss": 0.0004, | |
| "reward": 0.22777778282761574, | |
| "reward_std": 0.1472368687391281, | |
| "rewards/accuracy_multibox_reward": 0.22777778282761574, | |
| "step": 366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3582.9722290039062, | |
| "epoch": 0.6289631533847472, | |
| "grad_norm": 0.17362013459205627, | |
| "kl": 0.0122833251953125, | |
| "learning_rate": 4.2746235110050977e-07, | |
| "loss": 0.0775, | |
| "reward": 0.4277777932584286, | |
| "reward_std": 0.2401289134286344, | |
| "rewards/accuracy_multibox_reward": 0.4277777932584286, | |
| "step": 367 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3911.4861450195312, | |
| "epoch": 0.6306769494430163, | |
| "grad_norm": 0.12609483301639557, | |
| "kl": 0.0142822265625, | |
| "learning_rate": 4.248685892076657e-07, | |
| "loss": 0.0581, | |
| "reward": 0.19166667200624943, | |
| "reward_std": 0.1862592101097107, | |
| "rewards/accuracy_multibox_reward": 0.19166667200624943, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3353.6111450195312, | |
| "epoch": 0.6323907455012854, | |
| "grad_norm": 0.06986302882432938, | |
| "kl": 0.01137542724609375, | |
| "learning_rate": 4.222793251362746e-07, | |
| "loss": 0.0141, | |
| "reward": 0.14722222834825516, | |
| "reward_std": 0.14237192273139954, | |
| "rewards/accuracy_multibox_reward": 0.14722222834825516, | |
| "step": 369 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2971.888946533203, | |
| "epoch": 0.6341045415595544, | |
| "grad_norm": 0.06477347016334534, | |
| "kl": 0.00666046142578125, | |
| "learning_rate": 4.1969465195687327e-07, | |
| "loss": 0.0094, | |
| "reward": 0.3166666813194752, | |
| "reward_std": 0.05889025330543518, | |
| "rewards/accuracy_multibox_reward": 0.3166666813194752, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3211.0834350585938, | |
| "epoch": 0.6358183376178235, | |
| "grad_norm": 0.13010753691196442, | |
| "kl": 0.01032257080078125, | |
| "learning_rate": 4.1711466257497875e-07, | |
| "loss": -0.0342, | |
| "reward": 0.4416666775941849, | |
| "reward_std": 0.1435379944741726, | |
| "rewards/accuracy_multibox_reward": 0.4416666775941849, | |
| "step": 371 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4156.680603027344, | |
| "epoch": 0.6375321336760925, | |
| "grad_norm": 0.03561083599925041, | |
| "kl": 0.0122833251953125, | |
| "learning_rate": 4.1453944972775056e-07, | |
| "loss": 0.0068, | |
| "reward": 0.03333333507180214, | |
| "reward_std": 0.05163978412747383, | |
| "rewards/accuracy_multibox_reward": 0.03333333507180214, | |
| "step": 372 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3675.6527709960938, | |
| "epoch": 0.6392459297343616, | |
| "grad_norm": 0.02410358004271984, | |
| "kl": 0.012481689453125, | |
| "learning_rate": 4.119691059806565e-07, | |
| "loss": 0.0005, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 373 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3423.4861450195312, | |
| "epoch": 0.6409597257926307, | |
| "grad_norm": 0.06537967920303345, | |
| "kl": 0.00933074951171875, | |
| "learning_rate": 4.094037237241459e-07, | |
| "loss": -0.0152, | |
| "reward": 0.18247355858329684, | |
| "reward_std": 0.0937061351723969, | |
| "rewards/accuracy_multibox_reward": 0.18247355858329684, | |
| "step": 374 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3815.0972290039062, | |
| "epoch": 0.6426735218508998, | |
| "grad_norm": 0.09233283996582031, | |
| "kl": 0.0138397216796875, | |
| "learning_rate": 4.068433951703283e-07, | |
| "loss": 0.0255, | |
| "reward": 0.1750000026077032, | |
| "reward_std": 0.28978461399674416, | |
| "rewards/accuracy_multibox_reward": 0.1750000026077032, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3396.4305419921875, | |
| "epoch": 0.6443873179091688, | |
| "grad_norm": 0.06617026031017303, | |
| "kl": 0.0125274658203125, | |
| "learning_rate": 4.042882123496587e-07, | |
| "loss": 0.0051, | |
| "reward": 0.180555559694767, | |
| "reward_std": 0.10372589528560638, | |
| "rewards/accuracy_multibox_reward": 0.180555559694767, | |
| "step": 376 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3909.3333129882812, | |
| "epoch": 0.6461011139674379, | |
| "grad_norm": 0.11038164049386978, | |
| "kl": 0.01226806640625, | |
| "learning_rate": 4.0173826710763016e-07, | |
| "loss": -0.0034, | |
| "reward": 0.16111111640930176, | |
| "reward_std": 0.14374903962016106, | |
| "rewards/accuracy_multibox_reward": 0.16111111640930176, | |
| "step": 377 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3829.763916015625, | |
| "epoch": 0.6478149100257069, | |
| "grad_norm": 0.149893119931221, | |
| "kl": 0.0135955810546875, | |
| "learning_rate": 3.991936511014717e-07, | |
| "loss": 0.0396, | |
| "reward": 0.29722224175930023, | |
| "reward_std": 0.24497820809483528, | |
| "rewards/accuracy_multibox_reward": 0.29722224175930023, | |
| "step": 378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3086.0833740234375, | |
| "epoch": 0.6495287060839761, | |
| "grad_norm": 0.11539468914270401, | |
| "kl": 0.0160369873046875, | |
| "learning_rate": 3.96654455796855e-07, | |
| "loss": -0.0177, | |
| "reward": 0.27777778543531895, | |
| "reward_std": 0.11188619676977396, | |
| "rewards/accuracy_multibox_reward": 0.27777778543531895, | |
| "step": 379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3421.9166870117188, | |
| "epoch": 0.6512425021422451, | |
| "grad_norm": 0.2158275842666626, | |
| "kl": 0.01507568359375, | |
| "learning_rate": 3.9412077246460496e-07, | |
| "loss": 0.0924, | |
| "reward": 0.2361111268401146, | |
| "reward_std": 0.22807664051651955, | |
| "rewards/accuracy_multibox_reward": 0.2361111268401146, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4011.9305419921875, | |
| "epoch": 0.6529562982005142, | |
| "grad_norm": 0.04404829442501068, | |
| "kl": 0.01255035400390625, | |
| "learning_rate": 3.9159269217741984e-07, | |
| "loss": 0.0115, | |
| "reward": 0.08055555820465088, | |
| "reward_std": 0.040023140609264374, | |
| "rewards/accuracy_multibox_reward": 0.08055555820465088, | |
| "step": 381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3436.263916015625, | |
| "epoch": 0.6546700942587832, | |
| "grad_norm": 0.12178925424814224, | |
| "kl": 0.01165771484375, | |
| "learning_rate": 3.8907030580659795e-07, | |
| "loss": -0.0219, | |
| "reward": 0.298842616379261, | |
| "reward_std": 0.19462933018803596, | |
| "rewards/accuracy_multibox_reward": 0.298842616379261, | |
| "step": 382 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3302.1112060546875, | |
| "epoch": 0.6563838903170522, | |
| "grad_norm": 0.22753176093101501, | |
| "kl": 0.0123443603515625, | |
| "learning_rate": 3.865537040187713e-07, | |
| "loss": -0.0333, | |
| "reward": 0.2361111231148243, | |
| "reward_std": 0.33308491110801697, | |
| "rewards/accuracy_multibox_reward": 0.2361111231148243, | |
| "step": 383 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4145.263916015625, | |
| "epoch": 0.6580976863753213, | |
| "grad_norm": 0.05815766751766205, | |
| "kl": 0.009613037109375, | |
| "learning_rate": 3.8404297727264565e-07, | |
| "loss": 0.0027, | |
| "reward": 0.061111112125217915, | |
| "reward_std": 0.0861068069934845, | |
| "rewards/accuracy_multibox_reward": 0.061111112125217915, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3829.6806030273438, | |
| "epoch": 0.6598114824335904, | |
| "grad_norm": 0.024044044315814972, | |
| "kl": 0.014739990234375, | |
| "learning_rate": 3.8153821581575027e-07, | |
| "loss": 0.0006, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0833333358168602, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3857.8333740234375, | |
| "epoch": 0.6615252784918595, | |
| "grad_norm": 0.0724223330616951, | |
| "kl": 0.01181793212890625, | |
| "learning_rate": 3.790395096811928e-07, | |
| "loss": 0.0246, | |
| "reward": 0.23611111752688885, | |
| "reward_std": 0.1551058473996818, | |
| "rewards/accuracy_multibox_reward": 0.23611111752688885, | |
| "step": 386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3371.2222900390625, | |
| "epoch": 0.6632390745501285, | |
| "grad_norm": 0.11956768482923508, | |
| "kl": 0.0120849609375, | |
| "learning_rate": 3.765469486844238e-07, | |
| "loss": -0.0095, | |
| "reward": 0.2416666690260172, | |
| "reward_std": 0.10427038744091988, | |
| "rewards/accuracy_multibox_reward": 0.2416666690260172, | |
| "step": 387 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3232.138916015625, | |
| "epoch": 0.6649528706083976, | |
| "grad_norm": 0.10446308553218842, | |
| "kl": 0.0130615234375, | |
| "learning_rate": 3.740606224200089e-07, | |
| "loss": -0.0128, | |
| "reward": 0.29722223430871964, | |
| "reward_std": 0.21947000175714493, | |
| "rewards/accuracy_multibox_reward": 0.29722223430871964, | |
| "step": 388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3512.3334350585938, | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.07253856956958771, | |
| "kl": 0.0132598876953125, | |
| "learning_rate": 3.7158062025840685e-07, | |
| "loss": 0.0038, | |
| "reward": 0.1138888904824853, | |
| "reward_std": 0.034020692110061646, | |
| "rewards/accuracy_multibox_reward": 0.1138888904824853, | |
| "step": 389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3291.5695190429688, | |
| "epoch": 0.6683804627249358, | |
| "grad_norm": 0.09663305431604385, | |
| "kl": 0.014190673828125, | |
| "learning_rate": 3.691070313427578e-07, | |
| "loss": -0.0127, | |
| "reward": 0.1944444477558136, | |
| "reward_std": 0.0995594672858715, | |
| "rewards/accuracy_multibox_reward": 0.1944444477558136, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4006.47216796875, | |
| "epoch": 0.6700942587832048, | |
| "grad_norm": 0.06852348893880844, | |
| "kl": 0.0169525146484375, | |
| "learning_rate": 3.6663994458567973e-07, | |
| "loss": 0.001, | |
| "reward": 0.15833333879709244, | |
| "reward_std": 0.12462864443659782, | |
| "rewards/accuracy_multibox_reward": 0.15833333879709244, | |
| "step": 391 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3562.791748046875, | |
| "epoch": 0.6718080548414739, | |
| "grad_norm": 0.11607488989830017, | |
| "kl": 0.011993408203125, | |
| "learning_rate": 3.6417944866607143e-07, | |
| "loss": 0.0425, | |
| "reward": 0.3333333507180214, | |
| "reward_std": 0.22094154357910156, | |
| "rewards/accuracy_multibox_reward": 0.3333333507180214, | |
| "step": 392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4076.7778930664062, | |
| "epoch": 0.6735218508997429, | |
| "grad_norm": 0.06720370054244995, | |
| "kl": 0.014678955078125, | |
| "learning_rate": 3.6172563202592607e-07, | |
| "loss": -0.0013, | |
| "reward": 0.15555556491017342, | |
| "reward_std": 0.1110745258629322, | |
| "rewards/accuracy_multibox_reward": 0.15555556491017342, | |
| "step": 393 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3449.041748046875, | |
| "epoch": 0.675235646958012, | |
| "grad_norm": 0.11808700859546661, | |
| "kl": 0.0130462646484375, | |
| "learning_rate": 3.5927858286715084e-07, | |
| "loss": -0.0016, | |
| "reward": 0.4416666701436043, | |
| "reward_std": 0.33614620566368103, | |
| "rewards/accuracy_multibox_reward": 0.4416666701436043, | |
| "step": 394 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3647.2084350585938, | |
| "epoch": 0.676949443016281, | |
| "grad_norm": 0.14397211372852325, | |
| "kl": 0.01312255859375, | |
| "learning_rate": 3.5683838914839794e-07, | |
| "loss": 0.0745, | |
| "reward": 0.386111119762063, | |
| "reward_std": 0.2577893100678921, | |
| "rewards/accuracy_multibox_reward": 0.386111119762063, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4237.486267089844, | |
| "epoch": 0.6786632390745502, | |
| "grad_norm": 0.06049171835184097, | |
| "kl": 0.0101776123046875, | |
| "learning_rate": 3.544051385819017e-07, | |
| "loss": 0.0094, | |
| "reward": 0.12777777947485447, | |
| "reward_std": 0.09525793045759201, | |
| "rewards/accuracy_multibox_reward": 0.12777777947485447, | |
| "step": 396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3487.6805419921875, | |
| "epoch": 0.6803770351328192, | |
| "grad_norm": 0.06940115988254547, | |
| "kl": 0.010650634765625, | |
| "learning_rate": 3.519789186303268e-07, | |
| "loss": 0.0063, | |
| "reward": 0.12777778040617704, | |
| "reward_std": 0.17639262601733208, | |
| "rewards/accuracy_multibox_reward": 0.12777778040617704, | |
| "step": 397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4038.1251220703125, | |
| "epoch": 0.6820908311910883, | |
| "grad_norm": 0.15376919507980347, | |
| "kl": 0.01236724853515625, | |
| "learning_rate": 3.4955981650362375e-07, | |
| "loss": 0.0431, | |
| "reward": 0.3055555634200573, | |
| "reward_std": 0.30325423926115036, | |
| "rewards/accuracy_multibox_reward": 0.3055555634200573, | |
| "step": 398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2873.3333129882812, | |
| "epoch": 0.6838046272493573, | |
| "grad_norm": 0.03852584958076477, | |
| "kl": 0.0125732421875, | |
| "learning_rate": 3.4714791915589435e-07, | |
| "loss": 0.0011, | |
| "reward": 0.24444444850087166, | |
| "reward_std": 0.04906534031033516, | |
| "rewards/accuracy_multibox_reward": 0.24444444850087166, | |
| "step": 399 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4047.5694580078125, | |
| "epoch": 0.6855184233076264, | |
| "grad_norm": 0.09427085518836975, | |
| "kl": 0.012420654296875, | |
| "learning_rate": 3.447433132822664e-07, | |
| "loss": 0.0207, | |
| "reward": 0.19537037424743176, | |
| "reward_std": 0.16771478950977325, | |
| "rewards/accuracy_multibox_reward": 0.19537037424743176, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4099.305603027344, | |
| "epoch": 0.6872322193658955, | |
| "grad_norm": 0.09304556250572205, | |
| "kl": 0.0146026611328125, | |
| "learning_rate": 3.4234608531577704e-07, | |
| "loss": 0.0328, | |
| "reward": 0.11666667088866234, | |
| "reward_std": 0.12247449159622192, | |
| "rewards/accuracy_multibox_reward": 0.11666667088866234, | |
| "step": 401 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3800.8055419921875, | |
| "epoch": 0.6889460154241646, | |
| "grad_norm": 0.08906779438257217, | |
| "kl": 0.01458740234375, | |
| "learning_rate": 3.399563214242662e-07, | |
| "loss": -0.0192, | |
| "reward": 0.08055556006729603, | |
| "reward_std": 0.14009357243776321, | |
| "rewards/accuracy_multibox_reward": 0.08055556006729603, | |
| "step": 402 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3528.0556030273438, | |
| "epoch": 0.6906598114824336, | |
| "grad_norm": 0.10656818002462387, | |
| "kl": 0.013946533203125, | |
| "learning_rate": 3.375741075072793e-07, | |
| "loss": 0.0619, | |
| "reward": 0.13472222397103906, | |
| "reward_std": 0.23228861205279827, | |
| "rewards/accuracy_multibox_reward": 0.13472222397103906, | |
| "step": 403 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3632.0416870117188, | |
| "epoch": 0.6923736075407027, | |
| "grad_norm": 0.09021688997745514, | |
| "kl": 0.0118255615234375, | |
| "learning_rate": 3.351995291929794e-07, | |
| "loss": 0.0139, | |
| "reward": 0.13333334028720856, | |
| "reward_std": 0.12693162634968758, | |
| "rewards/accuracy_multibox_reward": 0.13333334028720856, | |
| "step": 404 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3906.1945190429688, | |
| "epoch": 0.6940874035989717, | |
| "grad_norm": 0.08097906410694122, | |
| "kl": 0.014801025390625, | |
| "learning_rate": 3.3283267183506956e-07, | |
| "loss": 0.0148, | |
| "reward": 0.18611111491918564, | |
| "reward_std": 0.19261273369193077, | |
| "rewards/accuracy_multibox_reward": 0.18611111491918564, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3044.8194580078125, | |
| "epoch": 0.6958011996572407, | |
| "grad_norm": 0.11380079388618469, | |
| "kl": 0.01404571533203125, | |
| "learning_rate": 3.304736205097251e-07, | |
| "loss": -0.0219, | |
| "reward": 0.2777777835726738, | |
| "reward_std": 0.19901228323578835, | |
| "rewards/accuracy_multibox_reward": 0.2777777835726738, | |
| "step": 406 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4063.7499389648438, | |
| "epoch": 0.6975149957155099, | |
| "grad_norm": 0.06325504183769226, | |
| "kl": 0.01568603515625, | |
| "learning_rate": 3.2812246001253453e-07, | |
| "loss": 0.0145, | |
| "reward": 0.11666667088866234, | |
| "reward_std": 0.10641204193234444, | |
| "rewards/accuracy_multibox_reward": 0.11666667088866234, | |
| "step": 407 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2976.9027709960938, | |
| "epoch": 0.699228791773779, | |
| "grad_norm": 0.13544146716594696, | |
| "kl": 0.01100921630859375, | |
| "learning_rate": 3.2577927485545283e-07, | |
| "loss": 0.0279, | |
| "reward": 0.4055555760860443, | |
| "reward_std": 0.15884897857904434, | |
| "rewards/accuracy_multibox_reward": 0.4055555760860443, | |
| "step": 408 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3615.0972290039062, | |
| "epoch": 0.700942587832048, | |
| "grad_norm": 0.08917216956615448, | |
| "kl": 0.0150299072265625, | |
| "learning_rate": 3.234441492637626e-07, | |
| "loss": 0.0246, | |
| "reward": 0.22619047947227955, | |
| "reward_std": 0.09357817750424147, | |
| "rewards/accuracy_multibox_reward": 0.22619047947227955, | |
| "step": 409 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3821.7916259765625, | |
| "epoch": 0.702656383890317, | |
| "grad_norm": 0.06382337212562561, | |
| "kl": 0.0162353515625, | |
| "learning_rate": 3.211171671730476e-07, | |
| "loss": 0.0059, | |
| "reward": 0.1669191960245371, | |
| "reward_std": 0.08313747122883797, | |
| "rewards/accuracy_multibox_reward": 0.1669191960245371, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3409.9166870117188, | |
| "epoch": 0.7043701799485861, | |
| "grad_norm": 0.04453384876251221, | |
| "kl": 0.0098724365234375, | |
| "learning_rate": 3.187984122261748e-07, | |
| "loss": 0.014, | |
| "reward": 0.06388889253139496, | |
| "reward_std": 0.049907319247722626, | |
| "rewards/accuracy_multibox_reward": 0.06388889253139496, | |
| "step": 411 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3151.27783203125, | |
| "epoch": 0.7060839760068551, | |
| "grad_norm": 0.07833683490753174, | |
| "kl": 0.01055145263671875, | |
| "learning_rate": 3.1648796777028857e-07, | |
| "loss": -0.0235, | |
| "reward": 0.07777778152376413, | |
| "reward_std": 0.08392800763249397, | |
| "rewards/accuracy_multibox_reward": 0.07777778152376413, | |
| "step": 412 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3565.4723510742188, | |
| "epoch": 0.7077977720651243, | |
| "grad_norm": 0.1216290220618248, | |
| "kl": 0.0118255615234375, | |
| "learning_rate": 3.1418591685381434e-07, | |
| "loss": 0.0504, | |
| "reward": 0.2611111234873533, | |
| "reward_std": 0.3209957964718342, | |
| "rewards/accuracy_multibox_reward": 0.2611111234873533, | |
| "step": 413 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3864.8056030273438, | |
| "epoch": 0.7095115681233933, | |
| "grad_norm": 0.14296910166740417, | |
| "kl": 0.013885498046875, | |
| "learning_rate": 3.1189234222347333e-07, | |
| "loss": 0.0589, | |
| "reward": 0.20833332277834415, | |
| "reward_std": 0.19262602552771568, | |
| "rewards/accuracy_multibox_reward": 0.20833332277834415, | |
| "step": 414 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3573.6806640625, | |
| "epoch": 0.7112253641816624, | |
| "grad_norm": 0.16137103736400604, | |
| "kl": 0.0140838623046875, | |
| "learning_rate": 3.096073263213092e-07, | |
| "loss": 0.0553, | |
| "reward": 0.2083333320915699, | |
| "reward_std": 0.08927841484546661, | |
| "rewards/accuracy_multibox_reward": 0.2083333320915699, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3827.1805419921875, | |
| "epoch": 0.7129391602399314, | |
| "grad_norm": 0.10257401317358017, | |
| "kl": 0.0181121826171875, | |
| "learning_rate": 3.0733095128172337e-07, | |
| "loss": -0.0042, | |
| "reward": 0.23611111380159855, | |
| "reward_std": 0.097060427069664, | |
| "rewards/accuracy_multibox_reward": 0.23611111380159855, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3571.3194580078125, | |
| "epoch": 0.7146529562982005, | |
| "grad_norm": 0.14013634622097015, | |
| "kl": 0.0152587890625, | |
| "learning_rate": 3.0506329892852335e-07, | |
| "loss": 0.0421, | |
| "reward": 0.09722222574055195, | |
| "reward_std": 0.08084797114133835, | |
| "rewards/accuracy_multibox_reward": 0.09722222574055195, | |
| "step": 417 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3303.638916015625, | |
| "epoch": 0.7163667523564696, | |
| "grad_norm": 0.10443224012851715, | |
| "kl": 0.0148162841796875, | |
| "learning_rate": 3.0280445077198194e-07, | |
| "loss": 0.023, | |
| "reward": 0.18333333916962147, | |
| "reward_std": 0.15013658720999956, | |
| "rewards/accuracy_multibox_reward": 0.18333333916962147, | |
| "step": 418 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3251.3472290039062, | |
| "epoch": 0.7180805484147387, | |
| "grad_norm": 0.060391925275325775, | |
| "kl": 0.0141143798828125, | |
| "learning_rate": 3.0055448800590673e-07, | |
| "loss": -0.0053, | |
| "reward": 0.1972222262993455, | |
| "reward_std": 0.034020692110061646, | |
| "rewards/accuracy_multibox_reward": 0.1972222262993455, | |
| "step": 419 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3815.138916015625, | |
| "epoch": 0.7197943444730077, | |
| "grad_norm": 0.08274088054895401, | |
| "kl": 0.011932373046875, | |
| "learning_rate": 2.983134915047218e-07, | |
| "loss": 0.0222, | |
| "reward": 0.23888888861984015, | |
| "reward_std": 0.16958848387002945, | |
| "rewards/accuracy_multibox_reward": 0.23888888861984015, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3645.3056030273438, | |
| "epoch": 0.7215081405312768, | |
| "grad_norm": 0.09790734946727753, | |
| "kl": 0.012451171875, | |
| "learning_rate": 2.9608154182056107e-07, | |
| "loss": 0.0239, | |
| "reward": 0.25000000931322575, | |
| "reward_std": 0.20268530026078224, | |
| "rewards/accuracy_multibox_reward": 0.25000000931322575, | |
| "step": 421 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3805.5972900390625, | |
| "epoch": 0.7232219365895458, | |
| "grad_norm": 0.13790515065193176, | |
| "kl": 0.0140380859375, | |
| "learning_rate": 2.938587191803723e-07, | |
| "loss": 0.0497, | |
| "reward": 0.3180555673316121, | |
| "reward_std": 0.2130419760942459, | |
| "rewards/accuracy_multibox_reward": 0.3180555673316121, | |
| "step": 422 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3722.638916015625, | |
| "epoch": 0.7249357326478149, | |
| "grad_norm": 0.09732615202665329, | |
| "kl": 0.0118865966796875, | |
| "learning_rate": 2.9164510348303363e-07, | |
| "loss": 0.0348, | |
| "reward": 0.2138888966292143, | |
| "reward_std": 0.24619626626372337, | |
| "rewards/accuracy_multibox_reward": 0.2138888966292143, | |
| "step": 423 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3857.0000610351562, | |
| "epoch": 0.726649528706084, | |
| "grad_norm": 0.06411174684762955, | |
| "kl": 0.0169830322265625, | |
| "learning_rate": 2.8944077429648216e-07, | |
| "loss": -0.0119, | |
| "reward": 0.16944445110857487, | |
| "reward_std": 0.13527286797761917, | |
| "rewards/accuracy_multibox_reward": 0.16944445110857487, | |
| "step": 424 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3324.2083740234375, | |
| "epoch": 0.7283633247643531, | |
| "grad_norm": 0.14491714537143707, | |
| "kl": 0.0173797607421875, | |
| "learning_rate": 2.8724581085485273e-07, | |
| "loss": 0.0274, | |
| "reward": 0.3222222365438938, | |
| "reward_std": 0.28378839790821075, | |
| "rewards/accuracy_multibox_reward": 0.3222222365438938, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3411.0972290039062, | |
| "epoch": 0.7300771208226221, | |
| "grad_norm": 0.09180285781621933, | |
| "kl": 0.018280029296875, | |
| "learning_rate": 2.8506029205563073e-07, | |
| "loss": -0.0075, | |
| "reward": 0.28611112385988235, | |
| "reward_std": 0.08566047623753548, | |
| "rewards/accuracy_multibox_reward": 0.28611112385988235, | |
| "step": 426 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3647.4861450195312, | |
| "epoch": 0.7317909168808912, | |
| "grad_norm": 0.1303664594888687, | |
| "kl": 0.014007568359375, | |
| "learning_rate": 2.82884296456816e-07, | |
| "loss": -0.0428, | |
| "reward": 0.21666666585952044, | |
| "reward_std": 0.1472368687391281, | |
| "rewards/accuracy_multibox_reward": 0.21666666585952044, | |
| "step": 427 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3465.90283203125, | |
| "epoch": 0.7335047129391602, | |
| "grad_norm": 0.12604549527168274, | |
| "kl": 0.0133209228515625, | |
| "learning_rate": 2.8071790227409886e-07, | |
| "loss": 0.007, | |
| "reward": 0.23611111752688885, | |
| "reward_std": 0.27048753947019577, | |
| "rewards/accuracy_multibox_reward": 0.23611111752688885, | |
| "step": 428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3993.7222290039062, | |
| "epoch": 0.7352185089974294, | |
| "grad_norm": 0.08605503290891647, | |
| "kl": 0.0172576904296875, | |
| "learning_rate": 2.78561187378049e-07, | |
| "loss": 0.0106, | |
| "reward": 0.1611111145466566, | |
| "reward_std": 0.1418960765004158, | |
| "rewards/accuracy_multibox_reward": 0.1611111145466566, | |
| "step": 429 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3556.9583129882812, | |
| "epoch": 0.7369323050556984, | |
| "grad_norm": 0.0960155725479126, | |
| "kl": 0.0117645263671875, | |
| "learning_rate": 2.764142292913161e-07, | |
| "loss": 0.0015, | |
| "reward": 0.13333333493210375, | |
| "reward_std": 0.056641421746462584, | |
| "rewards/accuracy_multibox_reward": 0.13333333493210375, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3242.4999389648438, | |
| "epoch": 0.7386461011139674, | |
| "grad_norm": 0.07688996195793152, | |
| "kl": 0.01409912109375, | |
| "learning_rate": 2.7427710518584345e-07, | |
| "loss": 0.0164, | |
| "reward": 0.24722222983837128, | |
| "reward_std": 0.10127307381480932, | |
| "rewards/accuracy_multibox_reward": 0.24722222983837128, | |
| "step": 431 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4096.4722900390625, | |
| "epoch": 0.7403598971722365, | |
| "grad_norm": 0.029960941523313522, | |
| "kl": 0.0154571533203125, | |
| "learning_rate": 2.721498918800939e-07, | |
| "loss": 0.0006, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.10000000149011612, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3638.8472900390625, | |
| "epoch": 0.7420736932305055, | |
| "grad_norm": 0.06569115817546844, | |
| "kl": 0.0146636962890625, | |
| "learning_rate": 2.7003266583628926e-07, | |
| "loss": 0.0031, | |
| "reward": 0.1388888843357563, | |
| "reward_std": 0.06493872031569481, | |
| "rewards/accuracy_multibox_reward": 0.1388888843357563, | |
| "step": 433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3120.541748046875, | |
| "epoch": 0.7437874892887746, | |
| "grad_norm": 0.10579577833414078, | |
| "kl": 0.0107269287109375, | |
| "learning_rate": 2.679255031576608e-07, | |
| "loss": -0.0012, | |
| "reward": 0.15138890407979488, | |
| "reward_std": 0.15257813408970833, | |
| "rewards/accuracy_multibox_reward": 0.15138890407979488, | |
| "step": 434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3574.3055419921875, | |
| "epoch": 0.7455012853470437, | |
| "grad_norm": 0.07471712678670883, | |
| "kl": 0.0107421875, | |
| "learning_rate": 2.6582847958571463e-07, | |
| "loss": 0.0042, | |
| "reward": 0.2250000163912773, | |
| "reward_std": 0.06705055013298988, | |
| "rewards/accuracy_multibox_reward": 0.2250000163912773, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2958.5555419921875, | |
| "epoch": 0.7472150814053128, | |
| "grad_norm": 0.08455600589513779, | |
| "kl": 0.00977325439453125, | |
| "learning_rate": 2.6374167049750864e-07, | |
| "loss": 0.0584, | |
| "reward": 0.24444444570690393, | |
| "reward_std": 0.08385797217488289, | |
| "rewards/accuracy_multibox_reward": 0.24444444570690393, | |
| "step": 436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3378.5555419921875, | |
| "epoch": 0.7489288774635818, | |
| "grad_norm": 0.11141511052846909, | |
| "kl": 0.0128021240234375, | |
| "learning_rate": 2.616651509029433e-07, | |
| "loss": -0.0011, | |
| "reward": 0.24722222238779068, | |
| "reward_std": 0.2629072293639183, | |
| "rewards/accuracy_multibox_reward": 0.24722222238779068, | |
| "step": 437 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3378.1388549804688, | |
| "epoch": 0.7506426735218509, | |
| "grad_norm": 0.0727572962641716, | |
| "kl": 0.0103912353515625, | |
| "learning_rate": 2.595989954420661e-07, | |
| "loss": -0.0186, | |
| "reward": 0.14999999850988388, | |
| "reward_std": 0.05671145906671882, | |
| "rewards/accuracy_multibox_reward": 0.14999999850988388, | |
| "step": 438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4125.5694580078125, | |
| "epoch": 0.7523564695801199, | |
| "grad_norm": 0.10067083686590195, | |
| "kl": 0.0134429931640625, | |
| "learning_rate": 2.575432783823869e-07, | |
| "loss": 0.0218, | |
| "reward": 0.2833333322778344, | |
| "reward_std": 0.11755470186471939, | |
| "rewards/accuracy_multibox_reward": 0.2833333322778344, | |
| "step": 439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3611.77783203125, | |
| "epoch": 0.7540702656383891, | |
| "grad_norm": 0.08463531732559204, | |
| "kl": 0.01381683349609375, | |
| "learning_rate": 2.554980736162104e-07, | |
| "loss": -0.0061, | |
| "reward": 0.14166668057441711, | |
| "reward_std": 0.06390096992254257, | |
| "rewards/accuracy_multibox_reward": 0.14166668057441711, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3730.2916259765625, | |
| "epoch": 0.7557840616966581, | |
| "grad_norm": 0.053779229521751404, | |
| "kl": 0.013946533203125, | |
| "learning_rate": 2.5346345465797856e-07, | |
| "loss": 0.0097, | |
| "reward": 0.12777778133749962, | |
| "reward_std": 0.09122904390096664, | |
| "rewards/accuracy_multibox_reward": 0.12777778133749962, | |
| "step": 441 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2802.8055419921875, | |
| "epoch": 0.7574978577549272, | |
| "grad_norm": 0.06235995143651962, | |
| "kl": 0.011749267578125, | |
| "learning_rate": 2.5143949464162957e-07, | |
| "loss": 0.0095, | |
| "reward": 0.43611112609505653, | |
| "reward_std": 0.05819405149668455, | |
| "rewards/accuracy_multibox_reward": 0.43611112609505653, | |
| "step": 442 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2796.055633544922, | |
| "epoch": 0.7592116538131962, | |
| "grad_norm": 0.12273381650447845, | |
| "kl": 0.01300048828125, | |
| "learning_rate": 2.4942626631796737e-07, | |
| "loss": 0.0026, | |
| "reward": 0.41388890892267227, | |
| "reward_std": 0.15404099971055984, | |
| "rewards/accuracy_multibox_reward": 0.41388890892267227, | |
| "step": 443 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3470.75, | |
| "epoch": 0.7609254498714653, | |
| "grad_norm": 0.12292377650737762, | |
| "kl": 0.0145263671875, | |
| "learning_rate": 2.47423842052048e-07, | |
| "loss": 0.0362, | |
| "reward": 0.28055556677281857, | |
| "reward_std": 0.2824106588959694, | |
| "rewards/accuracy_multibox_reward": 0.28055556677281857, | |
| "step": 444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3927.2361450195312, | |
| "epoch": 0.7626392459297343, | |
| "grad_norm": 0.08180582523345947, | |
| "kl": 0.012115478515625, | |
| "learning_rate": 2.4543229382057804e-07, | |
| "loss": 0.0253, | |
| "reward": 0.11666667275130749, | |
| "reward_std": 0.14410439878702164, | |
| "rewards/accuracy_multibox_reward": 0.11666667275130749, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3595.6111450195312, | |
| "epoch": 0.7643530419880035, | |
| "grad_norm": 0.07436072081327438, | |
| "kl": 0.0123138427734375, | |
| "learning_rate": 2.434516932093269e-07, | |
| "loss": 0.016, | |
| "reward": 0.3888888955116272, | |
| "reward_std": 0.17891193181276321, | |
| "rewards/accuracy_multibox_reward": 0.3888888955116272, | |
| "step": 446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3473.0972900390625, | |
| "epoch": 0.7660668380462725, | |
| "grad_norm": 0.14082324504852295, | |
| "kl": 0.0113525390625, | |
| "learning_rate": 2.414821114105549e-07, | |
| "loss": 0.0373, | |
| "reward": 0.21152998507022858, | |
| "reward_std": 0.13564686104655266, | |
| "rewards/accuracy_multibox_reward": 0.21152998507022858, | |
| "step": 447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3637.7916870117188, | |
| "epoch": 0.7677806341045416, | |
| "grad_norm": 0.059030212461948395, | |
| "kl": 0.01122283935546875, | |
| "learning_rate": 2.3952361922045305e-07, | |
| "loss": 0.0213, | |
| "reward": 0.23333333432674408, | |
| "reward_std": 0.06873702630400658, | |
| "rewards/accuracy_multibox_reward": 0.23333333432674408, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3866.7778930664062, | |
| "epoch": 0.7694944301628106, | |
| "grad_norm": 0.08079922199249268, | |
| "kl": 0.015350341796875, | |
| "learning_rate": 2.375762870365986e-07, | |
| "loss": -0.0108, | |
| "reward": 0.06805555906612426, | |
| "reward_std": 0.055041853338479996, | |
| "rewards/accuracy_multibox_reward": 0.06805555906612426, | |
| "step": 449 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3253.263916015625, | |
| "epoch": 0.7712082262210797, | |
| "grad_norm": 0.12304358929395676, | |
| "kl": 0.011077880859375, | |
| "learning_rate": 2.3564018485542502e-07, | |
| "loss": -0.0214, | |
| "reward": 0.3638888821005821, | |
| "reward_std": 0.22174322977662086, | |
| "rewards/accuracy_multibox_reward": 0.3638888821005821, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4127.083312988281, | |
| "epoch": 0.7729220222793488, | |
| "grad_norm": 0.08091532438993454, | |
| "kl": 0.0181121826171875, | |
| "learning_rate": 2.3371538226970607e-07, | |
| "loss": 0.0231, | |
| "reward": 0.07222222350537777, | |
| "reward_std": 0.09209848940372467, | |
| "rewards/accuracy_multibox_reward": 0.07222222350537777, | |
| "step": 451 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3409.90283203125, | |
| "epoch": 0.7746358183376179, | |
| "grad_norm": 0.06987495720386505, | |
| "kl": 0.0093536376953125, | |
| "learning_rate": 2.3180194846605364e-07, | |
| "loss": 0.038, | |
| "reward": 0.14722222927957773, | |
| "reward_std": 0.12961777672171593, | |
| "rewards/accuracy_multibox_reward": 0.14722222927957773, | |
| "step": 452 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3832.0555419921875, | |
| "epoch": 0.7763496143958869, | |
| "grad_norm": 0.3012501895427704, | |
| "kl": 0.0205078125, | |
| "learning_rate": 2.2989995222243122e-07, | |
| "loss": 0.0677, | |
| "reward": 0.24722222425043583, | |
| "reward_std": 0.22921660542488098, | |
| "rewards/accuracy_multibox_reward": 0.24722222425043583, | |
| "step": 453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2627.2362060546875, | |
| "epoch": 0.778063410454156, | |
| "grad_norm": 0.1187232956290245, | |
| "kl": 0.0079345703125, | |
| "learning_rate": 2.2800946190568174e-07, | |
| "loss": 0.0263, | |
| "reward": 0.3027777820825577, | |
| "reward_std": 0.15167447179555893, | |
| "rewards/accuracy_multibox_reward": 0.3027777820825577, | |
| "step": 454 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3479.888916015625, | |
| "epoch": 0.779777206512425, | |
| "grad_norm": 0.12871868908405304, | |
| "kl": 0.0140838623046875, | |
| "learning_rate": 2.2613054546907003e-07, | |
| "loss": 0.0075, | |
| "reward": 0.3533179173246026, | |
| "reward_std": 0.14214061573147774, | |
| "rewards/accuracy_multibox_reward": 0.3533179173246026, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3317.2083129882812, | |
| "epoch": 0.781491002570694, | |
| "grad_norm": 0.10878768563270569, | |
| "kl": 0.0131988525390625, | |
| "learning_rate": 2.2426327044984056e-07, | |
| "loss": 0.0489, | |
| "reward": 0.28055556677281857, | |
| "reward_std": 0.21750080585479736, | |
| "rewards/accuracy_multibox_reward": 0.28055556677281857, | |
| "step": 456 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3435.3889770507812, | |
| "epoch": 0.7832047986289632, | |
| "grad_norm": 0.11321526020765305, | |
| "kl": 0.015838623046875, | |
| "learning_rate": 2.2240770396678943e-07, | |
| "loss": -0.0305, | |
| "reward": 0.15833333879709244, | |
| "reward_std": 0.1301032342016697, | |
| "rewards/accuracy_multibox_reward": 0.15833333879709244, | |
| "step": 457 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3574.3472290039062, | |
| "epoch": 0.7849185946872322, | |
| "grad_norm": 0.13223789632320404, | |
| "kl": 0.016937255859375, | |
| "learning_rate": 2.2056391271785157e-07, | |
| "loss": 0.0486, | |
| "reward": 0.29722221940755844, | |
| "reward_std": 0.15404099971055984, | |
| "rewards/accuracy_multibox_reward": 0.29722221940755844, | |
| "step": 458 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3083.4861450195312, | |
| "epoch": 0.7866323907455013, | |
| "grad_norm": 0.12737201154232025, | |
| "kl": 0.0130615234375, | |
| "learning_rate": 2.1873196297770403e-07, | |
| "loss": -0.0149, | |
| "reward": 0.21666667610406876, | |
| "reward_std": 0.09971508383750916, | |
| "rewards/accuracy_multibox_reward": 0.21666667610406876, | |
| "step": 459 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3475.6389770507812, | |
| "epoch": 0.7883461868037703, | |
| "grad_norm": 0.10346706956624985, | |
| "kl": 0.01499176025390625, | |
| "learning_rate": 2.1691192059538372e-07, | |
| "loss": 0.0204, | |
| "reward": 0.25833334028720856, | |
| "reward_std": 0.2108636125922203, | |
| "rewards/accuracy_multibox_reward": 0.25833334028720856, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3347.375, | |
| "epoch": 0.7900599828620394, | |
| "grad_norm": 0.0963190570473671, | |
| "kl": 0.01583099365234375, | |
| "learning_rate": 2.1510385099191947e-07, | |
| "loss": 0.0004, | |
| "reward": 0.11666667088866234, | |
| "reward_std": 0.0861067958176136, | |
| "rewards/accuracy_multibox_reward": 0.11666667088866234, | |
| "step": 461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3662.125, | |
| "epoch": 0.7917737789203085, | |
| "grad_norm": 0.09130357205867767, | |
| "kl": 0.012237548828125, | |
| "learning_rate": 2.1330781915798162e-07, | |
| "loss": 0.0326, | |
| "reward": 0.1293176393955946, | |
| "reward_std": 0.23248007148504257, | |
| "rewards/accuracy_multibox_reward": 0.1293176393955946, | |
| "step": 462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3412.4166870117188, | |
| "epoch": 0.7934875749785776, | |
| "grad_norm": 0.13110047578811646, | |
| "kl": 0.0143280029296875, | |
| "learning_rate": 2.1152388965154532e-07, | |
| "loss": -0.0169, | |
| "reward": 0.3722222298383713, | |
| "reward_std": 0.1996498927474022, | |
| "rewards/accuracy_multibox_reward": 0.3722222298383713, | |
| "step": 463 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3407.5695190429688, | |
| "epoch": 0.7952013710368466, | |
| "grad_norm": 0.09380277246236801, | |
| "kl": 0.0126190185546875, | |
| "learning_rate": 2.097521265955701e-07, | |
| "loss": 0.0139, | |
| "reward": 0.3638889044523239, | |
| "reward_std": 0.19486583769321442, | |
| "rewards/accuracy_multibox_reward": 0.3638889044523239, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3919.888916015625, | |
| "epoch": 0.7969151670951157, | |
| "grad_norm": 0.09156035631895065, | |
| "kl": 0.01708984375, | |
| "learning_rate": 2.079925936756955e-07, | |
| "loss": 0.0406, | |
| "reward": 0.05128968367353082, | |
| "reward_std": 0.09475788939744234, | |
| "rewards/accuracy_multibox_reward": 0.05128968367353082, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3914.9861450195312, | |
| "epoch": 0.7986289631533847, | |
| "grad_norm": 0.08346665650606155, | |
| "kl": 0.0150299072265625, | |
| "learning_rate": 2.062453541379509e-07, | |
| "loss": -0.0033, | |
| "reward": 0.23333335295319557, | |
| "reward_std": 0.2343158908188343, | |
| "rewards/accuracy_multibox_reward": 0.23333335295319557, | |
| "step": 466 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3979.4583129882812, | |
| "epoch": 0.8003427592116538, | |
| "grad_norm": 0.04841732978820801, | |
| "kl": 0.014892578125, | |
| "learning_rate": 2.0451047078648315e-07, | |
| "loss": 0.009, | |
| "reward": 0.06666667014360428, | |
| "reward_std": 0.05163978412747383, | |
| "rewards/accuracy_multibox_reward": 0.06666667014360428, | |
| "step": 467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3215.166748046875, | |
| "epoch": 0.8020565552699229, | |
| "grad_norm": 0.14400030672550201, | |
| "kl": 0.0177154541015625, | |
| "learning_rate": 2.027880059812982e-07, | |
| "loss": -0.0255, | |
| "reward": 0.1833333345130086, | |
| "reward_std": 0.13189391046762466, | |
| "rewards/accuracy_multibox_reward": 0.1833333345130086, | |
| "step": 468 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3258.9445190429688, | |
| "epoch": 0.803770351328192, | |
| "grad_norm": 0.08897484093904495, | |
| "kl": 0.01273345947265625, | |
| "learning_rate": 2.0107802163602057e-07, | |
| "loss": 0.0236, | |
| "reward": 0.1972222262993455, | |
| "reward_std": 0.11945626232773066, | |
| "rewards/accuracy_multibox_reward": 0.1972222262993455, | |
| "step": 469 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3200.569549560547, | |
| "epoch": 0.805484147386461, | |
| "grad_norm": 0.051192618906497955, | |
| "kl": 0.01456451416015625, | |
| "learning_rate": 1.9938057921566701e-07, | |
| "loss": 0.015, | |
| "reward": 0.1472222227603197, | |
| "reward_std": 0.08845379576086998, | |
| "rewards/accuracy_multibox_reward": 0.1472222227603197, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3870.8750610351562, | |
| "epoch": 0.8071979434447301, | |
| "grad_norm": 0.05183480307459831, | |
| "kl": 0.01212310791015625, | |
| "learning_rate": 1.9769573973443766e-07, | |
| "loss": 0.0135, | |
| "reward": 0.030555556528270245, | |
| "reward_std": 0.07484552264213562, | |
| "rewards/accuracy_multibox_reward": 0.030555556528270245, | |
| "step": 471 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3590.9306030273438, | |
| "epoch": 0.8089117395029991, | |
| "grad_norm": 0.09707149118185043, | |
| "kl": 0.018524169921875, | |
| "learning_rate": 1.9602356375352264e-07, | |
| "loss": 0.0281, | |
| "reward": 0.28611110895872116, | |
| "reward_std": 0.1632351577281952, | |
| "rewards/accuracy_multibox_reward": 0.28611110895872116, | |
| "step": 472 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3920.5972900390625, | |
| "epoch": 0.8106255355612683, | |
| "grad_norm": 0.06979704648256302, | |
| "kl": 0.01702880859375, | |
| "learning_rate": 1.9436411137892523e-07, | |
| "loss": -0.0036, | |
| "reward": 0.15833333507180214, | |
| "reward_std": 0.09291094541549683, | |
| "rewards/accuracy_multibox_reward": 0.15833333507180214, | |
| "step": 473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3026.986114501953, | |
| "epoch": 0.8123393316195373, | |
| "grad_norm": 0.14104922115802765, | |
| "kl": 0.014801025390625, | |
| "learning_rate": 1.9271744225930173e-07, | |
| "loss": -0.0104, | |
| "reward": 0.32777778524905443, | |
| "reward_std": 0.1304345726966858, | |
| "rewards/accuracy_multibox_reward": 0.32777778524905443, | |
| "step": 474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3868.2084350585938, | |
| "epoch": 0.8140531276778064, | |
| "grad_norm": 0.09927435964345932, | |
| "kl": 0.0162506103515625, | |
| "learning_rate": 1.9108361558381693e-07, | |
| "loss": 0.0009, | |
| "reward": 0.1722222277894616, | |
| "reward_std": 0.1648220382630825, | |
| "rewards/accuracy_multibox_reward": 0.1722222277894616, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3891.0833740234375, | |
| "epoch": 0.8157669237360754, | |
| "grad_norm": 0.1258765608072281, | |
| "kl": 0.019683837890625, | |
| "learning_rate": 1.8946269008001652e-07, | |
| "loss": 0.005, | |
| "reward": 0.13055555894970894, | |
| "reward_std": 0.1885080263018608, | |
| "rewards/accuracy_multibox_reward": 0.13055555894970894, | |
| "step": 476 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3574.7083740234375, | |
| "epoch": 0.8174807197943444, | |
| "grad_norm": 0.08934295922517776, | |
| "kl": 0.0120849609375, | |
| "learning_rate": 1.8785472401171684e-07, | |
| "loss": 0.0308, | |
| "reward": 0.1666666753590107, | |
| "reward_std": 0.1632993221282959, | |
| "rewards/accuracy_multibox_reward": 0.1666666753590107, | |
| "step": 477 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3498.0, | |
| "epoch": 0.8191945158526135, | |
| "grad_norm": 0.08598225563764572, | |
| "kl": 0.01654052734375, | |
| "learning_rate": 1.8625977517690972e-07, | |
| "loss": 0.0118, | |
| "reward": 0.19722222909331322, | |
| "reward_std": 0.1822572834789753, | |
| "rewards/accuracy_multibox_reward": 0.19722222909331322, | |
| "step": 478 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2998.77783203125, | |
| "epoch": 0.8209083119108826, | |
| "grad_norm": 0.14342691004276276, | |
| "kl": 0.01546478271484375, | |
| "learning_rate": 1.8467790090568552e-07, | |
| "loss": 0.0773, | |
| "reward": 0.33888889849185944, | |
| "reward_std": 0.2460908107459545, | |
| "rewards/accuracy_multibox_reward": 0.33888889849185944, | |
| "step": 479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3229.1527709960938, | |
| "epoch": 0.8226221079691517, | |
| "grad_norm": 0.13320675492286682, | |
| "kl": 0.0144500732421875, | |
| "learning_rate": 1.8310915805817228e-07, | |
| "loss": 0.0133, | |
| "reward": 0.31944444589316845, | |
| "reward_std": 0.31064849346876144, | |
| "rewards/accuracy_multibox_reward": 0.31944444589316845, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3531.5694580078125, | |
| "epoch": 0.8243359040274207, | |
| "grad_norm": 0.08227194845676422, | |
| "kl": 0.019012451171875, | |
| "learning_rate": 1.815536030224918e-07, | |
| "loss": -0.0013, | |
| "reward": 0.10555555857717991, | |
| "reward_std": 0.04943146277219057, | |
| "rewards/accuracy_multibox_reward": 0.10555555857717991, | |
| "step": 481 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3597.0834045410156, | |
| "epoch": 0.8260497000856898, | |
| "grad_norm": 0.050318628549575806, | |
| "kl": 0.01190948486328125, | |
| "learning_rate": 1.800112917127327e-07, | |
| "loss": 0.0266, | |
| "reward": 0.20000002160668373, | |
| "reward_std": 0.10327956825494766, | |
| "rewards/accuracy_multibox_reward": 0.20000002160668373, | |
| "step": 482 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3467.416748046875, | |
| "epoch": 0.8277634961439588, | |
| "grad_norm": 0.0965297669172287, | |
| "kl": 0.01479339599609375, | |
| "learning_rate": 1.7848227956694118e-07, | |
| "loss": -0.0438, | |
| "reward": 0.1527777761220932, | |
| "reward_std": 0.07602725550532341, | |
| "rewards/accuracy_multibox_reward": 0.1527777761220932, | |
| "step": 483 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3493.3472290039062, | |
| "epoch": 0.829477292202228, | |
| "grad_norm": 0.10906803607940674, | |
| "kl": 0.01180267333984375, | |
| "learning_rate": 1.7696662154512738e-07, | |
| "loss": 0.0562, | |
| "reward": 0.247222232632339, | |
| "reward_std": 0.21607795730233192, | |
| "rewards/accuracy_multibox_reward": 0.247222232632339, | |
| "step": 484 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3265.7639770507812, | |
| "epoch": 0.831191088260497, | |
| "grad_norm": 0.0679367259144783, | |
| "kl": 0.0162811279296875, | |
| "learning_rate": 1.7546437212729064e-07, | |
| "loss": 0.0063, | |
| "reward": 0.1527777761220932, | |
| "reward_std": 0.07602725550532341, | |
| "rewards/accuracy_multibox_reward": 0.1527777761220932, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3566.90283203125, | |
| "epoch": 0.8329048843187661, | |
| "grad_norm": 0.06962968409061432, | |
| "kl": 0.0192718505859375, | |
| "learning_rate": 1.7397558531146082e-07, | |
| "loss": 0.0252, | |
| "reward": 0.1111111119389534, | |
| "reward_std": 0.10199342668056488, | |
| "rewards/accuracy_multibox_reward": 0.1111111119389534, | |
| "step": 486 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3874.0416259765625, | |
| "epoch": 0.8346186803770351, | |
| "grad_norm": 0.051922332495450974, | |
| "kl": 0.01611328125, | |
| "learning_rate": 1.725003146117575e-07, | |
| "loss": 0.007, | |
| "reward": 0.07777778059244156, | |
| "reward_std": 0.08392801135778427, | |
| "rewards/accuracy_multibox_reward": 0.07777778059244156, | |
| "step": 487 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3297.3333740234375, | |
| "epoch": 0.8363324764353042, | |
| "grad_norm": 0.13417819142341614, | |
| "kl": 0.01488494873046875, | |
| "learning_rate": 1.7103861305646634e-07, | |
| "loss": 0.0478, | |
| "reward": 0.3583333492279053, | |
| "reward_std": 0.25162352388724685, | |
| "rewards/accuracy_multibox_reward": 0.3583333492279053, | |
| "step": 488 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3756.916748046875, | |
| "epoch": 0.8380462724935732, | |
| "grad_norm": 0.07455100864171982, | |
| "kl": 0.017669677734375, | |
| "learning_rate": 1.6959053318613313e-07, | |
| "loss": -0.003, | |
| "reward": 0.2888889107853174, | |
| "reward_std": 0.09480177983641624, | |
| "rewards/accuracy_multibox_reward": 0.2888889107853174, | |
| "step": 489 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3362.1805419921875, | |
| "epoch": 0.8397600685518424, | |
| "grad_norm": 0.1156516969203949, | |
| "kl": 0.0150146484375, | |
| "learning_rate": 1.681561270516752e-07, | |
| "loss": -0.0273, | |
| "reward": 0.2666666731238365, | |
| "reward_std": 0.09106908738613129, | |
| "rewards/accuracy_multibox_reward": 0.2666666731238365, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3984.3195190429688, | |
| "epoch": 0.8414738646101114, | |
| "grad_norm": 0.06846940517425537, | |
| "kl": 0.018463134765625, | |
| "learning_rate": 1.6673544621251005e-07, | |
| "loss": -0.0005, | |
| "reward": 0.15555555745959282, | |
| "reward_std": 0.06493871612474322, | |
| "rewards/accuracy_multibox_reward": 0.15555555745959282, | |
| "step": 491 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4105.0694580078125, | |
| "epoch": 0.8431876606683805, | |
| "grad_norm": 0.10050587356090546, | |
| "kl": 0.0164337158203125, | |
| "learning_rate": 1.653285417347029e-07, | |
| "loss": 0.038, | |
| "reward": 0.18055555690079927, | |
| "reward_std": 0.22402158379554749, | |
| "rewards/accuracy_multibox_reward": 0.18055555690079927, | |
| "step": 492 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2979.9444580078125, | |
| "epoch": 0.8449014567266495, | |
| "grad_norm": 0.07768964767456055, | |
| "kl": 0.011962890625, | |
| "learning_rate": 1.639354641891304e-07, | |
| "loss": 0.0192, | |
| "reward": 0.17777779698371887, | |
| "reward_std": 0.049431461840867996, | |
| "rewards/accuracy_multibox_reward": 0.17777779698371887, | |
| "step": 493 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3900.7777709960938, | |
| "epoch": 0.8466152527849186, | |
| "grad_norm": 0.07338675856590271, | |
| "kl": 0.0167388916015625, | |
| "learning_rate": 1.6255626364966312e-07, | |
| "loss": 0.0424, | |
| "reward": 0.16388889774680138, | |
| "reward_std": 0.18412752449512482, | |
| "rewards/accuracy_multibox_reward": 0.16388889774680138, | |
| "step": 494 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3243.041717529297, | |
| "epoch": 0.8483290488431876, | |
| "grad_norm": 0.12602552771568298, | |
| "kl": 0.0151519775390625, | |
| "learning_rate": 1.611909896913657e-07, | |
| "loss": -0.0097, | |
| "reward": 0.3749999925494194, | |
| "reward_std": 0.18543857336044312, | |
| "rewards/accuracy_multibox_reward": 0.3749999925494194, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3540.1111450195312, | |
| "epoch": 0.8500428449014568, | |
| "grad_norm": 0.10541314631700516, | |
| "kl": 0.013336181640625, | |
| "learning_rate": 1.5983969138871472e-07, | |
| "loss": 0.044, | |
| "reward": 0.11111111380159855, | |
| "reward_std": 0.13328944519162178, | |
| "rewards/accuracy_multibox_reward": 0.11111111380159855, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3538.916748046875, | |
| "epoch": 0.8517566409597258, | |
| "grad_norm": 0.11556142568588257, | |
| "kl": 0.015625, | |
| "learning_rate": 1.5850241731383495e-07, | |
| "loss": 0.0632, | |
| "reward": 0.25000000186264515, | |
| "reward_std": 0.2169434241950512, | |
| "rewards/accuracy_multibox_reward": 0.25000000186264515, | |
| "step": 497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4099.916748046875, | |
| "epoch": 0.8534704370179949, | |
| "grad_norm": 0.10188069194555283, | |
| "kl": 0.01898193359375, | |
| "learning_rate": 1.5717921553475333e-07, | |
| "loss": 0.0043, | |
| "reward": 0.18287037499248981, | |
| "reward_std": 0.12651677383109927, | |
| "rewards/accuracy_multibox_reward": 0.18287037499248981, | |
| "step": 498 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3057.90283203125, | |
| "epoch": 0.8551842330762639, | |
| "grad_norm": 0.15526387095451355, | |
| "kl": 0.017425537109375, | |
| "learning_rate": 1.5587013361367125e-07, | |
| "loss": -0.0287, | |
| "reward": 0.25000000186264515, | |
| "reward_std": 0.12474862858653069, | |
| "rewards/accuracy_multibox_reward": 0.25000000186264515, | |
| "step": 499 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2889.930633544922, | |
| "epoch": 0.856898029134533, | |
| "grad_norm": 0.07878301292657852, | |
| "kl": 0.01546478271484375, | |
| "learning_rate": 1.5457521860525453e-07, | |
| "loss": -0.017, | |
| "reward": 0.4305555634200573, | |
| "reward_std": 0.099268754478544, | |
| "rewards/accuracy_multibox_reward": 0.4305555634200573, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3609.3056030273438, | |
| "epoch": 0.8586118251928021, | |
| "grad_norm": 0.11839068681001663, | |
| "kl": 0.01758575439453125, | |
| "learning_rate": 1.532945170549429e-07, | |
| "loss": -0.0053, | |
| "reward": 0.26944445818662643, | |
| "reward_std": 0.047628968954086304, | |
| "rewards/accuracy_multibox_reward": 0.26944445818662643, | |
| "step": 501 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3760.2083740234375, | |
| "epoch": 0.8603256212510711, | |
| "grad_norm": 0.086994968354702, | |
| "kl": 0.0183868408203125, | |
| "learning_rate": 1.5202807499727597e-07, | |
| "loss": -0.016, | |
| "reward": 0.24444445222616196, | |
| "reward_std": 0.14418624341487885, | |
| "rewards/accuracy_multibox_reward": 0.24444445222616196, | |
| "step": 502 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3679.1805419921875, | |
| "epoch": 0.8620394173093402, | |
| "grad_norm": 0.08694525808095932, | |
| "kl": 0.0143890380859375, | |
| "learning_rate": 1.507759379542393e-07, | |
| "loss": 0.0197, | |
| "reward": 0.1305555608123541, | |
| "reward_std": 0.1859900839626789, | |
| "rewards/accuracy_multibox_reward": 0.1305555608123541, | |
| "step": 503 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3577.5556030273438, | |
| "epoch": 0.8637532133676092, | |
| "grad_norm": 0.048640280961990356, | |
| "kl": 0.0140838623046875, | |
| "learning_rate": 1.495381509336275e-07, | |
| "loss": 0.0129, | |
| "reward": 0.16111110523343086, | |
| "reward_std": 0.08989017084240913, | |
| "rewards/accuracy_multibox_reward": 0.16111110523343086, | |
| "step": 504 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3618.6389770507812, | |
| "epoch": 0.8654670094258783, | |
| "grad_norm": 0.08933480083942413, | |
| "kl": 0.014007568359375, | |
| "learning_rate": 1.4831475842742694e-07, | |
| "loss": 0.0329, | |
| "reward": 0.32500001788139343, | |
| "reward_std": 0.09706043172627687, | |
| "rewards/accuracy_multibox_reward": 0.32500001788139343, | |
| "step": 505 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3435.2083129882812, | |
| "epoch": 0.8671808054841473, | |
| "grad_norm": 0.15019591152668, | |
| "kl": 0.0164031982421875, | |
| "learning_rate": 1.471058044102162e-07, | |
| "loss": -0.0489, | |
| "reward": 0.17777778953313828, | |
| "reward_std": 0.21019017696380615, | |
| "rewards/accuracy_multibox_reward": 0.17777778953313828, | |
| "step": 506 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3117.7222290039062, | |
| "epoch": 0.8688946015424165, | |
| "grad_norm": 0.08272594958543777, | |
| "kl": 0.0153656005859375, | |
| "learning_rate": 1.4591133233758557e-07, | |
| "loss": -0.0216, | |
| "reward": 0.40486113727092743, | |
| "reward_std": 0.09158426523208618, | |
| "rewards/accuracy_multibox_reward": 0.40486113727092743, | |
| "step": 507 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3584.8611450195312, | |
| "epoch": 0.8706083976006855, | |
| "grad_norm": 0.13427770137786865, | |
| "kl": 0.01458740234375, | |
| "learning_rate": 1.447313851445749e-07, | |
| "loss": -0.007, | |
| "reward": 0.5027777887880802, | |
| "reward_std": 0.34592461213469505, | |
| "rewards/accuracy_multibox_reward": 0.5027777887880802, | |
| "step": 508 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3204.3333129882812, | |
| "epoch": 0.8723221936589546, | |
| "grad_norm": 0.0818539708852768, | |
| "kl": 0.0132904052734375, | |
| "learning_rate": 1.4356600524413043e-07, | |
| "loss": -0.0044, | |
| "reward": 0.18611110746860504, | |
| "reward_std": 0.006804138422012329, | |
| "rewards/accuracy_multibox_reward": 0.18611110746860504, | |
| "step": 509 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3422.0139770507812, | |
| "epoch": 0.8740359897172236, | |
| "grad_norm": 0.15944251418113708, | |
| "kl": 0.01531982421875, | |
| "learning_rate": 1.4241523452558023e-07, | |
| "loss": 0.0506, | |
| "reward": 0.15833333879709244, | |
| "reward_std": 0.18174303323030472, | |
| "rewards/accuracy_multibox_reward": 0.15833333879709244, | |
| "step": 510 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3673.7361450195312, | |
| "epoch": 0.8757497857754927, | |
| "grad_norm": 0.0973527729511261, | |
| "kl": 0.0155029296875, | |
| "learning_rate": 1.4127911435312856e-07, | |
| "loss": 0.0296, | |
| "reward": 0.10277777258306742, | |
| "reward_std": 0.04262732062488794, | |
| "rewards/accuracy_multibox_reward": 0.10277777258306742, | |
| "step": 511 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3726.1806030273438, | |
| "epoch": 0.8774635818337618, | |
| "grad_norm": 0.10450690984725952, | |
| "kl": 0.01721954345703125, | |
| "learning_rate": 1.401576855643688e-07, | |
| "loss": -0.0111, | |
| "reward": 0.2833333406597376, | |
| "reward_std": 0.09025628585368395, | |
| "rewards/accuracy_multibox_reward": 0.2833333406597376, | |
| "step": 512 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3722.8472900390625, | |
| "epoch": 0.8791773778920309, | |
| "grad_norm": 0.12259256839752197, | |
| "kl": 0.0166015625, | |
| "learning_rate": 1.3905098846881582e-07, | |
| "loss": -0.0064, | |
| "reward": 0.5583333298563957, | |
| "reward_std": 0.3056107833981514, | |
| "rewards/accuracy_multibox_reward": 0.5583333298563957, | |
| "step": 513 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3854.8751220703125, | |
| "epoch": 0.8808911739502999, | |
| "grad_norm": 0.1060362458229065, | |
| "kl": 0.02001953125, | |
| "learning_rate": 1.3795906284645706e-07, | |
| "loss": 0.0323, | |
| "reward": 0.3694444615393877, | |
| "reward_std": 0.2299690768122673, | |
| "rewards/accuracy_multibox_reward": 0.3694444615393877, | |
| "step": 514 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4043.0277099609375, | |
| "epoch": 0.882604970008569, | |
| "grad_norm": 0.06920037418603897, | |
| "kl": 0.0165863037109375, | |
| "learning_rate": 1.3688194794632235e-07, | |
| "loss": 0.0176, | |
| "reward": 0.0694444477558136, | |
| "reward_std": 0.03402068838477135, | |
| "rewards/accuracy_multibox_reward": 0.0694444477558136, | |
| "step": 515 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2893.888916015625, | |
| "epoch": 0.884318766066838, | |
| "grad_norm": 0.07932537794113159, | |
| "kl": 0.0145721435546875, | |
| "learning_rate": 1.3581968248507355e-07, | |
| "loss": 0.0282, | |
| "reward": 0.3361111059784889, | |
| "reward_std": 0.06778562813997269, | |
| "rewards/accuracy_multibox_reward": 0.3361111059784889, | |
| "step": 516 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3270.2222595214844, | |
| "epoch": 0.8860325621251071, | |
| "grad_norm": 0.1276179999113083, | |
| "kl": 0.0160675048828125, | |
| "learning_rate": 1.3477230464561243e-07, | |
| "loss": 0.048, | |
| "reward": 0.0944444490596652, | |
| "reward_std": 0.17406947910785675, | |
| "rewards/accuracy_multibox_reward": 0.0944444490596652, | |
| "step": 517 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3597.2777709960938, | |
| "epoch": 0.8877463581833762, | |
| "grad_norm": 0.12547257542610168, | |
| "kl": 0.0164794921875, | |
| "learning_rate": 1.3373985207570854e-07, | |
| "loss": 0.0201, | |
| "reward": 0.29722222313284874, | |
| "reward_std": 0.1472584679722786, | |
| "rewards/accuracy_multibox_reward": 0.29722222313284874, | |
| "step": 518 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4103.861083984375, | |
| "epoch": 0.8894601542416453, | |
| "grad_norm": 0.12642744183540344, | |
| "kl": 0.015777587890625, | |
| "learning_rate": 1.3272236188664598e-07, | |
| "loss": 0.0443, | |
| "reward": 0.2833333322778344, | |
| "reward_std": 0.1184052862226963, | |
| "rewards/accuracy_multibox_reward": 0.2833333322778344, | |
| "step": 519 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3545.9722290039062, | |
| "epoch": 0.8911739502999143, | |
| "grad_norm": 0.130950465798378, | |
| "kl": 0.01513671875, | |
| "learning_rate": 1.3171987065188905e-07, | |
| "loss": -0.0139, | |
| "reward": 0.1805555671453476, | |
| "reward_std": 0.14230189099907875, | |
| "rewards/accuracy_multibox_reward": 0.1805555671453476, | |
| "step": 520 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3600.0556640625, | |
| "epoch": 0.8928877463581834, | |
| "grad_norm": 0.10778660327196121, | |
| "kl": 0.0164642333984375, | |
| "learning_rate": 1.307324144057681e-07, | |
| "loss": 0.0347, | |
| "reward": 0.2635416556149721, | |
| "reward_std": 0.21607131138443947, | |
| "rewards/accuracy_multibox_reward": 0.2635416556149721, | |
| "step": 521 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3558.6668090820312, | |
| "epoch": 0.8946015424164524, | |
| "grad_norm": 0.06329550594091415, | |
| "kl": 0.01319122314453125, | |
| "learning_rate": 1.297600286421839e-07, | |
| "loss": 0.0043, | |
| "reward": 0.2027777750045061, | |
| "reward_std": 0.04762896476313472, | |
| "rewards/accuracy_multibox_reward": 0.2027777750045061, | |
| "step": 522 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3636.291748046875, | |
| "epoch": 0.8963153384747216, | |
| "grad_norm": 0.10696972906589508, | |
| "kl": 0.0158538818359375, | |
| "learning_rate": 1.288027483133321e-07, | |
| "loss": 0.0283, | |
| "reward": 0.2638889104127884, | |
| "reward_std": 0.14455071836709976, | |
| "rewards/accuracy_multibox_reward": 0.2638889104127884, | |
| "step": 523 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3439.3054809570312, | |
| "epoch": 0.8980291345329906, | |
| "grad_norm": 0.1651819497346878, | |
| "kl": 0.0147552490234375, | |
| "learning_rate": 1.2786060782844668e-07, | |
| "loss": 0.0357, | |
| "reward": 0.1407407447695732, | |
| "reward_std": 0.14003384858369827, | |
| "rewards/accuracy_multibox_reward": 0.1407407447695732, | |
| "step": 524 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3949.861083984375, | |
| "epoch": 0.8997429305912596, | |
| "grad_norm": 0.10857588052749634, | |
| "kl": 0.0168609619140625, | |
| "learning_rate": 1.2693364105256338e-07, | |
| "loss": 0.0064, | |
| "reward": 0.1805555634200573, | |
| "reward_std": 0.23669574782252312, | |
| "rewards/accuracy_multibox_reward": 0.1805555634200573, | |
| "step": 525 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3319.0695190429688, | |
| "epoch": 0.9014567266495287, | |
| "grad_norm": 0.13888534903526306, | |
| "kl": 0.0150146484375, | |
| "learning_rate": 1.2602188130530207e-07, | |
| "loss": -0.0041, | |
| "reward": 0.41018519550561905, | |
| "reward_std": 0.13154930248856544, | |
| "rewards/accuracy_multibox_reward": 0.41018519550561905, | |
| "step": 526 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3401.4444580078125, | |
| "epoch": 0.9031705227077977, | |
| "grad_norm": 0.14649337530136108, | |
| "kl": 0.0140380859375, | |
| "learning_rate": 1.2512536135966937e-07, | |
| "loss": -0.022, | |
| "reward": 0.3666666904464364, | |
| "reward_std": 0.18492921441793442, | |
| "rewards/accuracy_multibox_reward": 0.3666666904464364, | |
| "step": 527 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3171.4861450195312, | |
| "epoch": 0.9048843187660668, | |
| "grad_norm": 0.12782660126686096, | |
| "kl": 0.0113525390625, | |
| "learning_rate": 1.242441134408805e-07, | |
| "loss": -0.0111, | |
| "reward": 0.3583333492279053, | |
| "reward_std": 0.26418305188417435, | |
| "rewards/accuracy_multibox_reward": 0.3583333492279053, | |
| "step": 528 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3593.3055419921875, | |
| "epoch": 0.9065981148243359, | |
| "grad_norm": 0.06860707700252533, | |
| "kl": 0.015625, | |
| "learning_rate": 1.2337816922520103e-07, | |
| "loss": 0.0183, | |
| "reward": 0.2611111178994179, | |
| "reward_std": 0.18447305262088776, | |
| "rewards/accuracy_multibox_reward": 0.2611111178994179, | |
| "step": 529 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2869.0833740234375, | |
| "epoch": 0.908311910882605, | |
| "grad_norm": 0.03843902796506882, | |
| "kl": 0.0118865966796875, | |
| "learning_rate": 1.2252755983880822e-07, | |
| "loss": 0.0052, | |
| "reward": 0.20277778059244156, | |
| "reward_std": 0.006804134231060743, | |
| "rewards/accuracy_multibox_reward": 0.20277778059244156, | |
| "step": 530 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4083.0, | |
| "epoch": 0.910025706940874, | |
| "grad_norm": 0.08080486208200455, | |
| "kl": 0.0223236083984375, | |
| "learning_rate": 1.2169231585667208e-07, | |
| "loss": -0.0005, | |
| "reward": 0.19166666641831398, | |
| "reward_std": 0.09728333353996277, | |
| "rewards/accuracy_multibox_reward": 0.19166666641831398, | |
| "step": 531 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3364.0140380859375, | |
| "epoch": 0.9117395029991431, | |
| "grad_norm": 0.11917509883642197, | |
| "kl": 0.01389312744140625, | |
| "learning_rate": 1.2087246730145672e-07, | |
| "loss": 0.0309, | |
| "reward": 0.06111111305654049, | |
| "reward_std": 0.12468281015753746, | |
| "rewards/accuracy_multibox_reward": 0.06111111305654049, | |
| "step": 532 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3240.263916015625, | |
| "epoch": 0.9134532990574121, | |
| "grad_norm": 0.10657712072134018, | |
| "kl": 0.015899658203125, | |
| "learning_rate": 1.2006804364244078e-07, | |
| "loss": 0.0281, | |
| "reward": 0.22777779772877693, | |
| "reward_std": 0.13422610238194466, | |
| "rewards/accuracy_multibox_reward": 0.22777779772877693, | |
| "step": 533 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3889.2083129882812, | |
| "epoch": 0.9151670951156813, | |
| "grad_norm": 0.028002509847283363, | |
| "kl": 0.0151824951171875, | |
| "learning_rate": 1.1927907379445845e-07, | |
| "loss": 0.0006, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.0, | |
| "step": 534 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3683.0277404785156, | |
| "epoch": 0.9168808911739503, | |
| "grad_norm": 0.06848502159118652, | |
| "kl": 0.0163726806640625, | |
| "learning_rate": 1.1850558611685997e-07, | |
| "loss": 0.0029, | |
| "reward": 0.31388889253139496, | |
| "reward_std": 0.06705055106431246, | |
| "rewards/accuracy_multibox_reward": 0.31388889253139496, | |
| "step": 535 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3338.75, | |
| "epoch": 0.9185946872322194, | |
| "grad_norm": 0.08541859686374664, | |
| "kl": 0.013671875, | |
| "learning_rate": 1.1774760841249235e-07, | |
| "loss": 0.0321, | |
| "reward": 0.21388890594244003, | |
| "reward_std": 0.14343678206205368, | |
| "rewards/accuracy_multibox_reward": 0.21388890594244003, | |
| "step": 536 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3635.7083740234375, | |
| "epoch": 0.9203084832904884, | |
| "grad_norm": 0.10903652012348175, | |
| "kl": 0.020538330078125, | |
| "learning_rate": 1.1700516792670004e-07, | |
| "loss": 0.0472, | |
| "reward": 0.23055557161569595, | |
| "reward_std": 0.10240122815594077, | |
| "rewards/accuracy_multibox_reward": 0.23055557161569595, | |
| "step": 537 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3838.0, | |
| "epoch": 0.9220222793487575, | |
| "grad_norm": 0.11763253808021545, | |
| "kl": 0.016265869140625, | |
| "learning_rate": 1.1627829134634537e-07, | |
| "loss": 0.0579, | |
| "reward": 0.23240742087364197, | |
| "reward_std": 0.2227391004562378, | |
| "rewards/accuracy_multibox_reward": 0.23240742087364197, | |
| "step": 538 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3454.2500610351562, | |
| "epoch": 0.9237360754070265, | |
| "grad_norm": 0.10026513785123825, | |
| "kl": 0.017608642578125, | |
| "learning_rate": 1.1556700479884968e-07, | |
| "loss": -0.0124, | |
| "reward": 0.22500001266598701, | |
| "reward_std": 0.13601411506533623, | |
| "rewards/accuracy_multibox_reward": 0.22500001266598701, | |
| "step": 539 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3817.1250610351562, | |
| "epoch": 0.9254498714652957, | |
| "grad_norm": 0.06587161123752594, | |
| "kl": 0.021148681640625, | |
| "learning_rate": 1.148713338512537e-07, | |
| "loss": 0.0034, | |
| "reward": 0.07222222536802292, | |
| "reward_std": 0.1418960690498352, | |
| "rewards/accuracy_multibox_reward": 0.07222222536802292, | |
| "step": 540 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3110.9305419921875, | |
| "epoch": 0.9271636675235647, | |
| "grad_norm": 0.09760706126689911, | |
| "kl": 0.0135345458984375, | |
| "learning_rate": 1.1419130350929897e-07, | |
| "loss": 0.0464, | |
| "reward": 0.30000000074505806, | |
| "reward_std": 0.1401212769560516, | |
| "rewards/accuracy_multibox_reward": 0.30000000074505806, | |
| "step": 541 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3544.013916015625, | |
| "epoch": 0.9288774635818338, | |
| "grad_norm": 0.2858937084674835, | |
| "kl": 0.0181121826171875, | |
| "learning_rate": 1.1352693821652885e-07, | |
| "loss": 0.0343, | |
| "reward": 0.3138889018446207, | |
| "reward_std": 0.14237193390727043, | |
| "rewards/accuracy_multibox_reward": 0.3138889018446207, | |
| "step": 542 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2996.736114501953, | |
| "epoch": 0.9305912596401028, | |
| "grad_norm": 0.12911923229694366, | |
| "kl": 0.014881134033203125, | |
| "learning_rate": 1.1287826185340984e-07, | |
| "loss": 0.0254, | |
| "reward": 0.23333334550261497, | |
| "reward_std": 0.1880616983398795, | |
| "rewards/accuracy_multibox_reward": 0.23333334550261497, | |
| "step": 543 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3957.6111450195312, | |
| "epoch": 0.9323050556983719, | |
| "grad_norm": 0.06952901929616928, | |
| "kl": 0.01727294921875, | |
| "learning_rate": 1.1224529773647331e-07, | |
| "loss": 0.0044, | |
| "reward": 0.125, | |
| "reward_std": 0.08646837621927261, | |
| "rewards/accuracy_multibox_reward": 0.125, | |
| "step": 544 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3152.513916015625, | |
| "epoch": 0.934018851756641, | |
| "grad_norm": 0.08738566190004349, | |
| "kl": 0.01324462890625, | |
| "learning_rate": 1.1162806861747725e-07, | |
| "loss": -0.016, | |
| "reward": 0.2361111119389534, | |
| "reward_std": 0.05425018072128296, | |
| "rewards/accuracy_multibox_reward": 0.2361111119389534, | |
| "step": 545 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3780.6528930664062, | |
| "epoch": 0.9357326478149101, | |
| "grad_norm": 0.12653063237667084, | |
| "kl": 0.0214996337890625, | |
| "learning_rate": 1.1102659668258868e-07, | |
| "loss": 0.0274, | |
| "reward": 0.061111112125217915, | |
| "reward_std": 0.12247448414564133, | |
| "rewards/accuracy_multibox_reward": 0.061111112125217915, | |
| "step": 546 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4136.055603027344, | |
| "epoch": 0.9374464438731791, | |
| "grad_norm": 0.08191170543432236, | |
| "kl": 0.0214996337890625, | |
| "learning_rate": 1.1044090355158605e-07, | |
| "loss": -0.0031, | |
| "reward": 0.11666667088866234, | |
| "reward_std": 0.18806170299649239, | |
| "rewards/accuracy_multibox_reward": 0.11666667088866234, | |
| "step": 547 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3821.3888549804688, | |
| "epoch": 0.9391602399314481, | |
| "grad_norm": 0.13058289885520935, | |
| "kl": 0.018890380859375, | |
| "learning_rate": 1.0987101027708205e-07, | |
| "loss": -0.014, | |
| "reward": 0.16944444552063942, | |
| "reward_std": 0.168707225471735, | |
| "rewards/accuracy_multibox_reward": 0.16944444552063942, | |
| "step": 548 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3175.6666870117188, | |
| "epoch": 0.9408740359897172, | |
| "grad_norm": 0.13926012814044952, | |
| "kl": 0.0150604248046875, | |
| "learning_rate": 1.0931693734376689e-07, | |
| "loss": -0.0141, | |
| "reward": 0.27777778171002865, | |
| "reward_std": 0.19574417360126972, | |
| "rewards/accuracy_multibox_reward": 0.27777778171002865, | |
| "step": 549 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4021.5833740234375, | |
| "epoch": 0.9425878320479862, | |
| "grad_norm": 0.05867859348654747, | |
| "kl": 0.0189361572265625, | |
| "learning_rate": 1.0877870466767222e-07, | |
| "loss": 0.0175, | |
| "reward": 0.14351852145045996, | |
| "reward_std": 0.07158663962036371, | |
| "rewards/accuracy_multibox_reward": 0.14351852145045996, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4069.0000610351562, | |
| "epoch": 0.9443016281062554, | |
| "grad_norm": 0.11697559058666229, | |
| "kl": 0.0196380615234375, | |
| "learning_rate": 1.0825633159545497e-07, | |
| "loss": 0.02, | |
| "reward": 0.27500002458691597, | |
| "reward_std": 0.2560563385486603, | |
| "rewards/accuracy_multibox_reward": 0.27500002458691597, | |
| "step": 551 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2937.5833740234375, | |
| "epoch": 0.9460154241645244, | |
| "grad_norm": 0.2190292328596115, | |
| "kl": 0.01483154296875, | |
| "learning_rate": 1.0774983690370204e-07, | |
| "loss": 0.0677, | |
| "reward": 0.4472222588956356, | |
| "reward_std": 0.18236100673675537, | |
| "rewards/accuracy_multibox_reward": 0.4472222588956356, | |
| "step": 552 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3114.8333740234375, | |
| "epoch": 0.9477292202227935, | |
| "grad_norm": 0.1625695377588272, | |
| "kl": 0.013824462890625, | |
| "learning_rate": 1.0725923879825536e-07, | |
| "loss": -0.0144, | |
| "reward": 0.31944446451961994, | |
| "reward_std": 0.3082212544977665, | |
| "rewards/accuracy_multibox_reward": 0.31944446451961994, | |
| "step": 553 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3861.791748046875, | |
| "epoch": 0.9494430162810625, | |
| "grad_norm": 0.08245892077684402, | |
| "kl": 0.0185089111328125, | |
| "learning_rate": 1.0678455491355759e-07, | |
| "loss": 0.0112, | |
| "reward": 0.14722222462296486, | |
| "reward_std": 0.1568959392607212, | |
| "rewards/accuracy_multibox_reward": 0.14722222462296486, | |
| "step": 554 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3455.4444580078125, | |
| "epoch": 0.9511568123393316, | |
| "grad_norm": 0.15519405901432037, | |
| "kl": 0.013397216796875, | |
| "learning_rate": 1.0632580231201814e-07, | |
| "loss": 0.0058, | |
| "reward": 0.305555559694767, | |
| "reward_std": 0.10192339075729251, | |
| "rewards/accuracy_multibox_reward": 0.305555559694767, | |
| "step": 555 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3928.77783203125, | |
| "epoch": 0.9528706083976007, | |
| "grad_norm": 0.04161922633647919, | |
| "kl": 0.01544189453125, | |
| "learning_rate": 1.0588299748339994e-07, | |
| "loss": 0.0031, | |
| "reward": 0.01666666753590107, | |
| "reward_std": 0.040824830532073975, | |
| "rewards/accuracy_multibox_reward": 0.01666666753590107, | |
| "step": 556 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3085.7638549804688, | |
| "epoch": 0.9545844044558698, | |
| "grad_norm": 0.12940551340579987, | |
| "kl": 0.0150909423828125, | |
| "learning_rate": 1.0545615634422654e-07, | |
| "loss": 0.0027, | |
| "reward": 0.16944444179534912, | |
| "reward_std": 0.047628968954086304, | |
| "rewards/accuracy_multibox_reward": 0.16944444179534912, | |
| "step": 557 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3814.2361450195312, | |
| "epoch": 0.9562982005141388, | |
| "grad_norm": 0.06866694986820221, | |
| "kl": 0.01824951171875, | |
| "learning_rate": 1.0504529423721023e-07, | |
| "loss": 0.0232, | |
| "reward": 0.1805555708706379, | |
| "reward_std": 0.15187500417232513, | |
| "rewards/accuracy_multibox_reward": 0.1805555708706379, | |
| "step": 558 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3433.7083740234375, | |
| "epoch": 0.9580119965724079, | |
| "grad_norm": 0.12345051765441895, | |
| "kl": 0.0155792236328125, | |
| "learning_rate": 1.046504259307005e-07, | |
| "loss": 0.0301, | |
| "reward": 0.23888889327645302, | |
| "reward_std": 0.16278834640979767, | |
| "rewards/accuracy_multibox_reward": 0.23888889327645302, | |
| "step": 559 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3231.6943969726562, | |
| "epoch": 0.9597257926306769, | |
| "grad_norm": 0.17866738140583038, | |
| "kl": 0.0186004638671875, | |
| "learning_rate": 1.0427156561815302e-07, | |
| "loss": 0.0391, | |
| "reward": 0.32777778152376413, | |
| "reward_std": 0.13177766650915146, | |
| "rewards/accuracy_multibox_reward": 0.32777778152376413, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3533.1527709960938, | |
| "epoch": 0.961439588688946, | |
| "grad_norm": 0.13673555850982666, | |
| "kl": 0.0173797607421875, | |
| "learning_rate": 1.0390872691761968e-07, | |
| "loss": -0.0042, | |
| "reward": 0.3722222372889519, | |
| "reward_std": 0.19654756505042315, | |
| "rewards/accuracy_multibox_reward": 0.3722222372889519, | |
| "step": 561 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3562.0555419921875, | |
| "epoch": 0.9631533847472151, | |
| "grad_norm": 0.08747408539056778, | |
| "kl": 0.017547607421875, | |
| "learning_rate": 1.0356192287125897e-07, | |
| "loss": -0.0062, | |
| "reward": 0.244444465264678, | |
| "reward_std": 0.1010712468996644, | |
| "rewards/accuracy_multibox_reward": 0.244444465264678, | |
| "step": 562 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3874.2916870117188, | |
| "epoch": 0.9648671808054842, | |
| "grad_norm": 0.10127369314432144, | |
| "kl": 0.014892578125, | |
| "learning_rate": 1.0323116594486718e-07, | |
| "loss": 0.0227, | |
| "reward": 0.305555553175509, | |
| "reward_std": 0.1988849826157093, | |
| "rewards/accuracy_multibox_reward": 0.305555553175509, | |
| "step": 563 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3216.4306030273438, | |
| "epoch": 0.9665809768637532, | |
| "grad_norm": 0.08921950310468674, | |
| "kl": 0.01513671875, | |
| "learning_rate": 1.0291646802743043e-07, | |
| "loss": 0.0441, | |
| "reward": 0.2637731432914734, | |
| "reward_std": 0.07733241841197014, | |
| "rewards/accuracy_multibox_reward": 0.2637731432914734, | |
| "step": 564 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3783.4861450195312, | |
| "epoch": 0.9682947729220223, | |
| "grad_norm": 0.05064484849572182, | |
| "kl": 0.01800537109375, | |
| "learning_rate": 1.026178404306972e-07, | |
| "loss": 0.0088, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.05671145347878337, | |
| "rewards/accuracy_multibox_reward": 0.1666666716337204, | |
| "step": 565 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3119.5972290039062, | |
| "epoch": 0.9700085689802913, | |
| "grad_norm": 0.07089328020811081, | |
| "kl": 0.0144805908203125, | |
| "learning_rate": 1.0233529388877169e-07, | |
| "loss": 0.0052, | |
| "reward": 0.14444444701075554, | |
| "reward_std": 0.08385797962546349, | |
| "rewards/accuracy_multibox_reward": 0.14444444701075554, | |
| "step": 566 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2819.8611450195312, | |
| "epoch": 0.9717223650385605, | |
| "grad_norm": 0.11893336474895477, | |
| "kl": 0.01641845703125, | |
| "learning_rate": 1.0206883855772812e-07, | |
| "loss": -0.0425, | |
| "reward": 0.31666666362434626, | |
| "reward_std": 0.13116297498345375, | |
| "rewards/accuracy_multibox_reward": 0.31666666362434626, | |
| "step": 567 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3760.5416870117188, | |
| "epoch": 0.9734361610968295, | |
| "grad_norm": 0.09803541004657745, | |
| "kl": 0.02154541015625, | |
| "learning_rate": 1.0181848401524571e-07, | |
| "loss": 0.0264, | |
| "reward": 0.2138888854533434, | |
| "reward_std": 0.08574232831597328, | |
| "rewards/accuracy_multibox_reward": 0.2138888854533434, | |
| "step": 568 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3375.388916015625, | |
| "epoch": 0.9751499571550986, | |
| "grad_norm": 0.08259835839271545, | |
| "kl": 0.018524169921875, | |
| "learning_rate": 1.0158423926026428e-07, | |
| "loss": -0.0241, | |
| "reward": 0.17777778208255768, | |
| "reward_std": 0.04943146277219057, | |
| "rewards/accuracy_multibox_reward": 0.17777778208255768, | |
| "step": 569 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3954.2361450195312, | |
| "epoch": 0.9768637532133676, | |
| "grad_norm": 0.12743334472179413, | |
| "kl": 0.021453857421875, | |
| "learning_rate": 1.0136611271266077e-07, | |
| "loss": 0.0451, | |
| "reward": 0.24337797798216343, | |
| "reward_std": 0.24430206045508385, | |
| "rewards/accuracy_multibox_reward": 0.24337797798216343, | |
| "step": 570 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3393.263916015625, | |
| "epoch": 0.9785775492716366, | |
| "grad_norm": 0.0804271548986435, | |
| "kl": 0.0181427001953125, | |
| "learning_rate": 1.0116411221294661e-07, | |
| "loss": -0.0015, | |
| "reward": 0.18888889253139496, | |
| "reward_std": 0.09467293322086334, | |
| "rewards/accuracy_multibox_reward": 0.18888889253139496, | |
| "step": 571 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3869.02783203125, | |
| "epoch": 0.9802913453299057, | |
| "grad_norm": 0.10332907736301422, | |
| "kl": 0.01812744140625, | |
| "learning_rate": 1.0097824502198607e-07, | |
| "loss": 0.0275, | |
| "reward": 0.19444443844258785, | |
| "reward_std": 0.23754342272877693, | |
| "rewards/accuracy_multibox_reward": 0.19444443844258785, | |
| "step": 572 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3636.7916870117188, | |
| "epoch": 0.9820051413881749, | |
| "grad_norm": 0.05298357084393501, | |
| "kl": 0.0149688720703125, | |
| "learning_rate": 1.0080851782073508e-07, | |
| "loss": 0.0115, | |
| "reward": 0.3638889007270336, | |
| "reward_std": 0.08879294618964195, | |
| "rewards/accuracy_multibox_reward": 0.3638889007270336, | |
| "step": 573 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3509.9584350585938, | |
| "epoch": 0.9837189374464439, | |
| "grad_norm": 0.10786385834217072, | |
| "kl": 0.0169677734375, | |
| "learning_rate": 1.0065493671000111e-07, | |
| "loss": 0.0355, | |
| "reward": 0.33472226234152913, | |
| "reward_std": 0.11634365748614073, | |
| "rewards/accuracy_multibox_reward": 0.33472226234152913, | |
| "step": 574 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3351.65283203125, | |
| "epoch": 0.9854327335047129, | |
| "grad_norm": 0.15691952407360077, | |
| "kl": 0.017059326171875, | |
| "learning_rate": 1.0051750721022386e-07, | |
| "loss": -0.0515, | |
| "reward": 0.20277779176831245, | |
| "reward_std": 0.15857390314340591, | |
| "rewards/accuracy_multibox_reward": 0.20277779176831245, | |
| "step": 575 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3570.8612060546875, | |
| "epoch": 0.987146529562982, | |
| "grad_norm": 0.11826075613498688, | |
| "kl": 0.020355224609375, | |
| "learning_rate": 1.0039623426127697e-07, | |
| "loss": 0.0568, | |
| "reward": 0.31944444961845875, | |
| "reward_std": 0.17608047276735306, | |
| "rewards/accuracy_multibox_reward": 0.31944444961845875, | |
| "step": 576 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3153.6250610351562, | |
| "epoch": 0.988860325621251, | |
| "grad_norm": 0.11033926904201508, | |
| "kl": 0.0153350830078125, | |
| "learning_rate": 1.0029112222229035e-07, | |
| "loss": 0.0395, | |
| "reward": 0.33888888731598854, | |
| "reward_std": 0.1950649581849575, | |
| "rewards/accuracy_multibox_reward": 0.33888888731598854, | |
| "step": 577 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3260.9027709960938, | |
| "epoch": 0.9905741216795202, | |
| "grad_norm": 0.08028316497802734, | |
| "kl": 0.0139007568359375, | |
| "learning_rate": 1.002021748714934e-07, | |
| "loss": 0.0174, | |
| "reward": 0.1388888880610466, | |
| "reward_std": 0.14936944842338562, | |
| "rewards/accuracy_multibox_reward": 0.1388888880610466, | |
| "step": 578 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3380.1527709960938, | |
| "epoch": 0.9922879177377892, | |
| "grad_norm": 0.09433434903621674, | |
| "kl": 0.0166473388671875, | |
| "learning_rate": 1.0012939540607943e-07, | |
| "loss": -0.0232, | |
| "reward": 0.19166665896773338, | |
| "reward_std": 0.12752408999949694, | |
| "rewards/accuracy_multibox_reward": 0.19166665896773338, | |
| "step": 579 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3381.5277709960938, | |
| "epoch": 0.9940017137960583, | |
| "grad_norm": 0.17496156692504883, | |
| "kl": 0.0193634033203125, | |
| "learning_rate": 1.0007278644209052e-07, | |
| "loss": 0.0319, | |
| "reward": 0.3222222216427326, | |
| "reward_std": 0.2876081317663193, | |
| "rewards/accuracy_multibox_reward": 0.3222222216427326, | |
| "step": 580 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3598.5001220703125, | |
| "epoch": 0.9957155098543273, | |
| "grad_norm": 0.03561937436461449, | |
| "kl": 0.0193939208984375, | |
| "learning_rate": 1.0003235001432369e-07, | |
| "loss": 0.0008, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_multibox_reward": 0.10000000149011612, | |
| "step": 581 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3512.3889770507812, | |
| "epoch": 0.9974293059125964, | |
| "grad_norm": 0.053072210401296616, | |
| "kl": 0.017181396484375, | |
| "learning_rate": 1.0000808757625745e-07, | |
| "loss": 0.0224, | |
| "reward": 0.03061568085104227, | |
| "reward_std": 0.07481637224555016, | |
| "rewards/accuracy_multibox_reward": 0.03061568085104227, | |
| "step": 582 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3852.40625, | |
| "epoch": 0.9991431019708654, | |
| "grad_norm": 0.18002861738204956, | |
| "kl": 0.0156707763671875, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0626, | |
| "reward": 0.2583333496004343, | |
| "reward_std": 0.1492285132408142, | |
| "rewards/accuracy_multibox_reward": 0.2583333496004343, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.9991431019708654, | |
| "step": 583, | |
| "total_flos": 0.0, | |
| "train_loss": 0.013134205615018704, | |
| "train_runtime": 84780.1933, | |
| "train_samples_per_second": 0.495, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 583, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |