Invalid JSON: Unexpected token 'N', ..."cy_loss": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1631986944104447, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016319869441044472, | |
| "grad_norm": 0.058299943804740906, | |
| "learning_rate": 0.00019983686786296902, | |
| "loss": 0.023, | |
| "on_policy_loss": 0.023, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0032639738882088943, | |
| "grad_norm": 0.04990854486823082, | |
| "learning_rate": 0.00019951060358890704, | |
| "loss": 0.0236, | |
| "on_policy_loss": 0.0236, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004895960832313341, | |
| "grad_norm": 0.03872146084904671, | |
| "learning_rate": 0.00019918433931484503, | |
| "loss": 0.025, | |
| "on_policy_loss": 0.025, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006527947776417789, | |
| "grad_norm": 0.0333283431828022, | |
| "learning_rate": 0.00019885807504078305, | |
| "loss": 0.0245, | |
| "on_policy_loss": 0.0245, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008159934720522236, | |
| "grad_norm": 0.0277633685618639, | |
| "learning_rate": 0.00019853181076672104, | |
| "loss": 0.0229, | |
| "on_policy_loss": 0.0229, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009791921664626682, | |
| "grad_norm": 0.02402353286743164, | |
| "learning_rate": 0.00019820554649265906, | |
| "loss": 0.0219, | |
| "on_policy_loss": 0.0219, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01142390860873113, | |
| "grad_norm": 0.026979146525263786, | |
| "learning_rate": 0.00019787928221859708, | |
| "loss": 0.0228, | |
| "on_policy_loss": 0.0228, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.013055895552835577, | |
| "grad_norm": 0.01897270232439041, | |
| "learning_rate": 0.0001975530179445351, | |
| "loss": 0.0224, | |
| "on_policy_loss": 0.0224, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.014687882496940025, | |
| "grad_norm": 0.020191779360175133, | |
| "learning_rate": 0.0001972267536704731, | |
| "loss": 0.0223, | |
| "on_policy_loss": 0.0223, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.016319869441044473, | |
| "grad_norm": 0.019388101994991302, | |
| "learning_rate": 0.0001969004893964111, | |
| "loss": 0.0207, | |
| "on_policy_loss": 0.0207, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01795185638514892, | |
| "grad_norm": 0.01775970496237278, | |
| "learning_rate": 0.0001965742251223491, | |
| "loss": 0.0225, | |
| "on_policy_loss": 0.0225, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.019583843329253364, | |
| "grad_norm": 0.0191648006439209, | |
| "learning_rate": 0.00019624796084828712, | |
| "loss": 0.0227, | |
| "on_policy_loss": 0.0227, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.021215830273357814, | |
| "grad_norm": 0.017626851797103882, | |
| "learning_rate": 0.0001959216965742251, | |
| "loss": 0.0212, | |
| "on_policy_loss": 0.0212, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02284781721746226, | |
| "grad_norm": 0.013785107992589474, | |
| "learning_rate": 0.00019559543230016316, | |
| "loss": 0.0196, | |
| "on_policy_loss": 0.0196, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02447980416156671, | |
| "grad_norm": 0.016143064945936203, | |
| "learning_rate": 0.00019526916802610115, | |
| "loss": 0.0227, | |
| "on_policy_loss": 0.0227, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.026111791105671155, | |
| "grad_norm": 0.012568962760269642, | |
| "learning_rate": 0.00019494290375203917, | |
| "loss": 0.022, | |
| "on_policy_loss": 0.022, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0277437780497756, | |
| "grad_norm": 0.018003186210989952, | |
| "learning_rate": 0.00019461663947797716, | |
| "loss": 0.0231, | |
| "on_policy_loss": 0.0231, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02937576499388005, | |
| "grad_norm": 0.024327414110302925, | |
| "learning_rate": 0.00019429037520391518, | |
| "loss": 0.0274, | |
| "on_policy_loss": 0.0274, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.031007751937984496, | |
| "grad_norm": 0.021259307861328125, | |
| "learning_rate": 0.0001939641109298532, | |
| "loss": 0.0268, | |
| "on_policy_loss": 0.0268, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.032639738882088945, | |
| "grad_norm": 0.02104460634291172, | |
| "learning_rate": 0.00019363784665579121, | |
| "loss": 0.0265, | |
| "on_policy_loss": 0.0265, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03427172582619339, | |
| "grad_norm": 0.024325763806700706, | |
| "learning_rate": 0.0001933115823817292, | |
| "loss": 0.0312, | |
| "on_policy_loss": 0.0312, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.03590371277029784, | |
| "grad_norm": 0.04464346170425415, | |
| "learning_rate": 0.00019298531810766722, | |
| "loss": 0.051, | |
| "on_policy_loss": 0.051, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.037535699714402286, | |
| "grad_norm": 0.06272145360708237, | |
| "learning_rate": 0.00019265905383360522, | |
| "loss": 0.0672, | |
| "on_policy_loss": NaN, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03916768665850673, | |
| "grad_norm": 0.08038327842950821, | |
| "learning_rate": 0.00019233278955954324, | |
| "loss": 0.0802, | |
| "on_policy_loss": NaN, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04079967360261118, | |
| "grad_norm": 0.04722515493631363, | |
| "learning_rate": 0.00019200652528548125, | |
| "loss": 0.0521, | |
| "on_policy_loss": NaN, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04243166054671563, | |
| "grad_norm": 0.04557369276881218, | |
| "learning_rate": 0.00019168026101141925, | |
| "loss": 0.0507, | |
| "on_policy_loss": 0.0507, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.044063647490820076, | |
| "grad_norm": 0.042579613626003265, | |
| "learning_rate": 0.00019135399673735726, | |
| "loss": 0.0501, | |
| "on_policy_loss": 0.0501, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04569563443492452, | |
| "grad_norm": 0.04415519908070564, | |
| "learning_rate": 0.00019102773246329528, | |
| "loss": 0.0531, | |
| "on_policy_loss": 0.0531, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.04732762137902897, | |
| "grad_norm": 0.04244513437151909, | |
| "learning_rate": 0.0001907014681892333, | |
| "loss": 0.0537, | |
| "on_policy_loss": 0.0537, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04895960832313342, | |
| "grad_norm": 0.04231644049286842, | |
| "learning_rate": 0.0001903752039151713, | |
| "loss": 0.046, | |
| "on_policy_loss": 0.046, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05059159526723786, | |
| "grad_norm": 0.03901742026209831, | |
| "learning_rate": 0.0001900489396411093, | |
| "loss": 0.0481, | |
| "on_policy_loss": 0.0481, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.05222358221134231, | |
| "grad_norm": 0.048259295523166656, | |
| "learning_rate": 0.0001897226753670473, | |
| "loss": 0.0578, | |
| "on_policy_loss": 0.0578, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05385556915544676, | |
| "grad_norm": 0.04231755807995796, | |
| "learning_rate": 0.00018939641109298535, | |
| "loss": 0.0531, | |
| "on_policy_loss": 0.0531, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0554875560995512, | |
| "grad_norm": 0.0582573339343071, | |
| "learning_rate": 0.00018907014681892334, | |
| "loss": 0.0723, | |
| "on_policy_loss": 0.0723, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.05711954304365565, | |
| "grad_norm": 0.03717726469039917, | |
| "learning_rate": 0.00018874388254486136, | |
| "loss": 0.0515, | |
| "on_policy_loss": 0.0515, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0587515299877601, | |
| "grad_norm": 0.039125215262174606, | |
| "learning_rate": 0.00018841761827079935, | |
| "loss": 0.0444, | |
| "on_policy_loss": 0.0444, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06038351693186454, | |
| "grad_norm": 0.04124997928738594, | |
| "learning_rate": 0.00018809135399673737, | |
| "loss": 0.051, | |
| "on_policy_loss": 0.051, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06201550387596899, | |
| "grad_norm": 0.045977186411619186, | |
| "learning_rate": 0.00018776508972267536, | |
| "loss": 0.0545, | |
| "on_policy_loss": 0.0545, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06364749082007344, | |
| "grad_norm": 0.04227733239531517, | |
| "learning_rate": 0.00018743882544861338, | |
| "loss": 0.0513, | |
| "on_policy_loss": 0.0513, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06527947776417789, | |
| "grad_norm": 0.03969841077923775, | |
| "learning_rate": 0.0001871125611745514, | |
| "loss": 0.0487, | |
| "on_policy_loss": 0.0487, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06691146470828234, | |
| "grad_norm": 0.03437201678752899, | |
| "learning_rate": 0.00018678629690048942, | |
| "loss": 0.0519, | |
| "on_policy_loss": 0.0519, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.06854345165238677, | |
| "grad_norm": 0.05064399167895317, | |
| "learning_rate": 0.0001864600326264274, | |
| "loss": 0.0532, | |
| "on_policy_loss": 0.0532, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.07017543859649122, | |
| "grad_norm": 0.05297597497701645, | |
| "learning_rate": 0.00018613376835236543, | |
| "loss": 0.055, | |
| "on_policy_loss": 0.055, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.07180742554059567, | |
| "grad_norm": 0.05343659967184067, | |
| "learning_rate": 0.00018580750407830342, | |
| "loss": 0.0482, | |
| "on_policy_loss": 0.0482, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07343941248470012, | |
| "grad_norm": 0.03496076166629791, | |
| "learning_rate": 0.00018548123980424144, | |
| "loss": 0.05, | |
| "on_policy_loss": 0.05, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07507139942880457, | |
| "grad_norm": 0.04398609325289726, | |
| "learning_rate": 0.00018515497553017946, | |
| "loss": 0.0484, | |
| "on_policy_loss": 0.0484, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07670338637290902, | |
| "grad_norm": 0.0509929358959198, | |
| "learning_rate": 0.00018482871125611748, | |
| "loss": 0.0532, | |
| "on_policy_loss": 0.0532, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.07833537331701346, | |
| "grad_norm": 0.04439166933298111, | |
| "learning_rate": 0.00018450244698205547, | |
| "loss": 0.0517, | |
| "on_policy_loss": 0.0517, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0799673602611179, | |
| "grad_norm": 0.03571309149265289, | |
| "learning_rate": 0.0001841761827079935, | |
| "loss": 0.0509, | |
| "on_policy_loss": 0.0509, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08159934720522236, | |
| "grad_norm": 0.03795301914215088, | |
| "learning_rate": 0.00018384991843393148, | |
| "loss": 0.0513, | |
| "on_policy_loss": 0.0513, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0832313341493268, | |
| "grad_norm": 0.04838141053915024, | |
| "learning_rate": 0.0001835236541598695, | |
| "loss": 0.0508, | |
| "on_policy_loss": 0.0508, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08486332109343125, | |
| "grad_norm": 0.041026629507541656, | |
| "learning_rate": 0.0001831973898858075, | |
| "loss": 0.0481, | |
| "on_policy_loss": 0.0481, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.0864953080375357, | |
| "grad_norm": 0.03260858356952667, | |
| "learning_rate": 0.00018287112561174554, | |
| "loss": 0.0436, | |
| "on_policy_loss": 0.0436, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.08812729498164015, | |
| "grad_norm": 0.03473200276494026, | |
| "learning_rate": 0.00018254486133768353, | |
| "loss": 0.0436, | |
| "on_policy_loss": 0.0436, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.08975928192574459, | |
| "grad_norm": 0.03827611356973648, | |
| "learning_rate": 0.00018221859706362155, | |
| "loss": 0.0472, | |
| "on_policy_loss": 0.0472, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09139126886984904, | |
| "grad_norm": 0.05292026698589325, | |
| "learning_rate": 0.00018189233278955954, | |
| "loss": 0.0525, | |
| "on_policy_loss": 0.0525, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09302325581395349, | |
| "grad_norm": 0.050034940242767334, | |
| "learning_rate": 0.00018156606851549756, | |
| "loss": 0.0508, | |
| "on_policy_loss": 0.0508, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09465524275805794, | |
| "grad_norm": 0.04287738725543022, | |
| "learning_rate": 0.00018123980424143555, | |
| "loss": 0.0496, | |
| "on_policy_loss": 0.0496, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09628722970216239, | |
| "grad_norm": 0.0442175529897213, | |
| "learning_rate": 0.0001809135399673736, | |
| "loss": 0.0484, | |
| "on_policy_loss": 0.0484, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.09791921664626684, | |
| "grad_norm": 0.04303177818655968, | |
| "learning_rate": 0.0001805872756933116, | |
| "loss": 0.053, | |
| "on_policy_loss": 0.053, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09955120359037127, | |
| "grad_norm": 0.046395499259233475, | |
| "learning_rate": 0.0001802610114192496, | |
| "loss": 0.057, | |
| "on_policy_loss": 0.057, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.10118319053447572, | |
| "grad_norm": 0.04695666581392288, | |
| "learning_rate": 0.0001799347471451876, | |
| "loss": 0.0546, | |
| "on_policy_loss": 0.0546, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.10281517747858017, | |
| "grad_norm": 0.04866120219230652, | |
| "learning_rate": 0.00017960848287112562, | |
| "loss": 0.0548, | |
| "on_policy_loss": 0.0548, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.10444716442268462, | |
| "grad_norm": 0.039415277540683746, | |
| "learning_rate": 0.0001792822185970636, | |
| "loss": 0.0557, | |
| "on_policy_loss": NaN, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10607915136678907, | |
| "grad_norm": 0.04781395196914673, | |
| "learning_rate": 0.00017895595432300163, | |
| "loss": 0.0517, | |
| "on_policy_loss": 0.0517, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10771113831089352, | |
| "grad_norm": 0.0389864556491375, | |
| "learning_rate": 0.00017862969004893965, | |
| "loss": 0.0515, | |
| "on_policy_loss": 0.0515, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.10934312525499797, | |
| "grad_norm": 0.04027765989303589, | |
| "learning_rate": 0.00017830342577487767, | |
| "loss": 0.0492, | |
| "on_policy_loss": 0.0492, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1109751121991024, | |
| "grad_norm": 0.04564642161130905, | |
| "learning_rate": 0.00017797716150081566, | |
| "loss": 0.055, | |
| "on_policy_loss": 0.055, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.11260709914320685, | |
| "grad_norm": 0.04376218840479851, | |
| "learning_rate": 0.00017765089722675368, | |
| "loss": 0.0495, | |
| "on_policy_loss": 0.0495, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1142390860873113, | |
| "grad_norm": 0.0441172830760479, | |
| "learning_rate": 0.0001773246329526917, | |
| "loss": 0.0515, | |
| "on_policy_loss": 0.0515, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11587107303141575, | |
| "grad_norm": 0.03999102860689163, | |
| "learning_rate": 0.0001769983686786297, | |
| "loss": 0.0508, | |
| "on_policy_loss": 0.0508, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1175030599755202, | |
| "grad_norm": 0.040877122431993484, | |
| "learning_rate": 0.0001766721044045677, | |
| "loss": 0.0486, | |
| "on_policy_loss": 0.0486, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.11913504691962465, | |
| "grad_norm": 0.034374285489320755, | |
| "learning_rate": 0.00017634584013050572, | |
| "loss": 0.0486, | |
| "on_policy_loss": 0.0486, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.12076703386372908, | |
| "grad_norm": 0.045045480132102966, | |
| "learning_rate": 0.00017601957585644374, | |
| "loss": 0.0482, | |
| "on_policy_loss": 0.0482, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.12239902080783353, | |
| "grad_norm": 0.04038718342781067, | |
| "learning_rate": 0.00017569331158238174, | |
| "loss": 0.0673, | |
| "on_policy_loss": 0.0673, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12403100775193798, | |
| "grad_norm": 0.04703664407134056, | |
| "learning_rate": 0.00017536704730831975, | |
| "loss": 0.0505, | |
| "on_policy_loss": 0.0505, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12566299469604242, | |
| "grad_norm": 0.08692470192909241, | |
| "learning_rate": 0.00017504078303425775, | |
| "loss": 0.0646, | |
| "on_policy_loss": 0.0646, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.12729498164014688, | |
| "grad_norm": 0.034957848489284515, | |
| "learning_rate": 0.00017471451876019576, | |
| "loss": 0.048, | |
| "on_policy_loss": 0.048, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.12892696858425132, | |
| "grad_norm": 0.032591577619314194, | |
| "learning_rate": 0.00017438825448613378, | |
| "loss": 0.0469, | |
| "on_policy_loss": 0.0469, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.13055895552835578, | |
| "grad_norm": 0.03767777606844902, | |
| "learning_rate": 0.0001740619902120718, | |
| "loss": 0.043, | |
| "on_policy_loss": 0.043, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13219094247246022, | |
| "grad_norm": 0.03873617574572563, | |
| "learning_rate": 0.0001737357259380098, | |
| "loss": 0.0474, | |
| "on_policy_loss": 0.0474, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.13382292941656468, | |
| "grad_norm": 0.03672102466225624, | |
| "learning_rate": 0.0001734094616639478, | |
| "loss": 0.0514, | |
| "on_policy_loss": 0.0514, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.13545491636066911, | |
| "grad_norm": 0.04233694076538086, | |
| "learning_rate": 0.0001730831973898858, | |
| "loss": 0.0477, | |
| "on_policy_loss": 0.0477, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.13708690330477355, | |
| "grad_norm": 0.0341520719230175, | |
| "learning_rate": 0.00017275693311582382, | |
| "loss": 0.0468, | |
| "on_policy_loss": 0.0468, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.138718890248878, | |
| "grad_norm": 0.031185219064354897, | |
| "learning_rate": 0.00017243066884176184, | |
| "loss": 0.0439, | |
| "on_policy_loss": 0.0439, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.14035087719298245, | |
| "grad_norm": 0.041588034480810165, | |
| "learning_rate": 0.00017210440456769986, | |
| "loss": 0.044, | |
| "on_policy_loss": 0.044, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1419828641370869, | |
| "grad_norm": 0.03446938097476959, | |
| "learning_rate": 0.00017177814029363785, | |
| "loss": 0.0491, | |
| "on_policy_loss": 0.0491, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.14361485108119135, | |
| "grad_norm": 0.03288084641098976, | |
| "learning_rate": 0.00017145187601957587, | |
| "loss": 0.0457, | |
| "on_policy_loss": 0.0457, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1452468380252958, | |
| "grad_norm": 0.03164874389767647, | |
| "learning_rate": 0.00017112561174551386, | |
| "loss": 0.047, | |
| "on_policy_loss": 0.047, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.14687882496940025, | |
| "grad_norm": 0.03469070792198181, | |
| "learning_rate": 0.00017079934747145188, | |
| "loss": 0.0481, | |
| "on_policy_loss": 0.0481, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14851081191350468, | |
| "grad_norm": 0.03517889231443405, | |
| "learning_rate": 0.0001704730831973899, | |
| "loss": 0.0506, | |
| "on_policy_loss": 0.0506, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.15014279885760914, | |
| "grad_norm": 0.03757636621594429, | |
| "learning_rate": 0.00017014681892332792, | |
| "loss": 0.0506, | |
| "on_policy_loss": 0.0506, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.15177478580171358, | |
| "grad_norm": 0.034249041229486465, | |
| "learning_rate": 0.0001698205546492659, | |
| "loss": 0.0431, | |
| "on_policy_loss": 0.0431, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15340677274581804, | |
| "grad_norm": 0.03459709882736206, | |
| "learning_rate": 0.00016949429037520393, | |
| "loss": 0.0462, | |
| "on_policy_loss": 0.0462, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.15503875968992248, | |
| "grad_norm": 0.031404901295900345, | |
| "learning_rate": 0.00016916802610114192, | |
| "loss": 0.0447, | |
| "on_policy_loss": 0.0447, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15667074663402691, | |
| "grad_norm": 0.056188613176345825, | |
| "learning_rate": 0.00016884176182707994, | |
| "loss": 0.0499, | |
| "on_policy_loss": 0.0499, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.15830273357813138, | |
| "grad_norm": 0.03952009230852127, | |
| "learning_rate": 0.00016851549755301793, | |
| "loss": 0.0449, | |
| "on_policy_loss": 0.0449, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1599347205222358, | |
| "grad_norm": 0.034737277776002884, | |
| "learning_rate": 0.00016818923327895598, | |
| "loss": 0.0519, | |
| "on_policy_loss": 0.0519, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.16156670746634028, | |
| "grad_norm": 0.03392937034368515, | |
| "learning_rate": 0.00016786296900489397, | |
| "loss": 0.0485, | |
| "on_policy_loss": 0.0485, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.1631986944104447, | |
| "grad_norm": 0.03504011780023575, | |
| "learning_rate": 0.000167536704730832, | |
| "loss": 0.0453, | |
| "on_policy_loss": 0.0453, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1226, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |