| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.5626695604991863, | |
| "eval_steps": 500, | |
| "global_step": 4500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017362995116657625, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5607, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003472599023331525, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8611, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005208898534997287, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.746, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.00694519804666305, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7252, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008681497558328812, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7153, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010417797069994574, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.666, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012154096581660336, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6443, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0138903960933261, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6404, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01562669560499186, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6227, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.017362995116657624, | |
| "grad_norm": 0.12353515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6077, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019099294628323386, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6814, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.020835594139989148, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6831, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02257189365165491, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6659, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02430819316332067, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6521, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.026044492674986434, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6309, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0277807921866522, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6263, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02951709169831796, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6268, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03125339120998372, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6309, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.032989690721649485, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6103, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03472599023331525, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03646228974498101, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6828, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03819858925664677, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6517, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03993488876831253, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6702, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.041671188279978295, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6158, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04340748779164406, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6157, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.04514378730330982, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6444, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04688008681497558, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6053, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04861638632664134, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5974, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.050352685838307105, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5874, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.05208898534997287, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5656, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05382528486163863, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7098, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0555615843733044, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6968, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05729788388497016, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.652, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.05903418339663592, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6084, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.060770482908301685, | |
| "grad_norm": 0.12060546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6243, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.06250678241996745, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6087, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06424308193163321, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6189, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06597938144329897, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5926, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06771568095496473, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5846, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.0694519804666305, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5668, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07118827997829626, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6842, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.07292457948996202, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6476, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07466087900162778, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6269, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.07639717851329354, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6189, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0781334780249593, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.617, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.07986977753662507, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.649, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08160607704829083, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5985, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.08334237655995659, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5909, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08507867607162235, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6015, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.08681497558328811, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5706, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08855127509495388, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6721, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.09028757460661964, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6523, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0920238741182854, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6232, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.09376017362995116, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.642, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09549647314161692, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6214, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.09723277265328269, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5981, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09896907216494845, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6205, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.10070537167661421, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6008, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10244167118827997, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5768, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.10417797069994574, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5863, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1059142702116115, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6621, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.10765056972327726, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6285, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.10938686923494302, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6254, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1111231687466088, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6164, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11285946825827456, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.623, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.11459576776994032, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5782, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11633206728160608, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5962, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.11806836679327185, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5987, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11980466630493761, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5885, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.12154096581660337, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5678, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12327726532826913, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.674, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1250135648399349, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.631, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12674986435160066, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6447, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.12848616386326642, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6277, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13022246337493218, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5902, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.13195876288659794, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6211, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1336950623982637, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6151, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.13543136190992947, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6097, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.13716766142159523, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5842, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.138903960933261, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5522, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14064026044492675, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6664, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1423765599565925, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.647, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14411285946825828, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6205, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.14584915897992404, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.615, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1475854584915898, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.605, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.14932175800325556, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5884, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15105805751492132, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6043, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.15279435702658709, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5842, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.15453065653825285, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5745, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.1562669560499186, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5975, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15800325556158437, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6604, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.15973955507325013, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6376, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1614758545849159, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6241, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.16321215409658166, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6238, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5941, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.16668475311991318, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5897, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6106, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1701573521432447, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5948, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17189365165491047, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5811, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.17362995116657623, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5604, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.175366250678242, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6414, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.17710255018990775, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6296, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17883884970157352, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6161, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.18057514921323928, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5981, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.18231144872490504, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6258, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1840477482365708, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.591, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.18578404774823656, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5829, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.18752034725990233, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5749, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1892566467715681, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5908, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.19099294628323385, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5533, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1927292457948996, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6469, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.19446554530656537, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.61, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.19620184481823114, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6342, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.1979381443298969, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6017, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.19967444384156266, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5873, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.20141074335322842, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5898, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.20314704286489418, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5927, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.20488334237655995, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5641, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2066196418882257, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5742, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.20835594139989147, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.566, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21009224091155723, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6651, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.211828540423223, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6181, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.21356483993488876, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6136, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.21530113944655452, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5951, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.21703743895822028, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5696, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.21877373846988604, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5912, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2205100379815518, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.587, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2222463374932176, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5574, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.22398263700488336, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5815, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.22571893651654912, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5718, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22745523602821488, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6307, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.22919153553988064, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6099, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2309278350515464, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6032, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.23266413456321217, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5917, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.23440043407487793, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5869, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2361367335865437, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5737, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.23787303309820945, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.586, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.23960933260987521, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5844, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.24134563212154098, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5819, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.24308193163320674, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5453, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2448182311448725, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6409, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.24655453065653826, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6099, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.24829083016820402, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6267, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.2500271296798698, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6319, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.25176342919153555, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5841, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2534997287032013, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5665, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2552360282148671, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5881, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.25697232772653283, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5599, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2587086272381986, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5614, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.26044492674986436, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5683, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2621812262615301, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6652, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.2639175257731959, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6104, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.26565382528486164, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5999, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.2673901247965274, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5882, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.26912642430819317, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5826, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.27086272381985893, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5648, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2725990233315247, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5883, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.27433532284319045, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5872, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2760716223548562, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5411, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.277807921866522, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5518, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.27954422137818774, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6338, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2812805208898535, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6061, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.28301682040151926, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6213, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.284753119913185, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5815, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2864894194248508, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6088, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.28822571893651655, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5935, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2899620184481823, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5708, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2916983179598481, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5919, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.29343461747151384, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5402, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.2951709169831796, | |
| "grad_norm": 0.11962890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5421, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.29690721649484536, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6586, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.2986435160065111, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6076, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3003798155181769, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6283, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.30211611502984265, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6065, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3038524145415084, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5651, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.30558871405317417, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6081, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.30732501356483993, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5631, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.3090613130765057, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5562, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.31079761258817146, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5865, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3125339120998372, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.546, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.314270211611503, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6489, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.31600651112316874, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6021, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3177428106348345, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6069, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.31947911014650027, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5969, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.32121540965816603, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5781, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3229517091698318, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5975, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.32468800868149755, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5856, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.3264243081931633, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5709, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3281606077048291, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5523, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5505, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3316332067281606, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6636, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.33336950623982636, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6165, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3351058057514921, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6182, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5859, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.33857840477482365, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6026, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.3403147042864894, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5777, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3420510037981552, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5612, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.34378730330982094, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5838, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3455236028214867, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5567, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.34725990233315246, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5563, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3489962018448182, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6221, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.350732501356484, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.613, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.35246880086814975, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.603, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.3542051003798155, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6122, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.35594139989148127, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6032, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.35767769940314703, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6027, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3594139989148128, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5592, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.36115029842647856, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5676, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3628865979381443, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5578, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.3646228974498101, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5349, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.36635919696147584, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6616, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.3680954964731416, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6331, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.36983179598480737, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6057, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.3715680954964731, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5951, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3733043950081389, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5747, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.37504069451980465, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5827, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3767769940314704, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5855, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.3785132935431362, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5862, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.38024959305480194, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5525, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.3819858925664677, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5491, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.38372219207813346, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6284, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.3854584915897992, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5915, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.387194791101465, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6242, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.38893109061313075, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5679, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3906673901247965, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5795, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.39240368963646227, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5973, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.39413998914812803, | |
| "grad_norm": 0.12109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5729, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.3958762886597938, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5757, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.39761258817145956, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5745, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.3993488876831253, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5563, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4010851871947911, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.637, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.40282148670645684, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.597, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4045577862181226, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.598, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.40629408572978837, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5984, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.40803038524145413, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5883, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.4097666847531199, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5725, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.41150298426478565, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5848, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.4132392837764514, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5757, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4149755832881172, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5538, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.41671188279978294, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.531, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4184481823114487, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6415, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.42018448182311446, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6296, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4219207813347802, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6157, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.423657080846446, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5636, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.42539338035811175, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5655, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.4271296798697775, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5727, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4288659793814433, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5615, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.43060227889310904, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5715, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4323385784047748, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5566, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.43407487791644056, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5504, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4358111774281063, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6203, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.4375474769397721, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6021, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.43928377645143785, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6065, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.4410200759631036, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5844, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.44275637547476937, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5717, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.4444926749864352, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5893, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.44622897449810095, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5707, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.4479652740097667, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5494, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4497015735214325, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5564, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.45143787303309824, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5536, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.453174172544764, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6417, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.45491047205642976, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6055, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4566467715680955, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5962, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.4583830710797613, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6115, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.46011937059142705, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5898, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.4618556701030928, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5834, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.46359196961475857, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5721, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.46532826912642433, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.561, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4670645686380901, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5637, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.46880086814975586, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5528, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4705371676614216, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6466, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.4722734671730874, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6073, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.47400976668475314, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6299, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.4757460661964189, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5787, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.47748236570808467, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5815, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.47921866521975043, | |
| "grad_norm": 0.2451171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5751, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4809549647314162, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5652, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.48269126424308195, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5488, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4844275637547477, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5472, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.4861638632664135, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5394, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.48790016277807924, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6331, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.489636462289745, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6019, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.49137276180141076, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6056, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.4931090613130765, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6167, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5708, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.49658166033640805, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.591, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4983179598480738, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5364, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.5000542593597396, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5634, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5017905588714053, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5374, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.5035268583830711, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5533, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6605, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.5069994574064026, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6167, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5087357569180684, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6013, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.5104720564297341, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5774, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5122083559413999, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5837, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.5139446554530657, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5885, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5156809549647314, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5565, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.5174172544763972, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5793, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.519153553988063, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5552, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.5208898534997287, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5376, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5226261530113945, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6368, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.5243624525230602, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6176, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.526098752034726, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5909, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.5278350515463918, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5816, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5295713510580575, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6136, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.5313076505697233, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5724, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.533043950081389, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5657, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.5347802495930548, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5642, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5365165491047206, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5711, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.5382528486163863, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.514, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5399891481280521, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6508, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.5417254476397179, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6109, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5434617471513836, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6041, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.5451980466630494, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5893, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5469343461747151, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5753, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.5486706456863809, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5626, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5504069451980467, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5912, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.5521432447097124, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5629, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5538795442213782, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.581, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.555615843733044, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5413, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5573521432447097, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6606, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.5590884427563755, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5923, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5608247422680412, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5958, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.562561041779707, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5899, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5642973412913728, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5817, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.5660336408030385, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5579, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5677699403147043, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5677, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.56950623982637, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.566, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5712425393380358, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5668, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.5729788388497016, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5334, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5747151383613673, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6322, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.5764514378730331, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.588, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5781877373846989, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5929, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.5799240368963646, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6142, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5816603364080304, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5919, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.5833966359196961, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5811, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5851329354313619, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5564, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.5868692349430277, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5658, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5886055344546934, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5677, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.5903418339663592, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5537, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.592078133478025, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6258, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.5938144329896907, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5946, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5955507325013565, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5991, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.5972870320130222, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.602, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.599023331524688, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5778, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.6007596310363538, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5531, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6024959305480195, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.56, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.6042322300596853, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5679, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6059685295713511, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.557, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.6077048290830168, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5345, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6094411285946826, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6289, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.6111774281063483, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6079, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6129137276180141, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5773, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.6146500271296799, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5802, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6163863266413456, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5654, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.6181226261530114, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5679, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6198589256646772, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5849, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.6215952251763429, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5674, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6233315246880087, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.56, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.6250678241996744, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.527, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6268041237113402, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6374, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.628540423223006, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5962, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6302767227346717, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6013, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.6320130222463375, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6017, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6337493217580032, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5826, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.635485621269669, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5977, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6372219207813348, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5981, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.6389582202930005, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5768, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6406945198046663, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5415, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.6424308193163321, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5317, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6441671188279978, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6237, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.6459034183396636, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5886, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6476397178513293, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5769, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.6493760173629951, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5793, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6511123168746609, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5704, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.6528486163863266, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5597, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6545849158979924, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5358, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.6563212154096582, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5479, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6580575149213239, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5472, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.6597938144329897, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5398, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6615301139446554, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.651, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.6632664134563212, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5915, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.665002712967987, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6007, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.6667390124796527, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5788, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6684753119913185, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.568, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.6702116115029843, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5711, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.67194791101465, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5682, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5764, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6754205100379815, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5491, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.6771568095496473, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5629, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6788931090613131, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6379, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.6806294085729788, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5983, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6823657080846446, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5929, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.6841020075963103, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.588, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6858383071079761, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5619, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.6875746066196419, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5683, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6893109061313076, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5808, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.6910472056429734, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5536, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6927835051546392, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5608, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.6945198046663049, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.534, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6962561041779707, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6431, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.6979924036896364, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5889, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6997287032013022, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5804, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.701465002712968, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5916, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7032013022246337, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5759, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.7049376017362995, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5853, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7066739012479653, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5733, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.708410200759631, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5776, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7101465002712968, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5489, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.7118827997829625, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5241, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7136190992946283, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6735, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.7153553988062941, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6125, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7170916983179598, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5805, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.7188279978296256, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6037, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7205642973412913, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5873, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.7223005968529571, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5727, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7240368963646229, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5709, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.7257731958762886, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.519, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7275094953879544, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5576, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.7292457948996202, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5285, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7309820944112859, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6482, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.7327183939229517, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.595, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7344546934346174, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5953, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.7361909929462832, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5665, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.737927292457949, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5612, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.7396635919696147, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5645, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7413998914812805, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5865, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.7431361909929463, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5658, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.744872490504612, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5523, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.7466087900162778, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5358, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7483450895279435, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6148, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.7500813890396093, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6092, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7518176885512751, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5939, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.7535539880629408, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5823, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7552902875746066, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5779, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.7570265870862724, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5697, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7587628865979381, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5716, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.7604991861096039, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5569, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7622354856212696, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5544, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.7639717851329354, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5332, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7657080846446012, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6517, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.7674443841562669, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5991, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7691806836679327, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5823, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.7709169831795984, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6052, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.7726532826912642, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5711, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.77438958220293, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5369, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7761258817145957, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5406, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.7778621812262615, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.604, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.7795984807379273, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5587, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.781334780249593, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5347, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7830710797612588, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6413, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.7848073792729245, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5683, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7865436787845903, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6132, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.7882799782962561, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5839, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7900162778079218, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5857, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.7917525773195876, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5711, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7934888768312534, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5748, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.7952251763429191, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5431, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7969614758545849, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.549, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.7986977753662506, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5471, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8004340748779164, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6297, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.8021703743895822, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5896, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8039066739012479, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5866, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.8056429734129137, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5779, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8073792729245794, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.58, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.8091155724362452, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5887, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.810851871947911, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5622, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.8125881714595767, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5402, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8143244709712425, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5459, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.8160607704829083, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5197, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.817797069994574, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6183, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.8195333695062398, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5875, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8212696690179055, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.605, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.8230059685295713, | |
| "grad_norm": 0.07421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.583, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5785, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.8264785675529028, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5611, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8282148670645686, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5796, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.8299511665762344, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.548, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8316874660879001, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5653, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.8334237655995659, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5371, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8351600651112316, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6198, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.8368963646228974, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5979, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8386326641345632, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5892, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.8403689636462289, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5635, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5798, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.8438415626695605, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5589, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.8455778621812262, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5365, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.847314161692892, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5417, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8490504612045577, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5503, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.8507867607162235, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5355, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8525230602278893, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6341, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.854259359739555, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5729, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.8559956592512208, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.584, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.8577319587628865, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5905, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.8594682582745523, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5782, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.8612045577862181, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5476, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8629408572978838, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5952, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.8646771568095496, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5347, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8664134563212154, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.554, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.8681497558328811, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.54, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8698860553445469, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6187, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.8716223548562126, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5818, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.8733586543678784, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.585, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.8750949538795442, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5996, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.8768312533912099, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5727, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.8785675529028757, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5711, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.8803038524145415, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5614, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.8820401519262072, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5451, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.883776451437873, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5539, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.8855127509495387, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5379, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.8872490504612045, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6218, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.8889853499728704, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6054, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.8907216494845361, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5887, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.8924579489962019, | |
| "grad_norm": 0.07666015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5683, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.8941942485078677, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5753, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.8959305480195334, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5803, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.8976668475311992, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5634, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.899403147042865, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5674, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9011394465545307, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5618, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.9028757460661965, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5553, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9046120455778622, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6392, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.906348345089528, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5888, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9080846446011938, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5674, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.9098209441128595, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5688, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9115572436245253, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5894, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.913293543136191, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5691, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9150298426478568, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5588, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.9167661421595226, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5453, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9185024416711883, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5394, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.9202387411828541, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5313, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9219750406945199, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6438, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.9237113402061856, | |
| "grad_norm": 0.0771484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5677, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.9254476397178514, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5836, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.9271839392295171, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5679, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.9289202387411829, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5684, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.9306565382528487, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.54, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.9323928377645144, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5564, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.9341291372761802, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5526, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.935865436787846, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5372, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.9376017362995117, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5369, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9393380358111775, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6357, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.9410743353228432, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5584, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.942810634834509, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6022, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.9445469343461748, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.566, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.9462832338578405, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5742, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.9480195333695063, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5775, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.949755832881172, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5666, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.9514921323928378, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5505, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.9532284319045036, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5276, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.9549647314161693, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5265, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9567010309278351, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6327, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.9584373304395009, | |
| "grad_norm": 0.0732421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5947, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.9601736299511666, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5922, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.9619099294628324, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.585, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.9636462289744981, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5712, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.9653825284861639, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5843, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.9671188279978297, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.551, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.9688551275094954, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5638, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.9705914270211612, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5495, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.972327726532827, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5238, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9740640260444927, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6252, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.9758003255561585, | |
| "grad_norm": 0.0712890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6002, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.9775366250678242, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5987, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.97927292457949, | |
| "grad_norm": 0.0751953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5911, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.9810092240911558, | |
| "grad_norm": 0.1533203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5721, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.9827455236028215, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5606, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.9844818231144873, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5618, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.986218122626153, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5074, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.9879544221378188, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5442, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.9896907216494846, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.556, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.9914270211611503, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6428, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.9931633206728161, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6115, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.9948996201844819, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5626, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.9966359196961476, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5743, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.9983722192078134, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5582, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.0001085187194791, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5435, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.0018448182311448, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5697, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.0035811177428107, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5622, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.0053174172544763, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5732, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.0070537167661422, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5266, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0087900162778078, | |
| "grad_norm": 0.07861328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5615, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.0105263157894737, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5519, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.0122626153011394, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5355, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.0139989148128052, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5235, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.015735214324471, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4988, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.0174715138361368, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5261, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.0192078133478024, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6225, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.0209441128594683, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.555, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.022680412371134, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5522, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.0244167118827998, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5617, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.0261530113944655, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5503, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.0278893109061313, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5318, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.029625610417797, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5166, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.0313619099294629, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5373, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.0330982094411285, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.535, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.0348345089527944, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5341, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.03657080846446, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5795, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.038307107976126, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5764, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.0400434074877916, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6015, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.0417797069994574, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5517, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.043516006511123, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5322, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.045252306022789, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5317, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.0469886055344546, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5288, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.0487249050461205, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5166, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.0504612045577861, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5185, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.052197504069452, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5088, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.0539338035811177, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6082, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.0556701030927835, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5536, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.0574064026044492, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5774, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.059142702116115, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5647, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.0608790016277807, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5461, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.0626153011394466, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5357, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.0643516006511122, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5344, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.066087900162778, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5371, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.0678241996744438, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5229, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.0695604991861096, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5132, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.0712967986977753, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5932, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.0730330982094411, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5733, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.0747693977211068, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5709, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.0765056972327727, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.557, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.0782419967444383, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5577, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.0799782962561042, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.521, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.0817145957677698, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5096, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.0834508952794357, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5221, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.0851871947911014, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5144, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.0869234943027672, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5188, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.088659793814433, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6153, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.0903960933260988, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5514, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.0921323928377644, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5448, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.0938686923494303, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5636, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.095604991861096, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5605, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.0973412913727618, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5305, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.0990775908844275, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5507, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.1008138903960933, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4921, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.102550189907759, | |
| "grad_norm": 0.12109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5324, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.1042864894194249, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5052, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.1060227889310905, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5938, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.1077590884427564, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5594, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.109495387954422, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5513, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.111231687466088, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5478, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.1129679869777536, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5314, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.1147042864894194, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5351, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.116440586001085, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5434, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.118176885512751, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5408, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.1199131850244166, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5003, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.1216494845360825, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5179, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.1233857840477481, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5892, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.125122083559414, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5529, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.1268583830710797, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5669, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.1285946825827455, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.565, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1303309820944114, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5467, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.132067281606077, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5411, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.1338035811177427, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5108, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.1355398806294086, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5241, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.1372761801410745, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5386, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.13901247965274, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5035, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.1407487791644058, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5958, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.1424850786760716, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5763, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.1442213781877375, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5484, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.1459576776994032, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5687, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.1476939772110688, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.55, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.1494302767227347, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5231, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.1511665762344006, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5541, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.1529028757460662, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.54, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.1546391752577319, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5213, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.1563754747693977, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5192, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.1581117742810636, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5946, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.1598480737927293, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5822, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.161584373304395, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5655, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.1633206728160608, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5687, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.1650569723277266, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5448, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.1667932718393923, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5355, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.168529571351058, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5321, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.1702658708627238, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5252, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.1720021703743897, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5082, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.1737384698860553, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4836, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.175474769397721, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6026, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.1772110689093869, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5656, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.1789473684210527, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.563, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.1806836679327184, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.553, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.182419967444384, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5587, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.18415626695605, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5344, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.1858925664677158, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5537, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.1876288659793814, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5234, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.189365165491047, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5108, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.191101465002713, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5186, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.1928377645143788, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.619, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.1945740640260445, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.568, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.1963103635377101, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5474, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.198046663049376, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5505, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.1997829625610419, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5279, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.2015192620727075, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5384, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.2032555615843732, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5143, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.204991861096039, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5401, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.206728160607705, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.526, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.2084644601193706, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5094, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.2102007596310362, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6154, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.2119370591427021, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.566, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.213673358654368, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5844, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.2154096581660336, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5497, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2171459576776993, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5543, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.2188822571893652, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.543, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.220618556701031, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5354, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.2223548562126967, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4978, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.2240911557243623, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5197, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.2258274552360282, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5303, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.227563754747694, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.599, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.2293000542593597, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5683, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.2310363537710254, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5718, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.2327726532826913, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5621, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2345089527943571, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5643, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.2362452523060228, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.545, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.2379815518176884, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5316, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.2397178513293543, | |
| "grad_norm": 0.11962890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5377, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.2414541508410202, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5073, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.2431904503526858, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5154, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.2449267498643515, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.605, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.2466630493760174, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5731, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.2483993488876832, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5519, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.2501356483993489, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5507, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.2518719479110145, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5362, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.2536082474226804, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5723, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.2553445469343463, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.538, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.257080846446012, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5143, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.2588171459576776, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5278, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.2605534454693434, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5244, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.2622897449810093, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5943, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.264026044492675, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5651, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.2657623440043406, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5607, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.2674986435160065, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5403, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.2692349430276724, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5385, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.270971242539338, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.528, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.2727075420510037, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5318, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.2744438415626695, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5335, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.2761801410743354, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5265, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.277916440586001, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4965, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.2796527400976667, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.583, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.2813890396093326, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5798, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.2831253391209985, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5799, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.2848616386326641, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5753, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.2865979381443298, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.555, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.2883342376559956, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5462, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.2900705371676615, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5345, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.2918068366793272, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5211, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.2935431361909928, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5173, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.2952794357026587, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.508, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.2970157352143246, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6064, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.2987520347259902, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5432, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.3004883342376559, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5654, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.3022246337493217, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5506, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.3039609332609876, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5412, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.3056972327726533, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5289, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.307433532284319, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5344, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.3091698317959848, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5394, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.3109061313076507, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.516, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.3126424308193163, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5243, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.314378730330982, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6128, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.3161150298426478, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5888, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.3178513293543137, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5755, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.3195876288659794, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5551, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.321323928377645, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5296, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.3230602278893109, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5562, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.3247965274009768, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5339, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.3265328269126424, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.526, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.328269126424308, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5213, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.330005425935974, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4972, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.3317417254476398, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5998, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.3334780249593055, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5902, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.3352143244709713, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.585, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.336950623982637, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5746, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.3386869234943028, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5543, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.3404232230059685, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5311, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.3421595225176344, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5246, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.3438958220293, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5152, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.345632121540966, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5267, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.3473684210526315, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5042, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.3491047205642974, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6091, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.350841020075963, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5629, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.352577319587629, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5481, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.3543136190992946, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5349, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.3560499186109605, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5352, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.3577862181226261, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5529, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.359522517634292, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.55, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.3612588171459576, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5221, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.3629951166576235, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5193, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.3647314161692892, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5119, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.366467715680955, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5839, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.3682040151926207, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5602, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.3699403147042866, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5468, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.3716766142159522, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.548, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.373412913727618, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5601, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.3751492132392837, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5371, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.3768855127509496, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5431, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.3786218122626153, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5277, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.3803581117742811, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5221, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.3820944112859468, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5064, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.3838307107976127, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6115, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.3855670103092783, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.573, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.3873033098209442, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.585, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.3890396093326098, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5566, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.3907759088442757, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5365, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.3925122083559414, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5477, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.3942485078676072, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5232, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.3959848073792729, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5557, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.3977211068909388, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5138, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.3994574064026044, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5254, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.4011937059142703, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.604, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.402930005425936, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5583, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.4046663049376018, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5735, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.4064026044492675, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5454, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.4081389039609333, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5569, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.409875203472599, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5459, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.4116115029842649, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5285, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.4133478024959305, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5373, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.4150841020075964, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5274, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.416820401519262, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4974, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.418556701030928, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5757, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.4202930005425936, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5569, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.4220293000542594, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5585, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.423765599565925, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5621, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.425501899077591, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5391, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.4272381985892566, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5445, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.4289744981009225, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5327, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.4307107976125881, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5027, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.432447097124254, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5147, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.4341833966359196, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5243, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.4359196961475855, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5844, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.4376559956592512, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5563, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.439392295170917, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5539, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.4411285946825827, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.539, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.4428648941942486, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.539, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.4446011937059142, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5496, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.44633749321758, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5393, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.4480737927292457, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5233, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.4498100922409116, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5267, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.4515463917525773, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.529, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.4532826912642431, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6023, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.4550189907759088, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5811, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.4567552902875747, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5586, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.4584915897992403, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5704, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.4602278893109062, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5394, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.4619641888225718, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5105, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.4637004883342377, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5327, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.4654367878459034, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5387, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.4671730873575692, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.518, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.468909386869235, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5103, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.4706456863809008, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.588, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.4723819858925664, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5676, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.4741182854042323, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5664, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.475854584915898, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5609, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.4775908844275638, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5587, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.4793271839392295, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5505, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.4810634834508953, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5338, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.482799782962561, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5416, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.4845360824742269, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5411, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.4862723819858925, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5019, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.4880086814975584, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5715, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.489744981009224, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5747, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.49148128052089, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5858, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.4932175800325556, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5528, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.4949538795442214, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5545, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.496690179055887, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5409, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.498426478567553, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5582, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.5001627780792188, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5204, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.5018990775908845, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5273, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.5036353771025501, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5047, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.505371676614216, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5932, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.5071079761258819, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5626, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.5088442756375475, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5594, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.5105805751492132, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5529, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.512316874660879, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5572, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.514053174172545, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5454, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.5157894736842106, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5368, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.5175257731958762, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5221, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.519262072707542, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5348, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.520998372219208, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5132, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.5227346717308736, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5832, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.5244709712425393, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5648, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.5262072707542051, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.568, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.527943570265871, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.539, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.5296798697775367, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5463, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.5314161692892023, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5238, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.5331524688008682, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5254, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.534888768312534, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.535, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.5366250678241997, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5234, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.5383613673358654, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4957, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.5400976668475312, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5816, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.5418339663591971, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5775, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.5435702658708628, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5528, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 1.5453065653825284, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5616, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.5470428648941943, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5281, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 1.5487791644058602, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5411, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.5505154639175258, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4934, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 1.5522517634291915, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5168, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.5539880629408573, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5091, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.5557243624525232, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5026, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.5574606619641889, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6009, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 1.5591969614758545, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5593, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.5609332609875204, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5694, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 1.5626695604991863, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5424, | |
| "step": 4500 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 90, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.510419270260736e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |