Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9994739610731194, | |
| "eval_steps": 500, | |
| "global_step": 1425, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007013852358407856, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 0.9295, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0014027704716815712, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 1.432, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0021041557075223566, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 0.3822, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0028055409433631424, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 0.6542, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003506926179203928, | |
| "grad_norm": 29.30962562561035, | |
| "learning_rate": 1.7543859649122808e-07, | |
| "loss": 0.3371, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004208311415044713, | |
| "grad_norm": 34.5477294921875, | |
| "learning_rate": 3.5087719298245616e-07, | |
| "loss": 0.1946, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004909696650885499, | |
| "grad_norm": 55.77326965332031, | |
| "learning_rate": 5.263157894736843e-07, | |
| "loss": 0.5682, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.005611081886726285, | |
| "grad_norm": 52.84772491455078, | |
| "learning_rate": 7.017543859649123e-07, | |
| "loss": 0.3959, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00631246712256707, | |
| "grad_norm": 44.6428337097168, | |
| "learning_rate": 8.771929824561404e-07, | |
| "loss": 0.2899, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.007013852358407856, | |
| "grad_norm": 69.89168548583984, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 0.6255, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007715237594248641, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 0.4706, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.008416622830089426, | |
| "grad_norm": 65.1708755493164, | |
| "learning_rate": 1.2280701754385965e-06, | |
| "loss": 0.6875, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.009118008065930211, | |
| "grad_norm": 62.44850158691406, | |
| "learning_rate": 1.4035087719298246e-06, | |
| "loss": 0.8691, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.009819393301770998, | |
| "grad_norm": 50.52750778198242, | |
| "learning_rate": 1.5789473684210528e-06, | |
| "loss": 0.5682, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.010520778537611783, | |
| "grad_norm": 38.50554656982422, | |
| "learning_rate": 1.7543859649122807e-06, | |
| "loss": 0.4118, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01122216377345257, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.7543859649122807e-06, | |
| "loss": 0.4622, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.011923549009293355, | |
| "grad_norm": 41.367347717285156, | |
| "learning_rate": 1.929824561403509e-06, | |
| "loss": 0.2331, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01262493424513414, | |
| "grad_norm": 47.666603088378906, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 0.489, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.013326319480974926, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 6.2172, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.014027704716815711, | |
| "grad_norm": 161.6815643310547, | |
| "learning_rate": 2.2807017543859652e-06, | |
| "loss": 1.9947, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014729089952656496, | |
| "grad_norm": 14.031229019165039, | |
| "learning_rate": 2.456140350877193e-06, | |
| "loss": 0.0529, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.015430475188497283, | |
| "grad_norm": 51.389862060546875, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 0.3272, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.016131860424338066, | |
| "grad_norm": 16.31728744506836, | |
| "learning_rate": 2.8070175438596493e-06, | |
| "loss": 0.0668, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.016833245660178853, | |
| "grad_norm": 32.19120407104492, | |
| "learning_rate": 2.9824561403508774e-06, | |
| "loss": 0.1298, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01753463089601964, | |
| "grad_norm": 8.127516746520996, | |
| "learning_rate": 3.1578947368421056e-06, | |
| "loss": 0.0245, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.018236016131860423, | |
| "grad_norm": 31.48802375793457, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.1808, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01893740136770121, | |
| "grad_norm": 174.7755584716797, | |
| "learning_rate": 3.5087719298245615e-06, | |
| "loss": 1.5976, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.019638786603541996, | |
| "grad_norm": 173.22021484375, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 2.0731, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.020340171839382783, | |
| "grad_norm": 16.176433563232422, | |
| "learning_rate": 3.859649122807018e-06, | |
| "loss": 0.0533, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.021041557075223566, | |
| "grad_norm": 25.456438064575195, | |
| "learning_rate": 4.035087719298246e-06, | |
| "loss": 0.2239, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.021742942311064353, | |
| "grad_norm": 2.50178861618042, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 0.007, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.02244432754690514, | |
| "grad_norm": 8.369728088378906, | |
| "learning_rate": 4.3859649122807014e-06, | |
| "loss": 0.0312, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.023145712782745922, | |
| "grad_norm": 1.419299602508545, | |
| "learning_rate": 4.5614035087719304e-06, | |
| "loss": 0.0039, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.02384709801858671, | |
| "grad_norm": 21.602149963378906, | |
| "learning_rate": 4.736842105263159e-06, | |
| "loss": 0.1907, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.024548483254427496, | |
| "grad_norm": 96.2072525024414, | |
| "learning_rate": 4.912280701754386e-06, | |
| "loss": 0.4621, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02524986849026828, | |
| "grad_norm": 186.85726928710938, | |
| "learning_rate": 5.087719298245614e-06, | |
| "loss": 3.6381, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.025951253726109066, | |
| "grad_norm": 222.0509033203125, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 2.1687, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.026652638961949852, | |
| "grad_norm": 395.810791015625, | |
| "learning_rate": 5.43859649122807e-06, | |
| "loss": 4.1469, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.027354024197790636, | |
| "grad_norm": 24.080060958862305, | |
| "learning_rate": 5.6140350877192985e-06, | |
| "loss": 0.0944, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.028055409433631422, | |
| "grad_norm": 23.396860122680664, | |
| "learning_rate": 5.789473684210527e-06, | |
| "loss": 0.2287, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02875679466947221, | |
| "grad_norm": 26.664474487304688, | |
| "learning_rate": 5.964912280701755e-06, | |
| "loss": 0.2467, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.029458179905312992, | |
| "grad_norm": 0.3562469482421875, | |
| "learning_rate": 6.140350877192982e-06, | |
| "loss": 0.0005, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.03015956514115378, | |
| "grad_norm": Infinity, | |
| "learning_rate": 6.140350877192982e-06, | |
| "loss": 1.6967, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.030860950376994566, | |
| "grad_norm": 0.1253829002380371, | |
| "learning_rate": 6.315789473684211e-06, | |
| "loss": 0.0003, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03156233561283535, | |
| "grad_norm": 18.994827270507812, | |
| "learning_rate": 6.4912280701754385e-06, | |
| "loss": 0.233, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03226372084867613, | |
| "grad_norm": 34.921852111816406, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.3528, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03296510608451692, | |
| "grad_norm": 202.69903564453125, | |
| "learning_rate": 6.842105263157896e-06, | |
| "loss": 2.7552, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.033666491320357705, | |
| "grad_norm": 359.0797424316406, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 2.112, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03436787655619849, | |
| "grad_norm": 3.0262975692749023, | |
| "learning_rate": 7.192982456140351e-06, | |
| "loss": 0.0094, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03506926179203928, | |
| "grad_norm": 504.8307189941406, | |
| "learning_rate": 7.3684210526315784e-06, | |
| "loss": 6.4525, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03577064702788006, | |
| "grad_norm": 7.283820152282715, | |
| "learning_rate": 7.5438596491228074e-06, | |
| "loss": 0.0419, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.036472032263720845, | |
| "grad_norm": 10.193113327026367, | |
| "learning_rate": 7.719298245614036e-06, | |
| "loss": 0.0805, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.037173417499561635, | |
| "grad_norm": 21.247915267944336, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.1385, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.03787480273540242, | |
| "grad_norm": 12.750287055969238, | |
| "learning_rate": 8.070175438596492e-06, | |
| "loss": 0.1786, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03857618797124321, | |
| "grad_norm": 186.3012237548828, | |
| "learning_rate": 8.245614035087721e-06, | |
| "loss": 2.5431, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03927757320708399, | |
| "grad_norm": 0.6861621141433716, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 0.0019, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.039978958442924775, | |
| "grad_norm": 0.36288923025131226, | |
| "learning_rate": 8.596491228070176e-06, | |
| "loss": 0.0008, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.040680343678765565, | |
| "grad_norm": 0.08618122339248657, | |
| "learning_rate": 8.771929824561403e-06, | |
| "loss": 0.0001, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04138172891460635, | |
| "grad_norm": 341.2626037597656, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 3.6742, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.04208311415044713, | |
| "grad_norm": 5.118091583251953, | |
| "learning_rate": 9.122807017543861e-06, | |
| "loss": 0.0128, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04278449938628792, | |
| "grad_norm": 3.345242977142334, | |
| "learning_rate": 9.298245614035088e-06, | |
| "loss": 0.0095, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.043485884622128705, | |
| "grad_norm": 319.7175598144531, | |
| "learning_rate": 9.473684210526317e-06, | |
| "loss": 4.3047, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04418726985796949, | |
| "grad_norm": 279.8736267089844, | |
| "learning_rate": 9.649122807017545e-06, | |
| "loss": 1.8524, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.04488865509381028, | |
| "grad_norm": 187.50588989257812, | |
| "learning_rate": 9.824561403508772e-06, | |
| "loss": 2.2166, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04559004032965106, | |
| "grad_norm": 3.1667685508728027, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0051, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.046291425565491845, | |
| "grad_norm": 347.2233581542969, | |
| "learning_rate": 1.0175438596491228e-05, | |
| "loss": 6.7317, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.046992810801332635, | |
| "grad_norm": 0.21984274685382843, | |
| "learning_rate": 1.0350877192982457e-05, | |
| "loss": 0.0006, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.04769419603717342, | |
| "grad_norm": 657.2495727539062, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 12.4963, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0483955812730142, | |
| "grad_norm": 180.615234375, | |
| "learning_rate": 1.0701754385964913e-05, | |
| "loss": 3.2259, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04909696650885499, | |
| "grad_norm": 0.4523675739765167, | |
| "learning_rate": 1.087719298245614e-05, | |
| "loss": 0.0011, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.049798351744695775, | |
| "grad_norm": 241.3996124267578, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 2.9636, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.05049973698053656, | |
| "grad_norm": 0.21977363526821136, | |
| "learning_rate": 1.1228070175438597e-05, | |
| "loss": 0.0005, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.05120112221637735, | |
| "grad_norm": 102.25936889648438, | |
| "learning_rate": 1.1403508771929824e-05, | |
| "loss": 0.4978, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.05190250745221813, | |
| "grad_norm": 24.031539916992188, | |
| "learning_rate": 1.1578947368421053e-05, | |
| "loss": 0.119, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.052603892688058915, | |
| "grad_norm": 187.41812133789062, | |
| "learning_rate": 1.1754385964912282e-05, | |
| "loss": 1.0223, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.053305277923899705, | |
| "grad_norm": 45.128421783447266, | |
| "learning_rate": 1.192982456140351e-05, | |
| "loss": 0.4026, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.05400666315974049, | |
| "grad_norm": 12.320211410522461, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "loss": 0.1124, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.05470804839558127, | |
| "grad_norm": 26.743837356567383, | |
| "learning_rate": 1.2280701754385964e-05, | |
| "loss": 0.1488, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.05540943363142206, | |
| "grad_norm": 30.013471603393555, | |
| "learning_rate": 1.2456140350877193e-05, | |
| "loss": 0.3796, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.056110818867262845, | |
| "grad_norm": 38.910221099853516, | |
| "learning_rate": 1.2631578947368422e-05, | |
| "loss": 0.7097, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05681220410310363, | |
| "grad_norm": 26.001049041748047, | |
| "learning_rate": 1.2807017543859651e-05, | |
| "loss": 0.1451, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.05751358933894442, | |
| "grad_norm": 12.577742576599121, | |
| "learning_rate": 1.2982456140350877e-05, | |
| "loss": 0.0548, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0582149745747852, | |
| "grad_norm": 334.09478759765625, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 8.8326, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.058916359810625984, | |
| "grad_norm": 33.031837463378906, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.2555, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.059617745046466775, | |
| "grad_norm": 47.51133346557617, | |
| "learning_rate": 1.3508771929824562e-05, | |
| "loss": 0.4288, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.06031913028230756, | |
| "grad_norm": 41.160430908203125, | |
| "learning_rate": 1.3684210526315791e-05, | |
| "loss": 0.3075, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.06102051551814834, | |
| "grad_norm": 9.073790550231934, | |
| "learning_rate": 1.3859649122807017e-05, | |
| "loss": 0.045, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.06172190075398913, | |
| "grad_norm": 352.0498352050781, | |
| "learning_rate": 1.4035087719298246e-05, | |
| "loss": 1.3396, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.062423285989829914, | |
| "grad_norm": 36.5557746887207, | |
| "learning_rate": 1.4210526315789475e-05, | |
| "loss": 0.1926, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.0631246712256707, | |
| "grad_norm": 6.991324424743652, | |
| "learning_rate": 1.4385964912280702e-05, | |
| "loss": 0.0341, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06382605646151149, | |
| "grad_norm": 6.0483078956604, | |
| "learning_rate": 1.4561403508771931e-05, | |
| "loss": 0.0219, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.06452744169735226, | |
| "grad_norm": 4.510119438171387, | |
| "learning_rate": 1.4736842105263157e-05, | |
| "loss": 0.0161, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.06522882693319305, | |
| "grad_norm": 0.8209487199783325, | |
| "learning_rate": 1.4912280701754386e-05, | |
| "loss": 0.0022, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.06593021216903384, | |
| "grad_norm": 0.0308829378336668, | |
| "learning_rate": 1.5087719298245615e-05, | |
| "loss": 0.0001, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.06663159740487462, | |
| "grad_norm": 171.68765258789062, | |
| "learning_rate": 1.5263157894736842e-05, | |
| "loss": 4.8668, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.06733298264071541, | |
| "grad_norm": 0.0008907412411645055, | |
| "learning_rate": 1.543859649122807e-05, | |
| "loss": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0680343678765562, | |
| "grad_norm": 0.0062671443447470665, | |
| "learning_rate": 1.56140350877193e-05, | |
| "loss": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.06873575311239698, | |
| "grad_norm": 0.02987469919025898, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.0001, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06943713834823777, | |
| "grad_norm": 0.00033108692150563, | |
| "learning_rate": 1.5964912280701755e-05, | |
| "loss": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.07013852358407856, | |
| "grad_norm": 0.21173974871635437, | |
| "learning_rate": 1.6140350877192984e-05, | |
| "loss": 0.0003, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07083990881991933, | |
| "grad_norm": 0.0003154289734084159, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.07154129405576012, | |
| "grad_norm": 0.0001488685520598665, | |
| "learning_rate": 1.6491228070175442e-05, | |
| "loss": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.07224267929160091, | |
| "grad_norm": 1.3430032595351804e-05, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.07294406452744169, | |
| "grad_norm": 0.011110284365713596, | |
| "learning_rate": 1.6842105263157896e-05, | |
| "loss": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.07364544976328248, | |
| "grad_norm": 1.7930891772266477e-05, | |
| "learning_rate": 1.7017543859649125e-05, | |
| "loss": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07434683499912327, | |
| "grad_norm": 5.653702146446449e-07, | |
| "learning_rate": 1.719298245614035e-05, | |
| "loss": 0.0, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.07504822023496406, | |
| "grad_norm": 135.4245147705078, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 6.4792, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.07574960547080484, | |
| "grad_norm": 153.99264526367188, | |
| "learning_rate": 1.7543859649122806e-05, | |
| "loss": 6.485, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07645099070664563, | |
| "grad_norm": 159.35337829589844, | |
| "learning_rate": 1.7719298245614035e-05, | |
| "loss": 2.4936, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.07715237594248642, | |
| "grad_norm": 0.00017455461784265935, | |
| "learning_rate": 1.7894736842105264e-05, | |
| "loss": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0778537611783272, | |
| "grad_norm": 146.1802978515625, | |
| "learning_rate": 1.8070175438596493e-05, | |
| "loss": 4.3088, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07855514641416798, | |
| "grad_norm": 0.022132212296128273, | |
| "learning_rate": 1.8245614035087722e-05, | |
| "loss": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07925653165000877, | |
| "grad_norm": 160.43975830078125, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 4.5829, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07995791688584955, | |
| "grad_norm": 164.45138549804688, | |
| "learning_rate": 1.8596491228070176e-05, | |
| "loss": 4.1681, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.08065930212169034, | |
| "grad_norm": 0.02766432799398899, | |
| "learning_rate": 1.8771929824561405e-05, | |
| "loss": 0.0001, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.08136068735753113, | |
| "grad_norm": 163.59400939941406, | |
| "learning_rate": 1.8947368421052634e-05, | |
| "loss": 3.0252, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.08206207259337191, | |
| "grad_norm": 7.11077356338501, | |
| "learning_rate": 1.9122807017543863e-05, | |
| "loss": 0.0309, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.0827634578292127, | |
| "grad_norm": 15.318146705627441, | |
| "learning_rate": 1.929824561403509e-05, | |
| "loss": 0.0856, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.08346484306505349, | |
| "grad_norm": 9.1824951171875, | |
| "learning_rate": 1.9473684210526315e-05, | |
| "loss": 0.0416, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.08416622830089426, | |
| "grad_norm": 22.720653533935547, | |
| "learning_rate": 1.9649122807017544e-05, | |
| "loss": 0.1365, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08486761353673505, | |
| "grad_norm": 26.200096130371094, | |
| "learning_rate": 1.9824561403508773e-05, | |
| "loss": 0.2579, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.08556899877257584, | |
| "grad_norm": 15.874689102172852, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0832, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.08627038400841662, | |
| "grad_norm": 11.818103790283203, | |
| "learning_rate": 2.0175438596491227e-05, | |
| "loss": 0.0541, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.08697176924425741, | |
| "grad_norm": 116.5608139038086, | |
| "learning_rate": 2.0350877192982456e-05, | |
| "loss": 0.9623, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.0876731544800982, | |
| "grad_norm": 10.346467018127441, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 0.0562, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08837453971593898, | |
| "grad_norm": 137.41136169433594, | |
| "learning_rate": 2.0701754385964914e-05, | |
| "loss": 1.1566, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08907592495177977, | |
| "grad_norm": 2.9712822437286377, | |
| "learning_rate": 2.0877192982456143e-05, | |
| "loss": 0.0104, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08977731018762056, | |
| "grad_norm": 18.723491668701172, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.0667, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.09047869542346133, | |
| "grad_norm": 156.25631713867188, | |
| "learning_rate": 2.1228070175438598e-05, | |
| "loss": 2.4023, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.09118008065930212, | |
| "grad_norm": 298.97552490234375, | |
| "learning_rate": 2.1403508771929827e-05, | |
| "loss": 6.4928, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09188146589514291, | |
| "grad_norm": 87.54296112060547, | |
| "learning_rate": 2.1578947368421053e-05, | |
| "loss": 0.5526, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.09258285113098369, | |
| "grad_norm": 0.25955039262771606, | |
| "learning_rate": 2.175438596491228e-05, | |
| "loss": 0.0007, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.09328423636682448, | |
| "grad_norm": 123.57398986816406, | |
| "learning_rate": 2.1929824561403507e-05, | |
| "loss": 1.0402, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.09398562160266527, | |
| "grad_norm": 238.33229064941406, | |
| "learning_rate": 2.2105263157894736e-05, | |
| "loss": 2.4997, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.09468700683850605, | |
| "grad_norm": 13.356133460998535, | |
| "learning_rate": 2.2280701754385965e-05, | |
| "loss": 0.0549, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09538839207434684, | |
| "grad_norm": 44.56560134887695, | |
| "learning_rate": 2.2456140350877194e-05, | |
| "loss": 0.4078, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.09608977731018763, | |
| "grad_norm": 25.394866943359375, | |
| "learning_rate": 2.2631578947368423e-05, | |
| "loss": 0.1681, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.0967911625460284, | |
| "grad_norm": 28.068824768066406, | |
| "learning_rate": 2.280701754385965e-05, | |
| "loss": 0.174, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.0974925477818692, | |
| "grad_norm": 27.76630401611328, | |
| "learning_rate": 2.2982456140350878e-05, | |
| "loss": 0.1515, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.09819393301770998, | |
| "grad_norm": 21.082311630249023, | |
| "learning_rate": 2.3157894736842107e-05, | |
| "loss": 0.1379, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09889531825355076, | |
| "grad_norm": 16.697647094726562, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.1066, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.09959670348939155, | |
| "grad_norm": 127.46953582763672, | |
| "learning_rate": 2.3508771929824565e-05, | |
| "loss": 1.3053, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.10029808872523234, | |
| "grad_norm": 6.2219648361206055, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 0.0323, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.10099947396107312, | |
| "grad_norm": 0.1309046596288681, | |
| "learning_rate": 2.385964912280702e-05, | |
| "loss": 0.0004, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1017008591969139, | |
| "grad_norm": 149.50645446777344, | |
| "learning_rate": 2.4035087719298245e-05, | |
| "loss": 1.8353, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1024022444327547, | |
| "grad_norm": 159.4170684814453, | |
| "learning_rate": 2.4210526315789474e-05, | |
| "loss": 3.2714, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.10310362966859547, | |
| "grad_norm": 0.038805797696113586, | |
| "learning_rate": 2.4385964912280703e-05, | |
| "loss": 0.0001, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.10380501490443626, | |
| "grad_norm": 301.7401123046875, | |
| "learning_rate": 2.456140350877193e-05, | |
| "loss": 5.5114, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.10450640014027705, | |
| "grad_norm": 160.01580810546875, | |
| "learning_rate": 2.4736842105263158e-05, | |
| "loss": 3.0398, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.10520778537611783, | |
| "grad_norm": 1.0593466758728027, | |
| "learning_rate": 2.4912280701754387e-05, | |
| "loss": 0.0026, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10590917061195862, | |
| "grad_norm": 0.1588684320449829, | |
| "learning_rate": 2.5087719298245616e-05, | |
| "loss": 0.0004, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.10661055584779941, | |
| "grad_norm": 0.12831349670886993, | |
| "learning_rate": 2.5263157894736845e-05, | |
| "loss": 0.0004, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.10731194108364019, | |
| "grad_norm": 1.908389925956726, | |
| "learning_rate": 2.5438596491228074e-05, | |
| "loss": 0.0062, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.10801332631948098, | |
| "grad_norm": 0.9305810928344727, | |
| "learning_rate": 2.5614035087719303e-05, | |
| "loss": 0.0029, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.10871471155532177, | |
| "grad_norm": 149.55746459960938, | |
| "learning_rate": 2.578947368421053e-05, | |
| "loss": 2.1251, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.10941609679116254, | |
| "grad_norm": 83.3541030883789, | |
| "learning_rate": 2.5964912280701754e-05, | |
| "loss": 0.4841, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.11011748202700333, | |
| "grad_norm": 0.37118402123451233, | |
| "learning_rate": 2.6140350877192983e-05, | |
| "loss": 0.0009, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.11081886726284412, | |
| "grad_norm": 212.9443359375, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 1.6639, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1115202524986849, | |
| "grad_norm": 1.386982798576355, | |
| "learning_rate": 2.6491228070175438e-05, | |
| "loss": 0.0049, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.11222163773452569, | |
| "grad_norm": 151.23684692382812, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.913, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11292302297036648, | |
| "grad_norm": 276.9901123046875, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 5.0838, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.11362440820620726, | |
| "grad_norm": 7.047855377197266, | |
| "learning_rate": 2.7017543859649125e-05, | |
| "loss": 0.0422, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.11432579344204805, | |
| "grad_norm": 5.565479755401611, | |
| "learning_rate": 2.7192982456140354e-05, | |
| "loss": 0.0252, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.11502717867788884, | |
| "grad_norm": 11.038246154785156, | |
| "learning_rate": 2.7368421052631583e-05, | |
| "loss": 0.064, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.11572856391372961, | |
| "grad_norm": 1.4180843830108643, | |
| "learning_rate": 2.754385964912281e-05, | |
| "loss": 0.0048, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1164299491495704, | |
| "grad_norm": 131.0841064453125, | |
| "learning_rate": 2.7719298245614034e-05, | |
| "loss": 1.1797, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.11713133438541119, | |
| "grad_norm": 286.4391784667969, | |
| "learning_rate": 2.7894736842105263e-05, | |
| "loss": 4.2848, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.11783271962125197, | |
| "grad_norm": 0.354282945394516, | |
| "learning_rate": 2.8070175438596492e-05, | |
| "loss": 0.001, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.11853410485709276, | |
| "grad_norm": 148.43812561035156, | |
| "learning_rate": 2.824561403508772e-05, | |
| "loss": 2.3637, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.11923549009293355, | |
| "grad_norm": 3.4153621196746826, | |
| "learning_rate": 2.842105263157895e-05, | |
| "loss": 0.0121, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11993687532877433, | |
| "grad_norm": 6.812062740325928, | |
| "learning_rate": 2.8596491228070175e-05, | |
| "loss": 0.038, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.12063826056461512, | |
| "grad_norm": 0.6381294131278992, | |
| "learning_rate": 2.8771929824561404e-05, | |
| "loss": 0.002, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1213396458004559, | |
| "grad_norm": 0.05846899002790451, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 0.0002, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.12204103103629668, | |
| "grad_norm": 140.1736297607422, | |
| "learning_rate": 2.9122807017543863e-05, | |
| "loss": 4.0868, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.12274241627213747, | |
| "grad_norm": 1.7416075468063354, | |
| "learning_rate": 2.929824561403509e-05, | |
| "loss": 0.0057, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12344380150797826, | |
| "grad_norm": 0.35068169236183167, | |
| "learning_rate": 2.9473684210526314e-05, | |
| "loss": 0.0009, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.12414518674381904, | |
| "grad_norm": 135.71665954589844, | |
| "learning_rate": 2.9649122807017543e-05, | |
| "loss": 2.1025, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.12484657197965983, | |
| "grad_norm": 134.73190307617188, | |
| "learning_rate": 2.9824561403508772e-05, | |
| "loss": 1.8685, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.1255479572155006, | |
| "grad_norm": 266.5934143066406, | |
| "learning_rate": 3e-05, | |
| "loss": 5.1122, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.1262493424513414, | |
| "grad_norm": 383.29931640625, | |
| "learning_rate": 3.017543859649123e-05, | |
| "loss": 10.0944, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12695072768718219, | |
| "grad_norm": 3.7992992401123047, | |
| "learning_rate": 3.035087719298246e-05, | |
| "loss": 0.0161, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.12765211292302298, | |
| "grad_norm": 206.0655975341797, | |
| "learning_rate": 3.0526315789473684e-05, | |
| "loss": 1.5303, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.12835349815886377, | |
| "grad_norm": 13.431243896484375, | |
| "learning_rate": 3.0701754385964913e-05, | |
| "loss": 0.0912, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.12905488339470453, | |
| "grad_norm": 16.69830894470215, | |
| "learning_rate": 3.087719298245614e-05, | |
| "loss": 0.0824, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.12975626863054532, | |
| "grad_norm": 13.488637924194336, | |
| "learning_rate": 3.105263157894737e-05, | |
| "loss": 0.0746, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1304576538663861, | |
| "grad_norm": 10.065983772277832, | |
| "learning_rate": 3.12280701754386e-05, | |
| "loss": 0.0503, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.1311590391022269, | |
| "grad_norm": 123.50023651123047, | |
| "learning_rate": 3.140350877192982e-05, | |
| "loss": 1.2874, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1318604243380677, | |
| "grad_norm": 1.7797540426254272, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.0067, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.13256180957390848, | |
| "grad_norm": 0.33675551414489746, | |
| "learning_rate": 3.175438596491228e-05, | |
| "loss": 0.0011, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.13326319480974924, | |
| "grad_norm": 118.4156723022461, | |
| "learning_rate": 3.192982456140351e-05, | |
| "loss": 1.2059, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.13396458004559003, | |
| "grad_norm": 136.65228271484375, | |
| "learning_rate": 3.210526315789474e-05, | |
| "loss": 1.7856, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.13466596528143082, | |
| "grad_norm": 145.7415313720703, | |
| "learning_rate": 3.228070175438597e-05, | |
| "loss": 2.8991, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1353673505172716, | |
| "grad_norm": 262.7060241699219, | |
| "learning_rate": 3.24561403508772e-05, | |
| "loss": 3.4817, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1360687357531124, | |
| "grad_norm": 0.25727707147598267, | |
| "learning_rate": 3.2631578947368426e-05, | |
| "loss": 0.0008, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1367701209889532, | |
| "grad_norm": 1.0702918767929077, | |
| "learning_rate": 3.2807017543859655e-05, | |
| "loss": 0.0037, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.13747150622479395, | |
| "grad_norm": 1.5917607545852661, | |
| "learning_rate": 3.2982456140350884e-05, | |
| "loss": 0.0055, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.13817289146063474, | |
| "grad_norm": 355.18511962890625, | |
| "learning_rate": 3.3157894736842106e-05, | |
| "loss": 11.4677, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.13887427669647553, | |
| "grad_norm": 1.7947548627853394, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0074, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.13957566193231633, | |
| "grad_norm": 1.5046889781951904, | |
| "learning_rate": 3.3508771929824564e-05, | |
| "loss": 0.0058, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.14027704716815712, | |
| "grad_norm": 0.2553917467594147, | |
| "learning_rate": 3.368421052631579e-05, | |
| "loss": 0.0008, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1409784324039979, | |
| "grad_norm": 0.773563027381897, | |
| "learning_rate": 3.385964912280702e-05, | |
| "loss": 0.0027, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.14167981763983867, | |
| "grad_norm": 166.71820068359375, | |
| "learning_rate": 3.403508771929825e-05, | |
| "loss": 6.1496, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.14238120287567946, | |
| "grad_norm": 0.017217637971043587, | |
| "learning_rate": 3.421052631578947e-05, | |
| "loss": 0.0001, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.14308258811152025, | |
| "grad_norm": 0.0036761912051588297, | |
| "learning_rate": 3.43859649122807e-05, | |
| "loss": 0.0, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.14378397334736104, | |
| "grad_norm": 0.000679672637488693, | |
| "learning_rate": 3.456140350877193e-05, | |
| "loss": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.14448535858320183, | |
| "grad_norm": 0.00011312704009469599, | |
| "learning_rate": 3.473684210526316e-05, | |
| "loss": 0.0, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.14518674381904262, | |
| "grad_norm": 0.00012077714200131595, | |
| "learning_rate": 3.491228070175438e-05, | |
| "loss": 0.0, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.14588812905488338, | |
| "grad_norm": 0.00012628763215616345, | |
| "learning_rate": 3.508771929824561e-05, | |
| "loss": 0.0, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.14658951429072417, | |
| "grad_norm": 1.7496255168225616e-05, | |
| "learning_rate": 3.526315789473684e-05, | |
| "loss": 0.0, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.14729089952656496, | |
| "grad_norm": 1.5739196896902286e-05, | |
| "learning_rate": 3.543859649122807e-05, | |
| "loss": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.14799228476240575, | |
| "grad_norm": 0.08704076707363129, | |
| "learning_rate": 3.56140350877193e-05, | |
| "loss": 0.0001, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.14869366999824654, | |
| "grad_norm": 0.0016435593133792281, | |
| "learning_rate": 3.578947368421053e-05, | |
| "loss": 0.0, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.14939505523408733, | |
| "grad_norm": 1.5830031633377075, | |
| "learning_rate": 3.5964912280701756e-05, | |
| "loss": 0.005, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.15009644046992812, | |
| "grad_norm": 0.00016859408060554415, | |
| "learning_rate": 3.6140350877192985e-05, | |
| "loss": 0.0, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.15079782570576888, | |
| "grad_norm": 1.2645250535570085e-06, | |
| "learning_rate": 3.6315789473684214e-05, | |
| "loss": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.15149921094160967, | |
| "grad_norm": 336.5249328613281, | |
| "learning_rate": 3.6491228070175443e-05, | |
| "loss": 4.1939, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.15220059617745046, | |
| "grad_norm": 9.010884127746976e-07, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.0, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.15290198141329125, | |
| "grad_norm": 5.530741304937692e-07, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 0.0, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.15360336664913204, | |
| "grad_norm": 222.7361297607422, | |
| "learning_rate": 3.7017543859649124e-05, | |
| "loss": 0.7712, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.15430475188497284, | |
| "grad_norm": 123.51704406738281, | |
| "learning_rate": 3.719298245614035e-05, | |
| "loss": 7.8703, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1550061371208136, | |
| "grad_norm": 119.08999633789062, | |
| "learning_rate": 3.736842105263158e-05, | |
| "loss": 6.6427, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.1557075223566544, | |
| "grad_norm": 0.005513960495591164, | |
| "learning_rate": 3.754385964912281e-05, | |
| "loss": 0.0, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.15640890759249518, | |
| "grad_norm": 0.021047895774245262, | |
| "learning_rate": 3.771929824561404e-05, | |
| "loss": 0.0, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.15711029282833597, | |
| "grad_norm": 0.001549599110148847, | |
| "learning_rate": 3.789473684210527e-05, | |
| "loss": 0.0, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.15781167806417676, | |
| "grad_norm": 0.008736948482692242, | |
| "learning_rate": 3.80701754385965e-05, | |
| "loss": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.15851306330001755, | |
| "grad_norm": 0.03641926124691963, | |
| "learning_rate": 3.824561403508773e-05, | |
| "loss": 0.0001, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1592144485358583, | |
| "grad_norm": 0.026789208874106407, | |
| "learning_rate": 3.842105263157895e-05, | |
| "loss": 0.0001, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.1599158337716991, | |
| "grad_norm": 130.79087829589844, | |
| "learning_rate": 3.859649122807018e-05, | |
| "loss": 3.4204, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.1606172190075399, | |
| "grad_norm": 404.3518981933594, | |
| "learning_rate": 3.877192982456141e-05, | |
| "loss": 5.6741, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.16131860424338068, | |
| "grad_norm": 5.39788818359375, | |
| "learning_rate": 3.894736842105263e-05, | |
| "loss": 0.0251, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.16201998947922147, | |
| "grad_norm": 18.25702667236328, | |
| "learning_rate": 3.912280701754386e-05, | |
| "loss": 0.1059, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.16272137471506226, | |
| "grad_norm": 2.5649735927581787, | |
| "learning_rate": 3.929824561403509e-05, | |
| "loss": 0.0108, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.16342275995090302, | |
| "grad_norm": 389.410400390625, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 4.1361, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.16412414518674381, | |
| "grad_norm": 97.98786926269531, | |
| "learning_rate": 3.9649122807017545e-05, | |
| "loss": 1.0207, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1648255304225846, | |
| "grad_norm": 446.8147277832031, | |
| "learning_rate": 3.9824561403508774e-05, | |
| "loss": 2.3678, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1655269156584254, | |
| "grad_norm": 88.88761901855469, | |
| "learning_rate": 4e-05, | |
| "loss": 0.814, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.16622830089426618, | |
| "grad_norm": 217.3586883544922, | |
| "learning_rate": 4.017543859649123e-05, | |
| "loss": 9.7291, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.16692968613010697, | |
| "grad_norm": 106.2865219116211, | |
| "learning_rate": 4.0350877192982455e-05, | |
| "loss": 1.1716, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.16763107136594774, | |
| "grad_norm": 154.2283477783203, | |
| "learning_rate": 4.0526315789473684e-05, | |
| "loss": 0.6351, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.16833245660178853, | |
| "grad_norm": 37.964447021484375, | |
| "learning_rate": 4.070175438596491e-05, | |
| "loss": 0.3484, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16903384183762932, | |
| "grad_norm": 24.696714401245117, | |
| "learning_rate": 4.087719298245614e-05, | |
| "loss": 0.2762, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.1697352270734701, | |
| "grad_norm": 82.12918853759766, | |
| "learning_rate": 4.105263157894737e-05, | |
| "loss": 0.9978, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1704366123093109, | |
| "grad_norm": 41.84835433959961, | |
| "learning_rate": 4.12280701754386e-05, | |
| "loss": 0.4799, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.1711379975451517, | |
| "grad_norm": 45.34370803833008, | |
| "learning_rate": 4.140350877192983e-05, | |
| "loss": 0.4255, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.17183938278099245, | |
| "grad_norm": 41.85076904296875, | |
| "learning_rate": 4.157894736842106e-05, | |
| "loss": 0.616, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.17254076801683324, | |
| "grad_norm": 31.718475341796875, | |
| "learning_rate": 4.1754385964912287e-05, | |
| "loss": 0.2034, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.17324215325267403, | |
| "grad_norm": 149.61940002441406, | |
| "learning_rate": 4.1929824561403516e-05, | |
| "loss": 1.5976, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.17394353848851482, | |
| "grad_norm": 127.43466186523438, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 1.4465, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1746449237243556, | |
| "grad_norm": 36.885379791259766, | |
| "learning_rate": 4.228070175438597e-05, | |
| "loss": 0.5706, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.1753463089601964, | |
| "grad_norm": 343.46649169921875, | |
| "learning_rate": 4.2456140350877196e-05, | |
| "loss": 5.0778, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.17604769419603716, | |
| "grad_norm": 23.604751586914062, | |
| "learning_rate": 4.2631578947368425e-05, | |
| "loss": 0.1635, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.17674907943187795, | |
| "grad_norm": 37.630043029785156, | |
| "learning_rate": 4.2807017543859654e-05, | |
| "loss": 0.5644, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.17745046466771874, | |
| "grad_norm": 41.07538986206055, | |
| "learning_rate": 4.298245614035088e-05, | |
| "loss": 0.5337, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.17815184990355953, | |
| "grad_norm": 66.12769317626953, | |
| "learning_rate": 4.3157894736842105e-05, | |
| "loss": 0.8491, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.17885323513940032, | |
| "grad_norm": 38.53345489501953, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.4, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.17955462037524111, | |
| "grad_norm": 37.57467269897461, | |
| "learning_rate": 4.350877192982456e-05, | |
| "loss": 0.6272, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.18025600561108188, | |
| "grad_norm": 27.241796493530273, | |
| "learning_rate": 4.368421052631579e-05, | |
| "loss": 0.2307, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.18095739084692267, | |
| "grad_norm": 24.353778839111328, | |
| "learning_rate": 4.3859649122807014e-05, | |
| "loss": 0.1238, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.18165877608276346, | |
| "grad_norm": 5.489215850830078, | |
| "learning_rate": 4.403508771929824e-05, | |
| "loss": 0.031, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.18236016131860425, | |
| "grad_norm": 16.305925369262695, | |
| "learning_rate": 4.421052631578947e-05, | |
| "loss": 0.0398, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18306154655444504, | |
| "grad_norm": 6.403785228729248, | |
| "learning_rate": 4.43859649122807e-05, | |
| "loss": 0.0304, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.18376293179028583, | |
| "grad_norm": 4.471805572509766, | |
| "learning_rate": 4.456140350877193e-05, | |
| "loss": 0.0077, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1844643170261266, | |
| "grad_norm": 0.9698334336280823, | |
| "learning_rate": 4.473684210526316e-05, | |
| "loss": 0.0031, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.18516570226196738, | |
| "grad_norm": 0.014244407415390015, | |
| "learning_rate": 4.491228070175439e-05, | |
| "loss": 0.0, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.18586708749780817, | |
| "grad_norm": 121.71415710449219, | |
| "learning_rate": 4.508771929824562e-05, | |
| "loss": 4.2289, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.18656847273364896, | |
| "grad_norm": 0.02348833531141281, | |
| "learning_rate": 4.5263157894736846e-05, | |
| "loss": 0.0001, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.18726985796948975, | |
| "grad_norm": 0.0016702099237591028, | |
| "learning_rate": 4.5438596491228075e-05, | |
| "loss": 0.0, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.18797124320533054, | |
| "grad_norm": 0.0027486609760671854, | |
| "learning_rate": 4.56140350877193e-05, | |
| "loss": 0.0, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.1886726284411713, | |
| "grad_norm": 0.0012574723223224282, | |
| "learning_rate": 4.5789473684210527e-05, | |
| "loss": 0.0, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.1893740136770121, | |
| "grad_norm": 0.002944325562566519, | |
| "learning_rate": 4.5964912280701756e-05, | |
| "loss": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.19007539891285288, | |
| "grad_norm": 0.0020754581782966852, | |
| "learning_rate": 4.6140350877192985e-05, | |
| "loss": 0.0, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.19077678414869367, | |
| "grad_norm": 0.008111892268061638, | |
| "learning_rate": 4.6315789473684214e-05, | |
| "loss": 0.0, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.19147816938453446, | |
| "grad_norm": 118.25870513916016, | |
| "learning_rate": 4.649122807017544e-05, | |
| "loss": 3.5798, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.19217955462037525, | |
| "grad_norm": 118.91422271728516, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 2.0933, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.19288093985621602, | |
| "grad_norm": 0.1281914860010147, | |
| "learning_rate": 4.68421052631579e-05, | |
| "loss": 0.0004, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1935823250920568, | |
| "grad_norm": 0.9166204929351807, | |
| "learning_rate": 4.701754385964913e-05, | |
| "loss": 0.0034, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1942837103278976, | |
| "grad_norm": 3.7037837505340576, | |
| "learning_rate": 4.719298245614036e-05, | |
| "loss": 0.0179, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.1949850955637384, | |
| "grad_norm": 0.716585099697113, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.0028, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.19568648079957918, | |
| "grad_norm": 119.46430969238281, | |
| "learning_rate": 4.754385964912281e-05, | |
| "loss": 1.7075, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.19638786603541997, | |
| "grad_norm": 0.07794260233640671, | |
| "learning_rate": 4.771929824561404e-05, | |
| "loss": 0.0003, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19708925127126073, | |
| "grad_norm": 119.65841674804688, | |
| "learning_rate": 4.789473684210526e-05, | |
| "loss": 3.3725, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.19779063650710152, | |
| "grad_norm": 0.06007947400212288, | |
| "learning_rate": 4.807017543859649e-05, | |
| "loss": 0.0002, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1984920217429423, | |
| "grad_norm": 0.33029523491859436, | |
| "learning_rate": 4.824561403508772e-05, | |
| "loss": 0.0013, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.1991934069787831, | |
| "grad_norm": 0.19337859749794006, | |
| "learning_rate": 4.842105263157895e-05, | |
| "loss": 0.0007, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.1998947922146239, | |
| "grad_norm": 0.4252503514289856, | |
| "learning_rate": 4.859649122807018e-05, | |
| "loss": 0.0017, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.20059617745046468, | |
| "grad_norm": 0.07317744940519333, | |
| "learning_rate": 4.8771929824561406e-05, | |
| "loss": 0.0003, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.20129756268630544, | |
| "grad_norm": 0.01806594245135784, | |
| "learning_rate": 4.8947368421052635e-05, | |
| "loss": 0.0001, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.20199894792214623, | |
| "grad_norm": 106.39412689208984, | |
| "learning_rate": 4.912280701754386e-05, | |
| "loss": 3.9844, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.20270033315798702, | |
| "grad_norm": 0.008989217691123486, | |
| "learning_rate": 4.9298245614035086e-05, | |
| "loss": 0.0, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.2034017183938278, | |
| "grad_norm": 0.05597059056162834, | |
| "learning_rate": 4.9473684210526315e-05, | |
| "loss": 0.0002, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2041031036296686, | |
| "grad_norm": 113.05389404296875, | |
| "learning_rate": 4.9649122807017544e-05, | |
| "loss": 2.5036, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2048044888655094, | |
| "grad_norm": 0.39913010597229004, | |
| "learning_rate": 4.9824561403508773e-05, | |
| "loss": 0.0015, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.20550587410135016, | |
| "grad_norm": 0.944106936454773, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0038, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.20620725933719095, | |
| "grad_norm": 108.76079559326172, | |
| "learning_rate": 4.9999995795715716e-05, | |
| "loss": 1.4773, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.20690864457303174, | |
| "grad_norm": 1.2869099378585815, | |
| "learning_rate": 4.999998318286425e-05, | |
| "loss": 0.0054, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.20761002980887253, | |
| "grad_norm": 0.7701173424720764, | |
| "learning_rate": 4.999996216144987e-05, | |
| "loss": 0.0029, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.20831141504471332, | |
| "grad_norm": 104.06690216064453, | |
| "learning_rate": 4.9999932731479625e-05, | |
| "loss": 1.2931, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.2090128002805541, | |
| "grad_norm": 234.58531188964844, | |
| "learning_rate": 4.999989489296344e-05, | |
| "loss": 4.5688, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.20971418551639487, | |
| "grad_norm": 108.04949188232422, | |
| "learning_rate": 4.999984864591401e-05, | |
| "loss": 1.5544, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.21041557075223566, | |
| "grad_norm": 1.8640260696411133, | |
| "learning_rate": 4.999979399034691e-05, | |
| "loss": 0.0078, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21111695598807645, | |
| "grad_norm": 3.089526414871216, | |
| "learning_rate": 4.999973092628052e-05, | |
| "loss": 0.0138, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.21181834122391724, | |
| "grad_norm": 1.4183728694915771, | |
| "learning_rate": 4.999965945373605e-05, | |
| "loss": 0.006, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.21251972645975803, | |
| "grad_norm": 0.9733534455299377, | |
| "learning_rate": 4.9999579572737533e-05, | |
| "loss": 0.0038, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.21322111169559882, | |
| "grad_norm": 121.11236572265625, | |
| "learning_rate": 4.9999491283311836e-05, | |
| "loss": 2.2543, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.21392249693143958, | |
| "grad_norm": 117.718017578125, | |
| "learning_rate": 4.999939458548868e-05, | |
| "loss": 2.5007, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.21462388216728037, | |
| "grad_norm": 0.013796065002679825, | |
| "learning_rate": 4.9999289479300557e-05, | |
| "loss": 0.0, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.21532526740312116, | |
| "grad_norm": 116.46581268310547, | |
| "learning_rate": 4.999917596478283e-05, | |
| "loss": 2.6005, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.21602665263896195, | |
| "grad_norm": 0.3713426887989044, | |
| "learning_rate": 4.999905404197368e-05, | |
| "loss": 0.0014, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.21672803787480274, | |
| "grad_norm": 0.44403278827667236, | |
| "learning_rate": 4.999892371091411e-05, | |
| "loss": 0.0017, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.21742942311064353, | |
| "grad_norm": 139.35816955566406, | |
| "learning_rate": 4.999878497164797e-05, | |
| "loss": 11.1347, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2181308083464843, | |
| "grad_norm": 0.20512554049491882, | |
| "learning_rate": 4.999863782422191e-05, | |
| "loss": 0.0007, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.21883219358232509, | |
| "grad_norm": 88.1954345703125, | |
| "learning_rate": 4.9998482268685434e-05, | |
| "loss": 0.843, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.21953357881816588, | |
| "grad_norm": 93.83118438720703, | |
| "learning_rate": 4.999831830509084e-05, | |
| "loss": 1.0804, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.22023496405400667, | |
| "grad_norm": 5.042333602905273, | |
| "learning_rate": 4.99981459334933e-05, | |
| "loss": 0.0256, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.22093634928984746, | |
| "grad_norm": 7.477559566497803, | |
| "learning_rate": 4.9997965153950775e-05, | |
| "loss": 0.0402, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.22163773452568825, | |
| "grad_norm": 156.78961181640625, | |
| "learning_rate": 4.999777596652408e-05, | |
| "loss": 6.0824, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.222339119761529, | |
| "grad_norm": 9.518619537353516, | |
| "learning_rate": 4.999757837127683e-05, | |
| "loss": 0.0577, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.2230405049973698, | |
| "grad_norm": 2.860729455947876, | |
| "learning_rate": 4.9997372368275506e-05, | |
| "loss": 0.0135, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.2237418902332106, | |
| "grad_norm": 1.1225600242614746, | |
| "learning_rate": 4.999715795758938e-05, | |
| "loss": 0.0049, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.22444327546905138, | |
| "grad_norm": 0.12374955415725708, | |
| "learning_rate": 4.9996935139290575e-05, | |
| "loss": 0.0004, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.22514466070489217, | |
| "grad_norm": 112.18521881103516, | |
| "learning_rate": 4.999670391345403e-05, | |
| "loss": 3.4372, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.22584604594073296, | |
| "grad_norm": 0.0004930093418806791, | |
| "learning_rate": 4.999646428015752e-05, | |
| "loss": 0.0, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.22654743117657372, | |
| "grad_norm": 0.00027333476464264095, | |
| "learning_rate": 4.9996216239481643e-05, | |
| "loss": 0.0, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.2272488164124145, | |
| "grad_norm": 0.0001735862751957029, | |
| "learning_rate": 4.999595979150982e-05, | |
| "loss": 0.0, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2279502016482553, | |
| "grad_norm": 2.878743180190213e-05, | |
| "learning_rate": 4.9995694936328316e-05, | |
| "loss": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2286515868840961, | |
| "grad_norm": 109.58158111572266, | |
| "learning_rate": 4.99954216740262e-05, | |
| "loss": 4.991, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.22935297211993688, | |
| "grad_norm": 113.2159194946289, | |
| "learning_rate": 4.9995140004695396e-05, | |
| "loss": 4.7235, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.23005435735577767, | |
| "grad_norm": 0.00015405479643959552, | |
| "learning_rate": 4.999484992843063e-05, | |
| "loss": 0.0, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.23075574259161843, | |
| "grad_norm": 0.011504331603646278, | |
| "learning_rate": 4.999455144532947e-05, | |
| "loss": 0.0, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.23145712782745922, | |
| "grad_norm": 0.001587590086273849, | |
| "learning_rate": 4.9994244555492315e-05, | |
| "loss": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.23215851306330001, | |
| "grad_norm": 0.0038678112905472517, | |
| "learning_rate": 4.999392925902238e-05, | |
| "loss": 0.0, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.2328598982991408, | |
| "grad_norm": 0.044445931911468506, | |
| "learning_rate": 4.999360555602571e-05, | |
| "loss": 0.0001, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2335612835349816, | |
| "grad_norm": 108.20228576660156, | |
| "learning_rate": 4.999327344661118e-05, | |
| "loss": 2.0004, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.23426266877082239, | |
| "grad_norm": 0.17047792673110962, | |
| "learning_rate": 4.9992932930890505e-05, | |
| "loss": 0.0005, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.23496405400666315, | |
| "grad_norm": 0.39230290055274963, | |
| "learning_rate": 4.999258400897819e-05, | |
| "loss": 0.0016, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.23566543924250394, | |
| "grad_norm": 191.8765106201172, | |
| "learning_rate": 4.999222668099163e-05, | |
| "loss": 2.4042, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.23636682447834473, | |
| "grad_norm": 87.04983520507812, | |
| "learning_rate": 4.999186094705097e-05, | |
| "loss": 0.9342, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.23706820971418552, | |
| "grad_norm": 14.748551368713379, | |
| "learning_rate": 4.999148680727925e-05, | |
| "loss": 0.1165, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2377695949500263, | |
| "grad_norm": 16.103792190551758, | |
| "learning_rate": 4.999110426180229e-05, | |
| "loss": 0.11, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2384709801858671, | |
| "grad_norm": 4.507053375244141, | |
| "learning_rate": 4.999071331074877e-05, | |
| "loss": 0.1421, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.23917236542170786, | |
| "grad_norm": 8.822351455688477, | |
| "learning_rate": 4.999031395425018e-05, | |
| "loss": 0.2372, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.23987375065754865, | |
| "grad_norm": 19.92313575744629, | |
| "learning_rate": 4.998990619244084e-05, | |
| "loss": 0.1537, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.24057513589338944, | |
| "grad_norm": 12.00966739654541, | |
| "learning_rate": 4.998949002545789e-05, | |
| "loss": 0.0872, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.24127652112923023, | |
| "grad_norm": 4.180269718170166, | |
| "learning_rate": 4.998906545344132e-05, | |
| "loss": 0.0218, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.24197790636507102, | |
| "grad_norm": 0.596821129322052, | |
| "learning_rate": 4.998863247653392e-05, | |
| "loss": 0.0025, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2426792916009118, | |
| "grad_norm": 0.4522903263568878, | |
| "learning_rate": 4.998819109488132e-05, | |
| "loss": 0.0013, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.24338067683675257, | |
| "grad_norm": 0.0007598726078867912, | |
| "learning_rate": 4.998774130863199e-05, | |
| "loss": 0.0, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.24408206207259336, | |
| "grad_norm": 8.103512664092705e-05, | |
| "learning_rate": 4.998728311793719e-05, | |
| "loss": 0.0, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.24478344730843415, | |
| "grad_norm": 105.80138397216797, | |
| "learning_rate": 4.998681652295104e-05, | |
| "loss": 4.8223, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.24548483254427494, | |
| "grad_norm": 127.92369079589844, | |
| "learning_rate": 4.998634152383047e-05, | |
| "loss": 7.0698, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.24618621778011573, | |
| "grad_norm": 2.636051704030251e-06, | |
| "learning_rate": 4.998585812073525e-05, | |
| "loss": 0.0, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.24688760301595652, | |
| "grad_norm": 2.710710077735712e-06, | |
| "learning_rate": 4.9985366313827975e-05, | |
| "loss": 0.0, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2475889882517973, | |
| "grad_norm": 103.96717834472656, | |
| "learning_rate": 4.998486610327405e-05, | |
| "loss": 6.485, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.24829037348763808, | |
| "grad_norm": 103.35551452636719, | |
| "learning_rate": 4.9984357489241715e-05, | |
| "loss": 5.7037, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.24899175872347887, | |
| "grad_norm": 100.56452178955078, | |
| "learning_rate": 4.998384047190204e-05, | |
| "loss": 4.1908, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.24969314395931966, | |
| "grad_norm": 200.88751220703125, | |
| "learning_rate": 4.998331505142893e-05, | |
| "loss": 4.8984, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.2503945291951604, | |
| "grad_norm": 77.69247436523438, | |
| "learning_rate": 4.99827812279991e-05, | |
| "loss": 0.9449, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.2510959144310012, | |
| "grad_norm": 26.622453689575195, | |
| "learning_rate": 4.9982239001792095e-05, | |
| "loss": 0.3613, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.251797299666842, | |
| "grad_norm": 26.442935943603516, | |
| "learning_rate": 4.9981688372990284e-05, | |
| "loss": 0.5424, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.2524986849026828, | |
| "grad_norm": 39.277618408203125, | |
| "learning_rate": 4.9981129341778886e-05, | |
| "loss": 0.9456, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2532000701385236, | |
| "grad_norm": 43.96021270751953, | |
| "learning_rate": 4.9980561908345916e-05, | |
| "loss": 0.6765, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.25390145537436437, | |
| "grad_norm": 14.254291534423828, | |
| "learning_rate": 4.997998607288222e-05, | |
| "loss": 0.1276, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.25460284061020516, | |
| "grad_norm": 0.6544491052627563, | |
| "learning_rate": 4.9979401835581476e-05, | |
| "loss": 0.0032, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.25530422584604595, | |
| "grad_norm": 0.011080354452133179, | |
| "learning_rate": 4.99788091966402e-05, | |
| "loss": 0.0, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.25600561108188674, | |
| "grad_norm": 284.8055114746094, | |
| "learning_rate": 4.997820815625771e-05, | |
| "loss": 7.6058, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.25670699631772753, | |
| "grad_norm": 89.02140045166016, | |
| "learning_rate": 4.997759871463618e-05, | |
| "loss": 5.079, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2574083815535683, | |
| "grad_norm": 4.7222201828844845e-05, | |
| "learning_rate": 4.997698087198056e-05, | |
| "loss": 0.0, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.25810976678940906, | |
| "grad_norm": 85.66144561767578, | |
| "learning_rate": 4.997635462849869e-05, | |
| "loss": 4.8183, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.25881115202524985, | |
| "grad_norm": 0.0015366720035672188, | |
| "learning_rate": 4.997571998440118e-05, | |
| "loss": 0.0, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.25951253726109064, | |
| "grad_norm": 100.14698028564453, | |
| "learning_rate": 4.99750769399015e-05, | |
| "loss": 2.2663, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2602139224969314, | |
| "grad_norm": 0.4829590618610382, | |
| "learning_rate": 4.997442549521592e-05, | |
| "loss": 0.0021, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2609153077327722, | |
| "grad_norm": 70.44209289550781, | |
| "learning_rate": 4.9973765650563564e-05, | |
| "loss": 0.7532, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.261616692968613, | |
| "grad_norm": 272.1163024902344, | |
| "learning_rate": 4.9973097406166355e-05, | |
| "loss": 3.9935, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2623180782044538, | |
| "grad_norm": 25.12706184387207, | |
| "learning_rate": 4.997242076224906e-05, | |
| "loss": 0.3738, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2630194634402946, | |
| "grad_norm": 35.15785598754883, | |
| "learning_rate": 4.997173571903926e-05, | |
| "loss": 0.6808, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2637208486761354, | |
| "grad_norm": 27.46406364440918, | |
| "learning_rate": 4.9971042276767366e-05, | |
| "loss": 0.2568, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.26442223391197617, | |
| "grad_norm": 6.682714462280273, | |
| "learning_rate": 4.997034043566661e-05, | |
| "loss": 0.0374, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.26512361914781696, | |
| "grad_norm": 0.057287223637104034, | |
| "learning_rate": 4.9969630195973045e-05, | |
| "loss": 0.0001, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.26582500438365775, | |
| "grad_norm": 0.006871811114251614, | |
| "learning_rate": 4.9968911557925564e-05, | |
| "loss": 0.0, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.2665263896194985, | |
| "grad_norm": 0.0037054684944450855, | |
| "learning_rate": 4.996818452176587e-05, | |
| "loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2672277748553393, | |
| "grad_norm": 123.15022277832031, | |
| "learning_rate": 4.9967449087738505e-05, | |
| "loss": 5.8346, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.26792916009118006, | |
| "grad_norm": 121.81306457519531, | |
| "learning_rate": 4.996670525609082e-05, | |
| "loss": 4.3778, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.26863054532702085, | |
| "grad_norm": 0.00026572938077151775, | |
| "learning_rate": 4.9965953027072996e-05, | |
| "loss": 0.0, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.26933193056286164, | |
| "grad_norm": 198.09776306152344, | |
| "learning_rate": 4.996519240093806e-05, | |
| "loss": 5.2574, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.27003331579870243, | |
| "grad_norm": 130.21214294433594, | |
| "learning_rate": 4.996442337794181e-05, | |
| "loss": 3.217, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2707347010345432, | |
| "grad_norm": 133.94178771972656, | |
| "learning_rate": 4.996364595834291e-05, | |
| "loss": 1.3412, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.271436086270384, | |
| "grad_norm": 12.223856925964355, | |
| "learning_rate": 4.9962860142402853e-05, | |
| "loss": 0.0924, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2721374715062248, | |
| "grad_norm": 21.94683074951172, | |
| "learning_rate": 4.9962065930385934e-05, | |
| "loss": 0.1824, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2728388567420656, | |
| "grad_norm": 46.802406311035156, | |
| "learning_rate": 4.996126332255929e-05, | |
| "loss": 0.3585, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.2735402419779064, | |
| "grad_norm": 7.21342134475708, | |
| "learning_rate": 4.9960452319192866e-05, | |
| "loss": 0.1045, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2742416272137472, | |
| "grad_norm": 33.5169677734375, | |
| "learning_rate": 4.995963292055943e-05, | |
| "loss": 0.248, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.2749430124495879, | |
| "grad_norm": 16.911136627197266, | |
| "learning_rate": 4.995880512693458e-05, | |
| "loss": 0.0876, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2756443976854287, | |
| "grad_norm": 206.7276153564453, | |
| "learning_rate": 4.995796893859675e-05, | |
| "loss": 1.6484, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.2763457829212695, | |
| "grad_norm": 94.29922485351562, | |
| "learning_rate": 4.9957124355827186e-05, | |
| "loss": 0.8745, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.2770471681571103, | |
| "grad_norm": 156.6600799560547, | |
| "learning_rate": 4.9956271378909945e-05, | |
| "loss": 0.9049, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.27774855339295107, | |
| "grad_norm": 6.1268744468688965, | |
| "learning_rate": 4.995541000813193e-05, | |
| "loss": 0.0343, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.27844993862879186, | |
| "grad_norm": 1.731136679649353, | |
| "learning_rate": 4.995454024378285e-05, | |
| "loss": 0.0075, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.27915132386463265, | |
| "grad_norm": 144.5174560546875, | |
| "learning_rate": 4.995366208615526e-05, | |
| "loss": 2.6072, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.27985270910047344, | |
| "grad_norm": 0.19711287319660187, | |
| "learning_rate": 4.9952775535544494e-05, | |
| "loss": 0.0007, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.28055409433631423, | |
| "grad_norm": 0.13065628707408905, | |
| "learning_rate": 4.9951880592248754e-05, | |
| "loss": 0.0004, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.281255479572155, | |
| "grad_norm": 125.28437042236328, | |
| "learning_rate": 4.995097725656905e-05, | |
| "loss": 1.8792, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2819568648079958, | |
| "grad_norm": 111.38099670410156, | |
| "learning_rate": 4.9950065528809214e-05, | |
| "loss": 2.548, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.2826582500438366, | |
| "grad_norm": 1.2473199367523193, | |
| "learning_rate": 4.994914540927589e-05, | |
| "loss": 0.0042, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.28335963527967734, | |
| "grad_norm": 0.7474699020385742, | |
| "learning_rate": 4.994821689827856e-05, | |
| "loss": 0.0026, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.2840610205155181, | |
| "grad_norm": 0.14196322858333588, | |
| "learning_rate": 4.99472799961295e-05, | |
| "loss": 0.0005, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2847624057513589, | |
| "grad_norm": 0.6420115828514099, | |
| "learning_rate": 4.994633470314387e-05, | |
| "loss": 0.0026, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2854637909871997, | |
| "grad_norm": 213.8779296875, | |
| "learning_rate": 4.994538101963958e-05, | |
| "loss": 2.1989, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2861651762230405, | |
| "grad_norm": 108.08120727539062, | |
| "learning_rate": 4.994441894593741e-05, | |
| "loss": 3.9965, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2868665614588813, | |
| "grad_norm": 0.40015965700149536, | |
| "learning_rate": 4.994344848236094e-05, | |
| "loss": 0.0016, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2875679466947221, | |
| "grad_norm": 125.80431365966797, | |
| "learning_rate": 4.9942469629236585e-05, | |
| "loss": 2.3139, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.28826933193056287, | |
| "grad_norm": 101.27831268310547, | |
| "learning_rate": 4.994148238689357e-05, | |
| "loss": 2.1065, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.28897071716640366, | |
| "grad_norm": 3.526338577270508, | |
| "learning_rate": 4.9940486755663946e-05, | |
| "loss": 0.015, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.28967210240224445, | |
| "grad_norm": 2.501554489135742, | |
| "learning_rate": 4.993948273588259e-05, | |
| "loss": 0.0123, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.29037348763808524, | |
| "grad_norm": 75.39434814453125, | |
| "learning_rate": 4.993847032788719e-05, | |
| "loss": 0.7107, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.291074872873926, | |
| "grad_norm": 3.751978635787964, | |
| "learning_rate": 4.993744953201827e-05, | |
| "loss": 0.0183, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.29177625810976676, | |
| "grad_norm": 5.31666374206543, | |
| "learning_rate": 4.9936420348619164e-05, | |
| "loss": 0.0385, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.29247764334560755, | |
| "grad_norm": 104.77239227294922, | |
| "learning_rate": 4.9935382778036036e-05, | |
| "loss": 1.7353, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.29317902858144834, | |
| "grad_norm": 92.44829559326172, | |
| "learning_rate": 4.993433682061786e-05, | |
| "loss": 1.1895, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.29388041381728913, | |
| "grad_norm": 89.00714874267578, | |
| "learning_rate": 4.993328247671643e-05, | |
| "loss": 1.2323, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.2945817990531299, | |
| "grad_norm": 172.867431640625, | |
| "learning_rate": 4.993221974668637e-05, | |
| "loss": 1.7303, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2952831842889707, | |
| "grad_norm": 19.69775390625, | |
| "learning_rate": 4.9931148630885127e-05, | |
| "loss": 0.1678, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2959845695248115, | |
| "grad_norm": 22.828763961791992, | |
| "learning_rate": 4.9930069129672965e-05, | |
| "loss": 0.3184, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2966859547606523, | |
| "grad_norm": 19.460153579711914, | |
| "learning_rate": 4.992898124341296e-05, | |
| "loss": 0.2748, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.2973873399964931, | |
| "grad_norm": 2.9695911407470703, | |
| "learning_rate": 4.9927884972471015e-05, | |
| "loss": 0.1925, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2980887252323339, | |
| "grad_norm": 12.33647346496582, | |
| "learning_rate": 4.9926780317215854e-05, | |
| "loss": 0.1908, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.29879011046817466, | |
| "grad_norm": 2.8421826362609863, | |
| "learning_rate": 4.992566727801902e-05, | |
| "loss": 0.101, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.29949149570401545, | |
| "grad_norm": 171.8658447265625, | |
| "learning_rate": 4.9924545855254875e-05, | |
| "loss": 3.9311, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.30019288093985624, | |
| "grad_norm": 83.66468048095703, | |
| "learning_rate": 4.992341604930061e-05, | |
| "loss": 1.6806, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.300894266175697, | |
| "grad_norm": 0.7609736323356628, | |
| "learning_rate": 4.9922277860536216e-05, | |
| "loss": 0.0041, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.30159565141153777, | |
| "grad_norm": 4.077641487121582, | |
| "learning_rate": 4.9921131289344514e-05, | |
| "loss": 0.0418, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.30229703664737856, | |
| "grad_norm": 3.2341866493225098, | |
| "learning_rate": 4.9919976336111155e-05, | |
| "loss": 0.0293, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.30299842188321935, | |
| "grad_norm": 148.74095153808594, | |
| "learning_rate": 4.991881300122458e-05, | |
| "loss": 3.3951, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.30369980711906014, | |
| "grad_norm": 77.130126953125, | |
| "learning_rate": 4.99176412850761e-05, | |
| "loss": 2.9407, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.30440119235490093, | |
| "grad_norm": 162.98777770996094, | |
| "learning_rate": 4.991646118805978e-05, | |
| "loss": 4.3504, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.3051025775907417, | |
| "grad_norm": 84.4205551147461, | |
| "learning_rate": 4.991527271057256e-05, | |
| "loss": 1.7142, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.3058039628265825, | |
| "grad_norm": 0.8636782765388489, | |
| "learning_rate": 4.991407585301416e-05, | |
| "loss": 0.0053, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3065053480624233, | |
| "grad_norm": 1.0418014526367188, | |
| "learning_rate": 4.9912870615787144e-05, | |
| "loss": 0.0059, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.3072067332982641, | |
| "grad_norm": 0.7050487399101257, | |
| "learning_rate": 4.991165699929688e-05, | |
| "loss": 0.0041, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.3079081185341049, | |
| "grad_norm": 0.3087022304534912, | |
| "learning_rate": 4.991043500395157e-05, | |
| "loss": 0.0017, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.30860950376994567, | |
| "grad_norm": 0.09886661171913147, | |
| "learning_rate": 4.990920463016221e-05, | |
| "loss": 0.0005, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3093108890057864, | |
| "grad_norm": 0.03563763573765755, | |
| "learning_rate": 4.9907965878342634e-05, | |
| "loss": 0.0002, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.3100122742416272, | |
| "grad_norm": 79.55757141113281, | |
| "learning_rate": 4.990671874890948e-05, | |
| "loss": 2.966, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.310713659477468, | |
| "grad_norm": 0.6562897562980652, | |
| "learning_rate": 4.990546324228222e-05, | |
| "loss": 0.0022, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.3114150447133088, | |
| "grad_norm": 74.84564208984375, | |
| "learning_rate": 4.990419935888313e-05, | |
| "loss": 3.0154, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.31211642994914957, | |
| "grad_norm": 0.03038967400789261, | |
| "learning_rate": 4.9902927099137305e-05, | |
| "loss": 0.0002, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.31281781518499036, | |
| "grad_norm": 0.023526914417743683, | |
| "learning_rate": 4.9901646463472674e-05, | |
| "loss": 0.0001, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.31351920042083115, | |
| "grad_norm": 0.08529358357191086, | |
| "learning_rate": 4.990035745231995e-05, | |
| "loss": 0.0005, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.31422058565667194, | |
| "grad_norm": 0.11940905451774597, | |
| "learning_rate": 4.989906006611269e-05, | |
| "loss": 0.0007, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3149219708925127, | |
| "grad_norm": 99.68978118896484, | |
| "learning_rate": 4.989775430528727e-05, | |
| "loss": 1.6958, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3156233561283535, | |
| "grad_norm": 0.3344658315181732, | |
| "learning_rate": 4.9896440170282864e-05, | |
| "loss": 0.002, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3163247413641943, | |
| "grad_norm": 0.7184603810310364, | |
| "learning_rate": 4.9895117661541476e-05, | |
| "loss": 0.0043, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.3170261266000351, | |
| "grad_norm": 0.5113491415977478, | |
| "learning_rate": 4.989378677950791e-05, | |
| "loss": 0.0031, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.31772751183587583, | |
| "grad_norm": 0.49223193526268005, | |
| "learning_rate": 4.989244752462982e-05, | |
| "loss": 0.0029, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3184288970717166, | |
| "grad_norm": 64.00535583496094, | |
| "learning_rate": 4.989109989735763e-05, | |
| "loss": 1.2629, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3191302823075574, | |
| "grad_norm": 0.18930155038833618, | |
| "learning_rate": 4.988974389814463e-05, | |
| "loss": 0.0011, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3198316675433982, | |
| "grad_norm": 0.3088482916355133, | |
| "learning_rate": 4.9888379527446885e-05, | |
| "loss": 0.0018, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.320533052779239, | |
| "grad_norm": 71.42636108398438, | |
| "learning_rate": 4.988700678572329e-05, | |
| "loss": 1.9691, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.3212344380150798, | |
| "grad_norm": 0.20487411320209503, | |
| "learning_rate": 4.988562567343557e-05, | |
| "loss": 0.0012, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.32193582325092057, | |
| "grad_norm": 73.37615203857422, | |
| "learning_rate": 4.988423619104823e-05, | |
| "loss": 1.8636, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.32263720848676136, | |
| "grad_norm": 66.4114761352539, | |
| "learning_rate": 4.988283833902863e-05, | |
| "loss": 1.3073, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.32333859372260215, | |
| "grad_norm": 4.2518744468688965, | |
| "learning_rate": 4.988143211784693e-05, | |
| "loss": 0.0318, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.32403997895844294, | |
| "grad_norm": 4.782449245452881, | |
| "learning_rate": 4.9880017527976083e-05, | |
| "loss": 0.0384, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.32474136419428373, | |
| "grad_norm": 22.517131805419922, | |
| "learning_rate": 4.9878594569891896e-05, | |
| "loss": 0.2973, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.3254427494301245, | |
| "grad_norm": 5.63627290725708, | |
| "learning_rate": 4.987716324407296e-05, | |
| "loss": 0.0496, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.32614413466596526, | |
| "grad_norm": 6.364257335662842, | |
| "learning_rate": 4.9875723551000696e-05, | |
| "loss": 0.0578, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.32684551990180605, | |
| "grad_norm": 147.82330322265625, | |
| "learning_rate": 4.987427549115933e-05, | |
| "loss": 1.8188, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.32754690513764684, | |
| "grad_norm": 3.7355575561523438, | |
| "learning_rate": 4.987281906503591e-05, | |
| "loss": 0.0274, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.32824829037348763, | |
| "grad_norm": 0.6107962131500244, | |
| "learning_rate": 4.987135427312029e-05, | |
| "loss": 0.0038, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.3289496756093284, | |
| "grad_norm": 61.26199722290039, | |
| "learning_rate": 4.9869881115905146e-05, | |
| "loss": 0.8915, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.3296510608451692, | |
| "grad_norm": 0.3030995726585388, | |
| "learning_rate": 4.986839959388596e-05, | |
| "loss": 0.0018, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.33035244608101, | |
| "grad_norm": 0.12271957844495773, | |
| "learning_rate": 4.986690970756104e-05, | |
| "loss": 0.0007, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3310538313168508, | |
| "grad_norm": 71.7486801147461, | |
| "learning_rate": 4.9865411457431485e-05, | |
| "loss": 1.8673, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3317552165526916, | |
| "grad_norm": 69.17851257324219, | |
| "learning_rate": 4.986390484400124e-05, | |
| "loss": 1.6587, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.33245660178853237, | |
| "grad_norm": 0.18208004534244537, | |
| "learning_rate": 4.9862389867777023e-05, | |
| "loss": 0.001, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.33315798702437316, | |
| "grad_norm": 122.65370178222656, | |
| "learning_rate": 4.986086652926839e-05, | |
| "loss": 0.9068, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.33385937226021395, | |
| "grad_norm": 0.6119470596313477, | |
| "learning_rate": 4.9859334828987715e-05, | |
| "loss": 0.0039, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.3345607574960547, | |
| "grad_norm": 4.483019828796387, | |
| "learning_rate": 4.985779476745017e-05, | |
| "loss": 0.0343, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3352621427318955, | |
| "grad_norm": 0.7592105269432068, | |
| "learning_rate": 4.985624634517374e-05, | |
| "loss": 0.005, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.33596352796773626, | |
| "grad_norm": 0.6566614508628845, | |
| "learning_rate": 4.985468956267922e-05, | |
| "loss": 0.0037, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.33666491320357705, | |
| "grad_norm": 67.39823150634766, | |
| "learning_rate": 4.985312442049023e-05, | |
| "loss": 1.7049, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.33736629843941784, | |
| "grad_norm": 0.13803933560848236, | |
| "learning_rate": 4.98515509191332e-05, | |
| "loss": 0.0008, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.33806768367525863, | |
| "grad_norm": 72.15523529052734, | |
| "learning_rate": 4.984996905913736e-05, | |
| "loss": 1.6634, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3387690689110994, | |
| "grad_norm": 0.10033612698316574, | |
| "learning_rate": 4.984837884103475e-05, | |
| "loss": 0.0005, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.3394704541469402, | |
| "grad_norm": 62.519317626953125, | |
| "learning_rate": 4.984678026536024e-05, | |
| "loss": 1.2577, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.340171839382781, | |
| "grad_norm": 0.36229056119918823, | |
| "learning_rate": 4.984517333265148e-05, | |
| "loss": 0.0023, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3408732246186218, | |
| "grad_norm": 0.8776547312736511, | |
| "learning_rate": 4.9843558043448985e-05, | |
| "loss": 0.0056, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.3415746098544626, | |
| "grad_norm": 108.73171997070312, | |
| "learning_rate": 4.984193439829602e-05, | |
| "loss": 2.7267, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.3422759950903034, | |
| "grad_norm": 0.6344736814498901, | |
| "learning_rate": 4.984030239773868e-05, | |
| "loss": 0.0041, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3429773803261441, | |
| "grad_norm": 113.62140655517578, | |
| "learning_rate": 4.98386620423259e-05, | |
| "loss": 1.7271, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3436787655619849, | |
| "grad_norm": 1.618384599685669, | |
| "learning_rate": 4.983701333260938e-05, | |
| "loss": 0.011, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3443801507978257, | |
| "grad_norm": 138.78854370117188, | |
| "learning_rate": 4.9835356269143676e-05, | |
| "loss": 1.9143, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.3450815360336665, | |
| "grad_norm": 49.62142562866211, | |
| "learning_rate": 4.98336908524861e-05, | |
| "loss": 0.753, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.34578292126950727, | |
| "grad_norm": 3.6228299140930176, | |
| "learning_rate": 4.9832017083196816e-05, | |
| "loss": 0.0277, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.34648430650534806, | |
| "grad_norm": 24.915790557861328, | |
| "learning_rate": 4.98303349618388e-05, | |
| "loss": 0.356, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.34718569174118885, | |
| "grad_norm": 7.880053997039795, | |
| "learning_rate": 4.9828644488977795e-05, | |
| "loss": 0.0727, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.34788707697702964, | |
| "grad_norm": 12.95199203491211, | |
| "learning_rate": 4.982694566518239e-05, | |
| "loss": 0.25, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.34858846221287043, | |
| "grad_norm": 2.059384822845459, | |
| "learning_rate": 4.982523849102397e-05, | |
| "loss": 0.2069, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3492898474487112, | |
| "grad_norm": 17.132720947265625, | |
| "learning_rate": 4.982352296707675e-05, | |
| "loss": 0.2093, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.349991232684552, | |
| "grad_norm": 12.591998100280762, | |
| "learning_rate": 4.98217990939177e-05, | |
| "loss": 0.1889, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.3506926179203928, | |
| "grad_norm": 12.747396469116211, | |
| "learning_rate": 4.982006687212666e-05, | |
| "loss": 0.1755, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35139400315623354, | |
| "grad_norm": 77.32592010498047, | |
| "learning_rate": 4.981832630228624e-05, | |
| "loss": 0.9517, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3520953883920743, | |
| "grad_norm": 8.452153205871582, | |
| "learning_rate": 4.9816577384981866e-05, | |
| "loss": 0.0913, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.3527967736279151, | |
| "grad_norm": 2.1267127990722656, | |
| "learning_rate": 4.9814820120801776e-05, | |
| "loss": 0.0178, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.3534981588637559, | |
| "grad_norm": 0.8161488771438599, | |
| "learning_rate": 4.981305451033701e-05, | |
| "loss": 0.0058, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3541995440995967, | |
| "grad_norm": 0.10489612072706223, | |
| "learning_rate": 4.981128055418143e-05, | |
| "loss": 0.0006, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3549009293354375, | |
| "grad_norm": 69.51275634765625, | |
| "learning_rate": 4.9809498252931674e-05, | |
| "loss": 2.62, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.3556023145712783, | |
| "grad_norm": 0.004451240878552198, | |
| "learning_rate": 4.980770760718721e-05, | |
| "loss": 0.0, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.35630369980711907, | |
| "grad_norm": 67.6517105102539, | |
| "learning_rate": 4.9805908617550335e-05, | |
| "loss": 3.25, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.35700508504295986, | |
| "grad_norm": 0.016789492219686508, | |
| "learning_rate": 4.980410128462609e-05, | |
| "loss": 0.0001, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.35770647027880065, | |
| "grad_norm": 0.025487884879112244, | |
| "learning_rate": 4.980228560902239e-05, | |
| "loss": 0.0001, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.35840785551464144, | |
| "grad_norm": 0.042408283799886703, | |
| "learning_rate": 4.98004615913499e-05, | |
| "loss": 0.0002, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.35910924075048223, | |
| "grad_norm": 0.038653161376714706, | |
| "learning_rate": 4.979862923222212e-05, | |
| "loss": 0.0002, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.35981062598632296, | |
| "grad_norm": 0.03510050103068352, | |
| "learning_rate": 4.9796788532255356e-05, | |
| "loss": 0.0002, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.36051201122216375, | |
| "grad_norm": 0.008441315963864326, | |
| "learning_rate": 4.979493949206872e-05, | |
| "loss": 0.0, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.36121339645800454, | |
| "grad_norm": 0.006373463664203882, | |
| "learning_rate": 4.979308211228411e-05, | |
| "loss": 0.0, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.36191478169384533, | |
| "grad_norm": 79.59523010253906, | |
| "learning_rate": 4.979121639352625e-05, | |
| "loss": 2.9487, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.3626161669296861, | |
| "grad_norm": 153.0602264404297, | |
| "learning_rate": 4.9789342336422665e-05, | |
| "loss": 3.6533, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.3633175521655269, | |
| "grad_norm": 0.7085576057434082, | |
| "learning_rate": 4.9787459941603666e-05, | |
| "loss": 0.0042, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3640189374013677, | |
| "grad_norm": 155.89154052734375, | |
| "learning_rate": 4.9785569209702395e-05, | |
| "loss": 7.9018, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.3647203226372085, | |
| "grad_norm": 5.253132343292236, | |
| "learning_rate": 4.9783670141354776e-05, | |
| "loss": 0.042, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3654217078730493, | |
| "grad_norm": 26.310007095336914, | |
| "learning_rate": 4.978176273719957e-05, | |
| "loss": 0.2762, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.3661230931088901, | |
| "grad_norm": 9.570679664611816, | |
| "learning_rate": 4.977984699787829e-05, | |
| "loss": 0.0898, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.36682447834473086, | |
| "grad_norm": 214.85060119628906, | |
| "learning_rate": 4.977792292403529e-05, | |
| "loss": 6.7239, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.36752586358057165, | |
| "grad_norm": 2.7513771057128906, | |
| "learning_rate": 4.977599051631773e-05, | |
| "loss": 0.0228, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.3682272488164124, | |
| "grad_norm": 33.629356384277344, | |
| "learning_rate": 4.977404977537554e-05, | |
| "loss": 0.4374, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3689286340522532, | |
| "grad_norm": 3.8864808082580566, | |
| "learning_rate": 4.97721007018615e-05, | |
| "loss": 0.0308, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.36963001928809397, | |
| "grad_norm": 1.0654228925704956, | |
| "learning_rate": 4.9770143296431144e-05, | |
| "loss": 0.0077, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.37033140452393476, | |
| "grad_norm": 131.74990844726562, | |
| "learning_rate": 4.9768177559742844e-05, | |
| "loss": 4.0998, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.37103278975977555, | |
| "grad_norm": 0.10420957952737808, | |
| "learning_rate": 4.976620349245776e-05, | |
| "loss": 0.0006, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.37173417499561634, | |
| "grad_norm": 72.78388977050781, | |
| "learning_rate": 4.976422109523985e-05, | |
| "loss": 3.8242, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.37243556023145713, | |
| "grad_norm": 69.0342025756836, | |
| "learning_rate": 4.976223036875588e-05, | |
| "loss": 1.9872, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.3731369454672979, | |
| "grad_norm": 0.43323075771331787, | |
| "learning_rate": 4.9760231313675426e-05, | |
| "loss": 0.0028, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3738383307031387, | |
| "grad_norm": 1.0747731924057007, | |
| "learning_rate": 4.975822393067084e-05, | |
| "loss": 0.0061, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.3745397159389795, | |
| "grad_norm": 69.71436309814453, | |
| "learning_rate": 4.9756208220417304e-05, | |
| "loss": 1.8796, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.3752411011748203, | |
| "grad_norm": 127.05543518066406, | |
| "learning_rate": 4.975418418359278e-05, | |
| "loss": 2.8441, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3759424864106611, | |
| "grad_norm": 0.9959923624992371, | |
| "learning_rate": 4.975215182087804e-05, | |
| "loss": 0.0065, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.3766438716465018, | |
| "grad_norm": 2.1746952533721924, | |
| "learning_rate": 4.9750111132956654e-05, | |
| "loss": 0.0156, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.3773452568823426, | |
| "grad_norm": 92.57381439208984, | |
| "learning_rate": 4.9748062120514996e-05, | |
| "loss": 1.4085, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3780466421181834, | |
| "grad_norm": 75.45511627197266, | |
| "learning_rate": 4.974600478424223e-05, | |
| "loss": 0.9909, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.3787480273540242, | |
| "grad_norm": 25.877460479736328, | |
| "learning_rate": 4.974393912483034e-05, | |
| "loss": 0.3578, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.379449412589865, | |
| "grad_norm": 12.262901306152344, | |
| "learning_rate": 4.9741865142974076e-05, | |
| "loss": 0.1338, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.38015079782570577, | |
| "grad_norm": 10.54548168182373, | |
| "learning_rate": 4.973978283937102e-05, | |
| "loss": 0.2111, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.38085218306154656, | |
| "grad_norm": 35.909976959228516, | |
| "learning_rate": 4.973769221472153e-05, | |
| "loss": 0.5549, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.38155356829738735, | |
| "grad_norm": 13.012696266174316, | |
| "learning_rate": 4.973559326972878e-05, | |
| "loss": 0.1784, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.38225495353322814, | |
| "grad_norm": 16.022974014282227, | |
| "learning_rate": 4.973348600509874e-05, | |
| "loss": 0.2235, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.3829563387690689, | |
| "grad_norm": 22.86570167541504, | |
| "learning_rate": 4.973137042154016e-05, | |
| "loss": 0.3561, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.3836577240049097, | |
| "grad_norm": 13.543822288513184, | |
| "learning_rate": 4.972924651976461e-05, | |
| "loss": 0.3077, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.3843591092407505, | |
| "grad_norm": 17.42881965637207, | |
| "learning_rate": 4.972711430048644e-05, | |
| "loss": 0.2779, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.38506049447659124, | |
| "grad_norm": 19.7313175201416, | |
| "learning_rate": 4.972497376442282e-05, | |
| "loss": 0.2582, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.38576187971243203, | |
| "grad_norm": 12.354894638061523, | |
| "learning_rate": 4.9722824912293695e-05, | |
| "loss": 0.2374, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3864632649482728, | |
| "grad_norm": 17.16146469116211, | |
| "learning_rate": 4.972066774482182e-05, | |
| "loss": 0.2085, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.3871646501841136, | |
| "grad_norm": 49.288734436035156, | |
| "learning_rate": 4.971850226273274e-05, | |
| "loss": 0.6211, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3878660354199544, | |
| "grad_norm": 4.870689392089844, | |
| "learning_rate": 4.97163284667548e-05, | |
| "loss": 0.0413, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.3885674206557952, | |
| "grad_norm": 4.268405437469482, | |
| "learning_rate": 4.971414635761914e-05, | |
| "loss": 0.0354, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.389268805891636, | |
| "grad_norm": 2.800025701522827, | |
| "learning_rate": 4.971195593605969e-05, | |
| "loss": 0.0215, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3899701911274768, | |
| "grad_norm": 136.0956573486328, | |
| "learning_rate": 4.9709757202813195e-05, | |
| "loss": 3.3931, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.39067157636331756, | |
| "grad_norm": 68.56623840332031, | |
| "learning_rate": 4.970755015861918e-05, | |
| "loss": 1.8678, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.39137296159915835, | |
| "grad_norm": 68.25215911865234, | |
| "learning_rate": 4.970533480421996e-05, | |
| "loss": 1.9984, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.39207434683499914, | |
| "grad_norm": 69.02233123779297, | |
| "learning_rate": 4.9703111140360656e-05, | |
| "loss": 2.0662, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.39277573207083993, | |
| "grad_norm": 61.60824203491211, | |
| "learning_rate": 4.970087916778918e-05, | |
| "loss": 1.5281, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.39347711730668067, | |
| "grad_norm": 48.50178146362305, | |
| "learning_rate": 4.9698638887256254e-05, | |
| "loss": 0.8115, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.39417850254252146, | |
| "grad_norm": 41.32649612426758, | |
| "learning_rate": 4.969639029951537e-05, | |
| "loss": 0.6374, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.39487988777836225, | |
| "grad_norm": 6.946910858154297, | |
| "learning_rate": 4.9694133405322815e-05, | |
| "loss": 0.2402, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.39558127301420304, | |
| "grad_norm": 22.8358097076416, | |
| "learning_rate": 4.9691868205437694e-05, | |
| "loss": 0.3883, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.39628265825004383, | |
| "grad_norm": 15.402759552001953, | |
| "learning_rate": 4.968959470062189e-05, | |
| "loss": 0.7708, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3969840434858846, | |
| "grad_norm": 28.08781623840332, | |
| "learning_rate": 4.968731289164006e-05, | |
| "loss": 0.7703, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3976854287217254, | |
| "grad_norm": 26.910886764526367, | |
| "learning_rate": 4.9685022779259696e-05, | |
| "loss": 0.6299, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.3983868139575662, | |
| "grad_norm": 24.341957092285156, | |
| "learning_rate": 4.968272436425105e-05, | |
| "loss": 0.5751, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.399088199193407, | |
| "grad_norm": 23.469263076782227, | |
| "learning_rate": 4.9680417647387186e-05, | |
| "loss": 0.4664, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3997895844292478, | |
| "grad_norm": 15.347972869873047, | |
| "learning_rate": 4.967810262944394e-05, | |
| "loss": 0.2252, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.40049096966508857, | |
| "grad_norm": 5.6460280418396, | |
| "learning_rate": 4.967577931119996e-05, | |
| "loss": 0.2232, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.40119235490092936, | |
| "grad_norm": 4.2383856773376465, | |
| "learning_rate": 4.967344769343667e-05, | |
| "loss": 0.0396, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4018937401367701, | |
| "grad_norm": 1.6932095289230347, | |
| "learning_rate": 4.96711077769383e-05, | |
| "loss": 0.0137, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.4025951253726109, | |
| "grad_norm": 0.41152113676071167, | |
| "learning_rate": 4.966875956249186e-05, | |
| "loss": 0.0029, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.4032965106084517, | |
| "grad_norm": 0.06895897537469864, | |
| "learning_rate": 4.9666403050887155e-05, | |
| "loss": 0.0004, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.40399789584429247, | |
| "grad_norm": 0.004564318805932999, | |
| "learning_rate": 4.9664038242916786e-05, | |
| "loss": 0.0, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.40469928108013326, | |
| "grad_norm": 0.0019707169849425554, | |
| "learning_rate": 4.966166513937613e-05, | |
| "loss": 0.0, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.40540066631597405, | |
| "grad_norm": 0.00037522497586905956, | |
| "learning_rate": 4.965928374106337e-05, | |
| "loss": 0.0, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.40610205155181484, | |
| "grad_norm": 5.6252967624459416e-05, | |
| "learning_rate": 4.965689404877946e-05, | |
| "loss": 0.0, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.4068034367876556, | |
| "grad_norm": 1.1128309779451229e-05, | |
| "learning_rate": 4.9654496063328174e-05, | |
| "loss": 0.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4075048220234964, | |
| "grad_norm": 119.89021301269531, | |
| "learning_rate": 4.965208978551605e-05, | |
| "loss": 10.8344, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.4082062072593372, | |
| "grad_norm": 1.3851165931555443e-05, | |
| "learning_rate": 4.964967521615241e-05, | |
| "loss": 0.0, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.408907592495178, | |
| "grad_norm": 3.7249556044116616e-06, | |
| "learning_rate": 4.96472523560494e-05, | |
| "loss": 0.0, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.4096089777310188, | |
| "grad_norm": 2.917176971095614e-06, | |
| "learning_rate": 4.9644821206021906e-05, | |
| "loss": 0.0, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.4103103629668595, | |
| "grad_norm": 3.3871406230900902e-06, | |
| "learning_rate": 4.964238176688765e-05, | |
| "loss": 0.0, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4110117482027003, | |
| "grad_norm": 2.260056135128252e-06, | |
| "learning_rate": 4.9639934039467106e-05, | |
| "loss": 0.0, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4117131334385411, | |
| "grad_norm": 1.5038928040667088e-06, | |
| "learning_rate": 4.963747802458355e-05, | |
| "loss": 0.0, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.4124145186743819, | |
| "grad_norm": 59.293521881103516, | |
| "learning_rate": 4.963501372306306e-05, | |
| "loss": 5.3994, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4131159039102227, | |
| "grad_norm": 59.763816833496094, | |
| "learning_rate": 4.963254113573446e-05, | |
| "loss": 5.6505, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.41381728914606347, | |
| "grad_norm": 132.7192840576172, | |
| "learning_rate": 4.963006026342941e-05, | |
| "loss": 9.1499, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.41451867438190426, | |
| "grad_norm": 0.00024285895051434636, | |
| "learning_rate": 4.962757110698232e-05, | |
| "loss": 0.0, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.41522005961774505, | |
| "grad_norm": 0.0009618580807000399, | |
| "learning_rate": 4.9625073667230404e-05, | |
| "loss": 0.0, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.41592144485358584, | |
| "grad_norm": 0.004245178773999214, | |
| "learning_rate": 4.962256794501365e-05, | |
| "loss": 0.0, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.41662283008942663, | |
| "grad_norm": 0.01505737192928791, | |
| "learning_rate": 4.962005394117486e-05, | |
| "loss": 0.0001, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.4173242153252674, | |
| "grad_norm": 0.01870640181005001, | |
| "learning_rate": 4.961753165655958e-05, | |
| "loss": 0.0001, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.4180256005611082, | |
| "grad_norm": 0.047609515488147736, | |
| "learning_rate": 4.961500109201618e-05, | |
| "loss": 0.0003, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.41872698579694895, | |
| "grad_norm": 0.15332278609275818, | |
| "learning_rate": 4.9612462248395775e-05, | |
| "loss": 0.001, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.41942837103278974, | |
| "grad_norm": 113.53760528564453, | |
| "learning_rate": 4.96099151265523e-05, | |
| "loss": 2.3303, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.4201297562686305, | |
| "grad_norm": 0.6282661557197571, | |
| "learning_rate": 4.9607359727342465e-05, | |
| "loss": 0.0044, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.4208311415044713, | |
| "grad_norm": 43.630615234375, | |
| "learning_rate": 4.9604796051625756e-05, | |
| "loss": 0.6733, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4215325267403121, | |
| "grad_norm": 72.0724868774414, | |
| "learning_rate": 4.960222410026444e-05, | |
| "loss": 1.4145, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.4222339119761529, | |
| "grad_norm": 6.166507244110107, | |
| "learning_rate": 4.959964387412358e-05, | |
| "loss": 0.0608, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.4229352972119937, | |
| "grad_norm": 80.03913879394531, | |
| "learning_rate": 4.9597055374071024e-05, | |
| "loss": 1.2555, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4236366824478345, | |
| "grad_norm": 17.456140518188477, | |
| "learning_rate": 4.959445860097738e-05, | |
| "loss": 0.238, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.42433806768367527, | |
| "grad_norm": 36.63374328613281, | |
| "learning_rate": 4.9591853555716064e-05, | |
| "loss": 0.8324, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.42503945291951606, | |
| "grad_norm": 16.27304458618164, | |
| "learning_rate": 4.9589240239163255e-05, | |
| "loss": 0.2125, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.42574083815535685, | |
| "grad_norm": 19.427776336669922, | |
| "learning_rate": 4.9586618652197934e-05, | |
| "loss": 0.2701, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.42644222339119764, | |
| "grad_norm": 8.516806602478027, | |
| "learning_rate": 4.958398879570185e-05, | |
| "loss": 0.3408, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.4271436086270384, | |
| "grad_norm": 1.857074499130249, | |
| "learning_rate": 4.9581350670559524e-05, | |
| "loss": 0.2039, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.42784499386287916, | |
| "grad_norm": 8.925201416015625, | |
| "learning_rate": 4.9578704277658286e-05, | |
| "loss": 0.09, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.42854637909871995, | |
| "grad_norm": 4.490631580352783, | |
| "learning_rate": 4.957604961788823e-05, | |
| "loss": 0.0384, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.42924776433456074, | |
| "grad_norm": 0.866288959980011, | |
| "learning_rate": 4.957338669214222e-05, | |
| "loss": 0.0063, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.42994914957040153, | |
| "grad_norm": 0.6116040349006653, | |
| "learning_rate": 4.9570715501315925e-05, | |
| "loss": 0.0035, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.4306505348062423, | |
| "grad_norm": 0.014350208453834057, | |
| "learning_rate": 4.956803604630777e-05, | |
| "loss": 0.0001, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.4313519200420831, | |
| "grad_norm": 0.004978867247700691, | |
| "learning_rate": 4.956534832801897e-05, | |
| "loss": 0.0, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4320533052779239, | |
| "grad_norm": 0.0008859955123625696, | |
| "learning_rate": 4.9562652347353535e-05, | |
| "loss": 0.0, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.4327546905137647, | |
| "grad_norm": 0.0007402606424875557, | |
| "learning_rate": 4.955994810521822e-05, | |
| "loss": 0.0, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.4334560757496055, | |
| "grad_norm": 60.98609161376953, | |
| "learning_rate": 4.955723560252259e-05, | |
| "loss": 4.6234, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.4341574609854463, | |
| "grad_norm": 60.63393783569336, | |
| "learning_rate": 4.955451484017896e-05, | |
| "loss": 4.669, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.43485884622128707, | |
| "grad_norm": 0.00016736971156205982, | |
| "learning_rate": 4.955178581910246e-05, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4355602314571278, | |
| "grad_norm": 0.00026081904070451856, | |
| "learning_rate": 4.9549048540210963e-05, | |
| "loss": 0.0, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.4362616166929686, | |
| "grad_norm": 0.0002771130239125341, | |
| "learning_rate": 4.954630300442513e-05, | |
| "loss": 0.0, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4369630019288094, | |
| "grad_norm": 0.00024890206987038255, | |
| "learning_rate": 4.954354921266841e-05, | |
| "loss": 0.0, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.43766438716465017, | |
| "grad_norm": 0.0003540183824952692, | |
| "learning_rate": 4.954078716586702e-05, | |
| "loss": 0.0, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.43836577240049096, | |
| "grad_norm": 61.1258659362793, | |
| "learning_rate": 4.9538016864949953e-05, | |
| "loss": 4.1063, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.43906715763633175, | |
| "grad_norm": 61.82120895385742, | |
| "learning_rate": 4.953523831084897e-05, | |
| "loss": 3.2575, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.43976854287217254, | |
| "grad_norm": 0.009341489523649216, | |
| "learning_rate": 4.9532451504498644e-05, | |
| "loss": 0.0001, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.44046992810801333, | |
| "grad_norm": 126.25260925292969, | |
| "learning_rate": 4.952965644683627e-05, | |
| "loss": 4.8894, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4411713133438541, | |
| "grad_norm": 64.0586166381836, | |
| "learning_rate": 4.9526853138801954e-05, | |
| "loss": 2.0939, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.4418726985796949, | |
| "grad_norm": 66.46090698242188, | |
| "learning_rate": 4.952404158133857e-05, | |
| "loss": 2.1672, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4425740838155357, | |
| "grad_norm": 8.158260345458984, | |
| "learning_rate": 4.952122177539177e-05, | |
| "loss": 0.089, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.4432754690513765, | |
| "grad_norm": 8.28303337097168, | |
| "learning_rate": 4.951839372190996e-05, | |
| "loss": 0.1792, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4439768542872172, | |
| "grad_norm": 6.670780658721924, | |
| "learning_rate": 4.951555742184435e-05, | |
| "loss": 0.3495, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.444678239523058, | |
| "grad_norm": 18.7882022857666, | |
| "learning_rate": 4.95127128761489e-05, | |
| "loss": 0.2761, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.4453796247588988, | |
| "grad_norm": 3.4713053703308105, | |
| "learning_rate": 4.950986008578037e-05, | |
| "loss": 0.2429, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.4460810099947396, | |
| "grad_norm": 8.415278434753418, | |
| "learning_rate": 4.950699905169825e-05, | |
| "loss": 0.0883, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.4467823952305804, | |
| "grad_norm": 5.512305736541748, | |
| "learning_rate": 4.950412977486484e-05, | |
| "loss": 0.0515, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.4474837804664212, | |
| "grad_norm": 1.983485460281372, | |
| "learning_rate": 4.95012522562452e-05, | |
| "loss": 0.0162, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.44818516570226197, | |
| "grad_norm": 0.5145743489265442, | |
| "learning_rate": 4.949836649680717e-05, | |
| "loss": 0.0038, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.44888655093810276, | |
| "grad_norm": 0.0464518703520298, | |
| "learning_rate": 4.949547249752134e-05, | |
| "loss": 0.0003, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.44958793617394355, | |
| "grad_norm": 63.47880935668945, | |
| "learning_rate": 4.9492570259361094e-05, | |
| "loss": 2.606, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.45028932140978434, | |
| "grad_norm": 63.044219970703125, | |
| "learning_rate": 4.948965978330258e-05, | |
| "loss": 3.3714, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.45099070664562513, | |
| "grad_norm": 61.190128326416016, | |
| "learning_rate": 4.948674107032471e-05, | |
| "loss": 3.3129, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.4516920918814659, | |
| "grad_norm": 59.03745651245117, | |
| "learning_rate": 4.948381412140917e-05, | |
| "loss": 2.3587, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.4523934771173067, | |
| "grad_norm": 61.125858306884766, | |
| "learning_rate": 4.948087893754043e-05, | |
| "loss": 2.7351, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.45309486235314744, | |
| "grad_norm": 60.69954299926758, | |
| "learning_rate": 4.947793551970571e-05, | |
| "loss": 2.2391, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.45379624758898823, | |
| "grad_norm": 56.91267395019531, | |
| "learning_rate": 4.9474983868895e-05, | |
| "loss": 1.5786, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.454497632824829, | |
| "grad_norm": 0.8762818574905396, | |
| "learning_rate": 4.947202398610108e-05, | |
| "loss": 0.0067, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.4551990180606698, | |
| "grad_norm": 2.4974522590637207, | |
| "learning_rate": 4.9469055872319483e-05, | |
| "loss": 0.0224, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.4559004032965106, | |
| "grad_norm": 22.98019790649414, | |
| "learning_rate": 4.9466079528548505e-05, | |
| "loss": 0.3665, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4566017885323514, | |
| "grad_norm": 16.03438949584961, | |
| "learning_rate": 4.9463094955789214e-05, | |
| "loss": 0.315, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.4573031737681922, | |
| "grad_norm": 8.860062599182129, | |
| "learning_rate": 4.946010215504546e-05, | |
| "loss": 0.1067, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.458004559004033, | |
| "grad_norm": 5.521644115447998, | |
| "learning_rate": 4.945710112732386e-05, | |
| "loss": 0.2473, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.45870594423987376, | |
| "grad_norm": 15.16481876373291, | |
| "learning_rate": 4.945409187363376e-05, | |
| "loss": 0.2257, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.45940732947571455, | |
| "grad_norm": 15.714698791503906, | |
| "learning_rate": 4.945107439498732e-05, | |
| "loss": 0.2359, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.46010871471155534, | |
| "grad_norm": 6.4135589599609375, | |
| "learning_rate": 4.944804869239944e-05, | |
| "loss": 0.2336, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.46081009994739613, | |
| "grad_norm": 9.600846290588379, | |
| "learning_rate": 4.94450147668878e-05, | |
| "loss": 0.1174, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.46151148518323687, | |
| "grad_norm": 7.477170467376709, | |
| "learning_rate": 4.944197261947283e-05, | |
| "loss": 0.0857, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.46221287041907766, | |
| "grad_norm": 2.0777835845947266, | |
| "learning_rate": 4.943892225117774e-05, | |
| "loss": 0.0196, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.46291425565491845, | |
| "grad_norm": 41.072837829589844, | |
| "learning_rate": 4.9435863663028495e-05, | |
| "loss": 0.7781, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.46361564089075924, | |
| "grad_norm": 0.5752171277999878, | |
| "learning_rate": 4.943279685605383e-05, | |
| "loss": 0.0045, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.46431702612660003, | |
| "grad_norm": 51.6488151550293, | |
| "learning_rate": 4.9429721831285246e-05, | |
| "loss": 1.1945, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.4650184113624408, | |
| "grad_norm": 0.11998272687196732, | |
| "learning_rate": 4.942663858975701e-05, | |
| "loss": 0.0009, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.4657197965982816, | |
| "grad_norm": 57.40029525756836, | |
| "learning_rate": 4.9423547132506135e-05, | |
| "loss": 1.7823, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.4664211818341224, | |
| "grad_norm": 0.07954677194356918, | |
| "learning_rate": 4.942044746057242e-05, | |
| "loss": 0.0006, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.4671225670699632, | |
| "grad_norm": 77.24893951416016, | |
| "learning_rate": 4.941733957499841e-05, | |
| "loss": 3.3749, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.467823952305804, | |
| "grad_norm": 57.04423522949219, | |
| "learning_rate": 4.9414223476829434e-05, | |
| "loss": 1.6771, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.46852533754164477, | |
| "grad_norm": 0.14917835593223572, | |
| "learning_rate": 4.941109916711355e-05, | |
| "loss": 0.0011, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.46922672277748556, | |
| "grad_norm": 47.98805236816406, | |
| "learning_rate": 4.940796664690161e-05, | |
| "loss": 1.0673, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.4699281080133263, | |
| "grad_norm": 63.10405349731445, | |
| "learning_rate": 4.940482591724721e-05, | |
| "loss": 1.7249, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4706294932491671, | |
| "grad_norm": 47.01327133178711, | |
| "learning_rate": 4.940167697920671e-05, | |
| "loss": 0.9642, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.4713308784850079, | |
| "grad_norm": 4.804792881011963, | |
| "learning_rate": 4.9398519833839234e-05, | |
| "loss": 0.0514, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.47203226372084867, | |
| "grad_norm": 46.21573257446289, | |
| "learning_rate": 4.939535448220667e-05, | |
| "loss": 0.7498, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.47273364895668946, | |
| "grad_norm": 12.326519966125488, | |
| "learning_rate": 4.939218092537365e-05, | |
| "loss": 0.1734, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.47343503419253025, | |
| "grad_norm": 2.402872085571289, | |
| "learning_rate": 4.938899916440759e-05, | |
| "loss": 0.2545, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.47413641942837104, | |
| "grad_norm": 4.058804512023926, | |
| "learning_rate": 4.938580920037864e-05, | |
| "loss": 0.2573, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.4748378046642118, | |
| "grad_norm": 11.151541709899902, | |
| "learning_rate": 4.938261103435973e-05, | |
| "loss": 0.4285, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.4755391899000526, | |
| "grad_norm": 6.957059860229492, | |
| "learning_rate": 4.937940466742653e-05, | |
| "loss": 0.2347, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.4762405751358934, | |
| "grad_norm": 5.085255146026611, | |
| "learning_rate": 4.9376190100657495e-05, | |
| "loss": 0.2671, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.4769419603717342, | |
| "grad_norm": 9.267155647277832, | |
| "learning_rate": 4.93729673351338e-05, | |
| "loss": 0.1125, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.477643345607575, | |
| "grad_norm": 6.162266731262207, | |
| "learning_rate": 4.9369736371939426e-05, | |
| "loss": 0.0683, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.4783447308434157, | |
| "grad_norm": 3.6411261558532715, | |
| "learning_rate": 4.936649721216105e-05, | |
| "loss": 0.0356, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.4790461160792565, | |
| "grad_norm": 48.720794677734375, | |
| "learning_rate": 4.9363249856888165e-05, | |
| "loss": 1.0375, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.4797475013150973, | |
| "grad_norm": 0.3436586856842041, | |
| "learning_rate": 4.9359994307212994e-05, | |
| "loss": 0.0027, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.4804488865509381, | |
| "grad_norm": 52.97607421875, | |
| "learning_rate": 4.93567305642305e-05, | |
| "loss": 1.6276, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.4811502717867789, | |
| "grad_norm": 0.09510497748851776, | |
| "learning_rate": 4.935345862903844e-05, | |
| "loss": 0.0007, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.4818516570226197, | |
| "grad_norm": 56.231292724609375, | |
| "learning_rate": 4.93501785027373e-05, | |
| "loss": 2.0342, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.48255304225846046, | |
| "grad_norm": 0.21096964180469513, | |
| "learning_rate": 4.934689018643032e-05, | |
| "loss": 0.0015, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.48325442749430125, | |
| "grad_norm": 0.19893679022789001, | |
| "learning_rate": 4.93435936812235e-05, | |
| "loss": 0.0014, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.48395581273014204, | |
| "grad_norm": 51.99702072143555, | |
| "learning_rate": 4.9340288988225594e-05, | |
| "loss": 1.0089, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.48465719796598283, | |
| "grad_norm": 53.519405364990234, | |
| "learning_rate": 4.9336976108548126e-05, | |
| "loss": 1.7908, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.4853585832018236, | |
| "grad_norm": 106.84630584716797, | |
| "learning_rate": 4.9333655043305354e-05, | |
| "loss": 3.6845, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4860599684376644, | |
| "grad_norm": 0.24259038269519806, | |
| "learning_rate": 4.9330325793614285e-05, | |
| "loss": 0.0019, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.48676135367350515, | |
| "grad_norm": 0.8419340252876282, | |
| "learning_rate": 4.93269883605947e-05, | |
| "loss": 0.0072, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.48746273890934594, | |
| "grad_norm": 1.204994559288025, | |
| "learning_rate": 4.932364274536912e-05, | |
| "loss": 0.0087, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.48816412414518673, | |
| "grad_norm": 1.1827445030212402, | |
| "learning_rate": 4.932028894906279e-05, | |
| "loss": 0.0104, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.4888655093810275, | |
| "grad_norm": 0.5520339608192444, | |
| "learning_rate": 4.931692697280378e-05, | |
| "loss": 0.0045, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4895668946168683, | |
| "grad_norm": 0.9636574983596802, | |
| "learning_rate": 4.931355681772283e-05, | |
| "loss": 0.0079, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.4902682798527091, | |
| "grad_norm": 0.5019407868385315, | |
| "learning_rate": 4.931017848495348e-05, | |
| "loss": 0.004, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4909696650885499, | |
| "grad_norm": 0.0757029727101326, | |
| "learning_rate": 4.9306791975632026e-05, | |
| "loss": 0.0006, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4916710503243907, | |
| "grad_norm": 110.13203430175781, | |
| "learning_rate": 4.9303397290897465e-05, | |
| "loss": 4.3731, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.49237243556023147, | |
| "grad_norm": 0.03152499720454216, | |
| "learning_rate": 4.929999443189159e-05, | |
| "loss": 0.0002, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.49307382079607226, | |
| "grad_norm": 0.029290443286299706, | |
| "learning_rate": 4.929658339975893e-05, | |
| "loss": 0.0002, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.49377520603191305, | |
| "grad_norm": 0.015484875068068504, | |
| "learning_rate": 4.9293164195646754e-05, | |
| "loss": 0.0001, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.49447659126775384, | |
| "grad_norm": 53.61540222167969, | |
| "learning_rate": 4.928973682070509e-05, | |
| "loss": 2.7716, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.4951779765035946, | |
| "grad_norm": 55.3106575012207, | |
| "learning_rate": 4.9286301276086715e-05, | |
| "loss": 2.3966, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.49587936173943536, | |
| "grad_norm": 0.014001145958900452, | |
| "learning_rate": 4.928285756294714e-05, | |
| "loss": 0.0001, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.49658074697527615, | |
| "grad_norm": 0.026434065774083138, | |
| "learning_rate": 4.927940568244465e-05, | |
| "loss": 0.0002, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.49728213221111695, | |
| "grad_norm": 0.04528193548321724, | |
| "learning_rate": 4.927594563574023e-05, | |
| "loss": 0.0003, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.49798351744695774, | |
| "grad_norm": 0.0950818806886673, | |
| "learning_rate": 4.9272477423997666e-05, | |
| "loss": 0.0007, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4986849026827985, | |
| "grad_norm": 0.1095154732465744, | |
| "learning_rate": 4.926900104838346e-05, | |
| "loss": 0.0008, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.4993862879186393, | |
| "grad_norm": 0.13107261061668396, | |
| "learning_rate": 4.9265516510066864e-05, | |
| "loss": 0.001, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.5000876731544801, | |
| "grad_norm": 53.58122634887695, | |
| "learning_rate": 4.926202381021988e-05, | |
| "loss": 1.8605, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.5007890583903208, | |
| "grad_norm": 48.86884689331055, | |
| "learning_rate": 4.925852295001725e-05, | |
| "loss": 1.3723, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.5014904436261617, | |
| "grad_norm": 0.40202492475509644, | |
| "learning_rate": 4.9255013930636465e-05, | |
| "loss": 0.0033, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5021918288620024, | |
| "grad_norm": 43.420196533203125, | |
| "learning_rate": 4.925149675325775e-05, | |
| "loss": 0.9394, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.5028932140978433, | |
| "grad_norm": 56.66259002685547, | |
| "learning_rate": 4.9247971419064097e-05, | |
| "loss": 1.2755, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.503594599333684, | |
| "grad_norm": 3.452730894088745, | |
| "learning_rate": 4.9244437929241215e-05, | |
| "loss": 0.0353, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.5042959845695248, | |
| "grad_norm": 4.454288005828857, | |
| "learning_rate": 4.924089628497757e-05, | |
| "loss": 0.0486, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.5049973698053656, | |
| "grad_norm": 18.87127685546875, | |
| "learning_rate": 4.923734648746437e-05, | |
| "loss": 0.3667, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5056987550412064, | |
| "grad_norm": 11.85842514038086, | |
| "learning_rate": 4.923378853789556e-05, | |
| "loss": 0.3075, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.5064001402770472, | |
| "grad_norm": 9.63955307006836, | |
| "learning_rate": 4.923022243746783e-05, | |
| "loss": 0.1304, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.507101525512888, | |
| "grad_norm": 9.134459495544434, | |
| "learning_rate": 4.922664818738063e-05, | |
| "loss": 0.1346, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.5078029107487287, | |
| "grad_norm": 2.0840797424316406, | |
| "learning_rate": 4.92230657888361e-05, | |
| "loss": 0.2203, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5085042959845695, | |
| "grad_norm": 8.857693672180176, | |
| "learning_rate": 4.921947524303919e-05, | |
| "loss": 0.1232, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5092056812204103, | |
| "grad_norm": 8.82702350616455, | |
| "learning_rate": 4.921587655119753e-05, | |
| "loss": 0.1143, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.5099070664562511, | |
| "grad_norm": 4.017044544219971, | |
| "learning_rate": 4.9212269714521505e-05, | |
| "loss": 0.0433, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.5106084516920919, | |
| "grad_norm": 1.7636412382125854, | |
| "learning_rate": 4.920865473422428e-05, | |
| "loss": 0.0171, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5113098369279326, | |
| "grad_norm": 107.01025390625, | |
| "learning_rate": 4.920503161152171e-05, | |
| "loss": 2.4773, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.5120112221637735, | |
| "grad_norm": 0.35363465547561646, | |
| "learning_rate": 4.9201400347632404e-05, | |
| "loss": 0.0028, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5127126073996142, | |
| "grad_norm": 0.19405458867549896, | |
| "learning_rate": 4.919776094377771e-05, | |
| "loss": 0.0015, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.5134139926354551, | |
| "grad_norm": 55.00040054321289, | |
| "learning_rate": 4.919411340118173e-05, | |
| "loss": 1.9537, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5141153778712958, | |
| "grad_norm": 55.760963439941406, | |
| "learning_rate": 4.919045772107127e-05, | |
| "loss": 2.1316, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.5148167631071366, | |
| "grad_norm": 55.24003601074219, | |
| "learning_rate": 4.91867939046759e-05, | |
| "loss": 2.1231, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5155181483429774, | |
| "grad_norm": 0.03399452567100525, | |
| "learning_rate": 4.918312195322792e-05, | |
| "loss": 0.0002, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5162195335788181, | |
| "grad_norm": 204.8887176513672, | |
| "learning_rate": 4.917944186796235e-05, | |
| "loss": 8.6129, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.516920918814659, | |
| "grad_norm": 124.1363754272461, | |
| "learning_rate": 4.917575365011699e-05, | |
| "loss": 3.9954, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5176223040504997, | |
| "grad_norm": 0.14609888195991516, | |
| "learning_rate": 4.9172057300932306e-05, | |
| "loss": 0.0011, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5183236892863405, | |
| "grad_norm": 0.33696234226226807, | |
| "learning_rate": 4.9168352821651575e-05, | |
| "loss": 0.0027, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5190250745221813, | |
| "grad_norm": 105.28889465332031, | |
| "learning_rate": 4.916464021352075e-05, | |
| "loss": 3.0367, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5197264597580221, | |
| "grad_norm": 1.017066240310669, | |
| "learning_rate": 4.916091947778855e-05, | |
| "loss": 0.0088, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5204278449938629, | |
| "grad_norm": 1.143968939781189, | |
| "learning_rate": 4.91571906157064e-05, | |
| "loss": 0.0102, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5211292302297037, | |
| "grad_norm": 0.7457045912742615, | |
| "learning_rate": 4.91534536285285e-05, | |
| "loss": 0.0063, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.5218306154655444, | |
| "grad_norm": 0.700978696346283, | |
| "learning_rate": 4.9149708517511743e-05, | |
| "loss": 0.0058, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5225320007013853, | |
| "grad_norm": 0.38463106751441956, | |
| "learning_rate": 4.914595528391578e-05, | |
| "loss": 0.0031, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.523233385937226, | |
| "grad_norm": 0.14777745306491852, | |
| "learning_rate": 4.914219392900297e-05, | |
| "loss": 0.0011, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.5239347711730669, | |
| "grad_norm": 0.07884679734706879, | |
| "learning_rate": 4.913842445403843e-05, | |
| "loss": 0.0006, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.5246361564089076, | |
| "grad_norm": 61.55155563354492, | |
| "learning_rate": 4.913464686028999e-05, | |
| "loss": 1.992, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5253375416447483, | |
| "grad_norm": 0.027440281584858894, | |
| "learning_rate": 4.913086114902822e-05, | |
| "loss": 0.0002, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5260389268805892, | |
| "grad_norm": 55.66230010986328, | |
| "learning_rate": 4.912706732152641e-05, | |
| "loss": 2.231, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5267403121164299, | |
| "grad_norm": 54.453102111816406, | |
| "learning_rate": 4.912326537906059e-05, | |
| "loss": 2.0983, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.5274416973522708, | |
| "grad_norm": 0.0637647733092308, | |
| "learning_rate": 4.911945532290951e-05, | |
| "loss": 0.0005, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5281430825881115, | |
| "grad_norm": 0.13056063652038574, | |
| "learning_rate": 4.9115637154354674e-05, | |
| "loss": 0.001, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.5288444678239523, | |
| "grad_norm": 52.72209167480469, | |
| "learning_rate": 4.911181087468027e-05, | |
| "loss": 1.5666, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5295458530597931, | |
| "grad_norm": 48.385467529296875, | |
| "learning_rate": 4.9107976485173255e-05, | |
| "loss": 1.2941, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5302472382956339, | |
| "grad_norm": 0.5080452561378479, | |
| "learning_rate": 4.91041339871233e-05, | |
| "loss": 0.0041, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5309486235314747, | |
| "grad_norm": 80.44468688964844, | |
| "learning_rate": 4.910028338182279e-05, | |
| "loss": 1.6558, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5316500087673155, | |
| "grad_norm": 1.59328031539917, | |
| "learning_rate": 4.909642467056685e-05, | |
| "loss": 0.0141, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5323513940031562, | |
| "grad_norm": 2.678886651992798, | |
| "learning_rate": 4.9092557854653344e-05, | |
| "loss": 0.0255, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.533052779238997, | |
| "grad_norm": 2.742159366607666, | |
| "learning_rate": 4.908868293538282e-05, | |
| "loss": 0.0271, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5337541644748378, | |
| "grad_norm": 2.275885820388794, | |
| "learning_rate": 4.908479991405861e-05, | |
| "loss": 0.0215, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.5344555497106785, | |
| "grad_norm": 1.4059484004974365, | |
| "learning_rate": 4.9080908791986724e-05, | |
| "loss": 0.0126, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.5351569349465194, | |
| "grad_norm": 0.7012129426002502, | |
| "learning_rate": 4.907700957047592e-05, | |
| "loss": 0.0058, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.5358583201823601, | |
| "grad_norm": 0.2713419497013092, | |
| "learning_rate": 4.9073102250837655e-05, | |
| "loss": 0.0021, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.536559705418201, | |
| "grad_norm": 0.13228341937065125, | |
| "learning_rate": 4.9069186834386145e-05, | |
| "loss": 0.001, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.5372610906540417, | |
| "grad_norm": 0.06449166685342789, | |
| "learning_rate": 4.90652633224383e-05, | |
| "loss": 0.0005, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.5379624758898826, | |
| "grad_norm": 0.017581632360816002, | |
| "learning_rate": 4.9061331716313783e-05, | |
| "loss": 0.0001, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.5386638611257233, | |
| "grad_norm": 0.016885504126548767, | |
| "learning_rate": 4.905739201733495e-05, | |
| "loss": 0.0001, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.5393652463615641, | |
| "grad_norm": 0.005497175268828869, | |
| "learning_rate": 4.9053444226826886e-05, | |
| "loss": 0.0, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.5400666315974049, | |
| "grad_norm": 58.73657989501953, | |
| "learning_rate": 4.90494883461174e-05, | |
| "loss": 3.1923, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5407680168332457, | |
| "grad_norm": 0.0022041036281734705, | |
| "learning_rate": 4.9045524376537034e-05, | |
| "loss": 0.0, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.5414694020690864, | |
| "grad_norm": 52.654136657714844, | |
| "learning_rate": 4.9041552319419035e-05, | |
| "loss": 3.097, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5421707873049272, | |
| "grad_norm": 0.0026611709035933018, | |
| "learning_rate": 4.903757217609938e-05, | |
| "loss": 0.0, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.542872172540768, | |
| "grad_norm": 0.0028983913362026215, | |
| "learning_rate": 4.9033583947916765e-05, | |
| "loss": 0.0, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5435735577766088, | |
| "grad_norm": 105.84739685058594, | |
| "learning_rate": 4.902958763621258e-05, | |
| "loss": 5.9716, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5442749430124496, | |
| "grad_norm": 54.44437789916992, | |
| "learning_rate": 4.902558324233098e-05, | |
| "loss": 2.7511, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.5449763282482903, | |
| "grad_norm": 0.009829580783843994, | |
| "learning_rate": 4.9021570767618794e-05, | |
| "loss": 0.0001, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.5456777134841312, | |
| "grad_norm": 53.416908264160156, | |
| "learning_rate": 4.90175502134256e-05, | |
| "loss": 2.337, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.5463790987199719, | |
| "grad_norm": 0.050978660583496094, | |
| "learning_rate": 4.9013521581103686e-05, | |
| "loss": 0.0004, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.5470804839558128, | |
| "grad_norm": 0.11917990446090698, | |
| "learning_rate": 4.900948487200804e-05, | |
| "loss": 0.0009, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5477818691916535, | |
| "grad_norm": 48.541015625, | |
| "learning_rate": 4.900544008749639e-05, | |
| "loss": 1.2937, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.5484832544274943, | |
| "grad_norm": 0.3613051474094391, | |
| "learning_rate": 4.900138722892917e-05, | |
| "loss": 0.003, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5491846396633351, | |
| "grad_norm": 35.976261138916016, | |
| "learning_rate": 4.899732629766953e-05, | |
| "loss": 0.7424, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.5498860248991758, | |
| "grad_norm": 1.3365657329559326, | |
| "learning_rate": 4.899325729508333e-05, | |
| "loss": 0.012, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.5505874101350167, | |
| "grad_norm": 2.2534306049346924, | |
| "learning_rate": 4.898918022253916e-05, | |
| "loss": 0.0228, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5512887953708574, | |
| "grad_norm": 24.77885627746582, | |
| "learning_rate": 4.898509508140829e-05, | |
| "loss": 0.4601, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.5519901806066982, | |
| "grad_norm": 4.28820276260376, | |
| "learning_rate": 4.898100187306475e-05, | |
| "loss": 0.0482, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.552691565842539, | |
| "grad_norm": 4.493249416351318, | |
| "learning_rate": 4.897690059888524e-05, | |
| "loss": 0.0503, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5533929510783798, | |
| "grad_norm": 3.269726037979126, | |
| "learning_rate": 4.897279126024923e-05, | |
| "loss": 0.0349, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5540943363142206, | |
| "grad_norm": 3.0403542518615723, | |
| "learning_rate": 4.896867385853883e-05, | |
| "loss": 0.0316, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5547957215500614, | |
| "grad_norm": 25.840961456298828, | |
| "learning_rate": 4.896454839513892e-05, | |
| "loss": 0.4405, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5554971067859021, | |
| "grad_norm": 72.00930786132812, | |
| "learning_rate": 4.8960414871437055e-05, | |
| "loss": 1.4578, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.556198492021743, | |
| "grad_norm": 0.7836804389953613, | |
| "learning_rate": 4.895627328882352e-05, | |
| "loss": 0.0068, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.5568998772575837, | |
| "grad_norm": 0.7037481069564819, | |
| "learning_rate": 4.895212364869132e-05, | |
| "loss": 0.0061, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5576012624934246, | |
| "grad_norm": 0.4775547981262207, | |
| "learning_rate": 4.894796595243614e-05, | |
| "loss": 0.004, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5583026477292653, | |
| "grad_norm": 0.4537871778011322, | |
| "learning_rate": 4.894380020145639e-05, | |
| "loss": 0.0038, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.559004032965106, | |
| "grad_norm": 0.2530267536640167, | |
| "learning_rate": 4.8939626397153196e-05, | |
| "loss": 0.0021, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5597054182009469, | |
| "grad_norm": 0.08755356073379517, | |
| "learning_rate": 4.8935444540930385e-05, | |
| "loss": 0.0007, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.5604068034367876, | |
| "grad_norm": 0.058386508375406265, | |
| "learning_rate": 4.8931254634194504e-05, | |
| "loss": 0.0004, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5611081886726285, | |
| "grad_norm": 0.020555956289172173, | |
| "learning_rate": 4.892705667835479e-05, | |
| "loss": 0.0002, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5618095739084692, | |
| "grad_norm": 0.023624928668141365, | |
| "learning_rate": 4.892285067482319e-05, | |
| "loss": 0.0002, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.56251095914431, | |
| "grad_norm": 54.471675872802734, | |
| "learning_rate": 4.891863662501437e-05, | |
| "loss": 2.8657, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.5632123443801508, | |
| "grad_norm": 0.005444019101560116, | |
| "learning_rate": 4.8914414530345684e-05, | |
| "loss": 0.0, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.5639137296159916, | |
| "grad_norm": 0.008992165327072144, | |
| "learning_rate": 4.891018439223722e-05, | |
| "loss": 0.0001, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5646151148518324, | |
| "grad_norm": 0.0062455967999994755, | |
| "learning_rate": 4.890594621211174e-05, | |
| "loss": 0.0, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5653165000876732, | |
| "grad_norm": 0.006425914354622364, | |
| "learning_rate": 4.8901699991394735e-05, | |
| "loss": 0.0, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5660178853235139, | |
| "grad_norm": 0.008212440647184849, | |
| "learning_rate": 4.8897445731514384e-05, | |
| "loss": 0.0001, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5667192705593547, | |
| "grad_norm": 0.004950184375047684, | |
| "learning_rate": 4.889318343390158e-05, | |
| "loss": 0.0, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.5674206557951955, | |
| "grad_norm": 0.0056209079921245575, | |
| "learning_rate": 4.8888913099989916e-05, | |
| "loss": 0.0, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5681220410310363, | |
| "grad_norm": 0.0028246166184544563, | |
| "learning_rate": 4.888463473121568e-05, | |
| "loss": 0.0, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5688234262668771, | |
| "grad_norm": 53.988868713378906, | |
| "learning_rate": 4.888034832901788e-05, | |
| "loss": 2.7359, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.5695248115027178, | |
| "grad_norm": 0.004621135536581278, | |
| "learning_rate": 4.8876053894838215e-05, | |
| "loss": 0.0, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.5702261967385587, | |
| "grad_norm": 0.011877781711518764, | |
| "learning_rate": 4.887175143012108e-05, | |
| "loss": 0.0001, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.5709275819743994, | |
| "grad_norm": 65.13773345947266, | |
| "learning_rate": 4.886744093631358e-05, | |
| "loss": 3.5609, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5716289672102403, | |
| "grad_norm": 161.86300659179688, | |
| "learning_rate": 4.886312241486554e-05, | |
| "loss": 7.2583, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.572330352446081, | |
| "grad_norm": 0.032078277319669724, | |
| "learning_rate": 4.8858795867229425e-05, | |
| "loss": 0.0002, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.5730317376819218, | |
| "grad_norm": 113.32162475585938, | |
| "learning_rate": 4.8854461294860454e-05, | |
| "loss": 4.1658, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5737331229177626, | |
| "grad_norm": 0.17656919360160828, | |
| "learning_rate": 4.885011869921654e-05, | |
| "loss": 0.0014, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.5744345081536034, | |
| "grad_norm": 0.2188340574502945, | |
| "learning_rate": 4.884576808175828e-05, | |
| "loss": 0.0018, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5751358933894442, | |
| "grad_norm": 0.38712555170059204, | |
| "learning_rate": 4.884140944394896e-05, | |
| "loss": 0.0033, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5758372786252849, | |
| "grad_norm": 0.7210093140602112, | |
| "learning_rate": 4.883704278725458e-05, | |
| "loss": 0.0062, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.5765386638611257, | |
| "grad_norm": 0.7694672346115112, | |
| "learning_rate": 4.883266811314385e-05, | |
| "loss": 0.0069, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5772400490969665, | |
| "grad_norm": 0.6353862881660461, | |
| "learning_rate": 4.8828285423088146e-05, | |
| "loss": 0.0056, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.5779414343328073, | |
| "grad_norm": 87.86915588378906, | |
| "learning_rate": 4.8823894718561545e-05, | |
| "loss": 2.1381, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.578642819568648, | |
| "grad_norm": 0.42429307103157043, | |
| "learning_rate": 4.8819496001040846e-05, | |
| "loss": 0.0036, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5793442048044889, | |
| "grad_norm": 0.40686339139938354, | |
| "learning_rate": 4.881508927200552e-05, | |
| "loss": 0.0034, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5800455900403296, | |
| "grad_norm": 0.27934542298316956, | |
| "learning_rate": 4.881067453293774e-05, | |
| "loss": 0.0022, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.5807469752761705, | |
| "grad_norm": 44.31092834472656, | |
| "learning_rate": 4.8806251785322355e-05, | |
| "loss": 1.1003, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5814483605120112, | |
| "grad_norm": 0.17587609589099884, | |
| "learning_rate": 4.880182103064695e-05, | |
| "loss": 0.0014, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.582149745747852, | |
| "grad_norm": 0.2651273012161255, | |
| "learning_rate": 4.879738227040176e-05, | |
| "loss": 0.0021, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5828511309836928, | |
| "grad_norm": 149.13035583496094, | |
| "learning_rate": 4.879293550607974e-05, | |
| "loss": 2.9684, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.5835525162195335, | |
| "grad_norm": 0.17069180309772491, | |
| "learning_rate": 4.878848073917651e-05, | |
| "loss": 0.0014, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5842539014553744, | |
| "grad_norm": 0.24799254536628723, | |
| "learning_rate": 4.878401797119042e-05, | |
| "loss": 0.002, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.5849552866912151, | |
| "grad_norm": 0.15171018242835999, | |
| "learning_rate": 4.8779547203622477e-05, | |
| "loss": 0.0012, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.585656671927056, | |
| "grad_norm": 45.8453254699707, | |
| "learning_rate": 4.877506843797639e-05, | |
| "loss": 1.2116, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5863580571628967, | |
| "grad_norm": 70.00684356689453, | |
| "learning_rate": 4.877058167575857e-05, | |
| "loss": 2.1496, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5870594423987375, | |
| "grad_norm": 43.86625671386719, | |
| "learning_rate": 4.876608691847809e-05, | |
| "loss": 1.077, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5877608276345783, | |
| "grad_norm": 39.75180435180664, | |
| "learning_rate": 4.876158416764675e-05, | |
| "loss": 0.8959, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.5884622128704191, | |
| "grad_norm": 78.85111999511719, | |
| "learning_rate": 4.8757073424779e-05, | |
| "loss": 1.609, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.5891635981062598, | |
| "grad_norm": 2.562579393386841, | |
| "learning_rate": 4.8752554691392004e-05, | |
| "loss": 0.0262, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5898649833421007, | |
| "grad_norm": 3.174607276916504, | |
| "learning_rate": 4.8748027969005606e-05, | |
| "loss": 0.0331, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5905663685779414, | |
| "grad_norm": 24.42951202392578, | |
| "learning_rate": 4.874349325914232e-05, | |
| "loss": 0.494, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5912677538137823, | |
| "grad_norm": 5.76008415222168, | |
| "learning_rate": 4.87389505633274e-05, | |
| "loss": 0.069, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.591969139049623, | |
| "grad_norm": 16.484464645385742, | |
| "learning_rate": 4.873439988308871e-05, | |
| "loss": 0.3677, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.5926705242854637, | |
| "grad_norm": 8.441133499145508, | |
| "learning_rate": 4.872984121995686e-05, | |
| "loss": 0.1122, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5933719095213046, | |
| "grad_norm": 7.171937942504883, | |
| "learning_rate": 4.8725274575465106e-05, | |
| "loss": 0.093, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5940732947571453, | |
| "grad_norm": 7.630092620849609, | |
| "learning_rate": 4.872069995114943e-05, | |
| "loss": 0.0997, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.5947746799929862, | |
| "grad_norm": 5.521340370178223, | |
| "learning_rate": 4.871611734854845e-05, | |
| "loss": 0.065, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.5954760652288269, | |
| "grad_norm": 40.95110321044922, | |
| "learning_rate": 4.871152676920351e-05, | |
| "loss": 0.6631, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5961774504646677, | |
| "grad_norm": 2.602055311203003, | |
| "learning_rate": 4.8706928214658596e-05, | |
| "loss": 0.0269, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5968788357005085, | |
| "grad_norm": 30.752164840698242, | |
| "learning_rate": 4.8702321686460424e-05, | |
| "loss": 0.5944, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.5975802209363493, | |
| "grad_norm": 34.97576904296875, | |
| "learning_rate": 4.8697707186158345e-05, | |
| "loss": 0.7216, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5982816061721901, | |
| "grad_norm": 1.3377373218536377, | |
| "learning_rate": 4.869308471530443e-05, | |
| "loss": 0.0126, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.5989829914080309, | |
| "grad_norm": 33.73777389526367, | |
| "learning_rate": 4.868845427545341e-05, | |
| "loss": 0.6655, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5996843766438716, | |
| "grad_norm": 27.722604751586914, | |
| "learning_rate": 4.868381586816268e-05, | |
| "loss": 0.5284, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6003857618797125, | |
| "grad_norm": 39.0617561340332, | |
| "learning_rate": 4.867916949499236e-05, | |
| "loss": 0.9141, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.6010871471155532, | |
| "grad_norm": 25.998207092285156, | |
| "learning_rate": 4.8674515157505205e-05, | |
| "loss": 0.4843, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.601788532351394, | |
| "grad_norm": 70.33052062988281, | |
| "learning_rate": 4.8669852857266686e-05, | |
| "loss": 1.1847, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.6024899175872348, | |
| "grad_norm": 2.859663724899292, | |
| "learning_rate": 4.866518259584492e-05, | |
| "loss": 0.1724, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.6031913028230755, | |
| "grad_norm": 7.6815185546875, | |
| "learning_rate": 4.866050437481072e-05, | |
| "loss": 0.2053, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6038926880589164, | |
| "grad_norm": 15.574623107910156, | |
| "learning_rate": 4.8655818195737576e-05, | |
| "loss": 0.2786, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.6045940732947571, | |
| "grad_norm": 17.501440048217773, | |
| "learning_rate": 4.865112406020164e-05, | |
| "loss": 0.3404, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.605295458530598, | |
| "grad_norm": 18.48247718811035, | |
| "learning_rate": 4.8646421969781757e-05, | |
| "loss": 0.3676, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.6059968437664387, | |
| "grad_norm": 15.630892753601074, | |
| "learning_rate": 4.864171192605944e-05, | |
| "loss": 0.2856, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.6066982290022795, | |
| "grad_norm": 15.12441349029541, | |
| "learning_rate": 4.863699393061888e-05, | |
| "loss": 0.2728, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6073996142381203, | |
| "grad_norm": 10.427019119262695, | |
| "learning_rate": 4.863226798504694e-05, | |
| "loss": 0.308, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.6081009994739611, | |
| "grad_norm": 10.065474510192871, | |
| "learning_rate": 4.8627534090933156e-05, | |
| "loss": 0.1617, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.6088023847098019, | |
| "grad_norm": 11.481346130371094, | |
| "learning_rate": 4.862279224986973e-05, | |
| "loss": 0.2136, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.6095037699456426, | |
| "grad_norm": 5.78438663482666, | |
| "learning_rate": 4.8618042463451566e-05, | |
| "loss": 0.0726, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.6102051551814834, | |
| "grad_norm": 39.987937927246094, | |
| "learning_rate": 4.8613284733276206e-05, | |
| "loss": 0.8901, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6109065404173242, | |
| "grad_norm": 3.24749755859375, | |
| "learning_rate": 4.860851906094388e-05, | |
| "loss": 0.0369, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.611607925653165, | |
| "grad_norm": 2.4093921184539795, | |
| "learning_rate": 4.860374544805749e-05, | |
| "loss": 0.0254, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.6123093108890058, | |
| "grad_norm": 0.970287024974823, | |
| "learning_rate": 4.85989638962226e-05, | |
| "loss": 0.0092, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.6130106961248466, | |
| "grad_norm": 0.4123772084712982, | |
| "learning_rate": 4.859417440704746e-05, | |
| "loss": 0.0036, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.6137120813606873, | |
| "grad_norm": 0.14014503359794617, | |
| "learning_rate": 4.8589376982142977e-05, | |
| "loss": 0.0011, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6144134665965282, | |
| "grad_norm": 0.05853430926799774, | |
| "learning_rate": 4.858457162312272e-05, | |
| "loss": 0.0005, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.6151148518323689, | |
| "grad_norm": 104.4763412475586, | |
| "learning_rate": 4.857975833160295e-05, | |
| "loss": 4.5228, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.6158162370682098, | |
| "grad_norm": 60.22207260131836, | |
| "learning_rate": 4.857493710920258e-05, | |
| "loss": 3.2127, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.6165176223040505, | |
| "grad_norm": 0.014606939628720284, | |
| "learning_rate": 4.8570107957543185e-05, | |
| "loss": 0.0001, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.6172190075398913, | |
| "grad_norm": 52.49748229980469, | |
| "learning_rate": 4.856527087824902e-05, | |
| "loss": 2.5181, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6179203927757321, | |
| "grad_norm": 60.33863067626953, | |
| "learning_rate": 4.8560425872947004e-05, | |
| "loss": 3.4192, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.6186217780115728, | |
| "grad_norm": 0.017534319311380386, | |
| "learning_rate": 4.8555572943266715e-05, | |
| "loss": 0.0001, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.6193231632474137, | |
| "grad_norm": 0.03253171965479851, | |
| "learning_rate": 4.85507120908404e-05, | |
| "loss": 0.0002, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.6200245484832544, | |
| "grad_norm": 100.64522552490234, | |
| "learning_rate": 4.8545843317302983e-05, | |
| "loss": 3.6464, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6207259337190952, | |
| "grad_norm": 52.276100158691406, | |
| "learning_rate": 4.854096662429203e-05, | |
| "loss": 1.5974, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.621427318954936, | |
| "grad_norm": 0.26515480875968933, | |
| "learning_rate": 4.853608201344778e-05, | |
| "loss": 0.0022, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6221287041907768, | |
| "grad_norm": 0.4160686731338501, | |
| "learning_rate": 4.853118948641314e-05, | |
| "loss": 0.0036, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6228300894266176, | |
| "grad_norm": 40.08952331542969, | |
| "learning_rate": 4.852628904483367e-05, | |
| "loss": 0.9215, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6235314746624584, | |
| "grad_norm": 35.90172576904297, | |
| "learning_rate": 4.852138069035761e-05, | |
| "loss": 0.7604, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.6242328598982991, | |
| "grad_norm": 2.5494091510772705, | |
| "learning_rate": 4.851646442463585e-05, | |
| "loss": 0.0269, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.62493424513414, | |
| "grad_norm": 4.3015456199646, | |
| "learning_rate": 4.851154024932193e-05, | |
| "loss": 0.0495, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.6256356303699807, | |
| "grad_norm": 3.570831537246704, | |
| "learning_rate": 4.8506608166072054e-05, | |
| "loss": 0.0385, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.6263370156058214, | |
| "grad_norm": 3.575366735458374, | |
| "learning_rate": 4.8501668176545115e-05, | |
| "loss": 0.0397, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.6270384008416623, | |
| "grad_norm": 2.7419331073760986, | |
| "learning_rate": 4.849672028240262e-05, | |
| "loss": 0.0288, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.627739786077503, | |
| "grad_norm": 41.37546157836914, | |
| "learning_rate": 4.849176448530878e-05, | |
| "loss": 0.8202, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.6284411713133439, | |
| "grad_norm": 34.27451705932617, | |
| "learning_rate": 4.8486800786930415e-05, | |
| "loss": 0.7397, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.6291425565491846, | |
| "grad_norm": 1.0924946069717407, | |
| "learning_rate": 4.848182918893705e-05, | |
| "loss": 0.0104, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.6298439417850255, | |
| "grad_norm": 0.9149582386016846, | |
| "learning_rate": 4.8476849693000836e-05, | |
| "loss": 0.0085, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.6305453270208662, | |
| "grad_norm": 0.4877033829689026, | |
| "learning_rate": 4.8471862300796594e-05, | |
| "loss": 0.0043, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.631246712256707, | |
| "grad_norm": 50.69717788696289, | |
| "learning_rate": 4.846686701400179e-05, | |
| "loss": 1.3647, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6319480974925478, | |
| "grad_norm": 45.04077911376953, | |
| "learning_rate": 4.846186383429657e-05, | |
| "loss": 1.366, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.6326494827283886, | |
| "grad_norm": 47.341819763183594, | |
| "learning_rate": 4.8456852763363706e-05, | |
| "loss": 1.2603, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.6333508679642293, | |
| "grad_norm": 44.485530853271484, | |
| "learning_rate": 4.8451833802888625e-05, | |
| "loss": 1.3248, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.6340522532000702, | |
| "grad_norm": 0.46667349338531494, | |
| "learning_rate": 4.844680695455943e-05, | |
| "loss": 0.0041, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.6347536384359109, | |
| "grad_norm": 0.7628422379493713, | |
| "learning_rate": 4.844177222006687e-05, | |
| "loss": 0.0067, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.6354550236717517, | |
| "grad_norm": 0.7451947927474976, | |
| "learning_rate": 4.8436729601104327e-05, | |
| "loss": 0.0069, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.6361564089075925, | |
| "grad_norm": 0.6644980907440186, | |
| "learning_rate": 4.843167909936786e-05, | |
| "loss": 0.006, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.6368577941434332, | |
| "grad_norm": 41.53312683105469, | |
| "learning_rate": 4.8426620716556164e-05, | |
| "loss": 1.1098, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.6375591793792741, | |
| "grad_norm": 0.5270825624465942, | |
| "learning_rate": 4.8421554454370596e-05, | |
| "loss": 0.0046, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.6382605646151148, | |
| "grad_norm": 90.92555236816406, | |
| "learning_rate": 4.841648031451514e-05, | |
| "loss": 2.5683, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6389619498509557, | |
| "grad_norm": 46.597808837890625, | |
| "learning_rate": 4.841139829869646e-05, | |
| "loss": 1.4381, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.6396633350867964, | |
| "grad_norm": 0.9347837567329407, | |
| "learning_rate": 4.8406308408623865e-05, | |
| "loss": 0.0083, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.6403647203226372, | |
| "grad_norm": 0.8483829498291016, | |
| "learning_rate": 4.840121064600928e-05, | |
| "loss": 0.0077, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.641066105558478, | |
| "grad_norm": 37.393951416015625, | |
| "learning_rate": 4.8396105012567315e-05, | |
| "loss": 0.8573, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.6417674907943188, | |
| "grad_norm": 69.15557098388672, | |
| "learning_rate": 4.839099151001521e-05, | |
| "loss": 1.491, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.6424688760301596, | |
| "grad_norm": 1.3047503232955933, | |
| "learning_rate": 4.838587014007284e-05, | |
| "loss": 0.0126, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.6431702612660003, | |
| "grad_norm": 1.462365746498108, | |
| "learning_rate": 4.838074090446276e-05, | |
| "loss": 0.0144, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.6438716465018411, | |
| "grad_norm": 1.4955204725265503, | |
| "learning_rate": 4.8375603804910154e-05, | |
| "loss": 0.0146, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.6445730317376819, | |
| "grad_norm": 1.0593093633651733, | |
| "learning_rate": 4.837045884314283e-05, | |
| "loss": 0.0101, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.6452744169735227, | |
| "grad_norm": 0.5433834791183472, | |
| "learning_rate": 4.836530602089127e-05, | |
| "loss": 0.0049, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6459758022093635, | |
| "grad_norm": 44.653175354003906, | |
| "learning_rate": 4.836014533988857e-05, | |
| "loss": 1.2462, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.6466771874452043, | |
| "grad_norm": 0.15624181926250458, | |
| "learning_rate": 4.835497680187052e-05, | |
| "loss": 0.0013, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.647378572681045, | |
| "grad_norm": 0.13333559036254883, | |
| "learning_rate": 4.8349800408575506e-05, | |
| "loss": 0.0011, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.6480799579168859, | |
| "grad_norm": 0.08697400987148285, | |
| "learning_rate": 4.834461616174456e-05, | |
| "loss": 0.0007, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.6487813431527266, | |
| "grad_norm": 49.62416076660156, | |
| "learning_rate": 4.8339424063121375e-05, | |
| "loss": 2.1264, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.6494827283885675, | |
| "grad_norm": 47.912906646728516, | |
| "learning_rate": 4.833422411445228e-05, | |
| "loss": 1.8048, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.6501841136244082, | |
| "grad_norm": 0.0816919356584549, | |
| "learning_rate": 4.832901631748622e-05, | |
| "loss": 0.0007, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.650885498860249, | |
| "grad_norm": 49.09192657470703, | |
| "learning_rate": 4.8323800673974826e-05, | |
| "loss": 1.7493, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.6515868840960898, | |
| "grad_norm": 98.90199279785156, | |
| "learning_rate": 4.8318577185672325e-05, | |
| "loss": 3.7461, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.6522882693319305, | |
| "grad_norm": 0.22995713353157043, | |
| "learning_rate": 4.831334585433561e-05, | |
| "loss": 0.002, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6529896545677714, | |
| "grad_norm": 0.4566737413406372, | |
| "learning_rate": 4.830810668172419e-05, | |
| "loss": 0.0041, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.6536910398036121, | |
| "grad_norm": 0.5328819155693054, | |
| "learning_rate": 4.830285966960023e-05, | |
| "loss": 0.0048, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.6543924250394529, | |
| "grad_norm": 0.6611910462379456, | |
| "learning_rate": 4.829760481972852e-05, | |
| "loss": 0.0061, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.6550938102752937, | |
| "grad_norm": 76.24475860595703, | |
| "learning_rate": 4.82923421338765e-05, | |
| "loss": 1.7814, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.6557951955111345, | |
| "grad_norm": 0.7961150407791138, | |
| "learning_rate": 4.828707161381423e-05, | |
| "loss": 0.0075, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.6564965807469753, | |
| "grad_norm": 33.920326232910156, | |
| "learning_rate": 4.8281793261314404e-05, | |
| "loss": 0.7362, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.6571979659828161, | |
| "grad_norm": 0.9916706681251526, | |
| "learning_rate": 4.827650707815236e-05, | |
| "loss": 0.0096, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.6578993512186568, | |
| "grad_norm": 0.7823458909988403, | |
| "learning_rate": 4.827121306610608e-05, | |
| "loss": 0.0074, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.6586007364544977, | |
| "grad_norm": 0.8576016426086426, | |
| "learning_rate": 4.8265911226956154e-05, | |
| "loss": 0.008, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.6593021216903384, | |
| "grad_norm": 0.5242276787757874, | |
| "learning_rate": 4.826060156248582e-05, | |
| "loss": 0.0047, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6600035069261792, | |
| "grad_norm": 0.31952106952667236, | |
| "learning_rate": 4.825528407448096e-05, | |
| "loss": 0.0028, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.66070489216202, | |
| "grad_norm": 0.17970554530620575, | |
| "learning_rate": 4.824995876473004e-05, | |
| "loss": 0.0015, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.6614062773978607, | |
| "grad_norm": 0.10021814703941345, | |
| "learning_rate": 4.824462563502422e-05, | |
| "loss": 0.0008, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.6621076626337016, | |
| "grad_norm": 146.59963989257812, | |
| "learning_rate": 4.823928468715724e-05, | |
| "loss": 5.6442, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.6628090478695423, | |
| "grad_norm": 0.060028087347745895, | |
| "learning_rate": 4.82339359229255e-05, | |
| "loss": 0.0005, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6635104331053832, | |
| "grad_norm": 0.05350363254547119, | |
| "learning_rate": 4.822857934412802e-05, | |
| "loss": 0.0004, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.6642118183412239, | |
| "grad_norm": 197.06178283691406, | |
| "learning_rate": 4.822321495256644e-05, | |
| "loss": 8.2989, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.6649132035770647, | |
| "grad_norm": 0.047274697571992874, | |
| "learning_rate": 4.821784275004503e-05, | |
| "loss": 0.0004, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.6656145888129055, | |
| "grad_norm": 0.037716954946517944, | |
| "learning_rate": 4.82124627383707e-05, | |
| "loss": 0.0003, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.6663159740487463, | |
| "grad_norm": 48.574623107910156, | |
| "learning_rate": 4.8207074919352976e-05, | |
| "loss": 1.7823, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.667017359284587, | |
| "grad_norm": 0.06911084055900574, | |
| "learning_rate": 4.8201679294804e-05, | |
| "loss": 0.0006, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.6677187445204279, | |
| "grad_norm": 0.07155507057905197, | |
| "learning_rate": 4.8196275866538574e-05, | |
| "loss": 0.0006, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.6684201297562686, | |
| "grad_norm": 0.10516396909952164, | |
| "learning_rate": 4.8190864636374086e-05, | |
| "loss": 0.0009, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.6691215149921094, | |
| "grad_norm": 46.50364685058594, | |
| "learning_rate": 4.818544560613056e-05, | |
| "loss": 1.4989, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.6698229002279502, | |
| "grad_norm": 0.2198321670293808, | |
| "learning_rate": 4.8180018777630664e-05, | |
| "loss": 0.0019, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.670524285463791, | |
| "grad_norm": 87.00233459472656, | |
| "learning_rate": 4.8174584152699665e-05, | |
| "loss": 2.3952, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.6712256706996318, | |
| "grad_norm": 39.932960510253906, | |
| "learning_rate": 4.816914173316545e-05, | |
| "loss": 1.0167, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.6719270559354725, | |
| "grad_norm": 0.8517984747886658, | |
| "learning_rate": 4.816369152085855e-05, | |
| "loss": 0.0082, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.6726284411713134, | |
| "grad_norm": 1.0399830341339111, | |
| "learning_rate": 4.8158233517612096e-05, | |
| "loss": 0.0102, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.6733298264071541, | |
| "grad_norm": 1.2987735271453857, | |
| "learning_rate": 4.815276772526186e-05, | |
| "loss": 0.013, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.674031211642995, | |
| "grad_norm": 1.1722874641418457, | |
| "learning_rate": 4.8147294145646207e-05, | |
| "loss": 0.0119, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.6747325968788357, | |
| "grad_norm": 30.018028259277344, | |
| "learning_rate": 4.8141812780606137e-05, | |
| "loss": 0.606, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.6754339821146765, | |
| "grad_norm": 1.0533334016799927, | |
| "learning_rate": 4.813632363198527e-05, | |
| "loss": 0.0105, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.6761353673505173, | |
| "grad_norm": 52.103050231933594, | |
| "learning_rate": 4.813082670162986e-05, | |
| "loss": 1.5312, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.676836752586358, | |
| "grad_norm": 36.39682388305664, | |
| "learning_rate": 4.812532199138873e-05, | |
| "loss": 0.8569, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6775381378221988, | |
| "grad_norm": 84.3869400024414, | |
| "learning_rate": 4.8119809503113363e-05, | |
| "loss": 2.1346, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.6782395230580396, | |
| "grad_norm": 0.8813936710357666, | |
| "learning_rate": 4.811428923865785e-05, | |
| "loss": 0.0085, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.6789409082938804, | |
| "grad_norm": 1.1427128314971924, | |
| "learning_rate": 4.810876119987888e-05, | |
| "loss": 0.0113, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.6796422935297212, | |
| "grad_norm": 30.400836944580078, | |
| "learning_rate": 4.810322538863578e-05, | |
| "loss": 0.6341, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.680343678765562, | |
| "grad_norm": 77.0494155883789, | |
| "learning_rate": 4.8097681806790466e-05, | |
| "loss": 1.7974, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6810450640014027, | |
| "grad_norm": 29.669004440307617, | |
| "learning_rate": 4.8092130456207495e-05, | |
| "loss": 0.5951, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.6817464492372436, | |
| "grad_norm": 2.0304183959960938, | |
| "learning_rate": 4.808657133875402e-05, | |
| "loss": 0.0214, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.6824478344730843, | |
| "grad_norm": 22.323291778564453, | |
| "learning_rate": 4.808100445629981e-05, | |
| "loss": 0.4383, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.6831492197089252, | |
| "grad_norm": 11.758098602294922, | |
| "learning_rate": 4.807542981071724e-05, | |
| "loss": 0.245, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.6838506049447659, | |
| "grad_norm": 9.657792091369629, | |
| "learning_rate": 4.80698474038813e-05, | |
| "loss": 0.2276, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6845519901806068, | |
| "grad_norm": 0.9010874629020691, | |
| "learning_rate": 4.806425723766961e-05, | |
| "loss": 0.1934, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.6852533754164475, | |
| "grad_norm": 13.536153793334961, | |
| "learning_rate": 4.805865931396236e-05, | |
| "loss": 0.2465, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.6859547606522882, | |
| "grad_norm": 12.555607795715332, | |
| "learning_rate": 4.8053053634642384e-05, | |
| "loss": 0.2135, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.6866561458881291, | |
| "grad_norm": 15.943707466125488, | |
| "learning_rate": 4.804744020159511e-05, | |
| "loss": 0.3215, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.6873575311239698, | |
| "grad_norm": 9.25063419342041, | |
| "learning_rate": 4.804181901670856e-05, | |
| "loss": 0.2798, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6880589163598106, | |
| "grad_norm": 8.65345573425293, | |
| "learning_rate": 4.80361900818734e-05, | |
| "loss": 0.1354, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.6887603015956514, | |
| "grad_norm": 10.924114227294922, | |
| "learning_rate": 4.803055339898288e-05, | |
| "loss": 0.1829, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.6894616868314922, | |
| "grad_norm": 7.537076950073242, | |
| "learning_rate": 4.8024908969932844e-05, | |
| "loss": 0.1064, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.690163072067333, | |
| "grad_norm": 43.258338928222656, | |
| "learning_rate": 4.8019256796621764e-05, | |
| "loss": 0.957, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.6908644573031738, | |
| "grad_norm": 26.308055877685547, | |
| "learning_rate": 4.801359688095071e-05, | |
| "loss": 0.541, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6915658425390145, | |
| "grad_norm": 1.8512808084487915, | |
| "learning_rate": 4.800792922482333e-05, | |
| "loss": 0.0192, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.6922672277748554, | |
| "grad_norm": 1.0505925416946411, | |
| "learning_rate": 4.800225383014594e-05, | |
| "loss": 0.0104, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.6929686130106961, | |
| "grad_norm": 40.21651840209961, | |
| "learning_rate": 4.799657069882738e-05, | |
| "loss": 1.0056, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.6936699982465369, | |
| "grad_norm": 0.17625601589679718, | |
| "learning_rate": 4.7990879832779156e-05, | |
| "loss": 0.0015, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.6943713834823777, | |
| "grad_norm": 0.15104326605796814, | |
| "learning_rate": 4.7985181233915335e-05, | |
| "loss": 0.0013, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6950727687182184, | |
| "grad_norm": 0.09187794476747513, | |
| "learning_rate": 4.79794749041526e-05, | |
| "loss": 0.0008, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.6957741539540593, | |
| "grad_norm": 48.85366439819336, | |
| "learning_rate": 4.797376084541024e-05, | |
| "loss": 1.9766, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.6964755391899, | |
| "grad_norm": 0.04326195642352104, | |
| "learning_rate": 4.796803905961013e-05, | |
| "loss": 0.0004, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.6971769244257409, | |
| "grad_norm": 0.04548042640089989, | |
| "learning_rate": 4.796230954867676e-05, | |
| "loss": 0.0004, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.6978783096615816, | |
| "grad_norm": 48.174827575683594, | |
| "learning_rate": 4.79565723145372e-05, | |
| "loss": 1.9204, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6985796948974224, | |
| "grad_norm": 0.04808010160923004, | |
| "learning_rate": 4.7950827359121135e-05, | |
| "loss": 0.0004, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.6992810801332632, | |
| "grad_norm": 48.49928665161133, | |
| "learning_rate": 4.794507468436084e-05, | |
| "loss": 1.8132, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.699982465369104, | |
| "grad_norm": 47.405731201171875, | |
| "learning_rate": 4.793931429219117e-05, | |
| "loss": 1.6124, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.7006838506049448, | |
| "grad_norm": 0.2506006062030792, | |
| "learning_rate": 4.793354618454961e-05, | |
| "loss": 0.0022, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.7013852358407856, | |
| "grad_norm": 90.9857177734375, | |
| "learning_rate": 4.792777036337621e-05, | |
| "loss": 2.7266, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7020866210766263, | |
| "grad_norm": 0.9927763938903809, | |
| "learning_rate": 4.792198683061363e-05, | |
| "loss": 0.0099, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.7027880063124671, | |
| "grad_norm": 1.5031591653823853, | |
| "learning_rate": 4.791619558820712e-05, | |
| "loss": 0.0152, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.7034893915483079, | |
| "grad_norm": 29.284543991088867, | |
| "learning_rate": 4.791039663810452e-05, | |
| "loss": 0.5838, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.7041907767841487, | |
| "grad_norm": 1.6648647785186768, | |
| "learning_rate": 4.790458998225627e-05, | |
| "loss": 0.0173, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.7048921620199895, | |
| "grad_norm": 2.1368696689605713, | |
| "learning_rate": 4.7898775622615384e-05, | |
| "loss": 0.023, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7055935472558302, | |
| "grad_norm": 1.5531522035598755, | |
| "learning_rate": 4.789295356113749e-05, | |
| "loss": 0.0161, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.7062949324916711, | |
| "grad_norm": 1.0346095561981201, | |
| "learning_rate": 4.7887123799780795e-05, | |
| "loss": 0.0096, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.7069963177275118, | |
| "grad_norm": 0.5431362986564636, | |
| "learning_rate": 4.788128634050609e-05, | |
| "loss": 0.005, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.7076977029633527, | |
| "grad_norm": 0.19359983503818512, | |
| "learning_rate": 4.787544118527677e-05, | |
| "loss": 0.0017, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.7083990881991934, | |
| "grad_norm": 49.78437042236328, | |
| "learning_rate": 4.786958833605882e-05, | |
| "loss": 1.7876, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7091004734350342, | |
| "grad_norm": 57.95826721191406, | |
| "learning_rate": 4.786372779482078e-05, | |
| "loss": 2.9286, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.709801858670875, | |
| "grad_norm": 48.60955047607422, | |
| "learning_rate": 4.7857859563533814e-05, | |
| "loss": 1.9456, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.7105032439067157, | |
| "grad_norm": 48.62706756591797, | |
| "learning_rate": 4.785198364417166e-05, | |
| "loss": 1.8589, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.7112046291425566, | |
| "grad_norm": 94.03495788574219, | |
| "learning_rate": 4.784610003871063e-05, | |
| "loss": 3.2697, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.7119060143783973, | |
| "grad_norm": 44.21165084838867, | |
| "learning_rate": 4.784020874912965e-05, | |
| "loss": 1.3436, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.7126073996142381, | |
| "grad_norm": 0.36148276925086975, | |
| "learning_rate": 4.7834309777410195e-05, | |
| "loss": 0.0033, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.7133087848500789, | |
| "grad_norm": 74.5863265991211, | |
| "learning_rate": 4.7828403125536354e-05, | |
| "loss": 1.8025, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.7140101700859197, | |
| "grad_norm": 43.957252502441406, | |
| "learning_rate": 4.782248879549478e-05, | |
| "loss": 1.1635, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.7147115553217604, | |
| "grad_norm": 69.43833923339844, | |
| "learning_rate": 4.781656678927471e-05, | |
| "loss": 1.2, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.7154129405576013, | |
| "grad_norm": 4.946931838989258, | |
| "learning_rate": 4.7810637108867986e-05, | |
| "loss": 0.0617, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.716114325793442, | |
| "grad_norm": 2.806675434112549, | |
| "learning_rate": 4.7804699756268987e-05, | |
| "loss": 0.1936, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.7168157110292829, | |
| "grad_norm": 10.467972755432129, | |
| "learning_rate": 4.7798754733474724e-05, | |
| "loss": 0.17, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.7175170962651236, | |
| "grad_norm": 13.816707611083984, | |
| "learning_rate": 4.7792802042484746e-05, | |
| "loss": 0.2481, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.7182184815009645, | |
| "grad_norm": 14.67568588256836, | |
| "learning_rate": 4.77868416853012e-05, | |
| "loss": 0.2728, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.7189198667368052, | |
| "grad_norm": 3.8597590923309326, | |
| "learning_rate": 4.778087366392881e-05, | |
| "loss": 0.3473, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.7196212519726459, | |
| "grad_norm": 6.207676410675049, | |
| "learning_rate": 4.777489798037489e-05, | |
| "loss": 0.276, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.7203226372084868, | |
| "grad_norm": 1.4748679399490356, | |
| "learning_rate": 4.77689146366493e-05, | |
| "loss": 0.2487, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.7210240224443275, | |
| "grad_norm": 3.2220802307128906, | |
| "learning_rate": 4.77629236347645e-05, | |
| "loss": 0.1941, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.7217254076801684, | |
| "grad_norm": 8.897248268127441, | |
| "learning_rate": 4.775692497673552e-05, | |
| "loss": 0.135, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.7224267929160091, | |
| "grad_norm": 6.12791633605957, | |
| "learning_rate": 4.7750918664579956e-05, | |
| "loss": 0.0784, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7231281781518499, | |
| "grad_norm": 20.013917922973633, | |
| "learning_rate": 4.7744904700318006e-05, | |
| "loss": 0.3768, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.7238295633876907, | |
| "grad_norm": 2.475820541381836, | |
| "learning_rate": 4.7738883085972406e-05, | |
| "loss": 0.0274, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.7245309486235315, | |
| "grad_norm": 1.4534077644348145, | |
| "learning_rate": 4.773285382356849e-05, | |
| "loss": 0.015, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.7252323338593722, | |
| "grad_norm": 36.22199630737305, | |
| "learning_rate": 4.772681691513417e-05, | |
| "loss": 0.9046, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.7259337190952131, | |
| "grad_norm": 0.2810744643211365, | |
| "learning_rate": 4.772077236269989e-05, | |
| "loss": 0.0025, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.7266351043310538, | |
| "grad_norm": 45.44533920288086, | |
| "learning_rate": 4.771472016829871e-05, | |
| "loss": 1.3375, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.7273364895668946, | |
| "grad_norm": 0.12535035610198975, | |
| "learning_rate": 4.770866033396623e-05, | |
| "loss": 0.0011, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.7280378748027354, | |
| "grad_norm": 45.64175033569336, | |
| "learning_rate": 4.770259286174065e-05, | |
| "loss": 1.6729, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.7287392600385761, | |
| "grad_norm": 46.727394104003906, | |
| "learning_rate": 4.7696517753662696e-05, | |
| "loss": 1.7577, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.729440645274417, | |
| "grad_norm": 0.17975449562072754, | |
| "learning_rate": 4.76904350117757e-05, | |
| "loss": 0.0016, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7301420305102577, | |
| "grad_norm": 85.70226287841797, | |
| "learning_rate": 4.7684344638125546e-05, | |
| "loss": 2.662, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.7308434157460986, | |
| "grad_norm": 39.62570571899414, | |
| "learning_rate": 4.767824663476069e-05, | |
| "loss": 1.1086, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.7315448009819393, | |
| "grad_norm": 33.90923309326172, | |
| "learning_rate": 4.767214100373214e-05, | |
| "loss": 0.8021, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.7322461862177801, | |
| "grad_norm": 23.501230239868164, | |
| "learning_rate": 4.76660277470935e-05, | |
| "loss": 0.4899, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.7329475714536209, | |
| "grad_norm": 10.125417709350586, | |
| "learning_rate": 4.76599068669009e-05, | |
| "loss": 0.2671, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.7336489566894617, | |
| "grad_norm": 3.0318775177001953, | |
| "learning_rate": 4.7653778365213076e-05, | |
| "loss": 0.2053, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.7343503419253025, | |
| "grad_norm": 7.598801612854004, | |
| "learning_rate": 4.7647642244091276e-05, | |
| "loss": 0.2937, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.7350517271611433, | |
| "grad_norm": 11.337316513061523, | |
| "learning_rate": 4.7641498505599356e-05, | |
| "loss": 0.283, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.735753112396984, | |
| "grad_norm": 2.909923553466797, | |
| "learning_rate": 4.763534715180373e-05, | |
| "loss": 0.2887, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.7364544976328248, | |
| "grad_norm": 12.51762866973877, | |
| "learning_rate": 4.762918818477334e-05, | |
| "loss": 0.2306, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7371558828686656, | |
| "grad_norm": 10.550107955932617, | |
| "learning_rate": 4.762302160657971e-05, | |
| "loss": 0.1879, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.7378572681045064, | |
| "grad_norm": 10.876016616821289, | |
| "learning_rate": 4.7616847419296945e-05, | |
| "loss": 0.1877, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.7385586533403472, | |
| "grad_norm": 3.0670106410980225, | |
| "learning_rate": 4.761066562500167e-05, | |
| "loss": 0.2327, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.7392600385761879, | |
| "grad_norm": 5.0195770263671875, | |
| "learning_rate": 4.7604476225773084e-05, | |
| "loss": 0.0651, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.7399614238120288, | |
| "grad_norm": 2.679926872253418, | |
| "learning_rate": 4.759827922369296e-05, | |
| "loss": 0.0317, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.7406628090478695, | |
| "grad_norm": 1.752316951751709, | |
| "learning_rate": 4.7592074620845616e-05, | |
| "loss": 0.0184, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.7413641942837104, | |
| "grad_norm": 0.6544418334960938, | |
| "learning_rate": 4.758586241931791e-05, | |
| "loss": 0.0062, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.7420655795195511, | |
| "grad_norm": 0.19106708467006683, | |
| "learning_rate": 4.757964262119928e-05, | |
| "loss": 0.0017, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.742766964755392, | |
| "grad_norm": 0.0741598978638649, | |
| "learning_rate": 4.757341522858171e-05, | |
| "loss": 0.0006, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.7434683499912327, | |
| "grad_norm": 0.02510019950568676, | |
| "learning_rate": 4.7567180243559736e-05, | |
| "loss": 0.0002, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7441697352270735, | |
| "grad_norm": 0.00887396652251482, | |
| "learning_rate": 4.7560937668230454e-05, | |
| "loss": 0.0001, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.7448711204629143, | |
| "grad_norm": 0.0027078278362751007, | |
| "learning_rate": 4.75546875046935e-05, | |
| "loss": 0.0, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.745572505698755, | |
| "grad_norm": 0.001226524356752634, | |
| "learning_rate": 4.7548429755051085e-05, | |
| "loss": 0.0, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.7462738909345958, | |
| "grad_norm": 0.0005389907164499164, | |
| "learning_rate": 4.754216442140794e-05, | |
| "loss": 0.0, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.7469752761704366, | |
| "grad_norm": 55.77132797241211, | |
| "learning_rate": 4.7535891505871375e-05, | |
| "loss": 3.9293, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.7476766614062774, | |
| "grad_norm": 103.8635025024414, | |
| "learning_rate": 4.752961101055124e-05, | |
| "loss": 7.9354, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.7483780466421182, | |
| "grad_norm": 103.83565521240234, | |
| "learning_rate": 4.7523322937559946e-05, | |
| "loss": 7.4002, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.749079431877959, | |
| "grad_norm": 0.0008341918000951409, | |
| "learning_rate": 4.751702728901242e-05, | |
| "loss": 0.0, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.7497808171137997, | |
| "grad_norm": 48.793819427490234, | |
| "learning_rate": 4.7510724067026156e-05, | |
| "loss": 3.1709, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.7504822023496406, | |
| "grad_norm": 97.3034439086914, | |
| "learning_rate": 4.750441327372122e-05, | |
| "loss": 5.7, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7511835875854813, | |
| "grad_norm": 0.0208453219383955, | |
| "learning_rate": 4.7498094911220185e-05, | |
| "loss": 0.0002, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.7518849728213222, | |
| "grad_norm": 0.05400345101952553, | |
| "learning_rate": 4.749176898164818e-05, | |
| "loss": 0.0004, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.7525863580571629, | |
| "grad_norm": 0.0927615538239479, | |
| "learning_rate": 4.7485435487132904e-05, | |
| "loss": 0.0008, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.7532877432930036, | |
| "grad_norm": 0.19018051028251648, | |
| "learning_rate": 4.747909442980456e-05, | |
| "loss": 0.0016, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.7539891285288445, | |
| "grad_norm": 0.3820917010307312, | |
| "learning_rate": 4.7472745811795936e-05, | |
| "loss": 0.0035, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7546905137646852, | |
| "grad_norm": 0.6652111411094666, | |
| "learning_rate": 4.746638963524235e-05, | |
| "loss": 0.0063, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.7553918990005261, | |
| "grad_norm": 91.62269592285156, | |
| "learning_rate": 4.7460025902281625e-05, | |
| "loss": 2.2848, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.7560932842363668, | |
| "grad_norm": 0.6401788592338562, | |
| "learning_rate": 4.7453654615054174e-05, | |
| "loss": 0.006, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.7567946694722076, | |
| "grad_norm": 0.9835159182548523, | |
| "learning_rate": 4.744727577570295e-05, | |
| "loss": 0.0096, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.7574960547080484, | |
| "grad_norm": 0.665366530418396, | |
| "learning_rate": 4.74408893863734e-05, | |
| "loss": 0.0064, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7581974399438892, | |
| "grad_norm": 0.3792603313922882, | |
| "learning_rate": 4.743449544921356e-05, | |
| "loss": 0.0034, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.75889882517973, | |
| "grad_norm": 0.18616673350334167, | |
| "learning_rate": 4.742809396637397e-05, | |
| "loss": 0.0016, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.7596002104155708, | |
| "grad_norm": 0.12753531336784363, | |
| "learning_rate": 4.7421684940007725e-05, | |
| "loss": 0.0011, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.7603015956514115, | |
| "grad_norm": 92.24988555908203, | |
| "learning_rate": 4.741526837227047e-05, | |
| "loss": 3.521, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.7610029808872524, | |
| "grad_norm": 48.01845169067383, | |
| "learning_rate": 4.740884426532035e-05, | |
| "loss": 1.9461, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7617043661230931, | |
| "grad_norm": 0.07205016911029816, | |
| "learning_rate": 4.740241262131808e-05, | |
| "loss": 0.0006, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.7624057513589338, | |
| "grad_norm": 0.07788128405809402, | |
| "learning_rate": 4.7395973442426895e-05, | |
| "loss": 0.0007, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.7631071365947747, | |
| "grad_norm": 94.46582794189453, | |
| "learning_rate": 4.738952673081256e-05, | |
| "loss": 3.4385, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.7638085218306154, | |
| "grad_norm": 0.17950232326984406, | |
| "learning_rate": 4.738307248864339e-05, | |
| "loss": 0.0016, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.7645099070664563, | |
| "grad_norm": 93.51290130615234, | |
| "learning_rate": 4.737661071809021e-05, | |
| "loss": 3.3519, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.765211292302297, | |
| "grad_norm": 92.91374206542969, | |
| "learning_rate": 4.73701414213264e-05, | |
| "loss": 2.7943, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.7659126775381379, | |
| "grad_norm": 0.5978343486785889, | |
| "learning_rate": 4.736366460052786e-05, | |
| "loss": 0.0056, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.7666140627739786, | |
| "grad_norm": 30.616474151611328, | |
| "learning_rate": 4.7357180257873014e-05, | |
| "loss": 0.6588, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.7673154480098194, | |
| "grad_norm": 1.5538511276245117, | |
| "learning_rate": 4.7350688395542825e-05, | |
| "loss": 0.0162, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.7680168332456602, | |
| "grad_norm": 75.00251007080078, | |
| "learning_rate": 4.734418901572079e-05, | |
| "loss": 1.4034, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.768718218481501, | |
| "grad_norm": 3.7650375366210938, | |
| "learning_rate": 4.733768212059293e-05, | |
| "loss": 0.0454, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.7694196037173417, | |
| "grad_norm": 5.952811241149902, | |
| "learning_rate": 4.733116771234778e-05, | |
| "loss": 0.0766, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.7701209889531825, | |
| "grad_norm": 15.600150108337402, | |
| "learning_rate": 4.7324645793176425e-05, | |
| "loss": 0.3215, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.7708223741890233, | |
| "grad_norm": 9.213759422302246, | |
| "learning_rate": 4.731811636527246e-05, | |
| "loss": 0.2548, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.7715237594248641, | |
| "grad_norm": 7.49055814743042, | |
| "learning_rate": 4.731157943083202e-05, | |
| "loss": 0.1, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7722251446607049, | |
| "grad_norm": 7.580368518829346, | |
| "learning_rate": 4.730503499205373e-05, | |
| "loss": 0.1067, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.7729265298965456, | |
| "grad_norm": 6.691344261169434, | |
| "learning_rate": 4.7298483051138796e-05, | |
| "loss": 0.0911, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.7736279151323865, | |
| "grad_norm": 39.10125732421875, | |
| "learning_rate": 4.72919236102909e-05, | |
| "loss": 0.7558, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.7743293003682272, | |
| "grad_norm": 4.285506725311279, | |
| "learning_rate": 4.728535667171626e-05, | |
| "loss": 0.051, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.7750306856040681, | |
| "grad_norm": 2.88086199760437, | |
| "learning_rate": 4.727878223762363e-05, | |
| "loss": 0.0324, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.7757320708399088, | |
| "grad_norm": 1.6831408739089966, | |
| "learning_rate": 4.7272200310224256e-05, | |
| "loss": 0.0172, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.7764334560757497, | |
| "grad_norm": 37.848419189453125, | |
| "learning_rate": 4.726561089173194e-05, | |
| "loss": 0.934, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.7771348413115904, | |
| "grad_norm": 38.1249885559082, | |
| "learning_rate": 4.725901398436297e-05, | |
| "loss": 0.9811, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.7778362265474312, | |
| "grad_norm": 51.93446731567383, | |
| "learning_rate": 4.725240959033618e-05, | |
| "loss": 1.5657, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.778537611783272, | |
| "grad_norm": 0.33258679509162903, | |
| "learning_rate": 4.7245797711872895e-05, | |
| "loss": 0.003, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7792389970191127, | |
| "grad_norm": 61.720924377441406, | |
| "learning_rate": 4.723917835119699e-05, | |
| "loss": 1.6662, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.7799403822549535, | |
| "grad_norm": 37.942874908447266, | |
| "learning_rate": 4.723255151053483e-05, | |
| "loss": 1.0046, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.7806417674907943, | |
| "grad_norm": 0.8135581016540527, | |
| "learning_rate": 4.7225917192115297e-05, | |
| "loss": 0.0076, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.7813431527266351, | |
| "grad_norm": 36.66408920288086, | |
| "learning_rate": 4.721927539816981e-05, | |
| "loss": 0.9099, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.7820445379624759, | |
| "grad_norm": 1.1302094459533691, | |
| "learning_rate": 4.721262613093228e-05, | |
| "loss": 0.0115, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7827459231983167, | |
| "grad_norm": 1.4476618766784668, | |
| "learning_rate": 4.720596939263914e-05, | |
| "loss": 0.0146, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.7834473084341574, | |
| "grad_norm": 1.0591875314712524, | |
| "learning_rate": 4.719930518552934e-05, | |
| "loss": 0.0106, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.7841486936699983, | |
| "grad_norm": 0.7351242899894714, | |
| "learning_rate": 4.7192633511844334e-05, | |
| "loss": 0.0069, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.784850078905839, | |
| "grad_norm": 0.43580272793769836, | |
| "learning_rate": 4.71859543738281e-05, | |
| "loss": 0.004, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.7855514641416799, | |
| "grad_norm": 0.256906658411026, | |
| "learning_rate": 4.71792677737271e-05, | |
| "loss": 0.0022, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7862528493775206, | |
| "grad_norm": 0.13365890085697174, | |
| "learning_rate": 4.717257371379034e-05, | |
| "loss": 0.0012, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.7869542346133613, | |
| "grad_norm": 0.07990420609712601, | |
| "learning_rate": 4.71658721962693e-05, | |
| "loss": 0.0007, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.7876556198492022, | |
| "grad_norm": 0.0479491651058197, | |
| "learning_rate": 4.715916322341801e-05, | |
| "loss": 0.0004, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.7883570050850429, | |
| "grad_norm": 47.28627014160156, | |
| "learning_rate": 4.7152446797492963e-05, | |
| "loss": 2.0982, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.7890583903208838, | |
| "grad_norm": 96.63157653808594, | |
| "learning_rate": 4.71457229207532e-05, | |
| "loss": 4.5257, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7897597755567245, | |
| "grad_norm": 47.81488800048828, | |
| "learning_rate": 4.713899159546024e-05, | |
| "loss": 2.3268, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.7904611607925653, | |
| "grad_norm": 47.34000015258789, | |
| "learning_rate": 4.713225282387811e-05, | |
| "loss": 2.1484, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.7911625460284061, | |
| "grad_norm": 46.76436996459961, | |
| "learning_rate": 4.7125506608273344e-05, | |
| "loss": 1.8822, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.7918639312642469, | |
| "grad_norm": 0.12850354611873627, | |
| "learning_rate": 4.711875295091499e-05, | |
| "loss": 0.0011, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.7925653165000877, | |
| "grad_norm": 126.00606536865234, | |
| "learning_rate": 4.71119918540746e-05, | |
| "loss": 3.7093, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7932667017359285, | |
| "grad_norm": 0.4526232182979584, | |
| "learning_rate": 4.7105223320026205e-05, | |
| "loss": 0.0042, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.7939680869717692, | |
| "grad_norm": 33.449928283691406, | |
| "learning_rate": 4.709844735104636e-05, | |
| "loss": 0.7578, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.7946694722076101, | |
| "grad_norm": 1.6459629535675049, | |
| "learning_rate": 4.7091663949414105e-05, | |
| "loss": 0.0174, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.7953708574434508, | |
| "grad_norm": 34.26624298095703, | |
| "learning_rate": 4.7084873117411e-05, | |
| "loss": 0.7867, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.7960722426792916, | |
| "grad_norm": 3.238154888153076, | |
| "learning_rate": 4.7078074857321085e-05, | |
| "loss": 0.0382, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7967736279151324, | |
| "grad_norm": 3.573115348815918, | |
| "learning_rate": 4.70712691714309e-05, | |
| "loss": 0.0431, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.7974750131509731, | |
| "grad_norm": 4.002458095550537, | |
| "learning_rate": 4.7064456062029506e-05, | |
| "loss": 0.0488, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.798176398386814, | |
| "grad_norm": 3.4627814292907715, | |
| "learning_rate": 4.7057635531408424e-05, | |
| "loss": 0.0394, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.7988777836226547, | |
| "grad_norm": 18.19850730895996, | |
| "learning_rate": 4.70508075818617e-05, | |
| "loss": 0.3679, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.7995791688584956, | |
| "grad_norm": 2.7357804775238037, | |
| "learning_rate": 4.704397221568586e-05, | |
| "loss": 0.0312, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8002805540943363, | |
| "grad_norm": 51.47498321533203, | |
| "learning_rate": 4.703712943517993e-05, | |
| "loss": 1.0259, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.8009819393301771, | |
| "grad_norm": 1.7935067415237427, | |
| "learning_rate": 4.7030279242645434e-05, | |
| "loss": 0.0192, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.8016833245660179, | |
| "grad_norm": 28.79102325439453, | |
| "learning_rate": 4.702342164038639e-05, | |
| "loss": 0.6393, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.8023847098018587, | |
| "grad_norm": 1.230648398399353, | |
| "learning_rate": 4.7016556630709284e-05, | |
| "loss": 0.0126, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.8030860950376995, | |
| "grad_norm": 0.9287470579147339, | |
| "learning_rate": 4.700968421592313e-05, | |
| "loss": 0.0094, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.8037874802735402, | |
| "grad_norm": 62.944549560546875, | |
| "learning_rate": 4.700280439833941e-05, | |
| "loss": 1.3975, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.804488865509381, | |
| "grad_norm": 44.87819290161133, | |
| "learning_rate": 4.69959171802721e-05, | |
| "loss": 1.0857, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.8051902507452218, | |
| "grad_norm": 0.5283865928649902, | |
| "learning_rate": 4.6989022564037654e-05, | |
| "loss": 0.0049, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.8058916359810626, | |
| "grad_norm": 0.5992218852043152, | |
| "learning_rate": 4.698212055195505e-05, | |
| "loss": 0.0058, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.8065930212169033, | |
| "grad_norm": 0.5411283373832703, | |
| "learning_rate": 4.697521114634571e-05, | |
| "loss": 0.0052, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8072944064527442, | |
| "grad_norm": 75.1710433959961, | |
| "learning_rate": 4.696829434953357e-05, | |
| "loss": 1.9684, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.8079957916885849, | |
| "grad_norm": 0.3737725615501404, | |
| "learning_rate": 4.6961370163845044e-05, | |
| "loss": 0.0035, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.8086971769244258, | |
| "grad_norm": 48.369232177734375, | |
| "learning_rate": 4.6954438591609027e-05, | |
| "loss": 1.2948, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.8093985621602665, | |
| "grad_norm": 0.4170537292957306, | |
| "learning_rate": 4.694749963515691e-05, | |
| "loss": 0.0036, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.8100999473961074, | |
| "grad_norm": 0.3476961553096771, | |
| "learning_rate": 4.6940553296822565e-05, | |
| "loss": 0.0031, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.8108013326319481, | |
| "grad_norm": 40.558204650878906, | |
| "learning_rate": 4.693359957894232e-05, | |
| "loss": 1.199, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.8115027178677889, | |
| "grad_norm": 78.08576202392578, | |
| "learning_rate": 4.692663848385503e-05, | |
| "loss": 2.1697, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.8122041031036297, | |
| "grad_norm": 0.5332819819450378, | |
| "learning_rate": 4.6919670013902015e-05, | |
| "loss": 0.0051, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.8129054883394704, | |
| "grad_norm": 0.6460719108581543, | |
| "learning_rate": 4.691269417142704e-05, | |
| "loss": 0.0063, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.8136068735753113, | |
| "grad_norm": 0.6999556422233582, | |
| "learning_rate": 4.69057109587764e-05, | |
| "loss": 0.0068, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.814308258811152, | |
| "grad_norm": 0.5912299156188965, | |
| "learning_rate": 4.6898720378298846e-05, | |
| "loss": 0.0057, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.8150096440469928, | |
| "grad_norm": 0.4620411992073059, | |
| "learning_rate": 4.6891722432345597e-05, | |
| "loss": 0.0044, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.8157110292828336, | |
| "grad_norm": 0.3710283935070038, | |
| "learning_rate": 4.6884717123270374e-05, | |
| "loss": 0.0034, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.8164124145186744, | |
| "grad_norm": 0.22289921343326569, | |
| "learning_rate": 4.687770445342936e-05, | |
| "loss": 0.002, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.8171137997545151, | |
| "grad_norm": 89.52765655517578, | |
| "learning_rate": 4.687068442518121e-05, | |
| "loss": 2.8479, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.817815184990356, | |
| "grad_norm": 0.15177088975906372, | |
| "learning_rate": 4.6863657040887065e-05, | |
| "loss": 0.0013, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.8185165702261967, | |
| "grad_norm": 0.13902179896831512, | |
| "learning_rate": 4.685662230291054e-05, | |
| "loss": 0.0012, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.8192179554620376, | |
| "grad_norm": 0.10139818489551544, | |
| "learning_rate": 4.68495802136177e-05, | |
| "loss": 0.0009, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.8199193406978783, | |
| "grad_norm": 0.08662685006856918, | |
| "learning_rate": 4.684253077537712e-05, | |
| "loss": 0.0007, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.820620725933719, | |
| "grad_norm": 0.07555234432220459, | |
| "learning_rate": 4.683547399055982e-05, | |
| "loss": 0.0006, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8213221111695599, | |
| "grad_norm": 0.06445518881082535, | |
| "learning_rate": 4.682840986153929e-05, | |
| "loss": 0.0006, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.8220234964054006, | |
| "grad_norm": 46.72000503540039, | |
| "learning_rate": 4.68213383906915e-05, | |
| "loss": 2.1281, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.8227248816412415, | |
| "grad_norm": 46.803985595703125, | |
| "learning_rate": 4.6814259580394915e-05, | |
| "loss": 1.9621, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.8234262668770822, | |
| "grad_norm": 0.07187401503324509, | |
| "learning_rate": 4.68071734330304e-05, | |
| "loss": 0.0006, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.824127652112923, | |
| "grad_norm": 0.09642617404460907, | |
| "learning_rate": 4.680007995098136e-05, | |
| "loss": 0.0008, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.8248290373487638, | |
| "grad_norm": 0.11466558277606964, | |
| "learning_rate": 4.679297913663363e-05, | |
| "loss": 0.001, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.8255304225846046, | |
| "grad_norm": 0.11334109306335449, | |
| "learning_rate": 4.67858709923755e-05, | |
| "loss": 0.001, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.8262318078204454, | |
| "grad_norm": 0.1963043361902237, | |
| "learning_rate": 4.677875552059777e-05, | |
| "loss": 0.0017, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.8269331930562862, | |
| "grad_norm": 0.17632529139518738, | |
| "learning_rate": 4.677163272369365e-05, | |
| "loss": 0.0016, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.8276345782921269, | |
| "grad_norm": 124.80657958984375, | |
| "learning_rate": 4.6764502604058864e-05, | |
| "loss": 3.9859, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8283359635279678, | |
| "grad_norm": 0.19632282853126526, | |
| "learning_rate": 4.675736516409156e-05, | |
| "loss": 0.0018, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.8290373487638085, | |
| "grad_norm": 43.057350158691406, | |
| "learning_rate": 4.675022040619238e-05, | |
| "loss": 1.2696, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.8297387339996493, | |
| "grad_norm": 39.46339416503906, | |
| "learning_rate": 4.674306833276439e-05, | |
| "loss": 1.093, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.8304401192354901, | |
| "grad_norm": 0.653581976890564, | |
| "learning_rate": 4.6735908946213156e-05, | |
| "loss": 0.0065, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.8311415044713308, | |
| "grad_norm": 0.8602274656295776, | |
| "learning_rate": 4.6728742248946677e-05, | |
| "loss": 0.0088, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.8318428897071717, | |
| "grad_norm": 0.9501532912254333, | |
| "learning_rate": 4.6721568243375425e-05, | |
| "loss": 0.0095, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.8325442749430124, | |
| "grad_norm": 0.8702390193939209, | |
| "learning_rate": 4.671438693191232e-05, | |
| "loss": 0.0088, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.8332456601788533, | |
| "grad_norm": 0.6245111227035522, | |
| "learning_rate": 4.6707198316972746e-05, | |
| "loss": 0.0061, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.833947045414694, | |
| "grad_norm": 0.4615590572357178, | |
| "learning_rate": 4.6700002400974544e-05, | |
| "loss": 0.0045, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.8346484306505348, | |
| "grad_norm": 0.3482006788253784, | |
| "learning_rate": 4.6692799186338e-05, | |
| "loss": 0.0033, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8353498158863756, | |
| "grad_norm": 43.396461486816406, | |
| "learning_rate": 4.668558867548587e-05, | |
| "loss": 1.3748, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.8360512011222164, | |
| "grad_norm": 49.66410827636719, | |
| "learning_rate": 4.6678370870843357e-05, | |
| "loss": 1.7113, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.8367525863580572, | |
| "grad_norm": 0.1562856137752533, | |
| "learning_rate": 4.667114577483812e-05, | |
| "loss": 0.0014, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.8374539715938979, | |
| "grad_norm": 47.60147476196289, | |
| "learning_rate": 4.666391338990026e-05, | |
| "loss": 1.8113, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.8381553568297387, | |
| "grad_norm": 0.19547221064567566, | |
| "learning_rate": 4.665667371846234e-05, | |
| "loss": 0.0018, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.8388567420655795, | |
| "grad_norm": 0.23496605455875397, | |
| "learning_rate": 4.664942676295937e-05, | |
| "loss": 0.0022, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.8395581273014203, | |
| "grad_norm": 0.19859689474105835, | |
| "learning_rate": 4.6642172525828806e-05, | |
| "loss": 0.0018, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.840259512537261, | |
| "grad_norm": 0.21561066806316376, | |
| "learning_rate": 4.6634911009510574e-05, | |
| "loss": 0.002, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.8409608977731019, | |
| "grad_norm": 0.16780033707618713, | |
| "learning_rate": 4.662764221644702e-05, | |
| "loss": 0.0016, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.8416622830089426, | |
| "grad_norm": 0.170645073056221, | |
| "learning_rate": 4.6620366149082954e-05, | |
| "loss": 0.0015, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8423636682447835, | |
| "grad_norm": 0.16811063885688782, | |
| "learning_rate": 4.661308280986563e-05, | |
| "loss": 0.0015, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.8430650534806242, | |
| "grad_norm": 42.41083908081055, | |
| "learning_rate": 4.6605792201244733e-05, | |
| "loss": 1.4838, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.8437664387164651, | |
| "grad_norm": 43.9367790222168, | |
| "learning_rate": 4.6598494325672426e-05, | |
| "loss": 1.4149, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.8444678239523058, | |
| "grad_norm": 0.1912125051021576, | |
| "learning_rate": 4.659118918560329e-05, | |
| "loss": 0.0018, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.8451692091881466, | |
| "grad_norm": 0.27999013662338257, | |
| "learning_rate": 4.6583876783494343e-05, | |
| "loss": 0.0025, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.8458705944239874, | |
| "grad_norm": 36.06639099121094, | |
| "learning_rate": 4.657655712180507e-05, | |
| "loss": 0.9947, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.8465719796598281, | |
| "grad_norm": 0.4413144588470459, | |
| "learning_rate": 4.6569230202997385e-05, | |
| "loss": 0.0042, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.847273364895669, | |
| "grad_norm": 0.530229926109314, | |
| "learning_rate": 4.656189602953565e-05, | |
| "loss": 0.0052, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.8479747501315097, | |
| "grad_norm": 0.6561914086341858, | |
| "learning_rate": 4.655455460388665e-05, | |
| "loss": 0.0066, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.8486761353673505, | |
| "grad_norm": 0.6459203362464905, | |
| "learning_rate": 4.654720592851962e-05, | |
| "loss": 0.0063, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8493775206031913, | |
| "grad_norm": 37.68455505371094, | |
| "learning_rate": 4.653985000590624e-05, | |
| "loss": 1.0854, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.8500789058390321, | |
| "grad_norm": 0.4941748380661011, | |
| "learning_rate": 4.6532486838520616e-05, | |
| "loss": 0.0049, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.8507802910748729, | |
| "grad_norm": 38.626068115234375, | |
| "learning_rate": 4.6525116428839307e-05, | |
| "loss": 1.0544, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.8514816763107137, | |
| "grad_norm": 0.5088845491409302, | |
| "learning_rate": 4.651773877934129e-05, | |
| "loss": 0.005, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.8521830615465544, | |
| "grad_norm": 0.5165444016456604, | |
| "learning_rate": 4.651035389250798e-05, | |
| "loss": 0.005, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.8528844467823953, | |
| "grad_norm": 37.2906379699707, | |
| "learning_rate": 4.650296177082323e-05, | |
| "loss": 1.0227, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.853585832018236, | |
| "grad_norm": 0.5255476832389832, | |
| "learning_rate": 4.649556241677333e-05, | |
| "loss": 0.0052, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.8542872172540767, | |
| "grad_norm": 37.65691375732422, | |
| "learning_rate": 4.6488155832847006e-05, | |
| "loss": 1.1044, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.8549886024899176, | |
| "grad_norm": 35.80206298828125, | |
| "learning_rate": 4.648074202153539e-05, | |
| "loss": 0.8649, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.8556899877257583, | |
| "grad_norm": 0.8690975308418274, | |
| "learning_rate": 4.647332098533208e-05, | |
| "loss": 0.0089, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8563913729615992, | |
| "grad_norm": 1.0327463150024414, | |
| "learning_rate": 4.646589272673308e-05, | |
| "loss": 0.0109, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.8570927581974399, | |
| "grad_norm": 32.14468765258789, | |
| "learning_rate": 4.645845724823684e-05, | |
| "loss": 0.7778, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.8577941434332808, | |
| "grad_norm": 1.0274231433868408, | |
| "learning_rate": 4.645101455234422e-05, | |
| "loss": 0.0106, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.8584955286691215, | |
| "grad_norm": 29.724233627319336, | |
| "learning_rate": 4.644356464155852e-05, | |
| "loss": 0.6951, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.8591969139049623, | |
| "grad_norm": 1.3864684104919434, | |
| "learning_rate": 4.6436107518385465e-05, | |
| "loss": 0.0151, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.8598982991408031, | |
| "grad_norm": 1.3250905275344849, | |
| "learning_rate": 4.6428643185333196e-05, | |
| "loss": 0.0144, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.8605996843766439, | |
| "grad_norm": 29.683332443237305, | |
| "learning_rate": 4.6421171644912295e-05, | |
| "loss": 0.7007, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.8613010696124846, | |
| "grad_norm": 29.98341178894043, | |
| "learning_rate": 4.6413692899635764e-05, | |
| "loss": 0.6981, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.8620024548483255, | |
| "grad_norm": 30.50311279296875, | |
| "learning_rate": 4.6406206952019014e-05, | |
| "loss": 0.7341, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.8627038400841662, | |
| "grad_norm": 1.5321385860443115, | |
| "learning_rate": 4.6398713804579896e-05, | |
| "loss": 0.0171, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.863405225320007, | |
| "grad_norm": 25.173595428466797, | |
| "learning_rate": 4.6391213459838676e-05, | |
| "loss": 0.5393, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.8641066105558478, | |
| "grad_norm": 1.9056795835494995, | |
| "learning_rate": 4.638370592031804e-05, | |
| "loss": 0.0216, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.8648079957916885, | |
| "grad_norm": 54.3871955871582, | |
| "learning_rate": 4.63761911885431e-05, | |
| "loss": 1.1726, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.8655093810275294, | |
| "grad_norm": 2.3975331783294678, | |
| "learning_rate": 4.6368669267041364e-05, | |
| "loss": 0.0284, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.8662107662633701, | |
| "grad_norm": 2.3505735397338867, | |
| "learning_rate": 4.636114015834279e-05, | |
| "loss": 0.027, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.866912151499211, | |
| "grad_norm": 19.353313446044922, | |
| "learning_rate": 4.635360386497973e-05, | |
| "loss": 0.4174, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.8676135367350517, | |
| "grad_norm": 2.5313189029693604, | |
| "learning_rate": 4.6346060389486976e-05, | |
| "loss": 0.0301, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.8683149219708926, | |
| "grad_norm": 2.249114751815796, | |
| "learning_rate": 4.633850973440171e-05, | |
| "loss": 0.0262, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.8690163072067333, | |
| "grad_norm": 1.087750792503357, | |
| "learning_rate": 4.633095190226355e-05, | |
| "loss": 0.0116, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.8697176924425741, | |
| "grad_norm": 0.7214330434799194, | |
| "learning_rate": 4.63233868956145e-05, | |
| "loss": 0.0075, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8704190776784149, | |
| "grad_norm": 0.5183669924736023, | |
| "learning_rate": 4.6315814716999003e-05, | |
| "loss": 0.0051, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.8711204629142556, | |
| "grad_norm": 0.26819220185279846, | |
| "learning_rate": 4.630823536896392e-05, | |
| "loss": 0.0026, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.8718218481500964, | |
| "grad_norm": 42.74302291870117, | |
| "learning_rate": 4.6300648854058496e-05, | |
| "loss": 1.4745, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.8725232333859372, | |
| "grad_norm": 0.06916595250368118, | |
| "learning_rate": 4.62930551748344e-05, | |
| "loss": 0.0006, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.873224618621778, | |
| "grad_norm": 0.04199717566370964, | |
| "learning_rate": 4.6285454333845714e-05, | |
| "loss": 0.0004, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8739260038576188, | |
| "grad_norm": 46.651554107666016, | |
| "learning_rate": 4.6277846333648926e-05, | |
| "loss": 2.1377, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.8746273890934596, | |
| "grad_norm": 0.019042061641812325, | |
| "learning_rate": 4.6270231176802936e-05, | |
| "loss": 0.0002, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.8753287743293003, | |
| "grad_norm": 43.70697784423828, | |
| "learning_rate": 4.626260886586903e-05, | |
| "loss": 1.9754, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.8760301595651412, | |
| "grad_norm": 0.03968697413802147, | |
| "learning_rate": 4.625497940341094e-05, | |
| "loss": 0.0003, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.8767315448009819, | |
| "grad_norm": 0.023654410615563393, | |
| "learning_rate": 4.624734279199476e-05, | |
| "loss": 0.0002, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8774329300368228, | |
| "grad_norm": 0.05060436204075813, | |
| "learning_rate": 4.623969903418903e-05, | |
| "loss": 0.0004, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.8781343152726635, | |
| "grad_norm": 0.062442123889923096, | |
| "learning_rate": 4.623204813256465e-05, | |
| "loss": 0.0006, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.8788357005085043, | |
| "grad_norm": 0.05655084550380707, | |
| "learning_rate": 4.622439008969497e-05, | |
| "loss": 0.0005, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.8795370857443451, | |
| "grad_norm": 0.05976495519280434, | |
| "learning_rate": 4.621672490815569e-05, | |
| "loss": 0.0005, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.8802384709801858, | |
| "grad_norm": 0.06660816818475723, | |
| "learning_rate": 4.620905259052496e-05, | |
| "loss": 0.0006, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8809398562160267, | |
| "grad_norm": 45.397789001464844, | |
| "learning_rate": 4.6201373139383284e-05, | |
| "loss": 1.7976, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.8816412414518674, | |
| "grad_norm": 44.1479606628418, | |
| "learning_rate": 4.619368655731362e-05, | |
| "loss": 1.7646, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.8823426266877082, | |
| "grad_norm": 0.13133244216442108, | |
| "learning_rate": 4.618599284690127e-05, | |
| "loss": 0.0012, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.883044011923549, | |
| "grad_norm": 82.86041259765625, | |
| "learning_rate": 4.6178292010733984e-05, | |
| "loss": 2.727, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.8837453971593898, | |
| "grad_norm": 36.786067962646484, | |
| "learning_rate": 4.6170584051401846e-05, | |
| "loss": 1.0486, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8844467823952306, | |
| "grad_norm": 43.94260787963867, | |
| "learning_rate": 4.6162868971497396e-05, | |
| "loss": 1.3111, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.8851481676310714, | |
| "grad_norm": 85.85136413574219, | |
| "learning_rate": 4.615514677361554e-05, | |
| "loss": 2.2235, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.8858495528669121, | |
| "grad_norm": 35.138553619384766, | |
| "learning_rate": 4.6147417460353596e-05, | |
| "loss": 0.6498, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.886550938102753, | |
| "grad_norm": 5.97971773147583, | |
| "learning_rate": 4.613968103431124e-05, | |
| "loss": 0.0857, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.8872523233385937, | |
| "grad_norm": 7.489769458770752, | |
| "learning_rate": 4.6131937498090575e-05, | |
| "loss": 0.1154, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8879537085744345, | |
| "grad_norm": 9.91996955871582, | |
| "learning_rate": 4.612418685429608e-05, | |
| "loss": 0.1685, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.8886550938102753, | |
| "grad_norm": 2.7866861820220947, | |
| "learning_rate": 4.611642910553463e-05, | |
| "loss": 0.2187, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.889356479046116, | |
| "grad_norm": 9.682534217834473, | |
| "learning_rate": 4.610866425441549e-05, | |
| "loss": 0.1669, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.8900578642819569, | |
| "grad_norm": 8.847615242004395, | |
| "learning_rate": 4.6100892303550305e-05, | |
| "loss": 0.1458, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.8907592495177976, | |
| "grad_norm": 6.194153308868408, | |
| "learning_rate": 4.609311325555311e-05, | |
| "loss": 0.0885, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8914606347536385, | |
| "grad_norm": 11.383880615234375, | |
| "learning_rate": 4.6085327113040346e-05, | |
| "loss": 0.2945, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.8921620199894792, | |
| "grad_norm": 41.971961975097656, | |
| "learning_rate": 4.607753387863082e-05, | |
| "loss": 0.8504, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.89286340522532, | |
| "grad_norm": 15.673029899597168, | |
| "learning_rate": 4.6069733554945725e-05, | |
| "loss": 0.3576, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.8935647904611608, | |
| "grad_norm": 2.994936466217041, | |
| "learning_rate": 4.606192614460865e-05, | |
| "loss": 0.0371, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.8942661756970016, | |
| "grad_norm": 15.890610694885254, | |
| "learning_rate": 4.6054111650245544e-05, | |
| "loss": 0.3475, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8949675609328424, | |
| "grad_norm": 3.3848869800567627, | |
| "learning_rate": 4.604629007448479e-05, | |
| "loss": 0.0433, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.8956689461686832, | |
| "grad_norm": 26.11835289001465, | |
| "learning_rate": 4.603846141995708e-05, | |
| "loss": 0.5389, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.8963703314045239, | |
| "grad_norm": 3.1392459869384766, | |
| "learning_rate": 4.6030625689295546e-05, | |
| "loss": 0.0383, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.8970717166403647, | |
| "grad_norm": 3.49643611907959, | |
| "learning_rate": 4.6022782885135674e-05, | |
| "loss": 0.0445, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.8977731018762055, | |
| "grad_norm": 2.069392204284668, | |
| "learning_rate": 4.601493301011534e-05, | |
| "loss": 0.0228, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8984744871120462, | |
| "grad_norm": 1.4139840602874756, | |
| "learning_rate": 4.6007076066874786e-05, | |
| "loss": 0.016, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.8991758723478871, | |
| "grad_norm": 0.8678575158119202, | |
| "learning_rate": 4.599921205805663e-05, | |
| "loss": 0.0092, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.8998772575837278, | |
| "grad_norm": 0.3188234567642212, | |
| "learning_rate": 4.5991340986305896e-05, | |
| "loss": 0.0031, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.9005786428195687, | |
| "grad_norm": 0.13479161262512207, | |
| "learning_rate": 4.598346285426994e-05, | |
| "loss": 0.0012, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.9012800280554094, | |
| "grad_norm": 44.85018539428711, | |
| "learning_rate": 4.5975577664598535e-05, | |
| "loss": 1.8893, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.9019814132912503, | |
| "grad_norm": 0.030871881172060966, | |
| "learning_rate": 4.596768541994379e-05, | |
| "loss": 0.0003, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.902682798527091, | |
| "grad_norm": 0.025855490937829018, | |
| "learning_rate": 4.595978612296021e-05, | |
| "loss": 0.0002, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.9033841837629318, | |
| "grad_norm": 46.28804397583008, | |
| "learning_rate": 4.595187977630467e-05, | |
| "loss": 2.4086, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.9040855689987726, | |
| "grad_norm": 46.17903518676758, | |
| "learning_rate": 4.5943966382636413e-05, | |
| "loss": 2.5441, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.9047869542346134, | |
| "grad_norm": 48.56827926635742, | |
| "learning_rate": 4.593604594461704e-05, | |
| "loss": 2.5474, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.9054883394704542, | |
| "grad_norm": 47.86493682861328, | |
| "learning_rate": 4.592811846491055e-05, | |
| "loss": 2.1781, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.9061897247062949, | |
| "grad_norm": 43.444766998291016, | |
| "learning_rate": 4.592018394618328e-05, | |
| "loss": 1.8371, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.9068911099421357, | |
| "grad_norm": 46.283565521240234, | |
| "learning_rate": 4.5912242391103944e-05, | |
| "loss": 1.5648, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.9075924951779765, | |
| "grad_norm": 0.2855938971042633, | |
| "learning_rate": 4.5904293802343646e-05, | |
| "loss": 0.0028, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.9082938804138173, | |
| "grad_norm": 44.066627502441406, | |
| "learning_rate": 4.589633818257582e-05, | |
| "loss": 1.3621, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.908995265649658, | |
| "grad_norm": 1.1252837181091309, | |
| "learning_rate": 4.588837553447628e-05, | |
| "loss": 0.0122, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.9096966508854989, | |
| "grad_norm": 1.7556012868881226, | |
| "learning_rate": 4.588040586072321e-05, | |
| "loss": 0.0199, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.9103980361213396, | |
| "grad_norm": 2.0839264392852783, | |
| "learning_rate": 4.5872429163997163e-05, | |
| "loss": 0.0248, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.9110994213571805, | |
| "grad_norm": 2.153773784637451, | |
| "learning_rate": 4.5864445446981025e-05, | |
| "loss": 0.0254, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.9118008065930212, | |
| "grad_norm": 2.1063156127929688, | |
| "learning_rate": 4.5856454712360067e-05, | |
| "loss": 0.0235, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.912502191828862, | |
| "grad_norm": 1.26010000705719, | |
| "learning_rate": 4.584845696282192e-05, | |
| "loss": 0.0134, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.9132035770647028, | |
| "grad_norm": 0.7219848036766052, | |
| "learning_rate": 4.584045220105656e-05, | |
| "loss": 0.0075, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.9139049623005435, | |
| "grad_norm": 0.3715592324733734, | |
| "learning_rate": 4.583244042975634e-05, | |
| "loss": 0.0037, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.9146063475363844, | |
| "grad_norm": 0.16978369653224945, | |
| "learning_rate": 4.582442165161596e-05, | |
| "loss": 0.0017, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.9153077327722251, | |
| "grad_norm": 0.10813824832439423, | |
| "learning_rate": 4.5816395869332476e-05, | |
| "loss": 0.001, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.916009118008066, | |
| "grad_norm": 0.05026814341545105, | |
| "learning_rate": 4.5808363085605296e-05, | |
| "loss": 0.0005, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.9167105032439067, | |
| "grad_norm": 0.025502556934952736, | |
| "learning_rate": 4.58003233031362e-05, | |
| "loss": 0.0002, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.9174118884797475, | |
| "grad_norm": 45.29407501220703, | |
| "learning_rate": 4.579227652462928e-05, | |
| "loss": 2.3544, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.9181132737155883, | |
| "grad_norm": 0.012745014391839504, | |
| "learning_rate": 4.578422275279105e-05, | |
| "loss": 0.0001, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.9188146589514291, | |
| "grad_norm": 0.009426114149391651, | |
| "learning_rate": 4.577616199033033e-05, | |
| "loss": 0.0001, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9195160441872698, | |
| "grad_norm": 0.008077832870185375, | |
| "learning_rate": 4.5768094239958285e-05, | |
| "loss": 0.0001, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.9202174294231107, | |
| "grad_norm": 0.006128115579485893, | |
| "learning_rate": 4.576001950438845e-05, | |
| "loss": 0.0001, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.9209188146589514, | |
| "grad_norm": 88.39704895019531, | |
| "learning_rate": 4.575193778633671e-05, | |
| "loss": 5.5453, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.9216201998947923, | |
| "grad_norm": 45.73112869262695, | |
| "learning_rate": 4.574384908852128e-05, | |
| "loss": 2.724, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.922321585130633, | |
| "grad_norm": 45.08832550048828, | |
| "learning_rate": 4.573575341366274e-05, | |
| "loss": 2.5745, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.9230229703664737, | |
| "grad_norm": 0.01642024703323841, | |
| "learning_rate": 4.572765076448402e-05, | |
| "loss": 0.0001, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.9237243556023146, | |
| "grad_norm": 0.0225964467972517, | |
| "learning_rate": 4.5719541143710375e-05, | |
| "loss": 0.0002, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.9244257408381553, | |
| "grad_norm": 0.03809621185064316, | |
| "learning_rate": 4.571142455406943e-05, | |
| "loss": 0.0003, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.9251271260739962, | |
| "grad_norm": 42.60374450683594, | |
| "learning_rate": 4.5703300998291124e-05, | |
| "loss": 1.805, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.9258285113098369, | |
| "grad_norm": 0.08730126917362213, | |
| "learning_rate": 4.569517047910777e-05, | |
| "loss": 0.0008, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9265298965456777, | |
| "grad_norm": 119.11152648925781, | |
| "learning_rate": 4.5687032999254e-05, | |
| "loss": 4.0878, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.9272312817815185, | |
| "grad_norm": 0.3243617117404938, | |
| "learning_rate": 4.567888856146681e-05, | |
| "loss": 0.0031, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.9279326670173593, | |
| "grad_norm": 0.4152386784553528, | |
| "learning_rate": 4.567073716848551e-05, | |
| "loss": 0.004, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.9286340522532001, | |
| "grad_norm": 30.812305450439453, | |
| "learning_rate": 4.566257882305176e-05, | |
| "loss": 0.7747, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.9293354374890409, | |
| "grad_norm": 30.550561904907227, | |
| "learning_rate": 4.565441352790957e-05, | |
| "loss": 0.7148, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.9300368227248816, | |
| "grad_norm": 79.6876220703125, | |
| "learning_rate": 4.564624128580528e-05, | |
| "loss": 1.728, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.9307382079607224, | |
| "grad_norm": 3.2627832889556885, | |
| "learning_rate": 4.5638062099487545e-05, | |
| "loss": 0.0427, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.9314395931965632, | |
| "grad_norm": 10.727757453918457, | |
| "learning_rate": 4.5629875971707395e-05, | |
| "loss": 0.2789, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.932140978432404, | |
| "grad_norm": 5.946654319763184, | |
| "learning_rate": 4.562168290521817e-05, | |
| "loss": 0.0887, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.9328423636682448, | |
| "grad_norm": 1.3037784099578857, | |
| "learning_rate": 4.561348290277554e-05, | |
| "loss": 0.1909, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9335437489040855, | |
| "grad_norm": 8.197032928466797, | |
| "learning_rate": 4.560527596713752e-05, | |
| "loss": 0.1388, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.9342451341399264, | |
| "grad_norm": 8.003156661987305, | |
| "learning_rate": 4.559706210106446e-05, | |
| "loss": 0.1299, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.9349465193757671, | |
| "grad_norm": 13.647798538208008, | |
| "learning_rate": 4.558884130731903e-05, | |
| "loss": 0.3574, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.935647904611608, | |
| "grad_norm": 5.755825996398926, | |
| "learning_rate": 4.558061358866623e-05, | |
| "loss": 0.0832, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.9363492898474487, | |
| "grad_norm": 21.87740135192871, | |
| "learning_rate": 4.55723789478734e-05, | |
| "loss": 0.4666, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.9370506750832895, | |
| "grad_norm": 9.851075172424316, | |
| "learning_rate": 4.5564137387710196e-05, | |
| "loss": 0.2624, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.9377520603191303, | |
| "grad_norm": 4.983445644378662, | |
| "learning_rate": 4.555588891094862e-05, | |
| "loss": 0.0688, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.9384534455549711, | |
| "grad_norm": 4.145101547241211, | |
| "learning_rate": 4.554763352036296e-05, | |
| "loss": 0.0537, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.9391548307908119, | |
| "grad_norm": 4.348505020141602, | |
| "learning_rate": 4.5539371218729894e-05, | |
| "loss": 0.058, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.9398562160266526, | |
| "grad_norm": 16.51106834411621, | |
| "learning_rate": 4.5531102008828354e-05, | |
| "loss": 0.3685, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9405576012624934, | |
| "grad_norm": 2.958770513534546, | |
| "learning_rate": 4.5522825893439645e-05, | |
| "loss": 0.0375, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.9412589864983342, | |
| "grad_norm": 1.705260157585144, | |
| "learning_rate": 4.5514542875347376e-05, | |
| "loss": 0.0194, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.941960371734175, | |
| "grad_norm": 28.18739891052246, | |
| "learning_rate": 4.550625295733748e-05, | |
| "loss": 0.6777, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.9426617569700158, | |
| "grad_norm": 39.53862380981445, | |
| "learning_rate": 4.5497956142198214e-05, | |
| "loss": 1.1208, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.9433631422058566, | |
| "grad_norm": 1.0771280527114868, | |
| "learning_rate": 4.5489652432720145e-05, | |
| "loss": 0.0118, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.9440645274416973, | |
| "grad_norm": 96.40424346923828, | |
| "learning_rate": 4.548134183169617e-05, | |
| "loss": 2.4673, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.9447659126775382, | |
| "grad_norm": 0.5970985293388367, | |
| "learning_rate": 4.547302434192149e-05, | |
| "loss": 0.0061, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.9454672979133789, | |
| "grad_norm": 36.97736358642578, | |
| "learning_rate": 4.546469996619364e-05, | |
| "loss": 0.9705, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.9461686831492198, | |
| "grad_norm": 0.6519697308540344, | |
| "learning_rate": 4.545636870731247e-05, | |
| "loss": 0.0066, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.9468700683850605, | |
| "grad_norm": 0.4174421429634094, | |
| "learning_rate": 4.544803056808013e-05, | |
| "loss": 0.0042, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9475714536209012, | |
| "grad_norm": 0.4273722469806671, | |
| "learning_rate": 4.5439685551301094e-05, | |
| "loss": 0.0043, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.9482728388567421, | |
| "grad_norm": 0.3083280324935913, | |
| "learning_rate": 4.543133365978214e-05, | |
| "loss": 0.003, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.9489742240925828, | |
| "grad_norm": 0.2918597161769867, | |
| "learning_rate": 4.542297489633238e-05, | |
| "loss": 0.0028, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.9496756093284237, | |
| "grad_norm": 47.360557556152344, | |
| "learning_rate": 4.541460926376322e-05, | |
| "loss": 1.7171, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.9503769945642644, | |
| "grad_norm": 81.7466049194336, | |
| "learning_rate": 4.5406236764888374e-05, | |
| "loss": 2.8326, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.9510783798001052, | |
| "grad_norm": 39.265384674072266, | |
| "learning_rate": 4.539785740252388e-05, | |
| "loss": 1.3337, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.951779765035946, | |
| "grad_norm": 0.28257325291633606, | |
| "learning_rate": 4.5389471179488074e-05, | |
| "loss": 0.0027, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.9524811502717868, | |
| "grad_norm": 41.259849548339844, | |
| "learning_rate": 4.538107809860159e-05, | |
| "loss": 1.2651, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.9531825355076275, | |
| "grad_norm": 0.5483337640762329, | |
| "learning_rate": 4.537267816268739e-05, | |
| "loss": 0.0055, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.9538839207434684, | |
| "grad_norm": 0.9649966359138489, | |
| "learning_rate": 4.536427137457072e-05, | |
| "loss": 0.0098, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9545853059793091, | |
| "grad_norm": 28.19986915588379, | |
| "learning_rate": 4.5355857737079166e-05, | |
| "loss": 0.6184, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.95528669121515, | |
| "grad_norm": 1.310431957244873, | |
| "learning_rate": 4.534743725304257e-05, | |
| "loss": 0.0142, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.9559880764509907, | |
| "grad_norm": 1.170506477355957, | |
| "learning_rate": 4.5339009925293105e-05, | |
| "loss": 0.0125, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.9566894616868314, | |
| "grad_norm": 1.135798454284668, | |
| "learning_rate": 4.533057575666526e-05, | |
| "loss": 0.0118, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.9573908469226723, | |
| "grad_norm": 0.7438721060752869, | |
| "learning_rate": 4.532213474999578e-05, | |
| "loss": 0.0077, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.958092232158513, | |
| "grad_norm": 0.46775007247924805, | |
| "learning_rate": 4.531368690812376e-05, | |
| "loss": 0.0046, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.9587936173943539, | |
| "grad_norm": 0.2829347848892212, | |
| "learning_rate": 4.530523223389056e-05, | |
| "loss": 0.0027, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.9594950026301946, | |
| "grad_norm": 0.12059792876243591, | |
| "learning_rate": 4.529677073013984e-05, | |
| "loss": 0.0011, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.9601963878660355, | |
| "grad_norm": 41.76416015625, | |
| "learning_rate": 4.5288302399717575e-05, | |
| "loss": 1.5052, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.9608977731018762, | |
| "grad_norm": 0.05551628768444061, | |
| "learning_rate": 4.5279827245472025e-05, | |
| "loss": 0.0005, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.961599158337717, | |
| "grad_norm": 44.579280853271484, | |
| "learning_rate": 4.527134527025374e-05, | |
| "loss": 1.9268, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.9623005435735578, | |
| "grad_norm": 92.77808380126953, | |
| "learning_rate": 4.526285647691559e-05, | |
| "loss": 3.8084, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.9630019288093986, | |
| "grad_norm": 0.06515678018331528, | |
| "learning_rate": 4.5254360868312694e-05, | |
| "loss": 0.0006, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.9637033140452393, | |
| "grad_norm": 44.31393814086914, | |
| "learning_rate": 4.5245858447302506e-05, | |
| "loss": 1.8936, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.9644046992810801, | |
| "grad_norm": 42.123191833496094, | |
| "learning_rate": 4.523734921674475e-05, | |
| "loss": 1.5489, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.9651060845169209, | |
| "grad_norm": 78.27152252197266, | |
| "learning_rate": 4.522883317950144e-05, | |
| "loss": 1.4444, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.9658074697527617, | |
| "grad_norm": 0.4181966185569763, | |
| "learning_rate": 4.5220310338436885e-05, | |
| "loss": 0.0041, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.9665088549886025, | |
| "grad_norm": 61.79771423339844, | |
| "learning_rate": 4.521178069641768e-05, | |
| "loss": 1.5033, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.9672102402244432, | |
| "grad_norm": 1.026915431022644, | |
| "learning_rate": 4.520324425631271e-05, | |
| "loss": 0.011, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.9679116254602841, | |
| "grad_norm": 1.1025463342666626, | |
| "learning_rate": 4.519470102099315e-05, | |
| "loss": 0.0117, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9686130106961248, | |
| "grad_norm": 1.5587780475616455, | |
| "learning_rate": 4.518615099333243e-05, | |
| "loss": 0.018, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.9693143959319657, | |
| "grad_norm": 1.4015796184539795, | |
| "learning_rate": 4.517759417620633e-05, | |
| "loss": 0.016, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.9700157811678064, | |
| "grad_norm": 1.1269611120224, | |
| "learning_rate": 4.516903057249284e-05, | |
| "loss": 0.0124, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.9707171664036472, | |
| "grad_norm": 64.7952651977539, | |
| "learning_rate": 4.516046018507229e-05, | |
| "loss": 1.7053, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.971418551639488, | |
| "grad_norm": 36.87698745727539, | |
| "learning_rate": 4.515188301682725e-05, | |
| "loss": 0.8805, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.9721199368753288, | |
| "grad_norm": 39.0102653503418, | |
| "learning_rate": 4.5143299070642584e-05, | |
| "loss": 1.2212, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.9728213221111696, | |
| "grad_norm": 62.35078811645508, | |
| "learning_rate": 4.5134708349405457e-05, | |
| "loss": 1.6357, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.9735227073470103, | |
| "grad_norm": 31.161195755004883, | |
| "learning_rate": 4.5126110856005285e-05, | |
| "loss": 0.7379, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.9742240925828511, | |
| "grad_norm": 1.4460724592208862, | |
| "learning_rate": 4.511750659333379e-05, | |
| "loss": 0.0166, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.9749254778186919, | |
| "grad_norm": 1.6871577501296997, | |
| "learning_rate": 4.5108895564284924e-05, | |
| "loss": 0.0201, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9756268630545327, | |
| "grad_norm": 18.601655960083008, | |
| "learning_rate": 4.510027777175497e-05, | |
| "loss": 0.3773, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.9763282482903735, | |
| "grad_norm": 24.591615676879883, | |
| "learning_rate": 4.509165321864245e-05, | |
| "loss": 0.6012, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.9770296335262143, | |
| "grad_norm": 2.3374860286712646, | |
| "learning_rate": 4.5083021907848164e-05, | |
| "loss": 0.0286, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.977731018762055, | |
| "grad_norm": 3.1211438179016113, | |
| "learning_rate": 4.50743838422752e-05, | |
| "loss": 0.0388, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.9784324039978959, | |
| "grad_norm": 15.115840911865234, | |
| "learning_rate": 4.50657390248289e-05, | |
| "loss": 0.3451, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9791337892337366, | |
| "grad_norm": 13.081830978393555, | |
| "learning_rate": 4.50570874584169e-05, | |
| "loss": 0.3082, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.9798351744695775, | |
| "grad_norm": 3.7496583461761475, | |
| "learning_rate": 4.5048429145949076e-05, | |
| "loss": 0.0503, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.9805365597054182, | |
| "grad_norm": 22.880462646484375, | |
| "learning_rate": 4.503976409033759e-05, | |
| "loss": 0.4432, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.9812379449412589, | |
| "grad_norm": 4.944215774536133, | |
| "learning_rate": 4.503109229449688e-05, | |
| "loss": 0.0706, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.9819393301770998, | |
| "grad_norm": 4.719092845916748, | |
| "learning_rate": 4.502241376134364e-05, | |
| "loss": 0.0667, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9826407154129405, | |
| "grad_norm": 4.378533363342285, | |
| "learning_rate": 4.5013728493796826e-05, | |
| "loss": 0.0611, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.9833421006487814, | |
| "grad_norm": 4.234534740447998, | |
| "learning_rate": 4.5005036494777666e-05, | |
| "loss": 0.055, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.9840434858846221, | |
| "grad_norm": 2.942084789276123, | |
| "learning_rate": 4.4996337767209656e-05, | |
| "loss": 0.0382, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.9847448711204629, | |
| "grad_norm": 1.5875838994979858, | |
| "learning_rate": 4.498763231401855e-05, | |
| "loss": 0.018, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.9854462563563037, | |
| "grad_norm": 53.9989128112793, | |
| "learning_rate": 4.497892013813235e-05, | |
| "loss": 1.2552, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9861476415921445, | |
| "grad_norm": 28.49068832397461, | |
| "learning_rate": 4.497020124248135e-05, | |
| "loss": 0.7336, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.9868490268279853, | |
| "grad_norm": 0.4417467713356018, | |
| "learning_rate": 4.496147562999807e-05, | |
| "loss": 0.0045, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.9875504120638261, | |
| "grad_norm": 104.99906921386719, | |
| "learning_rate": 4.4952743303617325e-05, | |
| "loss": 3.0756, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.9882517972996668, | |
| "grad_norm": 0.3293454647064209, | |
| "learning_rate": 4.4944004266276164e-05, | |
| "loss": 0.0033, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.9889531825355077, | |
| "grad_norm": 41.633243560791016, | |
| "learning_rate": 4.4935258520913894e-05, | |
| "loss": 1.3174, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9896545677713484, | |
| "grad_norm": 0.23380160331726074, | |
| "learning_rate": 4.4926506070472085e-05, | |
| "loss": 0.0023, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.9903559530071891, | |
| "grad_norm": 0.44888490438461304, | |
| "learning_rate": 4.4917746917894556e-05, | |
| "loss": 0.0047, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.99105733824303, | |
| "grad_norm": 33.958248138427734, | |
| "learning_rate": 4.490898106612739e-05, | |
| "loss": 0.9667, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.9917587234788707, | |
| "grad_norm": 0.3081722855567932, | |
| "learning_rate": 4.490020851811892e-05, | |
| "loss": 0.003, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.9924601087147116, | |
| "grad_norm": 32.04133224487305, | |
| "learning_rate": 4.4891429276819726e-05, | |
| "loss": 0.9003, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9931614939505523, | |
| "grad_norm": 43.6740608215332, | |
| "learning_rate": 4.488264334518264e-05, | |
| "loss": 0.9288, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.9938628791863932, | |
| "grad_norm": 29.450580596923828, | |
| "learning_rate": 4.487385072616275e-05, | |
| "loss": 0.7131, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.9945642644222339, | |
| "grad_norm": 1.1718950271606445, | |
| "learning_rate": 4.486505142271739e-05, | |
| "loss": 0.0137, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.9952656496580747, | |
| "grad_norm": 1.5927923917770386, | |
| "learning_rate": 4.485624543780613e-05, | |
| "loss": 0.0192, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.9959670348939155, | |
| "grad_norm": 24.835670471191406, | |
| "learning_rate": 4.4847432774390807e-05, | |
| "loss": 0.5717, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9966684201297563, | |
| "grad_norm": 1.8400554656982422, | |
| "learning_rate": 4.4838613435435505e-05, | |
| "loss": 0.022, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.997369805365597, | |
| "grad_norm": 2.3654353618621826, | |
| "learning_rate": 4.482978742390654e-05, | |
| "loss": 0.0298, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.9980711906014378, | |
| "grad_norm": 24.44013023376465, | |
| "learning_rate": 4.482095474277247e-05, | |
| "loss": 0.5608, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.9987725758372786, | |
| "grad_norm": 2.5648183822631836, | |
| "learning_rate": 4.48121153950041e-05, | |
| "loss": 0.034, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.9994739610731194, | |
| "grad_norm": 2.0194385051727295, | |
| "learning_rate": 4.4803269383574507e-05, | |
| "loss": 0.0249, | |
| "step": 1425 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1425, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.863941975950848e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |