| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.091220068415051, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004561003420752566, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.0969, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009122006841505131, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.1202, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.013683010262257697, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.9863, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.018244013683010263, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.1056, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02280501710376283, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.1682, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.027366020524515394, | |
| "grad_norm": 126.99530029296875, | |
| "learning_rate": 0.0, | |
| "loss": 2.16, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03192702394526796, | |
| "grad_norm": 136.298583984375, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 2.1097, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.036488027366020526, | |
| "grad_norm": 118.9027328491211, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 2.0874, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04104903078677309, | |
| "grad_norm": 95.98336791992188, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 2.0198, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04561003420752566, | |
| "grad_norm": 91.16958618164062, | |
| "learning_rate": 1.2121212121212122e-05, | |
| "loss": 1.9522, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05017103762827822, | |
| "grad_norm": 66.85772705078125, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 1.6778, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05473204104903079, | |
| "grad_norm": 53.472843170166016, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 1.4714, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.059293044469783354, | |
| "grad_norm": 49.19029235839844, | |
| "learning_rate": 2.1212121212121215e-05, | |
| "loss": 1.294, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06385404789053592, | |
| "grad_norm": 50.0140266418457, | |
| "learning_rate": 2.4242424242424244e-05, | |
| "loss": 1.4804, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06841505131128849, | |
| "grad_norm": 46.8694953918457, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 1.1335, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07297605473204105, | |
| "grad_norm": 43.30156326293945, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 1.2229, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07753705815279362, | |
| "grad_norm": 51.07203674316406, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.2547, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08209806157354618, | |
| "grad_norm": 62.249114990234375, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 1.0567, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08665906499429875, | |
| "grad_norm": 47.2119026184082, | |
| "learning_rate": 3.939393939393939e-05, | |
| "loss": 0.9491, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09122006841505131, | |
| "grad_norm": 42.218753814697266, | |
| "learning_rate": 4.242424242424243e-05, | |
| "loss": 0.8794, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09578107183580388, | |
| "grad_norm": 42.218753814697266, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.1536, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10034207525655645, | |
| "grad_norm": 44.523101806640625, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.7646, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10490307867730901, | |
| "grad_norm": 63.81179428100586, | |
| "learning_rate": 4.848484848484849e-05, | |
| "loss": 0.9237, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10946408209806158, | |
| "grad_norm": 46.185333251953125, | |
| "learning_rate": 5.151515151515152e-05, | |
| "loss": 0.9475, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11402508551881414, | |
| "grad_norm": 50.09680938720703, | |
| "learning_rate": 5.4545454545454546e-05, | |
| "loss": 0.6559, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11858608893956671, | |
| "grad_norm": 57.93541717529297, | |
| "learning_rate": 5.757575757575758e-05, | |
| "loss": 0.9655, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.12314709236031927, | |
| "grad_norm": 44.12418746948242, | |
| "learning_rate": 6.060606060606061e-05, | |
| "loss": 0.991, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12770809578107184, | |
| "grad_norm": 55.63926315307617, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 0.9469, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1322690992018244, | |
| "grad_norm": 52.04874038696289, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.765, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.13683010262257697, | |
| "grad_norm": 78.61589813232422, | |
| "learning_rate": 6.96969696969697e-05, | |
| "loss": 0.8077, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14139110604332952, | |
| "grad_norm": 78.61589813232422, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": 0.6946, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1459521094640821, | |
| "grad_norm": 45.660404205322266, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": 0.8566, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.15051311288483465, | |
| "grad_norm": 45.660404205322266, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.9645, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.15507411630558723, | |
| "grad_norm": 45.660404205322266, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.8577, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15963511972633979, | |
| "grad_norm": 44.082706451416016, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 0.6715, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16419612314709237, | |
| "grad_norm": 723.3299560546875, | |
| "learning_rate": 7.878787878787879e-05, | |
| "loss": 0.9595, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16875712656784492, | |
| "grad_norm": 102.72968292236328, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 0.8705, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1733181299885975, | |
| "grad_norm": 51.577972412109375, | |
| "learning_rate": 8.484848484848486e-05, | |
| "loss": 0.9124, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.17787913340935005, | |
| "grad_norm": 79.64832305908203, | |
| "learning_rate": 8.787878787878789e-05, | |
| "loss": 0.8608, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.18244013683010263, | |
| "grad_norm": 74.03942108154297, | |
| "learning_rate": 9.090909090909092e-05, | |
| "loss": 0.7678, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18700114025085518, | |
| "grad_norm": 75.6192855834961, | |
| "learning_rate": 9.393939393939395e-05, | |
| "loss": 0.8841, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.19156214367160776, | |
| "grad_norm": 151.26239013671875, | |
| "learning_rate": 9.696969696969698e-05, | |
| "loss": 0.6354, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1961231470923603, | |
| "grad_norm": 63.19050598144531, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0635, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2006841505131129, | |
| "grad_norm": 69.78765869140625, | |
| "learning_rate": 9.999978327420663e-05, | |
| "loss": 0.5772, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.20524515393386544, | |
| "grad_norm": 74.76192474365234, | |
| "learning_rate": 9.99991330987053e-05, | |
| "loss": 0.8419, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.20980615735461802, | |
| "grad_norm": 65.3372802734375, | |
| "learning_rate": 9.999804947913241e-05, | |
| "loss": 0.7743, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.21436716077537057, | |
| "grad_norm": 84.05085754394531, | |
| "learning_rate": 9.999653242488188e-05, | |
| "loss": 0.8496, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.21892816419612315, | |
| "grad_norm": 45.334293365478516, | |
| "learning_rate": 9.999458194910512e-05, | |
| "loss": 0.779, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2234891676168757, | |
| "grad_norm": 59.37651443481445, | |
| "learning_rate": 9.999219806871085e-05, | |
| "loss": 0.7776, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.22805017103762829, | |
| "grad_norm": 44.242713928222656, | |
| "learning_rate": 9.998938080436503e-05, | |
| "loss": 0.7422, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.23261117445838084, | |
| "grad_norm": 76.85882568359375, | |
| "learning_rate": 9.998613018049059e-05, | |
| "loss": 0.8527, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.23717217787913342, | |
| "grad_norm": 53.421348571777344, | |
| "learning_rate": 9.99824462252673e-05, | |
| "loss": 0.6793, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.24173318129988597, | |
| "grad_norm": 74.96648406982422, | |
| "learning_rate": 9.997832897063148e-05, | |
| "loss": 0.645, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.24629418472063855, | |
| "grad_norm": 36.90277099609375, | |
| "learning_rate": 9.997377845227576e-05, | |
| "loss": 0.6156, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2508551881413911, | |
| "grad_norm": 45.15210723876953, | |
| "learning_rate": 9.996879470964868e-05, | |
| "loss": 0.689, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2554161915621437, | |
| "grad_norm": 84.45231628417969, | |
| "learning_rate": 9.996337778595453e-05, | |
| "loss": 1.1516, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.25997719498289623, | |
| "grad_norm": 74.82112121582031, | |
| "learning_rate": 9.995752772815274e-05, | |
| "loss": 0.7793, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.2645381984036488, | |
| "grad_norm": 46.81865692138672, | |
| "learning_rate": 9.995124458695768e-05, | |
| "loss": 0.677, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2690992018244014, | |
| "grad_norm": 50.14031219482422, | |
| "learning_rate": 9.994452841683808e-05, | |
| "loss": 0.7934, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.27366020524515394, | |
| "grad_norm": 70.68441772460938, | |
| "learning_rate": 9.993737927601663e-05, | |
| "loss": 0.694, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2782212086659065, | |
| "grad_norm": 41.65163040161133, | |
| "learning_rate": 9.992979722646948e-05, | |
| "loss": 0.6657, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.28278221208665905, | |
| "grad_norm": 43.47587203979492, | |
| "learning_rate": 9.992178233392564e-05, | |
| "loss": 0.6069, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.28734321550741165, | |
| "grad_norm": 46.3960075378418, | |
| "learning_rate": 9.991333466786648e-05, | |
| "loss": 0.7959, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2919042189281642, | |
| "grad_norm": 36.781761169433594, | |
| "learning_rate": 9.990445430152507e-05, | |
| "loss": 0.6845, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.29646522234891676, | |
| "grad_norm": 47.75035095214844, | |
| "learning_rate": 9.989514131188559e-05, | |
| "loss": 0.9177, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3010262257696693, | |
| "grad_norm": 45.957950592041016, | |
| "learning_rate": 9.988539577968265e-05, | |
| "loss": 0.7967, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3055872291904219, | |
| "grad_norm": 26.79014015197754, | |
| "learning_rate": 9.987521778940058e-05, | |
| "loss": 0.4885, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.31014823261117447, | |
| "grad_norm": 46.98130416870117, | |
| "learning_rate": 9.986460742927271e-05, | |
| "loss": 0.7922, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.314709236031927, | |
| "grad_norm": 29.98710823059082, | |
| "learning_rate": 9.985356479128056e-05, | |
| "loss": 0.6239, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.31927023945267957, | |
| "grad_norm": 29.05475616455078, | |
| "learning_rate": 9.984208997115312e-05, | |
| "loss": 0.5977, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3238312428734322, | |
| "grad_norm": 30.07978057861328, | |
| "learning_rate": 9.9830183068366e-05, | |
| "loss": 0.6691, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.32839224629418473, | |
| "grad_norm": 44.7830810546875, | |
| "learning_rate": 9.981784418614048e-05, | |
| "loss": 0.8664, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3329532497149373, | |
| "grad_norm": 36.961578369140625, | |
| "learning_rate": 9.980507343144273e-05, | |
| "loss": 0.6482, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.33751425313568983, | |
| "grad_norm": 40.433258056640625, | |
| "learning_rate": 9.979187091498284e-05, | |
| "loss": 0.7933, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.34207525655644244, | |
| "grad_norm": 21.32088279724121, | |
| "learning_rate": 9.977823675121383e-05, | |
| "loss": 0.562, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.346636259977195, | |
| "grad_norm": 36.999900817871094, | |
| "learning_rate": 9.97641710583307e-05, | |
| "loss": 0.7949, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.35119726339794755, | |
| "grad_norm": 34.943058013916016, | |
| "learning_rate": 9.974967395826941e-05, | |
| "loss": 0.6669, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3557582668187001, | |
| "grad_norm": 40.3181037902832, | |
| "learning_rate": 9.973474557670575e-05, | |
| "loss": 0.65, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3603192702394527, | |
| "grad_norm": 26.141529083251953, | |
| "learning_rate": 9.971938604305435e-05, | |
| "loss": 0.5017, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.36488027366020526, | |
| "grad_norm": 27.214019775390625, | |
| "learning_rate": 9.970359549046749e-05, | |
| "loss": 0.5175, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3694412770809578, | |
| "grad_norm": 30.315534591674805, | |
| "learning_rate": 9.968737405583396e-05, | |
| "loss": 0.6422, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.37400228050171036, | |
| "grad_norm": 38.341400146484375, | |
| "learning_rate": 9.967072187977795e-05, | |
| "loss": 0.5456, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.37856328392246297, | |
| "grad_norm": 24.291261672973633, | |
| "learning_rate": 9.965363910665761e-05, | |
| "loss": 0.512, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3831242873432155, | |
| "grad_norm": 27.774852752685547, | |
| "learning_rate": 9.963612588456412e-05, | |
| "loss": 0.5651, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.38768529076396807, | |
| "grad_norm": 28.017744064331055, | |
| "learning_rate": 9.961818236532012e-05, | |
| "loss": 0.5831, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3922462941847206, | |
| "grad_norm": 52.88800811767578, | |
| "learning_rate": 9.959980870447854e-05, | |
| "loss": 0.609, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.39680729760547323, | |
| "grad_norm": 41.79977035522461, | |
| "learning_rate": 9.958100506132127e-05, | |
| "loss": 0.9048, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4013683010262258, | |
| "grad_norm": 22.837493896484375, | |
| "learning_rate": 9.956177159885765e-05, | |
| "loss": 0.526, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.40592930444697833, | |
| "grad_norm": 32.65010070800781, | |
| "learning_rate": 9.954210848382318e-05, | |
| "loss": 0.7481, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4104903078677309, | |
| "grad_norm": 27.46462631225586, | |
| "learning_rate": 9.952201588667804e-05, | |
| "loss": 0.56, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4150513112884835, | |
| "grad_norm": 24.279617309570312, | |
| "learning_rate": 9.950149398160562e-05, | |
| "loss": 0.568, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.41961231470923605, | |
| "grad_norm": 27.96613311767578, | |
| "learning_rate": 9.94805429465109e-05, | |
| "loss": 0.5389, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4241733181299886, | |
| "grad_norm": 29.904308319091797, | |
| "learning_rate": 9.945916296301913e-05, | |
| "loss": 0.6791, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.42873432155074115, | |
| "grad_norm": 28.36408042907715, | |
| "learning_rate": 9.943735421647404e-05, | |
| "loss": 0.6191, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.43329532497149376, | |
| "grad_norm": 28.37791633605957, | |
| "learning_rate": 9.941511689593633e-05, | |
| "loss": 0.6578, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4378563283922463, | |
| "grad_norm": 29.4257869720459, | |
| "learning_rate": 9.939245119418207e-05, | |
| "loss": 0.7219, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.44241733181299886, | |
| "grad_norm": 31.684606552124023, | |
| "learning_rate": 9.936935730770093e-05, | |
| "loss": 0.7971, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4469783352337514, | |
| "grad_norm": 41.304290771484375, | |
| "learning_rate": 9.934583543669453e-05, | |
| "loss": 0.5916, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.45153933865450396, | |
| "grad_norm": 45.91229248046875, | |
| "learning_rate": 9.932188578507476e-05, | |
| "loss": 0.6905, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.45610034207525657, | |
| "grad_norm": 18.211179733276367, | |
| "learning_rate": 9.929750856046187e-05, | |
| "loss": 0.4074, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4606613454960091, | |
| "grad_norm": 22.817419052124023, | |
| "learning_rate": 9.92727039741828e-05, | |
| "loss": 0.5474, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4652223489167617, | |
| "grad_norm": 17.524913787841797, | |
| "learning_rate": 9.924747224126932e-05, | |
| "loss": 0.3943, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4697833523375142, | |
| "grad_norm": 29.555734634399414, | |
| "learning_rate": 9.922181358045607e-05, | |
| "loss": 0.4851, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.47434435575826683, | |
| "grad_norm": 17.645509719848633, | |
| "learning_rate": 9.919572821417886e-05, | |
| "loss": 0.4485, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4789053591790194, | |
| "grad_norm": 27.566722869873047, | |
| "learning_rate": 9.916921636857253e-05, | |
| "loss": 0.5754, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.48346636259977194, | |
| "grad_norm": 23.578996658325195, | |
| "learning_rate": 9.91422782734691e-05, | |
| "loss": 0.5258, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4880273660205245, | |
| "grad_norm": 82.11543273925781, | |
| "learning_rate": 9.911491416239578e-05, | |
| "loss": 0.6878, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.4925883694412771, | |
| "grad_norm": 23.49557876586914, | |
| "learning_rate": 9.908712427257291e-05, | |
| "loss": 0.6356, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.49714937286202965, | |
| "grad_norm": 35.3658447265625, | |
| "learning_rate": 9.905890884491195e-05, | |
| "loss": 0.5214, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5017103762827823, | |
| "grad_norm": 31.83234214782715, | |
| "learning_rate": 9.903026812401333e-05, | |
| "loss": 0.4928, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5062713797035348, | |
| "grad_norm": 27.706256866455078, | |
| "learning_rate": 9.900120235816435e-05, | |
| "loss": 0.7318, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5108323831242874, | |
| "grad_norm": 34.21133041381836, | |
| "learning_rate": 9.897171179933707e-05, | |
| "loss": 0.4351, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5153933865450399, | |
| "grad_norm": 27.63327407836914, | |
| "learning_rate": 9.894179670318606e-05, | |
| "loss": 0.4016, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5199543899657925, | |
| "grad_norm": 19.61988639831543, | |
| "learning_rate": 9.891145732904627e-05, | |
| "loss": 0.4805, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5245153933865451, | |
| "grad_norm": 32.44860076904297, | |
| "learning_rate": 9.88806939399307e-05, | |
| "loss": 0.6809, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5290763968072976, | |
| "grad_norm": 32.945152282714844, | |
| "learning_rate": 9.884950680252811e-05, | |
| "loss": 0.7838, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5336374002280502, | |
| "grad_norm": 26.530765533447266, | |
| "learning_rate": 9.881789618720081e-05, | |
| "loss": 0.6608, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5381984036488028, | |
| "grad_norm": 29.559206008911133, | |
| "learning_rate": 9.878586236798222e-05, | |
| "loss": 0.6232, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5427594070695553, | |
| "grad_norm": 18.236879348754883, | |
| "learning_rate": 9.875340562257453e-05, | |
| "loss": 0.4325, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5473204104903079, | |
| "grad_norm": 16.536705017089844, | |
| "learning_rate": 9.872052623234632e-05, | |
| "loss": 0.3459, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5518814139110604, | |
| "grad_norm": 32.01934051513672, | |
| "learning_rate": 9.868722448233004e-05, | |
| "loss": 0.6023, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.556442417331813, | |
| "grad_norm": 19.95067024230957, | |
| "learning_rate": 9.865350066121961e-05, | |
| "loss": 0.4983, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5610034207525656, | |
| "grad_norm": 17.09720802307129, | |
| "learning_rate": 9.861935506136793e-05, | |
| "loss": 0.5208, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5655644241733181, | |
| "grad_norm": 40.02044677734375, | |
| "learning_rate": 9.85847879787843e-05, | |
| "loss": 0.4965, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5701254275940707, | |
| "grad_norm": 24.024381637573242, | |
| "learning_rate": 9.854979971313182e-05, | |
| "loss": 0.6857, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5746864310148233, | |
| "grad_norm": 23.170162200927734, | |
| "learning_rate": 9.85143905677249e-05, | |
| "loss": 0.4628, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5792474344355758, | |
| "grad_norm": 32.29564666748047, | |
| "learning_rate": 9.847856084952653e-05, | |
| "loss": 0.5976, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5838084378563284, | |
| "grad_norm": 22.218772888183594, | |
| "learning_rate": 9.844231086914571e-05, | |
| "loss": 0.4436, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5883694412770809, | |
| "grad_norm": 16.03778648376465, | |
| "learning_rate": 9.84056409408346e-05, | |
| "loss": 0.4686, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5929304446978335, | |
| "grad_norm": 23.267412185668945, | |
| "learning_rate": 9.836855138248605e-05, | |
| "loss": 0.5099, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5974914481185861, | |
| "grad_norm": 38.55447006225586, | |
| "learning_rate": 9.833104251563056e-05, | |
| "loss": 0.6403, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6020524515393386, | |
| "grad_norm": 23.0700740814209, | |
| "learning_rate": 9.829311466543373e-05, | |
| "loss": 0.4528, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6066134549600912, | |
| "grad_norm": 18.824100494384766, | |
| "learning_rate": 9.825476816069326e-05, | |
| "loss": 0.5267, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6111744583808438, | |
| "grad_norm": 30.157243728637695, | |
| "learning_rate": 9.821600333383625e-05, | |
| "loss": 0.5274, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6157354618015963, | |
| "grad_norm": 17.902389526367188, | |
| "learning_rate": 9.817682052091618e-05, | |
| "loss": 0.4983, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6202964652223489, | |
| "grad_norm": 23.980289459228516, | |
| "learning_rate": 9.813722006161013e-05, | |
| "loss": 0.5766, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6248574686431014, | |
| "grad_norm": 18.97156524658203, | |
| "learning_rate": 9.809720229921572e-05, | |
| "loss": 0.4777, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.629418472063854, | |
| "grad_norm": 21.262067794799805, | |
| "learning_rate": 9.805676758064821e-05, | |
| "loss": 0.6721, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6339794754846066, | |
| "grad_norm": 17.919252395629883, | |
| "learning_rate": 9.801591625643745e-05, | |
| "loss": 0.3766, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6385404789053591, | |
| "grad_norm": 14.64062213897705, | |
| "learning_rate": 9.797464868072488e-05, | |
| "loss": 0.4098, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6431014823261118, | |
| "grad_norm": 24.484783172607422, | |
| "learning_rate": 9.79329652112604e-05, | |
| "loss": 0.6009, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6476624857468644, | |
| "grad_norm": 23.56806182861328, | |
| "learning_rate": 9.789086620939936e-05, | |
| "loss": 0.5756, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6522234891676169, | |
| "grad_norm": 35.71012496948242, | |
| "learning_rate": 9.784835204009932e-05, | |
| "loss": 0.6669, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6567844925883695, | |
| "grad_norm": 25.027629852294922, | |
| "learning_rate": 9.780542307191698e-05, | |
| "loss": 0.7502, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.661345496009122, | |
| "grad_norm": 20.506362915039062, | |
| "learning_rate": 9.77620796770049e-05, | |
| "loss": 0.5575, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6659064994298746, | |
| "grad_norm": 18.51320457458496, | |
| "learning_rate": 9.771832223110839e-05, | |
| "loss": 0.4005, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6704675028506272, | |
| "grad_norm": 17.1466007232666, | |
| "learning_rate": 9.76741511135621e-05, | |
| "loss": 0.513, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6750285062713797, | |
| "grad_norm": 17.755672454833984, | |
| "learning_rate": 9.762956670728685e-05, | |
| "loss": 0.5208, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6795895096921323, | |
| "grad_norm": 17.082569122314453, | |
| "learning_rate": 9.758456939878629e-05, | |
| "loss": 0.5182, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6841505131128849, | |
| "grad_norm": 19.6417179107666, | |
| "learning_rate": 9.753915957814352e-05, | |
| "loss": 0.6026, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6887115165336374, | |
| "grad_norm": 16.423038482666016, | |
| "learning_rate": 9.74933376390177e-05, | |
| "loss": 0.4535, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.69327251995439, | |
| "grad_norm": 12.587494850158691, | |
| "learning_rate": 9.744710397864067e-05, | |
| "loss": 0.3239, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6978335233751425, | |
| "grad_norm": 16.246538162231445, | |
| "learning_rate": 9.740045899781352e-05, | |
| "loss": 0.4221, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7023945267958951, | |
| "grad_norm": 20.31894302368164, | |
| "learning_rate": 9.735340310090307e-05, | |
| "loss": 0.3357, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7069555302166477, | |
| "grad_norm": 39.6623420715332, | |
| "learning_rate": 9.730593669583836e-05, | |
| "loss": 0.5047, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7115165336374002, | |
| "grad_norm": 16.25356101989746, | |
| "learning_rate": 9.725806019410717e-05, | |
| "loss": 0.5985, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7160775370581528, | |
| "grad_norm": 18.692333221435547, | |
| "learning_rate": 9.720977401075242e-05, | |
| "loss": 0.4652, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7206385404789054, | |
| "grad_norm": 15.554421424865723, | |
| "learning_rate": 9.716107856436855e-05, | |
| "loss": 0.3915, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7251995438996579, | |
| "grad_norm": 14.215270042419434, | |
| "learning_rate": 9.711197427709796e-05, | |
| "loss": 0.4865, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7297605473204105, | |
| "grad_norm": 21.14404296875, | |
| "learning_rate": 9.706246157462726e-05, | |
| "loss": 0.4058, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.734321550741163, | |
| "grad_norm": 24.879043579101562, | |
| "learning_rate": 9.701254088618362e-05, | |
| "loss": 0.4697, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7388825541619156, | |
| "grad_norm": 20.374792098999023, | |
| "learning_rate": 9.696221264453109e-05, | |
| "loss": 0.3389, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7434435575826682, | |
| "grad_norm": 21.55912208557129, | |
| "learning_rate": 9.69114772859668e-05, | |
| "loss": 0.5669, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7480045610034207, | |
| "grad_norm": 13.084688186645508, | |
| "learning_rate": 9.686033525031719e-05, | |
| "loss": 0.3422, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7525655644241733, | |
| "grad_norm": 16.64484405517578, | |
| "learning_rate": 9.680878698093417e-05, | |
| "loss": 0.5166, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7571265678449259, | |
| "grad_norm": 18.273216247558594, | |
| "learning_rate": 9.675683292469132e-05, | |
| "loss": 0.5568, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7616875712656784, | |
| "grad_norm": 26.35822868347168, | |
| "learning_rate": 9.670447353198e-05, | |
| "loss": 0.6115, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.766248574686431, | |
| "grad_norm": 18.36164665222168, | |
| "learning_rate": 9.665170925670548e-05, | |
| "loss": 0.3441, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7708095781071835, | |
| "grad_norm": 17.316587448120117, | |
| "learning_rate": 9.659854055628291e-05, | |
| "loss": 0.451, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7753705815279361, | |
| "grad_norm": 23.489328384399414, | |
| "learning_rate": 9.654496789163345e-05, | |
| "loss": 0.5535, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7799315849486887, | |
| "grad_norm": 19.27407455444336, | |
| "learning_rate": 9.649099172718021e-05, | |
| "loss": 0.5019, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7844925883694412, | |
| "grad_norm": 11.64967155456543, | |
| "learning_rate": 9.643661253084431e-05, | |
| "loss": 0.3258, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7890535917901939, | |
| "grad_norm": 19.625473022460938, | |
| "learning_rate": 9.638183077404069e-05, | |
| "loss": 0.3288, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7936145952109465, | |
| "grad_norm": 18.685232162475586, | |
| "learning_rate": 9.632664693167416e-05, | |
| "loss": 0.3644, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.798175598631699, | |
| "grad_norm": 26.637842178344727, | |
| "learning_rate": 9.627106148213522e-05, | |
| "loss": 0.644, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8027366020524516, | |
| "grad_norm": 23.631614685058594, | |
| "learning_rate": 9.621507490729585e-05, | |
| "loss": 0.3727, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8072976054732041, | |
| "grad_norm": 15.275004386901855, | |
| "learning_rate": 9.615868769250546e-05, | |
| "loss": 0.3924, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8118586088939567, | |
| "grad_norm": 15.47768497467041, | |
| "learning_rate": 9.610190032658663e-05, | |
| "loss": 0.4487, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8164196123147093, | |
| "grad_norm": 14.71181583404541, | |
| "learning_rate": 9.604471330183083e-05, | |
| "loss": 0.32, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8209806157354618, | |
| "grad_norm": 9.841964721679688, | |
| "learning_rate": 9.598712711399416e-05, | |
| "loss": 0.2505, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8255416191562144, | |
| "grad_norm": 14.165433883666992, | |
| "learning_rate": 9.592914226229314e-05, | |
| "loss": 0.4393, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.830102622576967, | |
| "grad_norm": 23.092065811157227, | |
| "learning_rate": 9.587075924940028e-05, | |
| "loss": 0.4625, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8346636259977195, | |
| "grad_norm": 17.662593841552734, | |
| "learning_rate": 9.581197858143978e-05, | |
| "loss": 0.4665, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8392246294184721, | |
| "grad_norm": 16.949716567993164, | |
| "learning_rate": 9.575280076798309e-05, | |
| "loss": 0.4784, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8437856328392246, | |
| "grad_norm": 18.952489852905273, | |
| "learning_rate": 9.569322632204458e-05, | |
| "loss": 0.3888, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8483466362599772, | |
| "grad_norm": 16.350954055786133, | |
| "learning_rate": 9.563325576007701e-05, | |
| "loss": 0.3935, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8529076396807298, | |
| "grad_norm": 16.852394104003906, | |
| "learning_rate": 9.557288960196707e-05, | |
| "loss": 0.4025, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8574686431014823, | |
| "grad_norm": 14.821525573730469, | |
| "learning_rate": 9.551212837103092e-05, | |
| "loss": 0.3752, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8620296465222349, | |
| "grad_norm": 20.433399200439453, | |
| "learning_rate": 9.545097259400958e-05, | |
| "loss": 0.3219, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8665906499429875, | |
| "grad_norm": 11.883235931396484, | |
| "learning_rate": 9.538942280106443e-05, | |
| "loss": 0.3892, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.87115165336374, | |
| "grad_norm": 14.423933029174805, | |
| "learning_rate": 9.53274795257726e-05, | |
| "loss": 0.3798, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8757126567844926, | |
| "grad_norm": 15.010958671569824, | |
| "learning_rate": 9.526514330512225e-05, | |
| "loss": 0.3801, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8802736602052451, | |
| "grad_norm": 21.800418853759766, | |
| "learning_rate": 9.520241467950811e-05, | |
| "loss": 0.4404, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8848346636259977, | |
| "grad_norm": 15.904304504394531, | |
| "learning_rate": 9.513929419272662e-05, | |
| "loss": 0.3278, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8893956670467503, | |
| "grad_norm": 10.985480308532715, | |
| "learning_rate": 9.507578239197126e-05, | |
| "loss": 0.2883, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8939566704675028, | |
| "grad_norm": 10.487696647644043, | |
| "learning_rate": 9.501187982782785e-05, | |
| "loss": 0.2636, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8985176738882554, | |
| "grad_norm": 19.759944915771484, | |
| "learning_rate": 9.494758705426978e-05, | |
| "loss": 0.3749, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9030786773090079, | |
| "grad_norm": 17.322166442871094, | |
| "learning_rate": 9.48829046286531e-05, | |
| "loss": 0.4068, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9076396807297605, | |
| "grad_norm": 15.3864107131958, | |
| "learning_rate": 9.481783311171183e-05, | |
| "loss": 0.3576, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9122006841505131, | |
| "grad_norm": 13.966897964477539, | |
| "learning_rate": 9.475237306755302e-05, | |
| "loss": 0.4239, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9167616875712656, | |
| "grad_norm": 14.596879005432129, | |
| "learning_rate": 9.468652506365187e-05, | |
| "loss": 0.3637, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9213226909920182, | |
| "grad_norm": 20.099353790283203, | |
| "learning_rate": 9.46202896708468e-05, | |
| "loss": 0.5008, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9258836944127709, | |
| "grad_norm": 14.773473739624023, | |
| "learning_rate": 9.455366746333454e-05, | |
| "loss": 0.3506, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9304446978335233, | |
| "grad_norm": 18.689729690551758, | |
| "learning_rate": 9.448665901866514e-05, | |
| "loss": 0.4078, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.935005701254276, | |
| "grad_norm": 13.453817367553711, | |
| "learning_rate": 9.441926491773691e-05, | |
| "loss": 0.3253, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9395667046750285, | |
| "grad_norm": 14.93052864074707, | |
| "learning_rate": 9.435148574479144e-05, | |
| "loss": 0.3576, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9441277080957811, | |
| "grad_norm": 11.697999000549316, | |
| "learning_rate": 9.428332208740857e-05, | |
| "loss": 0.3115, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9486887115165337, | |
| "grad_norm": 13.518777847290039, | |
| "learning_rate": 9.421477453650118e-05, | |
| "loss": 0.364, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9532497149372862, | |
| "grad_norm": 10.434165000915527, | |
| "learning_rate": 9.414584368631019e-05, | |
| "loss": 0.2677, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9578107183580388, | |
| "grad_norm": 16.765907287597656, | |
| "learning_rate": 9.407653013439928e-05, | |
| "loss": 0.5504, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9623717217787914, | |
| "grad_norm": 10.962894439697266, | |
| "learning_rate": 9.400683448164987e-05, | |
| "loss": 0.2913, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9669327251995439, | |
| "grad_norm": 27.222328186035156, | |
| "learning_rate": 9.393675733225578e-05, | |
| "loss": 0.6258, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9714937286202965, | |
| "grad_norm": 17.89396095275879, | |
| "learning_rate": 9.386629929371804e-05, | |
| "loss": 0.3468, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.976054732041049, | |
| "grad_norm": 11.917913436889648, | |
| "learning_rate": 9.379546097683962e-05, | |
| "loss": 0.3384, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9806157354618016, | |
| "grad_norm": 16.30259895324707, | |
| "learning_rate": 9.372424299572013e-05, | |
| "loss": 0.4395, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9851767388825542, | |
| "grad_norm": 19.039505004882812, | |
| "learning_rate": 9.365264596775051e-05, | |
| "loss": 0.4235, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9897377423033067, | |
| "grad_norm": 21.45336151123047, | |
| "learning_rate": 9.35806705136077e-05, | |
| "loss": 0.3146, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9942987457240593, | |
| "grad_norm": 13.630745887756348, | |
| "learning_rate": 9.350831725724916e-05, | |
| "loss": 0.3927, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9988597491448119, | |
| "grad_norm": 13.76926326751709, | |
| "learning_rate": 9.343558682590756e-05, | |
| "loss": 0.3581, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 14.069113731384277, | |
| "learning_rate": 9.336247985008534e-05, | |
| "loss": 0.2267, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0045610034207526, | |
| "grad_norm": 9.834161758422852, | |
| "learning_rate": 9.328899696354918e-05, | |
| "loss": 0.2113, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.0091220068415052, | |
| "grad_norm": 8.437716484069824, | |
| "learning_rate": 9.321513880332458e-05, | |
| "loss": 0.2404, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.0136830102622576, | |
| "grad_norm": 10.78850269317627, | |
| "learning_rate": 9.314090600969024e-05, | |
| "loss": 0.1706, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.0182440136830102, | |
| "grad_norm": 10.366409301757812, | |
| "learning_rate": 9.306629922617261e-05, | |
| "loss": 0.2395, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.0228050171037628, | |
| "grad_norm": 14.871321678161621, | |
| "learning_rate": 9.29913190995403e-05, | |
| "loss": 0.2993, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0273660205245154, | |
| "grad_norm": 12.021495819091797, | |
| "learning_rate": 9.291596627979836e-05, | |
| "loss": 0.2149, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.031927023945268, | |
| "grad_norm": 14.372687339782715, | |
| "learning_rate": 9.284024142018281e-05, | |
| "loss": 0.2743, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.0364880273660204, | |
| "grad_norm": 16.323156356811523, | |
| "learning_rate": 9.276414517715484e-05, | |
| "loss": 0.343, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.041049030786773, | |
| "grad_norm": 14.0962495803833, | |
| "learning_rate": 9.268767821039521e-05, | |
| "loss": 0.2017, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0456100342075256, | |
| "grad_norm": 13.36721420288086, | |
| "learning_rate": 9.261084118279847e-05, | |
| "loss": 0.2844, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0501710376282782, | |
| "grad_norm": 16.017093658447266, | |
| "learning_rate": 9.253363476046725e-05, | |
| "loss": 0.2139, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.0547320410490308, | |
| "grad_norm": 13.20304012298584, | |
| "learning_rate": 9.245605961270649e-05, | |
| "loss": 0.1957, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0592930444697835, | |
| "grad_norm": 11.656867027282715, | |
| "learning_rate": 9.23781164120176e-05, | |
| "loss": 0.2862, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0638540478905358, | |
| "grad_norm": 22.73741340637207, | |
| "learning_rate": 9.229980583409266e-05, | |
| "loss": 0.5163, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0684150513112884, | |
| "grad_norm": 13.677047729492188, | |
| "learning_rate": 9.222112855780856e-05, | |
| "loss": 0.304, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.072976054732041, | |
| "grad_norm": 18.445669174194336, | |
| "learning_rate": 9.214208526522109e-05, | |
| "loss": 0.4152, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0775370581527937, | |
| "grad_norm": 7.721029758453369, | |
| "learning_rate": 9.206267664155907e-05, | |
| "loss": 0.1688, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0820980615735463, | |
| "grad_norm": 14.50108528137207, | |
| "learning_rate": 9.198290337521838e-05, | |
| "loss": 0.3409, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0866590649942987, | |
| "grad_norm": 9.616985321044922, | |
| "learning_rate": 9.190276615775599e-05, | |
| "loss": 0.212, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.0912200684150513, | |
| "grad_norm": 10.23328685760498, | |
| "learning_rate": 9.182226568388401e-05, | |
| "loss": 0.2361, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0957810718358039, | |
| "grad_norm": 8.819774627685547, | |
| "learning_rate": 9.174140265146356e-05, | |
| "loss": 0.2378, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.1003420752565565, | |
| "grad_norm": 9.800360679626465, | |
| "learning_rate": 9.166017776149887e-05, | |
| "loss": 0.1975, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.104903078677309, | |
| "grad_norm": 14.380069732666016, | |
| "learning_rate": 9.157859171813107e-05, | |
| "loss": 0.1747, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.1094640820980617, | |
| "grad_norm": 11.026459693908691, | |
| "learning_rate": 9.149664522863217e-05, | |
| "loss": 0.2154, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.114025085518814, | |
| "grad_norm": 14.39684009552002, | |
| "learning_rate": 9.141433900339887e-05, | |
| "loss": 0.2274, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.1185860889395667, | |
| "grad_norm": 12.926016807556152, | |
| "learning_rate": 9.133167375594647e-05, | |
| "loss": 0.2368, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.1231470923603193, | |
| "grad_norm": 11.235928535461426, | |
| "learning_rate": 9.12486502029026e-05, | |
| "loss": 0.1921, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.127708095781072, | |
| "grad_norm": 8.581406593322754, | |
| "learning_rate": 9.11652690640011e-05, | |
| "loss": 0.1573, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1322690992018245, | |
| "grad_norm": 12.511028289794922, | |
| "learning_rate": 9.10815310620757e-05, | |
| "loss": 0.2574, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1368301026225769, | |
| "grad_norm": 13.827507019042969, | |
| "learning_rate": 9.099743692305379e-05, | |
| "loss": 0.2751, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1413911060433295, | |
| "grad_norm": 16.066164016723633, | |
| "learning_rate": 9.091298737595014e-05, | |
| "loss": 0.2848, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.145952109464082, | |
| "grad_norm": 19.021018981933594, | |
| "learning_rate": 9.082818315286055e-05, | |
| "loss": 0.2962, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.1505131128848347, | |
| "grad_norm": 10.119819641113281, | |
| "learning_rate": 9.074302498895552e-05, | |
| "loss": 0.2158, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1550741163055873, | |
| "grad_norm": 11.308869361877441, | |
| "learning_rate": 9.065751362247388e-05, | |
| "loss": 0.2406, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.1596351197263397, | |
| "grad_norm": 13.046134948730469, | |
| "learning_rate": 9.057164979471635e-05, | |
| "loss": 0.2534, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1641961231470923, | |
| "grad_norm": 12.656744003295898, | |
| "learning_rate": 9.048543425003923e-05, | |
| "loss": 0.2888, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.168757126567845, | |
| "grad_norm": 11.619269371032715, | |
| "learning_rate": 9.039886773584779e-05, | |
| "loss": 0.2209, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.1733181299885975, | |
| "grad_norm": 9.45288372039795, | |
| "learning_rate": 9.031195100258987e-05, | |
| "loss": 0.1455, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1778791334093501, | |
| "grad_norm": 8.505230903625488, | |
| "learning_rate": 9.02246848037494e-05, | |
| "loss": 0.1666, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1824401368301025, | |
| "grad_norm": 18.330678939819336, | |
| "learning_rate": 9.013706989583983e-05, | |
| "loss": 0.2517, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1870011402508551, | |
| "grad_norm": 14.25640869140625, | |
| "learning_rate": 9.00491070383976e-05, | |
| "loss": 0.3547, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.1915621436716077, | |
| "grad_norm": 9.561500549316406, | |
| "learning_rate": 8.996079699397547e-05, | |
| "loss": 0.2168, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1961231470923603, | |
| "grad_norm": 12.185916900634766, | |
| "learning_rate": 8.987214052813604e-05, | |
| "loss": 0.1639, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.200684150513113, | |
| "grad_norm": 10.289037704467773, | |
| "learning_rate": 8.978313840944503e-05, | |
| "loss": 0.1805, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2052451539338653, | |
| "grad_norm": 10.741447448730469, | |
| "learning_rate": 8.969379140946464e-05, | |
| "loss": 0.2754, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.209806157354618, | |
| "grad_norm": 10.507063865661621, | |
| "learning_rate": 8.960410030274681e-05, | |
| "loss": 0.2606, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.2143671607753705, | |
| "grad_norm": 15.26578140258789, | |
| "learning_rate": 8.951406586682662e-05, | |
| "loss": 0.3271, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.2189281641961232, | |
| "grad_norm": 12.20109748840332, | |
| "learning_rate": 8.942368888221545e-05, | |
| "loss": 0.2345, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.2234891676168758, | |
| "grad_norm": 9.984328269958496, | |
| "learning_rate": 8.933297013239424e-05, | |
| "loss": 0.1968, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.2280501710376284, | |
| "grad_norm": 12.456114768981934, | |
| "learning_rate": 8.924191040380671e-05, | |
| "loss": 0.2624, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2326111744583808, | |
| "grad_norm": 11.822625160217285, | |
| "learning_rate": 8.915051048585256e-05, | |
| "loss": 0.2642, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.2371721778791334, | |
| "grad_norm": 14.391879081726074, | |
| "learning_rate": 8.905877117088054e-05, | |
| "loss": 0.2378, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.241733181299886, | |
| "grad_norm": 10.977947235107422, | |
| "learning_rate": 8.896669325418172e-05, | |
| "loss": 0.2302, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.2462941847206386, | |
| "grad_norm": 10.284666061401367, | |
| "learning_rate": 8.887427753398248e-05, | |
| "loss": 0.2304, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.2508551881413912, | |
| "grad_norm": 12.455047607421875, | |
| "learning_rate": 8.87815248114376e-05, | |
| "loss": 0.2586, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.2554161915621438, | |
| "grad_norm": 8.457530975341797, | |
| "learning_rate": 8.868843589062339e-05, | |
| "loss": 0.1605, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2599771949828962, | |
| "grad_norm": 9.678699493408203, | |
| "learning_rate": 8.859501157853066e-05, | |
| "loss": 0.1834, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.2645381984036488, | |
| "grad_norm": 10.811609268188477, | |
| "learning_rate": 8.850125268505774e-05, | |
| "loss": 0.2116, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.2690992018244014, | |
| "grad_norm": 17.398542404174805, | |
| "learning_rate": 8.840716002300347e-05, | |
| "loss": 0.2112, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.273660205245154, | |
| "grad_norm": 20.152509689331055, | |
| "learning_rate": 8.831273440806009e-05, | |
| "loss": 0.2475, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2782212086659066, | |
| "grad_norm": 9.431918144226074, | |
| "learning_rate": 8.821797665880625e-05, | |
| "loss": 0.1543, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.282782212086659, | |
| "grad_norm": 16.22393798828125, | |
| "learning_rate": 8.812288759669994e-05, | |
| "loss": 0.2396, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2873432155074116, | |
| "grad_norm": 22.462894439697266, | |
| "learning_rate": 8.802746804607118e-05, | |
| "loss": 0.4583, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.2919042189281642, | |
| "grad_norm": 9.348506927490234, | |
| "learning_rate": 8.793171883411515e-05, | |
| "loss": 0.1537, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2964652223489168, | |
| "grad_norm": 11.54738998413086, | |
| "learning_rate": 8.783564079088477e-05, | |
| "loss": 0.1724, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.3010262257696694, | |
| "grad_norm": 14.7079439163208, | |
| "learning_rate": 8.773923474928365e-05, | |
| "loss": 0.2363, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.3055872291904218, | |
| "grad_norm": 10.920402526855469, | |
| "learning_rate": 8.764250154505885e-05, | |
| "loss": 0.2441, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3101482326111744, | |
| "grad_norm": 13.201237678527832, | |
| "learning_rate": 8.754544201679353e-05, | |
| "loss": 0.2623, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.314709236031927, | |
| "grad_norm": 8.716439247131348, | |
| "learning_rate": 8.744805700589989e-05, | |
| "loss": 0.2039, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.3192702394526796, | |
| "grad_norm": 18.450355529785156, | |
| "learning_rate": 8.735034735661162e-05, | |
| "loss": 0.2247, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3238312428734322, | |
| "grad_norm": 9.224489212036133, | |
| "learning_rate": 8.725231391597681e-05, | |
| "loss": 0.2552, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.3283922462941846, | |
| "grad_norm": 6.751110553741455, | |
| "learning_rate": 8.715395753385048e-05, | |
| "loss": 0.1663, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.3329532497149372, | |
| "grad_norm": 12.301689147949219, | |
| "learning_rate": 8.705527906288718e-05, | |
| "loss": 0.2175, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.3375142531356898, | |
| "grad_norm": 10.624826431274414, | |
| "learning_rate": 8.695627935853373e-05, | |
| "loss": 0.1919, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.3420752565564424, | |
| "grad_norm": 11.931551933288574, | |
| "learning_rate": 8.68569592790217e-05, | |
| "loss": 0.2923, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.346636259977195, | |
| "grad_norm": 10.668767929077148, | |
| "learning_rate": 8.675731968536002e-05, | |
| "loss": 0.2388, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.3511972633979474, | |
| "grad_norm": 12.659479141235352, | |
| "learning_rate": 8.66573614413275e-05, | |
| "loss": 0.2413, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.3557582668187, | |
| "grad_norm": 17.342519760131836, | |
| "learning_rate": 8.655708541346533e-05, | |
| "loss": 0.2613, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.3603192702394526, | |
| "grad_norm": 11.727388381958008, | |
| "learning_rate": 8.645649247106955e-05, | |
| "loss": 0.2109, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.3648802736602053, | |
| "grad_norm": 12.136419296264648, | |
| "learning_rate": 8.635558348618359e-05, | |
| "loss": 0.2467, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3694412770809579, | |
| "grad_norm": 10.73682975769043, | |
| "learning_rate": 8.625435933359062e-05, | |
| "loss": 0.1937, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.3740022805017102, | |
| "grad_norm": 11.996789932250977, | |
| "learning_rate": 8.615282089080609e-05, | |
| "loss": 0.2655, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.378563283922463, | |
| "grad_norm": 15.302202224731445, | |
| "learning_rate": 8.605096903806991e-05, | |
| "loss": 0.2487, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.3831242873432155, | |
| "grad_norm": 9.55948543548584, | |
| "learning_rate": 8.594880465833908e-05, | |
| "loss": 0.1708, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.387685290763968, | |
| "grad_norm": 12.158270835876465, | |
| "learning_rate": 8.584632863727982e-05, | |
| "loss": 0.2452, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3922462941847207, | |
| "grad_norm": 8.76557731628418, | |
| "learning_rate": 8.574354186326001e-05, | |
| "loss": 0.193, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3968072976054733, | |
| "grad_norm": 9.248729705810547, | |
| "learning_rate": 8.564044522734147e-05, | |
| "loss": 0.2264, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.401368301026226, | |
| "grad_norm": 9.079157829284668, | |
| "learning_rate": 8.55370396232722e-05, | |
| "loss": 0.1809, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.4059293044469783, | |
| "grad_norm": 9.482975959777832, | |
| "learning_rate": 8.543332594747865e-05, | |
| "loss": 0.1772, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.4104903078677309, | |
| "grad_norm": 9.198452949523926, | |
| "learning_rate": 8.532930509905799e-05, | |
| "loss": 0.2047, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4150513112884835, | |
| "grad_norm": 8.783437728881836, | |
| "learning_rate": 8.522497797977024e-05, | |
| "loss": 0.2247, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.419612314709236, | |
| "grad_norm": 14.365094184875488, | |
| "learning_rate": 8.512034549403053e-05, | |
| "loss": 0.2208, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.4241733181299887, | |
| "grad_norm": 14.370891571044922, | |
| "learning_rate": 8.501540854890118e-05, | |
| "loss": 0.2326, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.428734321550741, | |
| "grad_norm": 10.391968727111816, | |
| "learning_rate": 8.491016805408387e-05, | |
| "loss": 0.1751, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.4332953249714937, | |
| "grad_norm": 11.512178421020508, | |
| "learning_rate": 8.480462492191186e-05, | |
| "loss": 0.2978, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.4378563283922463, | |
| "grad_norm": 12.58578872680664, | |
| "learning_rate": 8.469878006734185e-05, | |
| "loss": 0.2706, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.442417331812999, | |
| "grad_norm": 8.530269622802734, | |
| "learning_rate": 8.459263440794627e-05, | |
| "loss": 0.1755, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.4469783352337515, | |
| "grad_norm": 8.248932838439941, | |
| "learning_rate": 8.448618886390522e-05, | |
| "loss": 0.1483, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.451539338654504, | |
| "grad_norm": 18.134685516357422, | |
| "learning_rate": 8.437944435799848e-05, | |
| "loss": 0.1938, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.4561003420752565, | |
| "grad_norm": 8.072942733764648, | |
| "learning_rate": 8.427240181559754e-05, | |
| "loss": 0.1573, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.4606613454960091, | |
| "grad_norm": 11.139336585998535, | |
| "learning_rate": 8.416506216465765e-05, | |
| "loss": 0.2272, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.4652223489167617, | |
| "grad_norm": 12.186053276062012, | |
| "learning_rate": 8.405742633570961e-05, | |
| "loss": 0.1716, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4697833523375143, | |
| "grad_norm": 10.145633697509766, | |
| "learning_rate": 8.394949526185185e-05, | |
| "loss": 0.1913, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.4743443557582667, | |
| "grad_norm": 9.266544342041016, | |
| "learning_rate": 8.384126987874228e-05, | |
| "loss": 0.1642, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4789053591790193, | |
| "grad_norm": 11.050301551818848, | |
| "learning_rate": 8.373275112459016e-05, | |
| "loss": 0.2253, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.483466362599772, | |
| "grad_norm": 8.425420761108398, | |
| "learning_rate": 8.362393994014805e-05, | |
| "loss": 0.1826, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4880273660205245, | |
| "grad_norm": 14.498648643493652, | |
| "learning_rate": 8.35148372687035e-05, | |
| "loss": 0.2432, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4925883694412772, | |
| "grad_norm": 14.982590675354004, | |
| "learning_rate": 8.340544405607111e-05, | |
| "loss": 0.1724, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4971493728620295, | |
| "grad_norm": 11.120532035827637, | |
| "learning_rate": 8.329576125058406e-05, | |
| "loss": 0.1461, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.5017103762827824, | |
| "grad_norm": 17.683189392089844, | |
| "learning_rate": 8.318578980308609e-05, | |
| "loss": 0.3342, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.5062713797035348, | |
| "grad_norm": 17.191091537475586, | |
| "learning_rate": 8.307553066692314e-05, | |
| "loss": 0.2188, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.5108323831242874, | |
| "grad_norm": 8.582752227783203, | |
| "learning_rate": 8.29649847979352e-05, | |
| "loss": 0.1141, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.51539338654504, | |
| "grad_norm": 9.13406753540039, | |
| "learning_rate": 8.28541531544479e-05, | |
| "loss": 0.1767, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.5199543899657924, | |
| "grad_norm": 8.726181030273438, | |
| "learning_rate": 8.274303669726426e-05, | |
| "loss": 0.1348, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.5245153933865452, | |
| "grad_norm": 10.707447052001953, | |
| "learning_rate": 8.263163638965639e-05, | |
| "loss": 0.2005, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.5290763968072976, | |
| "grad_norm": 12.0310640335083, | |
| "learning_rate": 8.25199531973571e-05, | |
| "loss": 0.1985, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.5336374002280502, | |
| "grad_norm": 9.672492027282715, | |
| "learning_rate": 8.24079880885515e-05, | |
| "loss": 0.2014, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.5381984036488028, | |
| "grad_norm": 9.297097206115723, | |
| "learning_rate": 8.22957420338687e-05, | |
| "loss": 0.1302, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.5427594070695552, | |
| "grad_norm": 25.988061904907227, | |
| "learning_rate": 8.218321600637329e-05, | |
| "loss": 0.2899, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.547320410490308, | |
| "grad_norm": 9.74842643737793, | |
| "learning_rate": 8.2070410981557e-05, | |
| "loss": 0.1612, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.5518814139110604, | |
| "grad_norm": 10.73891544342041, | |
| "learning_rate": 8.195732793733014e-05, | |
| "loss": 0.2282, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.556442417331813, | |
| "grad_norm": 15.269837379455566, | |
| "learning_rate": 8.184396785401322e-05, | |
| "loss": 0.1585, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.5610034207525656, | |
| "grad_norm": 7.805790901184082, | |
| "learning_rate": 8.173033171432841e-05, | |
| "loss": 0.1117, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.565564424173318, | |
| "grad_norm": 9.819446563720703, | |
| "learning_rate": 8.1616420503391e-05, | |
| "loss": 0.2143, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.5701254275940708, | |
| "grad_norm": 8.949931144714355, | |
| "learning_rate": 8.15022352087009e-05, | |
| "loss": 0.2139, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5746864310148232, | |
| "grad_norm": 14.177704811096191, | |
| "learning_rate": 8.138777682013403e-05, | |
| "loss": 0.2733, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5792474344355758, | |
| "grad_norm": 10.694663047790527, | |
| "learning_rate": 8.127304632993382e-05, | |
| "loss": 0.1532, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.5838084378563284, | |
| "grad_norm": 14.421151161193848, | |
| "learning_rate": 8.115804473270253e-05, | |
| "loss": 0.1349, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5883694412770808, | |
| "grad_norm": 9.572623252868652, | |
| "learning_rate": 8.104277302539264e-05, | |
| "loss": 0.1852, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.5929304446978336, | |
| "grad_norm": 8.018699645996094, | |
| "learning_rate": 8.092723220729825e-05, | |
| "loss": 0.1398, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.597491448118586, | |
| "grad_norm": 10.331695556640625, | |
| "learning_rate": 8.081142328004637e-05, | |
| "loss": 0.1678, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.6020524515393386, | |
| "grad_norm": 8.879880905151367, | |
| "learning_rate": 8.069534724758827e-05, | |
| "loss": 0.1527, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.6066134549600912, | |
| "grad_norm": 11.865134239196777, | |
| "learning_rate": 8.057900511619076e-05, | |
| "loss": 0.174, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.6111744583808438, | |
| "grad_norm": 20.736913681030273, | |
| "learning_rate": 8.046239789442749e-05, | |
| "loss": 0.14, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.6157354618015964, | |
| "grad_norm": 8.29340648651123, | |
| "learning_rate": 8.034552659317012e-05, | |
| "loss": 0.1924, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.6202964652223488, | |
| "grad_norm": 14.969886779785156, | |
| "learning_rate": 8.02283922255797e-05, | |
| "loss": 0.1776, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.6248574686431014, | |
| "grad_norm": 41.689517974853516, | |
| "learning_rate": 8.011099580709778e-05, | |
| "loss": 0.1337, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.629418472063854, | |
| "grad_norm": 9.815425872802734, | |
| "learning_rate": 7.999333835543763e-05, | |
| "loss": 0.1959, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.6339794754846066, | |
| "grad_norm": 12.40318775177002, | |
| "learning_rate": 7.987542089057542e-05, | |
| "loss": 0.1968, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.6385404789053593, | |
| "grad_norm": 8.287771224975586, | |
| "learning_rate": 7.975724443474143e-05, | |
| "loss": 0.1082, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.6431014823261116, | |
| "grad_norm": 9.289151191711426, | |
| "learning_rate": 7.963881001241107e-05, | |
| "loss": 0.1176, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.6476624857468645, | |
| "grad_norm": 12.972766876220703, | |
| "learning_rate": 7.952011865029614e-05, | |
| "loss": 0.2185, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.6522234891676169, | |
| "grad_norm": 11.908880233764648, | |
| "learning_rate": 7.940117137733579e-05, | |
| "loss": 0.177, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.6567844925883695, | |
| "grad_norm": 8.70804500579834, | |
| "learning_rate": 7.928196922468772e-05, | |
| "loss": 0.143, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.661345496009122, | |
| "grad_norm": 11.03876781463623, | |
| "learning_rate": 7.916251322571918e-05, | |
| "loss": 0.1837, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.6659064994298745, | |
| "grad_norm": 12.949993133544922, | |
| "learning_rate": 7.904280441599801e-05, | |
| "loss": 0.1652, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.6704675028506273, | |
| "grad_norm": 10.857973098754883, | |
| "learning_rate": 7.892284383328367e-05, | |
| "loss": 0.1575, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6750285062713797, | |
| "grad_norm": 8.548442840576172, | |
| "learning_rate": 7.88026325175183e-05, | |
| "loss": 0.125, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.6795895096921323, | |
| "grad_norm": 7.094759464263916, | |
| "learning_rate": 7.868217151081755e-05, | |
| "loss": 0.132, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.6841505131128849, | |
| "grad_norm": 8.124651908874512, | |
| "learning_rate": 7.856146185746175e-05, | |
| "loss": 0.164, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6887115165336373, | |
| "grad_norm": 10.263216018676758, | |
| "learning_rate": 7.844050460388671e-05, | |
| "loss": 0.1476, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.69327251995439, | |
| "grad_norm": 12.262899398803711, | |
| "learning_rate": 7.831930079867469e-05, | |
| "loss": 0.1952, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6978335233751425, | |
| "grad_norm": 8.35619831085205, | |
| "learning_rate": 7.819785149254532e-05, | |
| "loss": 0.1429, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.702394526795895, | |
| "grad_norm": 10.857168197631836, | |
| "learning_rate": 7.807615773834652e-05, | |
| "loss": 0.1307, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.7069555302166477, | |
| "grad_norm": 7.174655914306641, | |
| "learning_rate": 7.795422059104527e-05, | |
| "loss": 0.1304, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.7115165336374, | |
| "grad_norm": 14.329642295837402, | |
| "learning_rate": 7.78320411077186e-05, | |
| "loss": 0.1997, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.716077537058153, | |
| "grad_norm": 8.973917961120605, | |
| "learning_rate": 7.77096203475443e-05, | |
| "loss": 0.1583, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.7206385404789053, | |
| "grad_norm": 6.451292514801025, | |
| "learning_rate": 7.758695937179185e-05, | |
| "loss": 0.1201, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.725199543899658, | |
| "grad_norm": 6.503537178039551, | |
| "learning_rate": 7.746405924381313e-05, | |
| "loss": 0.0973, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.7297605473204105, | |
| "grad_norm": 6.6744608879089355, | |
| "learning_rate": 7.734092102903323e-05, | |
| "loss": 0.1019, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.734321550741163, | |
| "grad_norm": 9.03148365020752, | |
| "learning_rate": 7.721754579494127e-05, | |
| "loss": 0.1592, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.7388825541619157, | |
| "grad_norm": 7.3868632316589355, | |
| "learning_rate": 7.709393461108107e-05, | |
| "loss": 0.1457, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.7434435575826681, | |
| "grad_norm": 11.139805793762207, | |
| "learning_rate": 7.697008854904191e-05, | |
| "loss": 0.1512, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.7480045610034207, | |
| "grad_norm": 9.616064071655273, | |
| "learning_rate": 7.68460086824492e-05, | |
| "loss": 0.194, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.7525655644241733, | |
| "grad_norm": 8.999774932861328, | |
| "learning_rate": 7.672169608695525e-05, | |
| "loss": 0.1654, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.757126567844926, | |
| "grad_norm": 12.37429141998291, | |
| "learning_rate": 7.659715184022994e-05, | |
| "loss": 0.2122, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.7616875712656785, | |
| "grad_norm": 9.472933769226074, | |
| "learning_rate": 7.647237702195123e-05, | |
| "loss": 0.1587, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.766248574686431, | |
| "grad_norm": 10.54593563079834, | |
| "learning_rate": 7.634737271379603e-05, | |
| "loss": 0.2103, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.7708095781071835, | |
| "grad_norm": 6.688052654266357, | |
| "learning_rate": 7.622213999943062e-05, | |
| "loss": 0.0989, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.7753705815279361, | |
| "grad_norm": 10.251477241516113, | |
| "learning_rate": 7.609667996450141e-05, | |
| "loss": 0.2219, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7799315849486887, | |
| "grad_norm": 6.267465591430664, | |
| "learning_rate": 7.59709936966254e-05, | |
| "loss": 0.0958, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.7844925883694414, | |
| "grad_norm": 8.224940299987793, | |
| "learning_rate": 7.584508228538085e-05, | |
| "loss": 0.1312, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.7890535917901937, | |
| "grad_norm": 12.063385963439941, | |
| "learning_rate": 7.571894682229775e-05, | |
| "loss": 0.1833, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.7936145952109466, | |
| "grad_norm": 8.511308670043945, | |
| "learning_rate": 7.559258840084848e-05, | |
| "loss": 0.1442, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.798175598631699, | |
| "grad_norm": 8.30827522277832, | |
| "learning_rate": 7.546600811643816e-05, | |
| "loss": 0.1438, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.8027366020524516, | |
| "grad_norm": 11.280699729919434, | |
| "learning_rate": 7.533920706639531e-05, | |
| "loss": 0.2558, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.8072976054732042, | |
| "grad_norm": 6.188623905181885, | |
| "learning_rate": 7.521218634996226e-05, | |
| "loss": 0.1072, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.8118586088939566, | |
| "grad_norm": 15.961888313293457, | |
| "learning_rate": 7.508494706828564e-05, | |
| "loss": 0.1619, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.8164196123147094, | |
| "grad_norm": 9.33893871307373, | |
| "learning_rate": 7.49574903244068e-05, | |
| "loss": 0.1995, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.8209806157354618, | |
| "grad_norm": 12.080733299255371, | |
| "learning_rate": 7.482981722325232e-05, | |
| "loss": 0.1647, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.8255416191562144, | |
| "grad_norm": 8.13494873046875, | |
| "learning_rate": 7.470192887162435e-05, | |
| "loss": 0.1278, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.830102622576967, | |
| "grad_norm": 9.168209075927734, | |
| "learning_rate": 7.457382637819108e-05, | |
| "loss": 0.1244, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.8346636259977194, | |
| "grad_norm": 8.094377517700195, | |
| "learning_rate": 7.444551085347707e-05, | |
| "loss": 0.1066, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.8392246294184722, | |
| "grad_norm": 9.4691743850708, | |
| "learning_rate": 7.43169834098537e-05, | |
| "loss": 0.1378, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.8437856328392246, | |
| "grad_norm": 9.721671104431152, | |
| "learning_rate": 7.418824516152943e-05, | |
| "loss": 0.1247, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.8483466362599772, | |
| "grad_norm": 8.888439178466797, | |
| "learning_rate": 7.405929722454026e-05, | |
| "loss": 0.1256, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.8529076396807298, | |
| "grad_norm": 7.914821147918701, | |
| "learning_rate": 7.393014071673992e-05, | |
| "loss": 0.1293, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.8574686431014822, | |
| "grad_norm": 10.728132247924805, | |
| "learning_rate": 7.380077675779027e-05, | |
| "loss": 0.1738, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.862029646522235, | |
| "grad_norm": 13.084773063659668, | |
| "learning_rate": 7.36712064691516e-05, | |
| "loss": 0.1478, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.8665906499429874, | |
| "grad_norm": 6.1122660636901855, | |
| "learning_rate": 7.354143097407283e-05, | |
| "loss": 0.084, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.87115165336374, | |
| "grad_norm": 16.69349479675293, | |
| "learning_rate": 7.341145139758185e-05, | |
| "loss": 0.1161, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.8757126567844926, | |
| "grad_norm": 18.03197479248047, | |
| "learning_rate": 7.328126886647575e-05, | |
| "loss": 0.2555, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.880273660205245, | |
| "grad_norm": 5.904569149017334, | |
| "learning_rate": 7.315088450931103e-05, | |
| "loss": 0.0978, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.8848346636259978, | |
| "grad_norm": 8.850961685180664, | |
| "learning_rate": 7.302029945639377e-05, | |
| "loss": 0.1636, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.8893956670467502, | |
| "grad_norm": 8.666600227355957, | |
| "learning_rate": 7.288951483976998e-05, | |
| "loss": 0.1544, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.8939566704675028, | |
| "grad_norm": 8.048266410827637, | |
| "learning_rate": 7.275853179321565e-05, | |
| "loss": 0.1148, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8985176738882554, | |
| "grad_norm": 9.665177345275879, | |
| "learning_rate": 7.262735145222696e-05, | |
| "loss": 0.1452, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.9030786773090078, | |
| "grad_norm": 6.529131889343262, | |
| "learning_rate": 7.249597495401043e-05, | |
| "loss": 0.0976, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.9076396807297606, | |
| "grad_norm": 6.697221755981445, | |
| "learning_rate": 7.236440343747313e-05, | |
| "loss": 0.1207, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.912200684150513, | |
| "grad_norm": 7.641704559326172, | |
| "learning_rate": 7.223263804321269e-05, | |
| "loss": 0.1102, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.9167616875712656, | |
| "grad_norm": 5.448543071746826, | |
| "learning_rate": 7.21006799135075e-05, | |
| "loss": 0.0969, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.9213226909920182, | |
| "grad_norm": 12.550832748413086, | |
| "learning_rate": 7.196853019230676e-05, | |
| "loss": 0.1629, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.9258836944127709, | |
| "grad_norm": 7.084536552429199, | |
| "learning_rate": 7.183619002522062e-05, | |
| "loss": 0.1378, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.9304446978335235, | |
| "grad_norm": 8.91976547241211, | |
| "learning_rate": 7.170366055951017e-05, | |
| "loss": 0.1177, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.9350057012542758, | |
| "grad_norm": 9.238527297973633, | |
| "learning_rate": 7.157094294407756e-05, | |
| "loss": 0.1522, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.9395667046750285, | |
| "grad_norm": 8.187129974365234, | |
| "learning_rate": 7.143803832945601e-05, | |
| "loss": 0.1134, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.944127708095781, | |
| "grad_norm": 7.621769905090332, | |
| "learning_rate": 7.130494786779987e-05, | |
| "loss": 0.1011, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.9486887115165337, | |
| "grad_norm": 11.57784652709961, | |
| "learning_rate": 7.117167271287453e-05, | |
| "loss": 0.1254, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.9532497149372863, | |
| "grad_norm": 8.105171203613281, | |
| "learning_rate": 7.103821402004654e-05, | |
| "loss": 0.0994, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.9578107183580387, | |
| "grad_norm": 10.137523651123047, | |
| "learning_rate": 7.090457294627358e-05, | |
| "loss": 0.0976, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.9623717217787915, | |
| "grad_norm": 5.640718936920166, | |
| "learning_rate": 7.077075065009433e-05, | |
| "loss": 0.0887, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.9669327251995439, | |
| "grad_norm": 10.016772270202637, | |
| "learning_rate": 7.063674829161853e-05, | |
| "loss": 0.1036, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.9714937286202965, | |
| "grad_norm": 8.870481491088867, | |
| "learning_rate": 7.050256703251688e-05, | |
| "loss": 0.0973, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.976054732041049, | |
| "grad_norm": 7.390217304229736, | |
| "learning_rate": 7.036820803601099e-05, | |
| "loss": 0.0966, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.9806157354618015, | |
| "grad_norm": 9.348631858825684, | |
| "learning_rate": 7.023367246686323e-05, | |
| "loss": 0.1119, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.9851767388825543, | |
| "grad_norm": 7.239314556121826, | |
| "learning_rate": 7.009896149136674e-05, | |
| "loss": 0.1167, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.9897377423033067, | |
| "grad_norm": 7.269038200378418, | |
| "learning_rate": 6.996407627733526e-05, | |
| "loss": 0.123, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.9942987457240593, | |
| "grad_norm": 10.885858535766602, | |
| "learning_rate": 6.982901799409294e-05, | |
| "loss": 0.1401, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.998859749144812, | |
| "grad_norm": 13.753951072692871, | |
| "learning_rate": 6.969378781246436e-05, | |
| "loss": 0.0994, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 21.25026512145996, | |
| "learning_rate": 6.955838690476426e-05, | |
| "loss": 0.1222, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.0045610034207524, | |
| "grad_norm": 6.62643575668335, | |
| "learning_rate": 6.942281644478739e-05, | |
| "loss": 0.0698, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.009122006841505, | |
| "grad_norm": 7.465953826904297, | |
| "learning_rate": 6.928707760779838e-05, | |
| "loss": 0.0795, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.0136830102622576, | |
| "grad_norm": 7.87470006942749, | |
| "learning_rate": 6.915117157052149e-05, | |
| "loss": 0.0733, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.0182440136830104, | |
| "grad_norm": 7.7158966064453125, | |
| "learning_rate": 6.90150995111305e-05, | |
| "loss": 0.0761, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.022805017103763, | |
| "grad_norm": 9.388237953186035, | |
| "learning_rate": 6.887886260923842e-05, | |
| "loss": 0.111, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.027366020524515, | |
| "grad_norm": 10.703797340393066, | |
| "learning_rate": 6.874246204588724e-05, | |
| "loss": 0.1158, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.031927023945268, | |
| "grad_norm": 6.636610507965088, | |
| "learning_rate": 6.860589900353778e-05, | |
| "loss": 0.078, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.0364880273660204, | |
| "grad_norm": 6.754958629608154, | |
| "learning_rate": 6.84691746660594e-05, | |
| "loss": 0.0676, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.0410490307867732, | |
| "grad_norm": 8.061761856079102, | |
| "learning_rate": 6.833229021871974e-05, | |
| "loss": 0.0781, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.0456100342075256, | |
| "grad_norm": 5.99964714050293, | |
| "learning_rate": 6.819524684817438e-05, | |
| "loss": 0.0645, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.050171037628278, | |
| "grad_norm": 6.639948844909668, | |
| "learning_rate": 6.805804574245666e-05, | |
| "loss": 0.0721, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.054732041049031, | |
| "grad_norm": 6.918362140655518, | |
| "learning_rate": 6.792068809096734e-05, | |
| "loss": 0.1027, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.0592930444697832, | |
| "grad_norm": 6.9611616134643555, | |
| "learning_rate": 6.778317508446423e-05, | |
| "loss": 0.0902, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.063854047890536, | |
| "grad_norm": 5.7177019119262695, | |
| "learning_rate": 6.764550791505197e-05, | |
| "loss": 0.0544, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.0684150513112884, | |
| "grad_norm": 7.697108745574951, | |
| "learning_rate": 6.750768777617162e-05, | |
| "loss": 0.0673, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.072976054732041, | |
| "grad_norm": 3.653858184814453, | |
| "learning_rate": 6.736971586259033e-05, | |
| "loss": 0.0413, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.0775370581527937, | |
| "grad_norm": 6.587297439575195, | |
| "learning_rate": 6.723159337039097e-05, | |
| "loss": 0.0537, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.082098061573546, | |
| "grad_norm": 5.920407295227051, | |
| "learning_rate": 6.709332149696185e-05, | |
| "loss": 0.0555, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.086659064994299, | |
| "grad_norm": 5.50054407119751, | |
| "learning_rate": 6.695490144098621e-05, | |
| "loss": 0.0756, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.0912200684150513, | |
| "grad_norm": 8.171920776367188, | |
| "learning_rate": 6.681633440243194e-05, | |
| "loss": 0.0817, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.095781071835804, | |
| "grad_norm": 7.142725944519043, | |
| "learning_rate": 6.667762158254104e-05, | |
| "loss": 0.0579, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.1003420752565565, | |
| "grad_norm": 6.230417251586914, | |
| "learning_rate": 6.653876418381937e-05, | |
| "loss": 0.0778, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.104903078677309, | |
| "grad_norm": 7.222645282745361, | |
| "learning_rate": 6.639976341002614e-05, | |
| "loss": 0.0471, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.1094640820980617, | |
| "grad_norm": 8.968223571777344, | |
| "learning_rate": 6.626062046616345e-05, | |
| "loss": 0.0631, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.114025085518814, | |
| "grad_norm": 6.115957736968994, | |
| "learning_rate": 6.612133655846592e-05, | |
| "loss": 0.0605, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.118586088939567, | |
| "grad_norm": 12.084288597106934, | |
| "learning_rate": 6.598191289439016e-05, | |
| "loss": 0.1068, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.1231470923603193, | |
| "grad_norm": 6.62805700302124, | |
| "learning_rate": 6.584235068260432e-05, | |
| "loss": 0.0812, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.1277080957810717, | |
| "grad_norm": 9.849166870117188, | |
| "learning_rate": 6.570265113297764e-05, | |
| "loss": 0.0972, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.1322690992018245, | |
| "grad_norm": 8.004566192626953, | |
| "learning_rate": 6.556281545656999e-05, | |
| "loss": 0.0602, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.136830102622577, | |
| "grad_norm": 5.589608192443848, | |
| "learning_rate": 6.542284486562124e-05, | |
| "loss": 0.0537, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.1413911060433297, | |
| "grad_norm": 5.782744884490967, | |
| "learning_rate": 6.528274057354092e-05, | |
| "loss": 0.071, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.145952109464082, | |
| "grad_norm": 5.03806734085083, | |
| "learning_rate": 6.514250379489753e-05, | |
| "loss": 0.052, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.1505131128848345, | |
| "grad_norm": 8.631730079650879, | |
| "learning_rate": 6.500213574540823e-05, | |
| "loss": 0.0711, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.1550741163055873, | |
| "grad_norm": 3.648717164993286, | |
| "learning_rate": 6.486163764192806e-05, | |
| "loss": 0.0558, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.1596351197263397, | |
| "grad_norm": 5.878966808319092, | |
| "learning_rate": 6.472101070243952e-05, | |
| "loss": 0.0377, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.1641961231470925, | |
| "grad_norm": 6.6274919509887695, | |
| "learning_rate": 6.458025614604203e-05, | |
| "loss": 0.063, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.168757126567845, | |
| "grad_norm": 5.117002964019775, | |
| "learning_rate": 6.44393751929413e-05, | |
| "loss": 0.0675, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.1733181299885973, | |
| "grad_norm": 4.451428413391113, | |
| "learning_rate": 6.429836906443879e-05, | |
| "loss": 0.0437, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.17787913340935, | |
| "grad_norm": 7.2544755935668945, | |
| "learning_rate": 6.415723898292112e-05, | |
| "loss": 0.0816, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.1824401368301025, | |
| "grad_norm": 7.115444183349609, | |
| "learning_rate": 6.401598617184939e-05, | |
| "loss": 0.0632, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.1870011402508553, | |
| "grad_norm": 6.341275215148926, | |
| "learning_rate": 6.387461185574874e-05, | |
| "loss": 0.045, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.1915621436716077, | |
| "grad_norm": 4.9018025398254395, | |
| "learning_rate": 6.373311726019763e-05, | |
| "loss": 0.0449, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.19612314709236, | |
| "grad_norm": 6.2423906326293945, | |
| "learning_rate": 6.359150361181715e-05, | |
| "loss": 0.0609, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.200684150513113, | |
| "grad_norm": 7.57888126373291, | |
| "learning_rate": 6.344977213826054e-05, | |
| "loss": 0.094, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.2052451539338653, | |
| "grad_norm": 7.0582475662231445, | |
| "learning_rate": 6.330792406820242e-05, | |
| "loss": 0.0598, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.209806157354618, | |
| "grad_norm": 6.263000011444092, | |
| "learning_rate": 6.316596063132822e-05, | |
| "loss": 0.0594, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.2143671607753705, | |
| "grad_norm": 5.4891862869262695, | |
| "learning_rate": 6.302388305832351e-05, | |
| "loss": 0.0512, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.2189281641961234, | |
| "grad_norm": 7.57410192489624, | |
| "learning_rate": 6.288169258086322e-05, | |
| "loss": 0.0746, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.2234891676168758, | |
| "grad_norm": 7.4229631423950195, | |
| "learning_rate": 6.273939043160118e-05, | |
| "loss": 0.0609, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.228050171037628, | |
| "grad_norm": 12.584153175354004, | |
| "learning_rate": 6.259697784415918e-05, | |
| "loss": 0.1267, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.232611174458381, | |
| "grad_norm": 8.015351295471191, | |
| "learning_rate": 6.245445605311649e-05, | |
| "loss": 0.0611, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.2371721778791334, | |
| "grad_norm": 8.479742050170898, | |
| "learning_rate": 6.231182629399901e-05, | |
| "loss": 0.052, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.241733181299886, | |
| "grad_norm": 7.191579341888428, | |
| "learning_rate": 6.21690898032687e-05, | |
| "loss": 0.0738, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.2462941847206386, | |
| "grad_norm": 6.246610641479492, | |
| "learning_rate": 6.202624781831268e-05, | |
| "loss": 0.0577, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.250855188141391, | |
| "grad_norm": 4.082911968231201, | |
| "learning_rate": 6.188330157743267e-05, | |
| "loss": 0.0404, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.255416191562144, | |
| "grad_norm": 5.735588550567627, | |
| "learning_rate": 6.174025231983416e-05, | |
| "loss": 0.0529, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.259977194982896, | |
| "grad_norm": 9.69885540008545, | |
| "learning_rate": 6.159710128561575e-05, | |
| "loss": 0.0574, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.264538198403649, | |
| "grad_norm": 7.707938194274902, | |
| "learning_rate": 6.145384971575823e-05, | |
| "loss": 0.0704, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.2690992018244014, | |
| "grad_norm": 5.515017032623291, | |
| "learning_rate": 6.131049885211404e-05, | |
| "loss": 0.0501, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.2736602052451538, | |
| "grad_norm": 7.536128997802734, | |
| "learning_rate": 6.116704993739635e-05, | |
| "loss": 0.0669, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.2782212086659066, | |
| "grad_norm": 7.009504795074463, | |
| "learning_rate": 6.102350421516837e-05, | |
| "loss": 0.0609, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.282782212086659, | |
| "grad_norm": 6.848779678344727, | |
| "learning_rate": 6.087986292983252e-05, | |
| "loss": 0.0742, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.287343215507412, | |
| "grad_norm": 6.385640621185303, | |
| "learning_rate": 6.073612732661966e-05, | |
| "loss": 0.0537, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.291904218928164, | |
| "grad_norm": 6.395091533660889, | |
| "learning_rate": 6.059229865157829e-05, | |
| "loss": 0.042, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.2964652223489166, | |
| "grad_norm": 9.047046661376953, | |
| "learning_rate": 6.044837815156377e-05, | |
| "loss": 0.0676, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.3010262257696694, | |
| "grad_norm": 4.578718662261963, | |
| "learning_rate": 6.030436707422745e-05, | |
| "loss": 0.0509, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.305587229190422, | |
| "grad_norm": 5.892753601074219, | |
| "learning_rate": 6.016026666800597e-05, | |
| "loss": 0.0484, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.3101482326111746, | |
| "grad_norm": 5.961977958679199, | |
| "learning_rate": 6.001607818211031e-05, | |
| "loss": 0.0653, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.314709236031927, | |
| "grad_norm": 5.6413397789001465, | |
| "learning_rate": 5.987180286651503e-05, | |
| "loss": 0.0468, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.3192702394526794, | |
| "grad_norm": 5.839052677154541, | |
| "learning_rate": 5.9727441971947395e-05, | |
| "loss": 0.0458, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.3238312428734322, | |
| "grad_norm": 3.717437982559204, | |
| "learning_rate": 5.958299674987663e-05, | |
| "loss": 0.0322, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.3283922462941846, | |
| "grad_norm": 5.86605978012085, | |
| "learning_rate": 5.943846845250291e-05, | |
| "loss": 0.0425, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.3329532497149374, | |
| "grad_norm": 3.296215534210205, | |
| "learning_rate": 5.9293858332746644e-05, | |
| "loss": 0.034, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.33751425313569, | |
| "grad_norm": 4.8171186447143555, | |
| "learning_rate": 5.9149167644237555e-05, | |
| "loss": 0.0427, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.342075256556442, | |
| "grad_norm": 6.483091354370117, | |
| "learning_rate": 5.90043976413038e-05, | |
| "loss": 0.0545, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.346636259977195, | |
| "grad_norm": 4.027348041534424, | |
| "learning_rate": 5.885954957896115e-05, | |
| "loss": 0.0376, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.3511972633979474, | |
| "grad_norm": 3.8197691440582275, | |
| "learning_rate": 5.871462471290202e-05, | |
| "loss": 0.0287, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.3557582668187003, | |
| "grad_norm": 15.101433753967285, | |
| "learning_rate": 5.8569624299484716e-05, | |
| "loss": 0.0699, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.3603192702394526, | |
| "grad_norm": 6.565672397613525, | |
| "learning_rate": 5.842454959572239e-05, | |
| "loss": 0.0715, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.364880273660205, | |
| "grad_norm": 5.648789405822754, | |
| "learning_rate": 5.827940185927227e-05, | |
| "loss": 0.066, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.369441277080958, | |
| "grad_norm": 6.39064359664917, | |
| "learning_rate": 5.813418234842467e-05, | |
| "loss": 0.0425, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.3740022805017102, | |
| "grad_norm": 6.164553165435791, | |
| "learning_rate": 5.798889232209217e-05, | |
| "loss": 0.0491, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.378563283922463, | |
| "grad_norm": 6.937675476074219, | |
| "learning_rate": 5.78435330397986e-05, | |
| "loss": 0.0354, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.3831242873432155, | |
| "grad_norm": 5.974575996398926, | |
| "learning_rate": 5.769810576166818e-05, | |
| "loss": 0.0504, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.387685290763968, | |
| "grad_norm": 6.108855247497559, | |
| "learning_rate": 5.755261174841461e-05, | |
| "loss": 0.0597, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.3922462941847207, | |
| "grad_norm": 5.981025695800781, | |
| "learning_rate": 5.740705226133013e-05, | |
| "loss": 0.062, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.396807297605473, | |
| "grad_norm": 6.320438861846924, | |
| "learning_rate": 5.726142856227452e-05, | |
| "loss": 0.0499, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.401368301026226, | |
| "grad_norm": 4.965454578399658, | |
| "learning_rate": 5.7115741913664264e-05, | |
| "loss": 0.0432, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.4059293044469783, | |
| "grad_norm": 7.857591152191162, | |
| "learning_rate": 5.696999357846153e-05, | |
| "loss": 0.0564, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.4104903078677307, | |
| "grad_norm": 5.360653877258301, | |
| "learning_rate": 5.682418482016329e-05, | |
| "loss": 0.037, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.4150513112884835, | |
| "grad_norm": 5.187353610992432, | |
| "learning_rate": 5.6678316902790266e-05, | |
| "loss": 0.0434, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.419612314709236, | |
| "grad_norm": 7.093838691711426, | |
| "learning_rate": 5.653239109087608e-05, | |
| "loss": 0.0816, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.4241733181299887, | |
| "grad_norm": 8.862817764282227, | |
| "learning_rate": 5.6386408649456205e-05, | |
| "loss": 0.1088, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.428734321550741, | |
| "grad_norm": 5.698467254638672, | |
| "learning_rate": 5.624037084405708e-05, | |
| "loss": 0.0764, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.433295324971494, | |
| "grad_norm": 7.893596649169922, | |
| "learning_rate": 5.609427894068507e-05, | |
| "loss": 0.0743, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.4378563283922463, | |
| "grad_norm": 3.882078170776367, | |
| "learning_rate": 5.594813420581554e-05, | |
| "loss": 0.0395, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.4424173318129987, | |
| "grad_norm": 5.990970611572266, | |
| "learning_rate": 5.580193790638181e-05, | |
| "loss": 0.0434, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.4469783352337515, | |
| "grad_norm": 6.1614789962768555, | |
| "learning_rate": 5.565569130976422e-05, | |
| "loss": 0.043, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.451539338654504, | |
| "grad_norm": 4.174839973449707, | |
| "learning_rate": 5.5509395683779185e-05, | |
| "loss": 0.0583, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.4561003420752567, | |
| "grad_norm": 3.416801929473877, | |
| "learning_rate": 5.536305229666815e-05, | |
| "loss": 0.034, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.460661345496009, | |
| "grad_norm": 5.814635276794434, | |
| "learning_rate": 5.521666241708655e-05, | |
| "loss": 0.0409, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.4652223489167615, | |
| "grad_norm": 4.838456153869629, | |
| "learning_rate": 5.5070227314092896e-05, | |
| "loss": 0.0428, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.4697833523375143, | |
| "grad_norm": 7.684220790863037, | |
| "learning_rate": 5.492374825713775e-05, | |
| "loss": 0.0663, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.4743443557582667, | |
| "grad_norm": 3.3523683547973633, | |
| "learning_rate": 5.47772265160527e-05, | |
| "loss": 0.0315, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.4789053591790196, | |
| "grad_norm": 5.440591812133789, | |
| "learning_rate": 5.46306633610394e-05, | |
| "loss": 0.053, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.483466362599772, | |
| "grad_norm": 4.606085300445557, | |
| "learning_rate": 5.448406006265846e-05, | |
| "loss": 0.0345, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.4880273660205243, | |
| "grad_norm": 6.1201887130737305, | |
| "learning_rate": 5.433741789181853e-05, | |
| "loss": 0.0673, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.492588369441277, | |
| "grad_norm": 7.997361660003662, | |
| "learning_rate": 5.419073811976525e-05, | |
| "loss": 0.0764, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.4971493728620295, | |
| "grad_norm": 4.388640880584717, | |
| "learning_rate": 5.4044022018070214e-05, | |
| "loss": 0.0414, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.5017103762827824, | |
| "grad_norm": 4.9629645347595215, | |
| "learning_rate": 5.3897270858619966e-05, | |
| "loss": 0.0424, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.5062713797035348, | |
| "grad_norm": 7.596857070922852, | |
| "learning_rate": 5.3750485913604965e-05, | |
| "loss": 0.0453, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.5108323831242876, | |
| "grad_norm": 5.5651068687438965, | |
| "learning_rate": 5.360366845550856e-05, | |
| "loss": 0.0339, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.51539338654504, | |
| "grad_norm": 3.2136380672454834, | |
| "learning_rate": 5.345681975709594e-05, | |
| "loss": 0.0224, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.5199543899657924, | |
| "grad_norm": 4.0387864112854, | |
| "learning_rate": 5.330994109140315e-05, | |
| "loss": 0.0296, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.524515393386545, | |
| "grad_norm": 5.669864654541016, | |
| "learning_rate": 5.316303373172601e-05, | |
| "loss": 0.0543, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.5290763968072976, | |
| "grad_norm": 3.9306421279907227, | |
| "learning_rate": 5.301609895160906e-05, | |
| "loss": 0.0374, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.5336374002280504, | |
| "grad_norm": 3.963334321975708, | |
| "learning_rate": 5.286913802483459e-05, | |
| "loss": 0.0304, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.538198403648803, | |
| "grad_norm": 4.443750858306885, | |
| "learning_rate": 5.2722152225411503e-05, | |
| "loss": 0.0397, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.542759407069555, | |
| "grad_norm": 5.408681869506836, | |
| "learning_rate": 5.25751428275644e-05, | |
| "loss": 0.0408, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.547320410490308, | |
| "grad_norm": 8.279979705810547, | |
| "learning_rate": 5.242811110572242e-05, | |
| "loss": 0.0392, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.5518814139110604, | |
| "grad_norm": 4.709146022796631, | |
| "learning_rate": 5.228105833450819e-05, | |
| "loss": 0.0377, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.556442417331813, | |
| "grad_norm": 7.52549409866333, | |
| "learning_rate": 5.213398578872688e-05, | |
| "loss": 0.0353, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.5610034207525656, | |
| "grad_norm": 5.7986602783203125, | |
| "learning_rate": 5.198689474335503e-05, | |
| "loss": 0.0564, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.565564424173318, | |
| "grad_norm": 6.1219611167907715, | |
| "learning_rate": 5.183978647352961e-05, | |
| "loss": 0.0441, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.570125427594071, | |
| "grad_norm": 4.516667366027832, | |
| "learning_rate": 5.169266225453686e-05, | |
| "loss": 0.0316, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.574686431014823, | |
| "grad_norm": 5.2199625968933105, | |
| "learning_rate": 5.154552336180132e-05, | |
| "loss": 0.0369, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.579247434435576, | |
| "grad_norm": 4.977226257324219, | |
| "learning_rate": 5.139837107087468e-05, | |
| "loss": 0.0383, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.5838084378563284, | |
| "grad_norm": 3.4498722553253174, | |
| "learning_rate": 5.1251206657424864e-05, | |
| "loss": 0.0215, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.588369441277081, | |
| "grad_norm": 4.04592227935791, | |
| "learning_rate": 5.110403139722484e-05, | |
| "loss": 0.0249, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.5929304446978336, | |
| "grad_norm": 6.6897969245910645, | |
| "learning_rate": 5.0956846566141595e-05, | |
| "loss": 0.0463, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.597491448118586, | |
| "grad_norm": 5.076176166534424, | |
| "learning_rate": 5.080965344012508e-05, | |
| "loss": 0.0426, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.602052451539339, | |
| "grad_norm": 11.544487953186035, | |
| "learning_rate": 5.066245329519721e-05, | |
| "loss": 0.0356, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.6066134549600912, | |
| "grad_norm": 6.120387077331543, | |
| "learning_rate": 5.0515247407440705e-05, | |
| "loss": 0.0451, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.6111744583808436, | |
| "grad_norm": 5.812496185302734, | |
| "learning_rate": 5.036803705298808e-05, | |
| "loss": 0.0293, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.6157354618015964, | |
| "grad_norm": 4.080401420593262, | |
| "learning_rate": 5.022082350801055e-05, | |
| "loss": 0.032, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.620296465222349, | |
| "grad_norm": 4.283697128295898, | |
| "learning_rate": 5.007360804870702e-05, | |
| "loss": 0.0161, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.6248574686431017, | |
| "grad_norm": 5.630773544311523, | |
| "learning_rate": 4.9926391951292985e-05, | |
| "loss": 0.0428, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.629418472063854, | |
| "grad_norm": 5.993396759033203, | |
| "learning_rate": 4.977917649198945e-05, | |
| "loss": 0.038, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.6339794754846064, | |
| "grad_norm": 5.899278163909912, | |
| "learning_rate": 4.963196294701194e-05, | |
| "loss": 0.048, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.6385404789053593, | |
| "grad_norm": 5.6876091957092285, | |
| "learning_rate": 4.9484752592559306e-05, | |
| "loss": 0.0358, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.6431014823261116, | |
| "grad_norm": 8.28043270111084, | |
| "learning_rate": 4.9337546704802806e-05, | |
| "loss": 0.0446, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.6476624857468645, | |
| "grad_norm": 3.0159778594970703, | |
| "learning_rate": 4.919034655987493e-05, | |
| "loss": 0.0202, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.652223489167617, | |
| "grad_norm": 3.556821823120117, | |
| "learning_rate": 4.904315343385844e-05, | |
| "loss": 0.0359, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.6567844925883692, | |
| "grad_norm": 9.480207443237305, | |
| "learning_rate": 4.889596860277519e-05, | |
| "loss": 0.0292, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.661345496009122, | |
| "grad_norm": 4.381405830383301, | |
| "learning_rate": 4.8748793342575134e-05, | |
| "loss": 0.0432, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.6659064994298745, | |
| "grad_norm": 3.772207260131836, | |
| "learning_rate": 4.860162892912532e-05, | |
| "loss": 0.0172, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.6704675028506273, | |
| "grad_norm": 4.178829193115234, | |
| "learning_rate": 4.84544766381987e-05, | |
| "loss": 0.0344, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.6750285062713797, | |
| "grad_norm": 12.805524826049805, | |
| "learning_rate": 4.830733774546315e-05, | |
| "loss": 0.0377, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.679589509692132, | |
| "grad_norm": 5.325319290161133, | |
| "learning_rate": 4.8160213526470403e-05, | |
| "loss": 0.0533, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.684150513112885, | |
| "grad_norm": 6.815293788909912, | |
| "learning_rate": 4.801310525664498e-05, | |
| "loss": 0.0256, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.6887115165336373, | |
| "grad_norm": 5.803644180297852, | |
| "learning_rate": 4.7866014211273135e-05, | |
| "loss": 0.0179, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.69327251995439, | |
| "grad_norm": 4.223587512969971, | |
| "learning_rate": 4.7718941665491825e-05, | |
| "loss": 0.0337, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.6978335233751425, | |
| "grad_norm": 3.5225670337677, | |
| "learning_rate": 4.7571888894277604e-05, | |
| "loss": 0.0251, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.702394526795895, | |
| "grad_norm": 4.653651714324951, | |
| "learning_rate": 4.7424857172435596e-05, | |
| "loss": 0.0309, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.7069555302166477, | |
| "grad_norm": 4.962986469268799, | |
| "learning_rate": 4.72778477745885e-05, | |
| "loss": 0.0375, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.7115165336374, | |
| "grad_norm": 7.884991645812988, | |
| "learning_rate": 4.713086197516542e-05, | |
| "loss": 0.0619, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.716077537058153, | |
| "grad_norm": 6.011470317840576, | |
| "learning_rate": 4.698390104839096e-05, | |
| "loss": 0.0304, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.7206385404789053, | |
| "grad_norm": 4.3046159744262695, | |
| "learning_rate": 4.683696626827401e-05, | |
| "loss": 0.0251, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.7251995438996577, | |
| "grad_norm": 4.003452301025391, | |
| "learning_rate": 4.669005890859686e-05, | |
| "loss": 0.0231, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.7297605473204105, | |
| "grad_norm": 4.7901530265808105, | |
| "learning_rate": 4.654318024290407e-05, | |
| "loss": 0.0425, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.734321550741163, | |
| "grad_norm": 4.104437351226807, | |
| "learning_rate": 4.639633154449146e-05, | |
| "loss": 0.0284, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.7388825541619157, | |
| "grad_norm": 3.93487811088562, | |
| "learning_rate": 4.624951408639503e-05, | |
| "loss": 0.0294, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.743443557582668, | |
| "grad_norm": 6.138600826263428, | |
| "learning_rate": 4.610272914138004e-05, | |
| "loss": 0.0315, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.7480045610034205, | |
| "grad_norm": 6.667906761169434, | |
| "learning_rate": 4.59559779819298e-05, | |
| "loss": 0.0395, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.7525655644241733, | |
| "grad_norm": 4.121731281280518, | |
| "learning_rate": 4.5809261880234764e-05, | |
| "loss": 0.0319, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.757126567844926, | |
| "grad_norm": 3.8120853900909424, | |
| "learning_rate": 4.566258210818148e-05, | |
| "loss": 0.029, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.7616875712656785, | |
| "grad_norm": 3.287109851837158, | |
| "learning_rate": 4.5515939937341556e-05, | |
| "loss": 0.0224, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.766248574686431, | |
| "grad_norm": 3.322906970977783, | |
| "learning_rate": 4.5369336638960616e-05, | |
| "loss": 0.0233, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.7708095781071833, | |
| "grad_norm": 1.5358067750930786, | |
| "learning_rate": 4.522277348394731e-05, | |
| "loss": 0.0088, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.775370581527936, | |
| "grad_norm": 4.789572715759277, | |
| "learning_rate": 4.507625174286226e-05, | |
| "loss": 0.0357, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.779931584948689, | |
| "grad_norm": 3.2839534282684326, | |
| "learning_rate": 4.492977268590711e-05, | |
| "loss": 0.0237, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.7844925883694414, | |
| "grad_norm": 4.322288513183594, | |
| "learning_rate": 4.478333758291347e-05, | |
| "loss": 0.0387, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.7890535917901937, | |
| "grad_norm": 6.475346088409424, | |
| "learning_rate": 4.4636947703331864e-05, | |
| "loss": 0.0426, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.7936145952109466, | |
| "grad_norm": 4.861753940582275, | |
| "learning_rate": 4.449060431622082e-05, | |
| "loss": 0.0286, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.798175598631699, | |
| "grad_norm": 2.6957809925079346, | |
| "learning_rate": 4.434430869023579e-05, | |
| "loss": 0.0222, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.802736602052452, | |
| "grad_norm": 6.314004898071289, | |
| "learning_rate": 4.419806209361822e-05, | |
| "loss": 0.033, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.807297605473204, | |
| "grad_norm": 5.230919361114502, | |
| "learning_rate": 4.405186579418448e-05, | |
| "loss": 0.0195, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.8118586088939566, | |
| "grad_norm": 4.417494773864746, | |
| "learning_rate": 4.390572105931492e-05, | |
| "loss": 0.0372, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.8164196123147094, | |
| "grad_norm": 6.1748552322387695, | |
| "learning_rate": 4.375962915594292e-05, | |
| "loss": 0.0284, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.8209806157354618, | |
| "grad_norm": 8.319523811340332, | |
| "learning_rate": 4.36135913505438e-05, | |
| "loss": 0.0392, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.8255416191562146, | |
| "grad_norm": 6.181365489959717, | |
| "learning_rate": 4.346760890912394e-05, | |
| "loss": 0.0521, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.830102622576967, | |
| "grad_norm": 4.259495735168457, | |
| "learning_rate": 4.3321683097209745e-05, | |
| "loss": 0.0181, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.8346636259977194, | |
| "grad_norm": 6.773824214935303, | |
| "learning_rate": 4.317581517983673e-05, | |
| "loss": 0.0229, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.839224629418472, | |
| "grad_norm": 7.4220356941223145, | |
| "learning_rate": 4.303000642153847e-05, | |
| "loss": 0.0316, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.8437856328392246, | |
| "grad_norm": 6.611977577209473, | |
| "learning_rate": 4.288425808633575e-05, | |
| "loss": 0.0333, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.8483466362599774, | |
| "grad_norm": 4.086733818054199, | |
| "learning_rate": 4.27385714377255e-05, | |
| "loss": 0.022, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.85290763968073, | |
| "grad_norm": 3.454923629760742, | |
| "learning_rate": 4.259294773866987e-05, | |
| "loss": 0.0273, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.857468643101482, | |
| "grad_norm": 2.6385574340820312, | |
| "learning_rate": 4.2447388251585384e-05, | |
| "loss": 0.0167, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.862029646522235, | |
| "grad_norm": 3.3853583335876465, | |
| "learning_rate": 4.230189423833183e-05, | |
| "loss": 0.0261, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.8665906499429874, | |
| "grad_norm": 2.6701831817626953, | |
| "learning_rate": 4.215646696020141e-05, | |
| "loss": 0.0251, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.8711516533637402, | |
| "grad_norm": 2.350428342819214, | |
| "learning_rate": 4.201110767790784e-05, | |
| "loss": 0.013, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.8757126567844926, | |
| "grad_norm": 4.5163373947143555, | |
| "learning_rate": 4.186581765157534e-05, | |
| "loss": 0.0333, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.880273660205245, | |
| "grad_norm": 4.357926368713379, | |
| "learning_rate": 4.172059814072776e-05, | |
| "loss": 0.0204, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.884834663625998, | |
| "grad_norm": 5.049153804779053, | |
| "learning_rate": 4.157545040427763e-05, | |
| "loss": 0.0508, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.88939566704675, | |
| "grad_norm": 3.518669366836548, | |
| "learning_rate": 4.143037570051529e-05, | |
| "loss": 0.0267, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.893956670467503, | |
| "grad_norm": 2.3187649250030518, | |
| "learning_rate": 4.1285375287097976e-05, | |
| "loss": 0.0144, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.8985176738882554, | |
| "grad_norm": 5.340627193450928, | |
| "learning_rate": 4.114045042103887e-05, | |
| "loss": 0.0221, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.903078677309008, | |
| "grad_norm": 5.597445487976074, | |
| "learning_rate": 4.099560235869621e-05, | |
| "loss": 0.0217, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.9076396807297606, | |
| "grad_norm": 10.440146446228027, | |
| "learning_rate": 4.085083235576246e-05, | |
| "loss": 0.0383, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.912200684150513, | |
| "grad_norm": 6.332849502563477, | |
| "learning_rate": 4.070614166725337e-05, | |
| "loss": 0.0341, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.916761687571266, | |
| "grad_norm": 3.814603090286255, | |
| "learning_rate": 4.056153154749711e-05, | |
| "loss": 0.0336, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.9213226909920182, | |
| "grad_norm": 6.715717315673828, | |
| "learning_rate": 4.04170032501234e-05, | |
| "loss": 0.0396, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.9258836944127706, | |
| "grad_norm": 5.1199140548706055, | |
| "learning_rate": 4.02725580280526e-05, | |
| "loss": 0.0311, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.9304446978335235, | |
| "grad_norm": 2.676060438156128, | |
| "learning_rate": 4.012819713348499e-05, | |
| "loss": 0.0188, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.935005701254276, | |
| "grad_norm": 7.360265254974365, | |
| "learning_rate": 3.9983921817889694e-05, | |
| "loss": 0.0318, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.9395667046750287, | |
| "grad_norm": 2.801821708679199, | |
| "learning_rate": 3.9839733331994036e-05, | |
| "loss": 0.0176, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.944127708095781, | |
| "grad_norm": 6.362011432647705, | |
| "learning_rate": 3.9695632925772555e-05, | |
| "loss": 0.0353, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.9486887115165334, | |
| "grad_norm": 3.604642152786255, | |
| "learning_rate": 3.955162184843625e-05, | |
| "loss": 0.0298, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.9532497149372863, | |
| "grad_norm": 4.035106182098389, | |
| "learning_rate": 3.940770134842172e-05, | |
| "loss": 0.0312, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.9578107183580387, | |
| "grad_norm": 3.8275413513183594, | |
| "learning_rate": 3.9263872673380356e-05, | |
| "loss": 0.0242, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.9623717217787915, | |
| "grad_norm": 6.57460355758667, | |
| "learning_rate": 3.912013707016748e-05, | |
| "loss": 0.0362, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.966932725199544, | |
| "grad_norm": 2.4332528114318848, | |
| "learning_rate": 3.897649578483163e-05, | |
| "loss": 0.0124, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.9714937286202963, | |
| "grad_norm": 2.740542411804199, | |
| "learning_rate": 3.883295006260366e-05, | |
| "loss": 0.0141, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.976054732041049, | |
| "grad_norm": 4.145965576171875, | |
| "learning_rate": 3.868950114788597e-05, | |
| "loss": 0.0227, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.9806157354618015, | |
| "grad_norm": 3.8210062980651855, | |
| "learning_rate": 3.8546150284241784e-05, | |
| "loss": 0.0256, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.9851767388825543, | |
| "grad_norm": 2.960423469543457, | |
| "learning_rate": 3.840289871438427e-05, | |
| "loss": 0.0234, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.9897377423033067, | |
| "grad_norm": 2.305687189102173, | |
| "learning_rate": 3.8259747680165835e-05, | |
| "loss": 0.0167, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.994298745724059, | |
| "grad_norm": 3.4394383430480957, | |
| "learning_rate": 3.811669842256733e-05, | |
| "loss": 0.0188, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.998859749144812, | |
| "grad_norm": 4.973351955413818, | |
| "learning_rate": 3.7973752181687335e-05, | |
| "loss": 0.0154, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 8.8117036819458, | |
| "learning_rate": 3.78309101967313e-05, | |
| "loss": 0.0315, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.0045610034207524, | |
| "grad_norm": 2.305928945541382, | |
| "learning_rate": 3.768817370600098e-05, | |
| "loss": 0.0068, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.009122006841505, | |
| "grad_norm": 3.3252930641174316, | |
| "learning_rate": 3.754554394688353e-05, | |
| "loss": 0.0249, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.0136830102622576, | |
| "grad_norm": 2.8702948093414307, | |
| "learning_rate": 3.740302215584083e-05, | |
| "loss": 0.0141, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.0182440136830104, | |
| "grad_norm": 1.2875800132751465, | |
| "learning_rate": 3.726060956839884e-05, | |
| "loss": 0.0088, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.022805017103763, | |
| "grad_norm": 2.241077184677124, | |
| "learning_rate": 3.7118307419136784e-05, | |
| "loss": 0.0158, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.027366020524515, | |
| "grad_norm": 3.223511219024658, | |
| "learning_rate": 3.697611694167652e-05, | |
| "loss": 0.0172, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.031927023945268, | |
| "grad_norm": 1.6573007106781006, | |
| "learning_rate": 3.683403936867179e-05, | |
| "loss": 0.0061, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.0364880273660204, | |
| "grad_norm": 2.708397388458252, | |
| "learning_rate": 3.6692075931797586e-05, | |
| "loss": 0.0189, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.0410490307867732, | |
| "grad_norm": 1.0548919439315796, | |
| "learning_rate": 3.6550227861739474e-05, | |
| "loss": 0.0057, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.0456100342075256, | |
| "grad_norm": 2.2885780334472656, | |
| "learning_rate": 3.640849638818286e-05, | |
| "loss": 0.0114, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.050171037628278, | |
| "grad_norm": 2.5884759426116943, | |
| "learning_rate": 3.6266882739802385e-05, | |
| "loss": 0.0135, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.054732041049031, | |
| "grad_norm": 1.9603959321975708, | |
| "learning_rate": 3.612538814425127e-05, | |
| "loss": 0.0078, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.0592930444697832, | |
| "grad_norm": 1.8305693864822388, | |
| "learning_rate": 3.598401382815062e-05, | |
| "loss": 0.0191, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.063854047890536, | |
| "grad_norm": 6.010728359222412, | |
| "learning_rate": 3.584276101707892e-05, | |
| "loss": 0.0228, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.0684150513112884, | |
| "grad_norm": 0.7543220520019531, | |
| "learning_rate": 3.570163093556123e-05, | |
| "loss": 0.0049, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.072976054732041, | |
| "grad_norm": 3.3827199935913086, | |
| "learning_rate": 3.556062480705871e-05, | |
| "loss": 0.0205, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.0775370581527937, | |
| "grad_norm": 3.6950876712799072, | |
| "learning_rate": 3.541974385395799e-05, | |
| "loss": 0.013, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.082098061573546, | |
| "grad_norm": 2.4483461380004883, | |
| "learning_rate": 3.527898929756049e-05, | |
| "loss": 0.0112, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.086659064994299, | |
| "grad_norm": 1.083269715309143, | |
| "learning_rate": 3.5138362358071955e-05, | |
| "loss": 0.0051, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.0912200684150513, | |
| "grad_norm": 3.507185459136963, | |
| "learning_rate": 3.4997864254591786e-05, | |
| "loss": 0.0184, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.095781071835804, | |
| "grad_norm": 1.244012713432312, | |
| "learning_rate": 3.4857496205102474e-05, | |
| "loss": 0.0058, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.1003420752565565, | |
| "grad_norm": 1.7855976819992065, | |
| "learning_rate": 3.47172594264591e-05, | |
| "loss": 0.0081, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.104903078677309, | |
| "grad_norm": 3.3493175506591797, | |
| "learning_rate": 3.457715513437878e-05, | |
| "loss": 0.0099, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.1094640820980617, | |
| "grad_norm": 2.2510809898376465, | |
| "learning_rate": 3.443718454343003e-05, | |
| "loss": 0.0103, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.114025085518814, | |
| "grad_norm": 2.8858158588409424, | |
| "learning_rate": 3.429734886702235e-05, | |
| "loss": 0.0201, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.118586088939567, | |
| "grad_norm": 2.9078762531280518, | |
| "learning_rate": 3.415764931739569e-05, | |
| "loss": 0.0126, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.1231470923603193, | |
| "grad_norm": 11.870075225830078, | |
| "learning_rate": 3.401808710560984e-05, | |
| "loss": 0.0367, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.1277080957810717, | |
| "grad_norm": 2.278918504714966, | |
| "learning_rate": 3.3878663441534074e-05, | |
| "loss": 0.0131, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.1322690992018245, | |
| "grad_norm": 2.479530096054077, | |
| "learning_rate": 3.3739379533836545e-05, | |
| "loss": 0.0157, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.136830102622577, | |
| "grad_norm": 5.716989517211914, | |
| "learning_rate": 3.360023658997387e-05, | |
| "loss": 0.0121, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.1413911060433297, | |
| "grad_norm": 3.942920446395874, | |
| "learning_rate": 3.346123581618064e-05, | |
| "loss": 0.0119, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.145952109464082, | |
| "grad_norm": 3.193537712097168, | |
| "learning_rate": 3.332237841745898e-05, | |
| "loss": 0.0214, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.1505131128848345, | |
| "grad_norm": 6.671420574188232, | |
| "learning_rate": 3.318366559756807e-05, | |
| "loss": 0.0141, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.1550741163055873, | |
| "grad_norm": 1.345292329788208, | |
| "learning_rate": 3.304509855901379e-05, | |
| "loss": 0.0052, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.1596351197263397, | |
| "grad_norm": 1.9885728359222412, | |
| "learning_rate": 3.290667850303816e-05, | |
| "loss": 0.0139, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.1641961231470925, | |
| "grad_norm": 2.2747621536254883, | |
| "learning_rate": 3.276840662960904e-05, | |
| "loss": 0.0111, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.168757126567845, | |
| "grad_norm": 1.8984365463256836, | |
| "learning_rate": 3.26302841374097e-05, | |
| "loss": 0.0094, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.1733181299885973, | |
| "grad_norm": 1.0688873529434204, | |
| "learning_rate": 3.2492312223828395e-05, | |
| "loss": 0.0066, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.17787913340935, | |
| "grad_norm": 3.808332681655884, | |
| "learning_rate": 3.235449208494804e-05, | |
| "loss": 0.0267, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.1824401368301025, | |
| "grad_norm": 2.2355008125305176, | |
| "learning_rate": 3.221682491553578e-05, | |
| "loss": 0.0098, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.1870011402508553, | |
| "grad_norm": 2.873753547668457, | |
| "learning_rate": 3.207931190903267e-05, | |
| "loss": 0.0211, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.1915621436716077, | |
| "grad_norm": 1.0170818567276, | |
| "learning_rate": 3.194195425754333e-05, | |
| "loss": 0.0042, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.19612314709236, | |
| "grad_norm": 1.7715719938278198, | |
| "learning_rate": 3.180475315182563e-05, | |
| "loss": 0.0042, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.200684150513113, | |
| "grad_norm": 3.1615145206451416, | |
| "learning_rate": 3.166770978128027e-05, | |
| "loss": 0.0096, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.2052451539338653, | |
| "grad_norm": 1.4706147909164429, | |
| "learning_rate": 3.1530825333940606e-05, | |
| "loss": 0.0069, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.209806157354618, | |
| "grad_norm": 1.4655320644378662, | |
| "learning_rate": 3.139410099646223e-05, | |
| "loss": 0.007, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.2143671607753705, | |
| "grad_norm": 2.010169506072998, | |
| "learning_rate": 3.1257537954112784e-05, | |
| "loss": 0.0061, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.2189281641961234, | |
| "grad_norm": 2.4863297939300537, | |
| "learning_rate": 3.112113739076161e-05, | |
| "loss": 0.0135, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.2234891676168758, | |
| "grad_norm": 0.9651201963424683, | |
| "learning_rate": 3.09849004888695e-05, | |
| "loss": 0.0059, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.228050171037628, | |
| "grad_norm": 1.4260286092758179, | |
| "learning_rate": 3.084882842947851e-05, | |
| "loss": 0.0058, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.232611174458381, | |
| "grad_norm": 2.2575788497924805, | |
| "learning_rate": 3.071292239220164e-05, | |
| "loss": 0.0072, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.2371721778791334, | |
| "grad_norm": 2.315737247467041, | |
| "learning_rate": 3.057718355521262e-05, | |
| "loss": 0.0072, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.241733181299886, | |
| "grad_norm": 1.3366587162017822, | |
| "learning_rate": 3.0441613095235755e-05, | |
| "loss": 0.0079, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.2462941847206386, | |
| "grad_norm": 1.586226224899292, | |
| "learning_rate": 3.0306212187535653e-05, | |
| "loss": 0.0087, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.250855188141391, | |
| "grad_norm": 1.7931245565414429, | |
| "learning_rate": 3.0170982005907066e-05, | |
| "loss": 0.0077, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.255416191562144, | |
| "grad_norm": 3.0318455696105957, | |
| "learning_rate": 3.003592372266476e-05, | |
| "loss": 0.0055, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.259977194982896, | |
| "grad_norm": 1.366331934928894, | |
| "learning_rate": 2.990103850863327e-05, | |
| "loss": 0.0047, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.264538198403649, | |
| "grad_norm": 0.7723425030708313, | |
| "learning_rate": 2.9766327533136774e-05, | |
| "loss": 0.0049, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.2690992018244014, | |
| "grad_norm": 3.0485169887542725, | |
| "learning_rate": 2.963179196398902e-05, | |
| "loss": 0.007, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.2736602052451538, | |
| "grad_norm": 1.4431509971618652, | |
| "learning_rate": 2.9497432967483124e-05, | |
| "loss": 0.0051, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.2782212086659066, | |
| "grad_norm": 1.8810582160949707, | |
| "learning_rate": 2.9363251708381477e-05, | |
| "loss": 0.0069, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.282782212086659, | |
| "grad_norm": 0.9738725423812866, | |
| "learning_rate": 2.9229249349905684e-05, | |
| "loss": 0.0049, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.287343215507412, | |
| "grad_norm": 2.8499152660369873, | |
| "learning_rate": 2.9095427053726442e-05, | |
| "loss": 0.0093, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.291904218928164, | |
| "grad_norm": 2.7052316665649414, | |
| "learning_rate": 2.896178597995347e-05, | |
| "loss": 0.0109, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.2964652223489166, | |
| "grad_norm": 0.7764227986335754, | |
| "learning_rate": 2.882832728712551e-05, | |
| "loss": 0.0046, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.3010262257696694, | |
| "grad_norm": 2.247260570526123, | |
| "learning_rate": 2.869505213220014e-05, | |
| "loss": 0.0078, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.305587229190422, | |
| "grad_norm": 0.8493082523345947, | |
| "learning_rate": 2.8561961670543995e-05, | |
| "loss": 0.0049, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.3101482326111746, | |
| "grad_norm": 0.709747850894928, | |
| "learning_rate": 2.8429057055922448e-05, | |
| "loss": 0.0039, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.314709236031927, | |
| "grad_norm": 1.864812970161438, | |
| "learning_rate": 2.8296339440489837e-05, | |
| "loss": 0.0094, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.3192702394526794, | |
| "grad_norm": 2.6070640087127686, | |
| "learning_rate": 2.8163809974779405e-05, | |
| "loss": 0.0127, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.3238312428734322, | |
| "grad_norm": 0.6544182896614075, | |
| "learning_rate": 2.8031469807693257e-05, | |
| "loss": 0.0029, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.3283922462941846, | |
| "grad_norm": 2.191878318786621, | |
| "learning_rate": 2.789932008649252e-05, | |
| "loss": 0.0075, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.3329532497149374, | |
| "grad_norm": 1.73975670337677, | |
| "learning_rate": 2.776736195678734e-05, | |
| "loss": 0.0083, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.33751425313569, | |
| "grad_norm": 1.4151902198791504, | |
| "learning_rate": 2.7635596562526865e-05, | |
| "loss": 0.0071, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.342075256556442, | |
| "grad_norm": 2.084052562713623, | |
| "learning_rate": 2.7504025045989577e-05, | |
| "loss": 0.0098, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.346636259977195, | |
| "grad_norm": 1.386006474494934, | |
| "learning_rate": 2.737264854777306e-05, | |
| "loss": 0.0083, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.3511972633979474, | |
| "grad_norm": 2.138157606124878, | |
| "learning_rate": 2.724146820678436e-05, | |
| "loss": 0.0061, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.3557582668187003, | |
| "grad_norm": 0.871894896030426, | |
| "learning_rate": 2.7110485160230037e-05, | |
| "loss": 0.0047, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.3603192702394526, | |
| "grad_norm": 2.6974539756774902, | |
| "learning_rate": 2.6979700543606245e-05, | |
| "loss": 0.0063, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.364880273660205, | |
| "grad_norm": 1.140648603439331, | |
| "learning_rate": 2.6849115490689013e-05, | |
| "loss": 0.0046, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.369441277080958, | |
| "grad_norm": 5.994167804718018, | |
| "learning_rate": 2.6718731133524265e-05, | |
| "loss": 0.0116, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.3740022805017102, | |
| "grad_norm": 2.2735085487365723, | |
| "learning_rate": 2.6588548602418156e-05, | |
| "loss": 0.0092, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.378563283922463, | |
| "grad_norm": 1.1353788375854492, | |
| "learning_rate": 2.6458569025927183e-05, | |
| "loss": 0.0038, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.3831242873432155, | |
| "grad_norm": 1.2895104885101318, | |
| "learning_rate": 2.6328793530848405e-05, | |
| "loss": 0.0053, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.387685290763968, | |
| "grad_norm": 1.2267273664474487, | |
| "learning_rate": 2.6199223242209747e-05, | |
| "loss": 0.0054, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.3922462941847207, | |
| "grad_norm": 1.287307858467102, | |
| "learning_rate": 2.6069859283260097e-05, | |
| "loss": 0.005, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.396807297605473, | |
| "grad_norm": 0.8949470520019531, | |
| "learning_rate": 2.5940702775459747e-05, | |
| "loss": 0.0046, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.401368301026226, | |
| "grad_norm": 3.3357739448547363, | |
| "learning_rate": 2.5811754838470583e-05, | |
| "loss": 0.0076, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.4059293044469783, | |
| "grad_norm": 7.779567718505859, | |
| "learning_rate": 2.5683016590146318e-05, | |
| "loss": 0.0072, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.4104903078677307, | |
| "grad_norm": 2.3621115684509277, | |
| "learning_rate": 2.5554489146522958e-05, | |
| "loss": 0.0091, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.4150513112884835, | |
| "grad_norm": 1.8572039604187012, | |
| "learning_rate": 2.542617362180893e-05, | |
| "loss": 0.0074, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.419612314709236, | |
| "grad_norm": 2.4851315021514893, | |
| "learning_rate": 2.5298071128375644e-05, | |
| "loss": 0.0045, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.4241733181299887, | |
| "grad_norm": 2.945042371749878, | |
| "learning_rate": 2.5170182776747687e-05, | |
| "loss": 0.009, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.428734321550741, | |
| "grad_norm": 2.0146474838256836, | |
| "learning_rate": 2.5042509675593195e-05, | |
| "loss": 0.0115, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.433295324971494, | |
| "grad_norm": 0.28727975487709045, | |
| "learning_rate": 2.491505293171438e-05, | |
| "loss": 0.0022, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.4378563283922463, | |
| "grad_norm": 4.880654335021973, | |
| "learning_rate": 2.478781365003775e-05, | |
| "loss": 0.0218, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.4424173318129987, | |
| "grad_norm": 0.8212169408798218, | |
| "learning_rate": 2.46607929336047e-05, | |
| "loss": 0.0037, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.4469783352337515, | |
| "grad_norm": 2.413165330886841, | |
| "learning_rate": 2.4533991883561868e-05, | |
| "loss": 0.0115, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.451539338654504, | |
| "grad_norm": 1.3072090148925781, | |
| "learning_rate": 2.440741159915153e-05, | |
| "loss": 0.0057, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.4561003420752567, | |
| "grad_norm": 2.672323703765869, | |
| "learning_rate": 2.4281053177702256e-05, | |
| "loss": 0.0105, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.460661345496009, | |
| "grad_norm": 1.8092498779296875, | |
| "learning_rate": 2.4154917714619164e-05, | |
| "loss": 0.0055, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.4652223489167615, | |
| "grad_norm": 2.4887306690216064, | |
| "learning_rate": 2.40290063033746e-05, | |
| "loss": 0.0068, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.4697833523375143, | |
| "grad_norm": 1.1760387420654297, | |
| "learning_rate": 2.3903320035498605e-05, | |
| "loss": 0.0049, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.4743443557582667, | |
| "grad_norm": 1.3489493131637573, | |
| "learning_rate": 2.3777860000569384e-05, | |
| "loss": 0.004, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.4789053591790196, | |
| "grad_norm": 4.36347770690918, | |
| "learning_rate": 2.365262728620398e-05, | |
| "loss": 0.0042, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.483466362599772, | |
| "grad_norm": 0.5808774828910828, | |
| "learning_rate": 2.352762297804879e-05, | |
| "loss": 0.0029, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.4880273660205243, | |
| "grad_norm": 1.9665743112564087, | |
| "learning_rate": 2.340284815977007e-05, | |
| "loss": 0.0088, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.492588369441277, | |
| "grad_norm": 5.437419891357422, | |
| "learning_rate": 2.327830391304475e-05, | |
| "loss": 0.0414, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.4971493728620295, | |
| "grad_norm": 0.9246713519096375, | |
| "learning_rate": 2.315399131755081e-05, | |
| "loss": 0.0055, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.5017103762827824, | |
| "grad_norm": 1.88973069190979, | |
| "learning_rate": 2.3029911450958113e-05, | |
| "loss": 0.007, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.5062713797035348, | |
| "grad_norm": 5.172039985656738, | |
| "learning_rate": 2.2906065388918934e-05, | |
| "loss": 0.0104, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.5108323831242876, | |
| "grad_norm": 2.3819446563720703, | |
| "learning_rate": 2.278245420505873e-05, | |
| "loss": 0.0125, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.51539338654504, | |
| "grad_norm": 0.6151133179664612, | |
| "learning_rate": 2.2659078970966784e-05, | |
| "loss": 0.0036, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.5199543899657924, | |
| "grad_norm": 1.6557809114456177, | |
| "learning_rate": 2.2535940756186897e-05, | |
| "loss": 0.0095, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.524515393386545, | |
| "grad_norm": 1.1430513858795166, | |
| "learning_rate": 2.2413040628208165e-05, | |
| "loss": 0.0049, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.5290763968072976, | |
| "grad_norm": 1.5872365236282349, | |
| "learning_rate": 2.22903796524557e-05, | |
| "loss": 0.0045, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.5336374002280504, | |
| "grad_norm": 1.7567164897918701, | |
| "learning_rate": 2.2167958892281404e-05, | |
| "loss": 0.0072, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.538198403648803, | |
| "grad_norm": 3.414562702178955, | |
| "learning_rate": 2.2045779408954738e-05, | |
| "loss": 0.015, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.542759407069555, | |
| "grad_norm": 1.1695743799209595, | |
| "learning_rate": 2.192384226165349e-05, | |
| "loss": 0.0063, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.547320410490308, | |
| "grad_norm": 1.2887126207351685, | |
| "learning_rate": 2.180214850745467e-05, | |
| "loss": 0.0042, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.5518814139110604, | |
| "grad_norm": 6.278741359710693, | |
| "learning_rate": 2.1680699201325326e-05, | |
| "loss": 0.0096, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.556442417331813, | |
| "grad_norm": 1.6705055236816406, | |
| "learning_rate": 2.1559495396113307e-05, | |
| "loss": 0.0064, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.5610034207525656, | |
| "grad_norm": 1.6865471601486206, | |
| "learning_rate": 2.1438538142538273e-05, | |
| "loss": 0.0066, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.565564424173318, | |
| "grad_norm": 1.117870569229126, | |
| "learning_rate": 2.131782848918245e-05, | |
| "loss": 0.0039, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.570125427594071, | |
| "grad_norm": 0.6459174752235413, | |
| "learning_rate": 2.119736748248172e-05, | |
| "loss": 0.0031, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.574686431014823, | |
| "grad_norm": 2.9996261596679688, | |
| "learning_rate": 2.1077156166716323e-05, | |
| "loss": 0.007, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.579247434435576, | |
| "grad_norm": 2.0634710788726807, | |
| "learning_rate": 2.0957195584001986e-05, | |
| "loss": 0.0076, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.5838084378563284, | |
| "grad_norm": 3.3125040531158447, | |
| "learning_rate": 2.083748677428083e-05, | |
| "loss": 0.0137, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.588369441277081, | |
| "grad_norm": 4.13361120223999, | |
| "learning_rate": 2.0718030775312285e-05, | |
| "loss": 0.0193, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.5929304446978336, | |
| "grad_norm": 1.7809717655181885, | |
| "learning_rate": 2.0598828622664213e-05, | |
| "loss": 0.006, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.597491448118586, | |
| "grad_norm": 12.555645942687988, | |
| "learning_rate": 2.0479881349703883e-05, | |
| "loss": 0.0169, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.602052451539339, | |
| "grad_norm": 2.071539878845215, | |
| "learning_rate": 2.0361189987588918e-05, | |
| "loss": 0.0072, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.6066134549600912, | |
| "grad_norm": 1.3284027576446533, | |
| "learning_rate": 2.024275556525858e-05, | |
| "loss": 0.006, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.6111744583808436, | |
| "grad_norm": 0.8479968905448914, | |
| "learning_rate": 2.012457910942458e-05, | |
| "loss": 0.0039, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.6157354618015964, | |
| "grad_norm": 2.979787826538086, | |
| "learning_rate": 2.0006661644562375e-05, | |
| "loss": 0.0092, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.620296465222349, | |
| "grad_norm": 0.8467869758605957, | |
| "learning_rate": 1.988900419290224e-05, | |
| "loss": 0.0033, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.6248574686431017, | |
| "grad_norm": 0.7672457098960876, | |
| "learning_rate": 1.9771607774420307e-05, | |
| "loss": 0.0038, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.629418472063854, | |
| "grad_norm": 1.3988791704177856, | |
| "learning_rate": 1.9654473406829903e-05, | |
| "loss": 0.0059, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.6339794754846064, | |
| "grad_norm": 1.741504430770874, | |
| "learning_rate": 1.953760210557254e-05, | |
| "loss": 0.0096, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.6385404789053593, | |
| "grad_norm": 4.00645637512207, | |
| "learning_rate": 1.942099488380923e-05, | |
| "loss": 0.0098, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.6431014823261116, | |
| "grad_norm": 0.9082571864128113, | |
| "learning_rate": 1.9304652752411734e-05, | |
| "loss": 0.0038, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.6476624857468645, | |
| "grad_norm": 0.9427306652069092, | |
| "learning_rate": 1.9188576719953633e-05, | |
| "loss": 0.0051, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.652223489167617, | |
| "grad_norm": 0.5667453408241272, | |
| "learning_rate": 1.9072767792701768e-05, | |
| "loss": 0.0029, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.6567844925883692, | |
| "grad_norm": 1.584375262260437, | |
| "learning_rate": 1.895722697460737e-05, | |
| "loss": 0.0042, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.661345496009122, | |
| "grad_norm": 4.02076530456543, | |
| "learning_rate": 1.884195526729748e-05, | |
| "loss": 0.0092, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.6659064994298745, | |
| "grad_norm": 2.488560438156128, | |
| "learning_rate": 1.8726953670066193e-05, | |
| "loss": 0.0041, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.6704675028506273, | |
| "grad_norm": 1.546533226966858, | |
| "learning_rate": 1.861222317986598e-05, | |
| "loss": 0.0074, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.6750285062713797, | |
| "grad_norm": 2.984379529953003, | |
| "learning_rate": 1.8497764791299117e-05, | |
| "loss": 0.0091, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.679589509692132, | |
| "grad_norm": 1.3167816400527954, | |
| "learning_rate": 1.8383579496609004e-05, | |
| "loss": 0.0063, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.684150513112885, | |
| "grad_norm": 2.830770492553711, | |
| "learning_rate": 1.8269668285671587e-05, | |
| "loss": 0.0059, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.6887115165336373, | |
| "grad_norm": 1.9150404930114746, | |
| "learning_rate": 1.8156032145986784e-05, | |
| "loss": 0.003, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.69327251995439, | |
| "grad_norm": 0.5943277478218079, | |
| "learning_rate": 1.8042672062669863e-05, | |
| "loss": 0.0028, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.6978335233751425, | |
| "grad_norm": 0.7251271605491638, | |
| "learning_rate": 1.7929589018443016e-05, | |
| "loss": 0.004, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.702394526795895, | |
| "grad_norm": 0.5687354207038879, | |
| "learning_rate": 1.7816783993626712e-05, | |
| "loss": 0.0032, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.7069555302166477, | |
| "grad_norm": 0.7953961491584778, | |
| "learning_rate": 1.7704257966131304e-05, | |
| "loss": 0.0041, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.7115165336374, | |
| "grad_norm": 1.1048918962478638, | |
| "learning_rate": 1.759201191144852e-05, | |
| "loss": 0.0035, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.716077537058153, | |
| "grad_norm": 1.2720906734466553, | |
| "learning_rate": 1.7480046802642906e-05, | |
| "loss": 0.0043, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.7206385404789053, | |
| "grad_norm": 4.515064239501953, | |
| "learning_rate": 1.7368363610343617e-05, | |
| "loss": 0.009, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.7251995438996577, | |
| "grad_norm": 2.149350166320801, | |
| "learning_rate": 1.725696330273575e-05, | |
| "loss": 0.007, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.7297605473204105, | |
| "grad_norm": 0.4950422942638397, | |
| "learning_rate": 1.714584684555211e-05, | |
| "loss": 0.0028, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.734321550741163, | |
| "grad_norm": 1.4185466766357422, | |
| "learning_rate": 1.703501520206482e-05, | |
| "loss": 0.005, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.7388825541619157, | |
| "grad_norm": 3.5513916015625, | |
| "learning_rate": 1.692446933307687e-05, | |
| "loss": 0.0053, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.743443557582668, | |
| "grad_norm": 2.450361728668213, | |
| "learning_rate": 1.6814210196913927e-05, | |
| "loss": 0.0083, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.7480045610034205, | |
| "grad_norm": 0.6304205656051636, | |
| "learning_rate": 1.6704238749415957e-05, | |
| "loss": 0.0042, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.7525655644241733, | |
| "grad_norm": 2.1942145824432373, | |
| "learning_rate": 1.6594555943928887e-05, | |
| "loss": 0.0046, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.757126567844926, | |
| "grad_norm": 0.7358046770095825, | |
| "learning_rate": 1.6485162731296495e-05, | |
| "loss": 0.0027, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.7616875712656785, | |
| "grad_norm": 3.021874189376831, | |
| "learning_rate": 1.6376060059851963e-05, | |
| "loss": 0.0092, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 3.766248574686431, | |
| "grad_norm": 0.618632435798645, | |
| "learning_rate": 1.6267248875409835e-05, | |
| "loss": 0.0033, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.7708095781071833, | |
| "grad_norm": 1.160000205039978, | |
| "learning_rate": 1.6158730121257737e-05, | |
| "loss": 0.0047, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 3.775370581527936, | |
| "grad_norm": 1.2812681198120117, | |
| "learning_rate": 1.6050504738148152e-05, | |
| "loss": 0.005, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.779931584948689, | |
| "grad_norm": 1.2367734909057617, | |
| "learning_rate": 1.5942573664290412e-05, | |
| "loss": 0.0058, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 3.7844925883694414, | |
| "grad_norm": 1.0198251008987427, | |
| "learning_rate": 1.5834937835342366e-05, | |
| "loss": 0.0039, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.7890535917901937, | |
| "grad_norm": 0.9254816174507141, | |
| "learning_rate": 1.5727598184402464e-05, | |
| "loss": 0.003, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 3.7936145952109466, | |
| "grad_norm": 1.487305998802185, | |
| "learning_rate": 1.562055564200154e-05, | |
| "loss": 0.0049, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.798175598631699, | |
| "grad_norm": 0.8291229009628296, | |
| "learning_rate": 1.5513811136094787e-05, | |
| "loss": 0.0042, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 3.802736602052452, | |
| "grad_norm": 0.4022439122200012, | |
| "learning_rate": 1.5407365592053735e-05, | |
| "loss": 0.0023, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.807297605473204, | |
| "grad_norm": 1.640064001083374, | |
| "learning_rate": 1.5301219932658156e-05, | |
| "loss": 0.0057, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 3.8118586088939566, | |
| "grad_norm": 1.2258837223052979, | |
| "learning_rate": 1.5195375078088147e-05, | |
| "loss": 0.0034, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.8164196123147094, | |
| "grad_norm": 1.264310359954834, | |
| "learning_rate": 1.5089831945916133e-05, | |
| "loss": 0.0057, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 3.8209806157354618, | |
| "grad_norm": 2.701324939727783, | |
| "learning_rate": 1.4984591451098845e-05, | |
| "loss": 0.0086, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.8255416191562146, | |
| "grad_norm": 3.1908340454101562, | |
| "learning_rate": 1.4879654505969498e-05, | |
| "loss": 0.0085, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 3.830102622576967, | |
| "grad_norm": 0.7210344672203064, | |
| "learning_rate": 1.4775022020229756e-05, | |
| "loss": 0.0028, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.8346636259977194, | |
| "grad_norm": 2.616572380065918, | |
| "learning_rate": 1.4670694900942005e-05, | |
| "loss": 0.0069, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 3.839224629418472, | |
| "grad_norm": 1.0061376094818115, | |
| "learning_rate": 1.4566674052521357e-05, | |
| "loss": 0.0035, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.8437856328392246, | |
| "grad_norm": 0.5519426465034485, | |
| "learning_rate": 1.4462960376727813e-05, | |
| "loss": 0.0025, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 3.8483466362599774, | |
| "grad_norm": 1.0319393873214722, | |
| "learning_rate": 1.4359554772658552e-05, | |
| "loss": 0.0037, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.85290763968073, | |
| "grad_norm": 5.387468338012695, | |
| "learning_rate": 1.4256458136739998e-05, | |
| "loss": 0.0131, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 3.857468643101482, | |
| "grad_norm": 1.1421221494674683, | |
| "learning_rate": 1.415367136272019e-05, | |
| "loss": 0.0038, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.862029646522235, | |
| "grad_norm": 1.2745294570922852, | |
| "learning_rate": 1.4051195341660939e-05, | |
| "loss": 0.0063, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 3.8665906499429874, | |
| "grad_norm": 3.796590805053711, | |
| "learning_rate": 1.3949030961930077e-05, | |
| "loss": 0.0102, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.8711516533637402, | |
| "grad_norm": 1.4126759767532349, | |
| "learning_rate": 1.3847179109193925e-05, | |
| "loss": 0.0043, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.8757126567844926, | |
| "grad_norm": 0.6939073801040649, | |
| "learning_rate": 1.374564066640937e-05, | |
| "loss": 0.0038, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.880273660205245, | |
| "grad_norm": 0.8315445780754089, | |
| "learning_rate": 1.3644416513816416e-05, | |
| "loss": 0.0051, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 3.884834663625998, | |
| "grad_norm": 0.7290100455284119, | |
| "learning_rate": 1.3543507528930472e-05, | |
| "loss": 0.0035, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.88939566704675, | |
| "grad_norm": 3.279763698577881, | |
| "learning_rate": 1.3442914586534688e-05, | |
| "loss": 0.0112, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 3.893956670467503, | |
| "grad_norm": 0.7284213900566101, | |
| "learning_rate": 1.3342638558672504e-05, | |
| "loss": 0.0032, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.8985176738882554, | |
| "grad_norm": 1.461885690689087, | |
| "learning_rate": 1.3242680314639993e-05, | |
| "loss": 0.0046, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 3.903078677309008, | |
| "grad_norm": 0.36676260828971863, | |
| "learning_rate": 1.31430407209783e-05, | |
| "loss": 0.0023, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.9076396807297606, | |
| "grad_norm": 0.5633653402328491, | |
| "learning_rate": 1.3043720641466289e-05, | |
| "loss": 0.0025, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 3.912200684150513, | |
| "grad_norm": 0.9612581133842468, | |
| "learning_rate": 1.2944720937112836e-05, | |
| "loss": 0.0023, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.916761687571266, | |
| "grad_norm": 2.462862491607666, | |
| "learning_rate": 1.284604246614955e-05, | |
| "loss": 0.0048, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 3.9213226909920182, | |
| "grad_norm": 0.6819082498550415, | |
| "learning_rate": 1.2747686084023192e-05, | |
| "loss": 0.003, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.9258836944127706, | |
| "grad_norm": 0.4367881417274475, | |
| "learning_rate": 1.2649652643388382e-05, | |
| "loss": 0.0024, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.9304446978335235, | |
| "grad_norm": 0.6573525667190552, | |
| "learning_rate": 1.2551942994100136e-05, | |
| "loss": 0.0029, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.935005701254276, | |
| "grad_norm": 1.2858328819274902, | |
| "learning_rate": 1.2454557983206477e-05, | |
| "loss": 0.0039, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.9395667046750287, | |
| "grad_norm": 0.46625810861587524, | |
| "learning_rate": 1.2357498454941175e-05, | |
| "loss": 0.0029, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.944127708095781, | |
| "grad_norm": 0.39558151364326477, | |
| "learning_rate": 1.2260765250716356e-05, | |
| "loss": 0.0021, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.9486887115165334, | |
| "grad_norm": 0.9131205677986145, | |
| "learning_rate": 1.2164359209115234e-05, | |
| "loss": 0.0032, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.9532497149372863, | |
| "grad_norm": 0.8359415531158447, | |
| "learning_rate": 1.2068281165884864e-05, | |
| "loss": 0.0038, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.9578107183580387, | |
| "grad_norm": 0.5636485815048218, | |
| "learning_rate": 1.1972531953928823e-05, | |
| "loss": 0.0025, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.9623717217787915, | |
| "grad_norm": 1.0502080917358398, | |
| "learning_rate": 1.1877112403300079e-05, | |
| "loss": 0.0032, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.966932725199544, | |
| "grad_norm": 1.4225945472717285, | |
| "learning_rate": 1.1782023341193754e-05, | |
| "loss": 0.005, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.9714937286202963, | |
| "grad_norm": 0.8865280747413635, | |
| "learning_rate": 1.1687265591939927e-05, | |
| "loss": 0.0036, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.976054732041049, | |
| "grad_norm": 0.7002689242362976, | |
| "learning_rate": 1.1592839976996555e-05, | |
| "loss": 0.0034, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.9806157354618015, | |
| "grad_norm": 1.3850862979888916, | |
| "learning_rate": 1.1498747314942255e-05, | |
| "loss": 0.0052, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.9851767388825543, | |
| "grad_norm": 1.3568379878997803, | |
| "learning_rate": 1.1404988421469348e-05, | |
| "loss": 0.0037, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.9897377423033067, | |
| "grad_norm": 0.815382719039917, | |
| "learning_rate": 1.1311564109376621e-05, | |
| "loss": 0.0036, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.994298745724059, | |
| "grad_norm": 2.1018550395965576, | |
| "learning_rate": 1.121847518856241e-05, | |
| "loss": 0.0048, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.998859749144812, | |
| "grad_norm": 0.6212396025657654, | |
| "learning_rate": 1.1125722466017547e-05, | |
| "loss": 0.0025, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.9469783306121826, | |
| "learning_rate": 1.1033306745818283e-05, | |
| "loss": 0.0028, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.004561003420752, | |
| "grad_norm": 0.6264204382896423, | |
| "learning_rate": 1.0941228829119453e-05, | |
| "loss": 0.0033, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.009122006841505, | |
| "grad_norm": 0.27060914039611816, | |
| "learning_rate": 1.0849489514147459e-05, | |
| "loss": 0.0016, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.013683010262258, | |
| "grad_norm": 0.47667694091796875, | |
| "learning_rate": 1.0758089596193282e-05, | |
| "loss": 0.0028, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.01824401368301, | |
| "grad_norm": 0.9456324577331543, | |
| "learning_rate": 1.066702986760577e-05, | |
| "loss": 0.0029, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.022805017103763, | |
| "grad_norm": 1.1689949035644531, | |
| "learning_rate": 1.057631111778456e-05, | |
| "loss": 0.0034, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.027366020524515, | |
| "grad_norm": 0.25507575273513794, | |
| "learning_rate": 1.0485934133173387e-05, | |
| "loss": 0.002, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.031927023945268, | |
| "grad_norm": 0.37119409441947937, | |
| "learning_rate": 1.0395899697253208e-05, | |
| "loss": 0.0022, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.036488027366021, | |
| "grad_norm": 0.3143307864665985, | |
| "learning_rate": 1.0306208590535382e-05, | |
| "loss": 0.0021, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.041049030786773, | |
| "grad_norm": 0.3163100481033325, | |
| "learning_rate": 1.0216861590554983e-05, | |
| "loss": 0.0021, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.045610034207526, | |
| "grad_norm": 0.4894810914993286, | |
| "learning_rate": 1.012785947186397e-05, | |
| "loss": 0.0024, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.050171037628278, | |
| "grad_norm": 0.3152639865875244, | |
| "learning_rate": 1.0039203006024527e-05, | |
| "loss": 0.0025, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.05473204104903, | |
| "grad_norm": 1.619958519935608, | |
| "learning_rate": 9.95089296160241e-06, | |
| "loss": 0.0054, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.059293044469784, | |
| "grad_norm": 0.5192055106163025, | |
| "learning_rate": 9.862930104160162e-06, | |
| "loss": 0.0027, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.063854047890536, | |
| "grad_norm": 2.080965518951416, | |
| "learning_rate": 9.775315196250612e-06, | |
| "loss": 0.0042, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.068415051311288, | |
| "grad_norm": 0.3620293140411377, | |
| "learning_rate": 9.688048997410143e-06, | |
| "loss": 0.0022, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.072976054732041, | |
| "grad_norm": 0.37922972440719604, | |
| "learning_rate": 9.601132264152223e-06, | |
| "loss": 0.002, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.077537058152793, | |
| "grad_norm": 0.3833453059196472, | |
| "learning_rate": 9.51456574996078e-06, | |
| "loss": 0.0022, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.0820980615735465, | |
| "grad_norm": 0.41054201126098633, | |
| "learning_rate": 9.428350205283648e-06, | |
| "loss": 0.0024, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.086659064994299, | |
| "grad_norm": 0.3207608461380005, | |
| "learning_rate": 9.342486377526133e-06, | |
| "loss": 0.002, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.091220068415051, | |
| "grad_norm": 0.4284456968307495, | |
| "learning_rate": 9.256975011044483e-06, | |
| "loss": 0.0022, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.659402853631918e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |