| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.993258426966292, | |
| "eval_steps": 500, | |
| "global_step": 333, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008988764044943821, | |
| "grad_norm": 5.937067934578121, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.8925, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017977528089887642, | |
| "grad_norm": 5.923591025846198, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.8908, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02696629213483146, | |
| "grad_norm": 5.45957942819648, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.8716, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.035955056179775284, | |
| "grad_norm": 3.974971561642825, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.8395, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0449438202247191, | |
| "grad_norm": 2.1772095757152767, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 0.7954, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05393258426966292, | |
| "grad_norm": 5.329495741252824, | |
| "learning_rate": 1.4117647058823532e-05, | |
| "loss": 0.8391, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06292134831460675, | |
| "grad_norm": 7.0082332084809424, | |
| "learning_rate": 1.647058823529412e-05, | |
| "loss": 0.8077, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07191011235955057, | |
| "grad_norm": 7.179904932078677, | |
| "learning_rate": 1.8823529411764708e-05, | |
| "loss": 0.8269, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08089887640449438, | |
| "grad_norm": 3.944730501336432, | |
| "learning_rate": 2.1176470588235296e-05, | |
| "loss": 0.7655, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0898876404494382, | |
| "grad_norm": 3.094924202191644, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 0.7356, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09887640449438202, | |
| "grad_norm": 2.1418931101275476, | |
| "learning_rate": 2.5882352941176475e-05, | |
| "loss": 0.7015, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10786516853932585, | |
| "grad_norm": 1.8786098685363002, | |
| "learning_rate": 2.8235294117647063e-05, | |
| "loss": 0.6907, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11685393258426967, | |
| "grad_norm": 1.217510887771457, | |
| "learning_rate": 3.0588235294117644e-05, | |
| "loss": 0.6657, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1258426966292135, | |
| "grad_norm": 1.645912176172542, | |
| "learning_rate": 3.294117647058824e-05, | |
| "loss": 0.6501, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.1348314606741573, | |
| "grad_norm": 1.1214763643282593, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 0.6431, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.14382022471910114, | |
| "grad_norm": 1.3566330254296206, | |
| "learning_rate": 3.7647058823529415e-05, | |
| "loss": 0.6305, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.15280898876404495, | |
| "grad_norm": 1.5612433580633596, | |
| "learning_rate": 4e-05, | |
| "loss": 0.6239, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.16179775280898875, | |
| "grad_norm": 1.6192637419680402, | |
| "learning_rate": 4.235294117647059e-05, | |
| "loss": 0.6169, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1707865168539326, | |
| "grad_norm": 1.2309268080406381, | |
| "learning_rate": 4.470588235294118e-05, | |
| "loss": 0.6097, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1797752808988764, | |
| "grad_norm": 1.862538444377319, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.6134, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.18876404494382024, | |
| "grad_norm": 1.5958037318215281, | |
| "learning_rate": 4.941176470588236e-05, | |
| "loss": 0.6007, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.19775280898876405, | |
| "grad_norm": 1.357215808802562, | |
| "learning_rate": 5.176470588235295e-05, | |
| "loss": 0.5961, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.20674157303370785, | |
| "grad_norm": 1.3402016135380905, | |
| "learning_rate": 5.411764705882354e-05, | |
| "loss": 0.5872, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.2157303370786517, | |
| "grad_norm": 1.9136536863131055, | |
| "learning_rate": 5.6470588235294126e-05, | |
| "loss": 0.5779, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.2247191011235955, | |
| "grad_norm": 1.0205411195750105, | |
| "learning_rate": 5.8823529411764714e-05, | |
| "loss": 0.5728, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.23370786516853934, | |
| "grad_norm": 2.6452559257739265, | |
| "learning_rate": 6.117647058823529e-05, | |
| "loss": 0.5948, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.24269662921348314, | |
| "grad_norm": 1.7949903848826065, | |
| "learning_rate": 6.352941176470589e-05, | |
| "loss": 0.5869, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.251685393258427, | |
| "grad_norm": 1.9069324360886062, | |
| "learning_rate": 6.588235294117648e-05, | |
| "loss": 0.5816, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2606741573033708, | |
| "grad_norm": 1.7027526272642033, | |
| "learning_rate": 6.823529411764707e-05, | |
| "loss": 0.5674, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2696629213483146, | |
| "grad_norm": 1.6647039397669456, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 0.5801, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2786516853932584, | |
| "grad_norm": 1.2647930520355923, | |
| "learning_rate": 7.294117647058824e-05, | |
| "loss": 0.5622, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2876404494382023, | |
| "grad_norm": 1.8668167622362088, | |
| "learning_rate": 7.529411764705883e-05, | |
| "loss": 0.5628, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2966292134831461, | |
| "grad_norm": 1.6151821153814043, | |
| "learning_rate": 7.764705882352942e-05, | |
| "loss": 0.569, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3056179775280899, | |
| "grad_norm": 1.9972629519815388, | |
| "learning_rate": 8e-05, | |
| "loss": 0.5559, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3146067415730337, | |
| "grad_norm": 1.930654248915306, | |
| "learning_rate": 7.999779207981935e-05, | |
| "loss": 0.5578, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3235955056179775, | |
| "grad_norm": 1.1040009369498223, | |
| "learning_rate": 7.999116856302298e-05, | |
| "loss": 0.5553, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3325842696629214, | |
| "grad_norm": 1.9114787612288138, | |
| "learning_rate": 7.998013018082072e-05, | |
| "loss": 0.5518, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3415730337078652, | |
| "grad_norm": 2.5196506278998476, | |
| "learning_rate": 7.996467815180588e-05, | |
| "loss": 0.5587, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.350561797752809, | |
| "grad_norm": 1.5471557908374793, | |
| "learning_rate": 7.994481418182082e-05, | |
| "loss": 0.5394, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3595505617977528, | |
| "grad_norm": 2.747162602890951, | |
| "learning_rate": 7.992054046376854e-05, | |
| "loss": 0.5602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3685393258426966, | |
| "grad_norm": 1.9963506460084293, | |
| "learning_rate": 7.989185967737066e-05, | |
| "loss": 0.5489, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.3775280898876405, | |
| "grad_norm": 2.356173456359232, | |
| "learning_rate": 7.985877498887149e-05, | |
| "loss": 0.554, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3865168539325843, | |
| "grad_norm": 1.9651033697846167, | |
| "learning_rate": 7.982129005068865e-05, | |
| "loss": 0.5432, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3955056179775281, | |
| "grad_norm": 1.7900792925575555, | |
| "learning_rate": 7.977940900100967e-05, | |
| "loss": 0.5476, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4044943820224719, | |
| "grad_norm": 1.8559704498293903, | |
| "learning_rate": 7.973313646333532e-05, | |
| "loss": 0.535, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4134831460674157, | |
| "grad_norm": 1.4415920297319014, | |
| "learning_rate": 7.968247754596908e-05, | |
| "loss": 0.5339, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.42247191011235957, | |
| "grad_norm": 1.4135208728964568, | |
| "learning_rate": 7.962743784145323e-05, | |
| "loss": 0.5381, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4314606741573034, | |
| "grad_norm": 1.6688537146906361, | |
| "learning_rate": 7.956802342595152e-05, | |
| "loss": 0.5328, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4404494382022472, | |
| "grad_norm": 1.3978532275006461, | |
| "learning_rate": 7.950424085857827e-05, | |
| "loss": 0.5265, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.449438202247191, | |
| "grad_norm": 1.7334227968766784, | |
| "learning_rate": 7.943609718067437e-05, | |
| "loss": 0.5245, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4584269662921348, | |
| "grad_norm": 0.7953199977805888, | |
| "learning_rate": 7.936359991502993e-05, | |
| "loss": 0.5167, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.46741573033707867, | |
| "grad_norm": 1.6871487499435203, | |
| "learning_rate": 7.92867570650537e-05, | |
| "loss": 0.5325, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.4764044943820225, | |
| "grad_norm": 1.304827694741159, | |
| "learning_rate": 7.920557711388967e-05, | |
| "loss": 0.5371, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.4853932584269663, | |
| "grad_norm": 1.6036182086909745, | |
| "learning_rate": 7.912006902348045e-05, | |
| "loss": 0.5245, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.4943820224719101, | |
| "grad_norm": 1.0607757354012914, | |
| "learning_rate": 7.903024223357797e-05, | |
| "loss": 0.5289, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.503370786516854, | |
| "grad_norm": 2.0583378114050728, | |
| "learning_rate": 7.893610666070134e-05, | |
| "loss": 0.5346, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5123595505617977, | |
| "grad_norm": 1.4305584900288937, | |
| "learning_rate": 7.883767269704209e-05, | |
| "loss": 0.5251, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5213483146067416, | |
| "grad_norm": 1.5466083404792839, | |
| "learning_rate": 7.873495120931697e-05, | |
| "loss": 0.5242, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5303370786516854, | |
| "grad_norm": 1.3322841661439688, | |
| "learning_rate": 7.86279535375683e-05, | |
| "loss": 0.5266, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5393258426966292, | |
| "grad_norm": 1.2220900027281376, | |
| "learning_rate": 7.851669149391198e-05, | |
| "loss": 0.5228, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5483146067415731, | |
| "grad_norm": 1.5982554174133687, | |
| "learning_rate": 7.84011773612336e-05, | |
| "loss": 0.5231, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5573033707865168, | |
| "grad_norm": 1.146536447272186, | |
| "learning_rate": 7.828142389183239e-05, | |
| "loss": 0.5195, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5662921348314607, | |
| "grad_norm": 1.0879405901836858, | |
| "learning_rate": 7.815744430601344e-05, | |
| "loss": 0.5129, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5752808988764045, | |
| "grad_norm": 1.6921788651579457, | |
| "learning_rate": 7.802925229062823e-05, | |
| "loss": 0.521, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5842696629213483, | |
| "grad_norm": 1.0795726495535796, | |
| "learning_rate": 7.789686199756365e-05, | |
| "loss": 0.5172, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5932584269662922, | |
| "grad_norm": 1.1065195406196553, | |
| "learning_rate": 7.776028804217968e-05, | |
| "loss": 0.5134, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6022471910112359, | |
| "grad_norm": 1.2749574533792616, | |
| "learning_rate": 7.761954550169593e-05, | |
| "loss": 0.5161, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6112359550561798, | |
| "grad_norm": 1.3329895959967077, | |
| "learning_rate": 7.74746499135272e-05, | |
| "loss": 0.5153, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6202247191011236, | |
| "grad_norm": 1.233521082461672, | |
| "learning_rate": 7.732561727356811e-05, | |
| "loss": 0.5086, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.6292134831460674, | |
| "grad_norm": 1.1760347925547088, | |
| "learning_rate": 7.717246403442735e-05, | |
| "loss": 0.5137, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6382022471910113, | |
| "grad_norm": 1.1780036452883795, | |
| "learning_rate": 7.701520710361129e-05, | |
| "loss": 0.4999, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.647191011235955, | |
| "grad_norm": 0.948808130697301, | |
| "learning_rate": 7.685386384165748e-05, | |
| "loss": 0.5036, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6561797752808989, | |
| "grad_norm": 1.7675174539861906, | |
| "learning_rate": 7.668845206021812e-05, | |
| "loss": 0.5132, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6651685393258427, | |
| "grad_norm": 0.7501534484404624, | |
| "learning_rate": 7.651899002009375e-05, | |
| "loss": 0.5044, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6741573033707865, | |
| "grad_norm": 1.2580366817929822, | |
| "learning_rate": 7.634549642921725e-05, | |
| "loss": 0.5158, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6831460674157304, | |
| "grad_norm": 1.2116010404784998, | |
| "learning_rate": 7.616799044058867e-05, | |
| "loss": 0.5132, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6921348314606741, | |
| "grad_norm": 1.504394845229427, | |
| "learning_rate": 7.598649165016073e-05, | |
| "loss": 0.5051, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.701123595505618, | |
| "grad_norm": 0.8286673874695757, | |
| "learning_rate": 7.58010200946755e-05, | |
| "loss": 0.5, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7101123595505618, | |
| "grad_norm": 1.6610859144234769, | |
| "learning_rate": 7.561159624945257e-05, | |
| "loss": 0.5094, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.7191011235955056, | |
| "grad_norm": 0.8748352440236109, | |
| "learning_rate": 7.541824102612839e-05, | |
| "loss": 0.5009, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7280898876404495, | |
| "grad_norm": 1.4412943546400492, | |
| "learning_rate": 7.5220975770348e-05, | |
| "loss": 0.501, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7370786516853932, | |
| "grad_norm": 1.017225970339285, | |
| "learning_rate": 7.501982225940833e-05, | |
| "loss": 0.5009, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7460674157303371, | |
| "grad_norm": 1.3512165456613465, | |
| "learning_rate": 7.48148026998542e-05, | |
| "loss": 0.503, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.755056179775281, | |
| "grad_norm": 1.0549525315803745, | |
| "learning_rate": 7.460593972502674e-05, | |
| "loss": 0.4953, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7640449438202247, | |
| "grad_norm": 1.1196254022912724, | |
| "learning_rate": 7.439325639256483e-05, | |
| "loss": 0.4897, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7730337078651686, | |
| "grad_norm": 1.1317585655215339, | |
| "learning_rate": 7.417677618185955e-05, | |
| "loss": 0.4969, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7820224719101123, | |
| "grad_norm": 1.2763979132088588, | |
| "learning_rate": 7.39565229914622e-05, | |
| "loss": 0.5046, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7910112359550562, | |
| "grad_norm": 1.2131663136230482, | |
| "learning_rate": 7.373252113644596e-05, | |
| "loss": 0.5006, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.121494784629588, | |
| "learning_rate": 7.350479534572166e-05, | |
| "loss": 0.4949, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.8089887640449438, | |
| "grad_norm": 0.9241654008148638, | |
| "learning_rate": 7.327337075930775e-05, | |
| "loss": 0.5003, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8179775280898877, | |
| "grad_norm": 1.4368488374102792, | |
| "learning_rate": 7.303827292555495e-05, | |
| "loss": 0.4907, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.8269662921348314, | |
| "grad_norm": 1.2319104612634353, | |
| "learning_rate": 7.279952779832584e-05, | |
| "loss": 0.5043, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8359550561797753, | |
| "grad_norm": 0.8671468092587438, | |
| "learning_rate": 7.255716173412966e-05, | |
| "loss": 0.486, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.8449438202247191, | |
| "grad_norm": 1.3364076805186555, | |
| "learning_rate": 7.23112014892126e-05, | |
| "loss": 0.4932, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8539325842696629, | |
| "grad_norm": 1.0720511804076949, | |
| "learning_rate": 7.20616742166041e-05, | |
| "loss": 0.49, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8629213483146068, | |
| "grad_norm": 0.8784137232709633, | |
| "learning_rate": 7.180860746311917e-05, | |
| "loss": 0.4911, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8719101123595505, | |
| "grad_norm": 1.0650244396836273, | |
| "learning_rate": 7.155202916631743e-05, | |
| "loss": 0.4922, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8808988764044944, | |
| "grad_norm": 1.15396517981509, | |
| "learning_rate": 7.129196765141886e-05, | |
| "loss": 0.4899, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8898876404494382, | |
| "grad_norm": 0.8409463006756092, | |
| "learning_rate": 7.10284516281768e-05, | |
| "loss": 0.4935, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 0.8987420745372244, | |
| "learning_rate": 7.076151018770854e-05, | |
| "loss": 0.4933, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9078651685393259, | |
| "grad_norm": 1.3476943698616615, | |
| "learning_rate": 7.049117279928374e-05, | |
| "loss": 0.4941, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.9168539325842696, | |
| "grad_norm": 1.0470417246613994, | |
| "learning_rate": 7.021746930707117e-05, | |
| "loss": 0.4878, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.9258426966292135, | |
| "grad_norm": 1.222327504764566, | |
| "learning_rate": 6.994042992684406e-05, | |
| "loss": 0.478, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.9348314606741573, | |
| "grad_norm": 0.8143177540157983, | |
| "learning_rate": 6.966008524264429e-05, | |
| "loss": 0.4868, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.9438202247191011, | |
| "grad_norm": 1.3154017535872904, | |
| "learning_rate": 6.937646620340618e-05, | |
| "loss": 0.4921, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.952808988764045, | |
| "grad_norm": 1.2828038188846893, | |
| "learning_rate": 6.908960411953973e-05, | |
| "loss": 0.4871, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9617977528089887, | |
| "grad_norm": 0.7484103692531322, | |
| "learning_rate": 6.879953065947416e-05, | |
| "loss": 0.4856, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9707865168539326, | |
| "grad_norm": 1.393234551386416, | |
| "learning_rate": 6.850627784616178e-05, | |
| "loss": 0.4884, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9797752808988764, | |
| "grad_norm": 0.6778085483847683, | |
| "learning_rate": 6.82098780535428e-05, | |
| "loss": 0.4844, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9887640449438202, | |
| "grad_norm": 0.95356400631233, | |
| "learning_rate": 6.791036400297142e-05, | |
| "loss": 0.4766, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9977528089887641, | |
| "grad_norm": 0.9616360385718218, | |
| "learning_rate": 6.760776875960347e-05, | |
| "loss": 0.4796, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.006741573033708, | |
| "grad_norm": 1.8748279594605497, | |
| "learning_rate": 6.730212572874618e-05, | |
| "loss": 0.8205, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.0157303370786517, | |
| "grad_norm": 0.7712220221008119, | |
| "learning_rate": 6.699346865217031e-05, | |
| "loss": 0.4547, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.0247191011235954, | |
| "grad_norm": 1.1476060733360436, | |
| "learning_rate": 6.668183160438531e-05, | |
| "loss": 0.4598, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.0337078651685394, | |
| "grad_norm": 1.091336171209942, | |
| "learning_rate": 6.636724898887751e-05, | |
| "loss": 0.4566, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.0426966292134832, | |
| "grad_norm": 1.356832256792843, | |
| "learning_rate": 6.604975553431219e-05, | |
| "loss": 0.4636, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.051685393258427, | |
| "grad_norm": 0.8230787378003628, | |
| "learning_rate": 6.572938629069959e-05, | |
| "loss": 0.4539, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.060674157303371, | |
| "grad_norm": 0.7577254786527922, | |
| "learning_rate": 6.540617662552565e-05, | |
| "loss": 0.4626, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.0696629213483146, | |
| "grad_norm": 0.9663188263281904, | |
| "learning_rate": 6.508016221984747e-05, | |
| "loss": 0.4507, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.0786516853932584, | |
| "grad_norm": 1.4024310383300067, | |
| "learning_rate": 6.475137906435435e-05, | |
| "loss": 0.4613, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.0876404494382022, | |
| "grad_norm": 0.7663106953730127, | |
| "learning_rate": 6.441986345539446e-05, | |
| "loss": 0.4656, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.0966292134831461, | |
| "grad_norm": 1.1216710559366627, | |
| "learning_rate": 6.408565199096798e-05, | |
| "loss": 0.4529, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.1056179775280899, | |
| "grad_norm": 0.8823684903577822, | |
| "learning_rate": 6.374878156668676e-05, | |
| "loss": 0.4588, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.1146067415730336, | |
| "grad_norm": 0.7542533792179187, | |
| "learning_rate": 6.340928937170118e-05, | |
| "loss": 0.4551, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.1235955056179776, | |
| "grad_norm": 1.1622893113229344, | |
| "learning_rate": 6.30672128845947e-05, | |
| "loss": 0.4587, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.1325842696629214, | |
| "grad_norm": 0.6982046729289911, | |
| "learning_rate": 6.272258986924624e-05, | |
| "loss": 0.4527, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.1415730337078651, | |
| "grad_norm": 0.5854116152358985, | |
| "learning_rate": 6.237545837066133e-05, | |
| "loss": 0.4518, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.1505617977528089, | |
| "grad_norm": 0.47197050837846644, | |
| "learning_rate": 6.202585671077204e-05, | |
| "loss": 0.4474, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.1595505617977528, | |
| "grad_norm": 0.4631791319657276, | |
| "learning_rate": 6.167382348420637e-05, | |
| "loss": 0.4573, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.1685393258426966, | |
| "grad_norm": 0.6398622174196904, | |
| "learning_rate": 6.131939755402755e-05, | |
| "loss": 0.4551, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.1775280898876406, | |
| "grad_norm": 1.110637394887812, | |
| "learning_rate": 6.09626180474438e-05, | |
| "loss": 0.4524, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.1865168539325843, | |
| "grad_norm": 1.377350137912211, | |
| "learning_rate": 6.060352435148874e-05, | |
| "loss": 0.4559, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.195505617977528, | |
| "grad_norm": 0.5283491845810788, | |
| "learning_rate": 6.024215610867327e-05, | |
| "loss": 0.4603, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.2044943820224718, | |
| "grad_norm": 0.9966520169090612, | |
| "learning_rate": 5.9878553212609184e-05, | |
| "loss": 0.4516, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.2134831460674158, | |
| "grad_norm": 1.4576011324058094, | |
| "learning_rate": 5.95127558036051e-05, | |
| "loss": 0.4596, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.2224719101123596, | |
| "grad_norm": 0.697821502136658, | |
| "learning_rate": 5.9144804264235066e-05, | |
| "loss": 0.4576, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.2314606741573033, | |
| "grad_norm": 1.5590501658526488, | |
| "learning_rate": 5.8774739214880554e-05, | |
| "loss": 0.4602, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.2404494382022473, | |
| "grad_norm": 0.6045793781300405, | |
| "learning_rate": 5.840260150924609e-05, | |
| "loss": 0.45, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.249438202247191, | |
| "grad_norm": 1.417376379462992, | |
| "learning_rate": 5.802843222984919e-05, | |
| "loss": 0.4564, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.2584269662921348, | |
| "grad_norm": 0.7383419101529723, | |
| "learning_rate": 5.765227268348501e-05, | |
| "loss": 0.4533, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2674157303370785, | |
| "grad_norm": 1.4264362126630619, | |
| "learning_rate": 5.727416439666622e-05, | |
| "loss": 0.457, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.2764044943820225, | |
| "grad_norm": 0.8638040394017062, | |
| "learning_rate": 5.689414911103867e-05, | |
| "loss": 0.4516, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.2853932584269663, | |
| "grad_norm": 1.42829306450921, | |
| "learning_rate": 5.651226877877326e-05, | |
| "loss": 0.4556, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.29438202247191, | |
| "grad_norm": 1.0514839616322087, | |
| "learning_rate": 5.612856555793459e-05, | |
| "loss": 0.4554, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.303370786516854, | |
| "grad_norm": 1.2796676473517508, | |
| "learning_rate": 5.574308180782693e-05, | |
| "loss": 0.4491, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.3123595505617978, | |
| "grad_norm": 1.0932581128919143, | |
| "learning_rate": 5.5355860084317787e-05, | |
| "loss": 0.4575, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.3213483146067415, | |
| "grad_norm": 0.8711585447910877, | |
| "learning_rate": 5.496694313514009e-05, | |
| "loss": 0.4536, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.3303370786516853, | |
| "grad_norm": 0.976493438245937, | |
| "learning_rate": 5.457637389517285e-05, | |
| "loss": 0.4616, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.3393258426966292, | |
| "grad_norm": 0.600498943325478, | |
| "learning_rate": 5.4184195481701425e-05, | |
| "loss": 0.4474, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.348314606741573, | |
| "grad_norm": 0.7706578711873172, | |
| "learning_rate": 5.3790451189657486e-05, | |
| "loss": 0.4511, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.357303370786517, | |
| "grad_norm": 0.5178771915217835, | |
| "learning_rate": 5.339518448683945e-05, | |
| "loss": 0.4514, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.3662921348314607, | |
| "grad_norm": 0.7334914676346533, | |
| "learning_rate": 5.2998439009113814e-05, | |
| "loss": 0.4522, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.3752808988764045, | |
| "grad_norm": 0.5701908032527899, | |
| "learning_rate": 5.260025855559792e-05, | |
| "loss": 0.457, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.3842696629213482, | |
| "grad_norm": 0.45968308679963693, | |
| "learning_rate": 5.2200687083824706e-05, | |
| "loss": 0.4501, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.3932584269662922, | |
| "grad_norm": 0.5556354013625224, | |
| "learning_rate": 5.179976870488999e-05, | |
| "loss": 0.4508, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.402247191011236, | |
| "grad_norm": 0.4445793572850935, | |
| "learning_rate": 5.1397547678582745e-05, | |
| "loss": 0.4507, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.4112359550561797, | |
| "grad_norm": 0.5201992901363155, | |
| "learning_rate": 5.099406840849902e-05, | |
| "loss": 0.4535, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.4202247191011237, | |
| "grad_norm": 0.5297850125662515, | |
| "learning_rate": 5.058937543713999e-05, | |
| "loss": 0.4436, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.4292134831460674, | |
| "grad_norm": 0.3969459180847379, | |
| "learning_rate": 5.018351344099453e-05, | |
| "loss": 0.4532, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.4382022471910112, | |
| "grad_norm": 0.47432246499785363, | |
| "learning_rate": 4.9776527225607274e-05, | |
| "loss": 0.4511, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.447191011235955, | |
| "grad_norm": 0.31975394190454653, | |
| "learning_rate": 4.93684617206321e-05, | |
| "loss": 0.4485, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.456179775280899, | |
| "grad_norm": 0.4164002143535295, | |
| "learning_rate": 4.89593619748722e-05, | |
| "loss": 0.4481, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.4651685393258427, | |
| "grad_norm": 0.367528177243067, | |
| "learning_rate": 4.8549273151306795e-05, | |
| "loss": 0.4446, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.4741573033707867, | |
| "grad_norm": 0.36494718836358464, | |
| "learning_rate": 4.8138240522105365e-05, | |
| "loss": 0.4516, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.4831460674157304, | |
| "grad_norm": 0.2981123577566662, | |
| "learning_rate": 4.7726309463629733e-05, | |
| "loss": 0.439, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.4921348314606742, | |
| "grad_norm": 0.31072790704633446, | |
| "learning_rate": 4.731352545142478e-05, | |
| "loss": 0.449, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.501123595505618, | |
| "grad_norm": 0.3992742056784971, | |
| "learning_rate": 4.689993405519802e-05, | |
| "loss": 0.4433, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.5101123595505617, | |
| "grad_norm": 0.2594367011131309, | |
| "learning_rate": 4.648558093378899e-05, | |
| "loss": 0.4501, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.5191011235955056, | |
| "grad_norm": 0.2830467959001486, | |
| "learning_rate": 4.607051183012862e-05, | |
| "loss": 0.4422, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.5280898876404494, | |
| "grad_norm": 0.28137593410743117, | |
| "learning_rate": 4.5654772566189415e-05, | |
| "loss": 0.4439, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.5370786516853934, | |
| "grad_norm": 0.30222361671491843, | |
| "learning_rate": 4.5238409037926905e-05, | |
| "loss": 0.4464, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.5460674157303371, | |
| "grad_norm": 0.2984953677611448, | |
| "learning_rate": 4.4821467210212924e-05, | |
| "loss": 0.4515, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.5550561797752809, | |
| "grad_norm": 0.29477762612841285, | |
| "learning_rate": 4.4403993111761265e-05, | |
| "loss": 0.4487, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.5640449438202246, | |
| "grad_norm": 0.27738094537303265, | |
| "learning_rate": 4.398603283004626e-05, | |
| "loss": 0.4349, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.5730337078651684, | |
| "grad_norm": 0.27179761514817713, | |
| "learning_rate": 4.356763250621496e-05, | |
| "loss": 0.439, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.5820224719101124, | |
| "grad_norm": 0.30145150102869067, | |
| "learning_rate": 4.314883832999326e-05, | |
| "loss": 0.4399, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.5910112359550563, | |
| "grad_norm": 0.27593285844535975, | |
| "learning_rate": 4.272969653458685e-05, | |
| "loss": 0.449, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.30712340293439133, | |
| "learning_rate": 4.231025339157714e-05, | |
| "loss": 0.4368, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.6089887640449438, | |
| "grad_norm": 0.26832779433391346, | |
| "learning_rate": 4.189055520581315e-05, | |
| "loss": 0.4447, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.6179775280898876, | |
| "grad_norm": 0.2800675843384406, | |
| "learning_rate": 4.147064831029959e-05, | |
| "loss": 0.445, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.6269662921348313, | |
| "grad_norm": 0.3067111758990106, | |
| "learning_rate": 4.105057906108189e-05, | |
| "loss": 0.4452, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.6359550561797753, | |
| "grad_norm": 0.24971049417224553, | |
| "learning_rate": 4.063039383212866e-05, | |
| "loss": 0.4425, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.644943820224719, | |
| "grad_norm": 0.2020310973856206, | |
| "learning_rate": 4.021013901021225e-05, | |
| "loss": 0.4411, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.653932584269663, | |
| "grad_norm": 0.2574200192037008, | |
| "learning_rate": 3.978986098978777e-05, | |
| "loss": 0.4517, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.6629213483146068, | |
| "grad_norm": 0.2736980359380189, | |
| "learning_rate": 3.936960616787135e-05, | |
| "loss": 0.4446, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.6719101123595506, | |
| "grad_norm": 0.24658217170903196, | |
| "learning_rate": 3.8949420938918124e-05, | |
| "loss": 0.4364, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.6808988764044943, | |
| "grad_norm": 0.2767599168622161, | |
| "learning_rate": 3.852935168970042e-05, | |
| "loss": 0.4357, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.689887640449438, | |
| "grad_norm": 0.2548602216487942, | |
| "learning_rate": 3.810944479418686e-05, | |
| "loss": 0.4399, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.698876404494382, | |
| "grad_norm": 0.2640517946445997, | |
| "learning_rate": 3.768974660842287e-05, | |
| "loss": 0.4475, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.7078651685393258, | |
| "grad_norm": 0.25913290546087026, | |
| "learning_rate": 3.727030346541317e-05, | |
| "loss": 0.437, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.7168539325842698, | |
| "grad_norm": 0.21742230610782412, | |
| "learning_rate": 3.685116167000675e-05, | |
| "loss": 0.4376, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.7258426966292135, | |
| "grad_norm": 0.2076811227564877, | |
| "learning_rate": 3.6432367493785056e-05, | |
| "loss": 0.438, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.7348314606741573, | |
| "grad_norm": 0.3321278391017944, | |
| "learning_rate": 3.601396716995375e-05, | |
| "loss": 0.4388, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.743820224719101, | |
| "grad_norm": 0.2478941601472815, | |
| "learning_rate": 3.559600688823875e-05, | |
| "loss": 0.4452, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.7528089887640448, | |
| "grad_norm": 0.28877121684016926, | |
| "learning_rate": 3.517853278978708e-05, | |
| "loss": 0.442, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.7617977528089888, | |
| "grad_norm": 0.2393813520807247, | |
| "learning_rate": 3.4761590962073115e-05, | |
| "loss": 0.4437, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.7707865168539327, | |
| "grad_norm": 0.26771357483054375, | |
| "learning_rate": 3.434522743381061e-05, | |
| "loss": 0.4496, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.7797752808988765, | |
| "grad_norm": 0.2506533050953217, | |
| "learning_rate": 3.39294881698714e-05, | |
| "loss": 0.4443, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.7887640449438202, | |
| "grad_norm": 0.21252098175250356, | |
| "learning_rate": 3.3514419066211025e-05, | |
| "loss": 0.4383, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.797752808988764, | |
| "grad_norm": 0.3227331899101581, | |
| "learning_rate": 3.310006594480199e-05, | |
| "loss": 0.4373, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.8067415730337077, | |
| "grad_norm": 0.2595288336777384, | |
| "learning_rate": 3.268647454857524e-05, | |
| "loss": 0.4432, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.8157303370786517, | |
| "grad_norm": 0.22248085130320008, | |
| "learning_rate": 3.227369053637028e-05, | |
| "loss": 0.4378, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.8247191011235955, | |
| "grad_norm": 0.26022739350380103, | |
| "learning_rate": 3.1861759477894656e-05, | |
| "loss": 0.4511, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.8337078651685395, | |
| "grad_norm": 0.2525774955154011, | |
| "learning_rate": 3.145072684869322e-05, | |
| "loss": 0.4408, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.8426966292134832, | |
| "grad_norm": 0.19933336701791182, | |
| "learning_rate": 3.104063802512782e-05, | |
| "loss": 0.4428, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.851685393258427, | |
| "grad_norm": 0.2618315402075057, | |
| "learning_rate": 3.063153827936792e-05, | |
| "loss": 0.4387, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.8606741573033707, | |
| "grad_norm": 0.2612229535126854, | |
| "learning_rate": 3.0223472774392753e-05, | |
| "loss": 0.4373, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.8696629213483145, | |
| "grad_norm": 0.23175557173041528, | |
| "learning_rate": 2.9816486559005482e-05, | |
| "loss": 0.4428, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.8786516853932584, | |
| "grad_norm": 0.2659832710112894, | |
| "learning_rate": 2.9410624562860026e-05, | |
| "loss": 0.4374, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.8876404494382022, | |
| "grad_norm": 0.2394604361123856, | |
| "learning_rate": 2.9005931591500974e-05, | |
| "loss": 0.4377, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.8966292134831462, | |
| "grad_norm": 0.21085090202042212, | |
| "learning_rate": 2.860245232141726e-05, | |
| "loss": 0.4446, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.90561797752809, | |
| "grad_norm": 0.24320843039780787, | |
| "learning_rate": 2.8200231295110012e-05, | |
| "loss": 0.4449, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.9146067415730337, | |
| "grad_norm": 0.18825252205454282, | |
| "learning_rate": 2.7799312916175294e-05, | |
| "loss": 0.4474, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.9235955056179774, | |
| "grad_norm": 0.21020622489218485, | |
| "learning_rate": 2.7399741444402087e-05, | |
| "loss": 0.4396, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.9325842696629212, | |
| "grad_norm": 0.19967691714665464, | |
| "learning_rate": 2.7001560990886196e-05, | |
| "loss": 0.432, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.9415730337078652, | |
| "grad_norm": 0.1723917394030639, | |
| "learning_rate": 2.6604815513160556e-05, | |
| "loss": 0.4347, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.9505617977528091, | |
| "grad_norm": 0.2286024751356636, | |
| "learning_rate": 2.6209548810342517e-05, | |
| "loss": 0.4272, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.9595505617977529, | |
| "grad_norm": 0.1609850927229707, | |
| "learning_rate": 2.5815804518298575e-05, | |
| "loss": 0.4373, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.9685393258426966, | |
| "grad_norm": 0.17225954372932897, | |
| "learning_rate": 2.542362610482715e-05, | |
| "loss": 0.4346, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.9775280898876404, | |
| "grad_norm": 0.14538818432484127, | |
| "learning_rate": 2.503305686485991e-05, | |
| "loss": 0.444, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.9865168539325841, | |
| "grad_norm": 0.17741790417981537, | |
| "learning_rate": 2.464413991568222e-05, | |
| "loss": 0.4413, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.9955056179775281, | |
| "grad_norm": 0.19631454210187727, | |
| "learning_rate": 2.4256918192173088e-05, | |
| "loss": 0.4383, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.004494382022472, | |
| "grad_norm": 0.2777440257511273, | |
| "learning_rate": 2.3871434442065414e-05, | |
| "loss": 0.7545, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 2.013483146067416, | |
| "grad_norm": 0.2471458608915711, | |
| "learning_rate": 2.3487731221226754e-05, | |
| "loss": 0.4154, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.0224719101123596, | |
| "grad_norm": 0.1821697922847147, | |
| "learning_rate": 2.3105850888961348e-05, | |
| "loss": 0.4165, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.0314606741573034, | |
| "grad_norm": 0.24585959006825425, | |
| "learning_rate": 2.272583560333379e-05, | |
| "loss": 0.4141, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 2.040449438202247, | |
| "grad_norm": 0.1702964012359119, | |
| "learning_rate": 2.2347727316515e-05, | |
| "loss": 0.4173, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 2.049438202247191, | |
| "grad_norm": 0.24547374442583564, | |
| "learning_rate": 2.1971567770150814e-05, | |
| "loss": 0.4039, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.0584269662921346, | |
| "grad_norm": 0.19448387676230083, | |
| "learning_rate": 2.1597398490753917e-05, | |
| "loss": 0.4126, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 2.067415730337079, | |
| "grad_norm": 0.1711476507488459, | |
| "learning_rate": 2.1225260785119456e-05, | |
| "loss": 0.4112, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.0764044943820226, | |
| "grad_norm": 0.21251938925846103, | |
| "learning_rate": 2.0855195735764947e-05, | |
| "loss": 0.4136, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.0853932584269663, | |
| "grad_norm": 0.19044392751354922, | |
| "learning_rate": 2.0487244196394912e-05, | |
| "loss": 0.4158, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.09438202247191, | |
| "grad_norm": 0.1815645592240795, | |
| "learning_rate": 2.0121446787390822e-05, | |
| "loss": 0.414, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 2.103370786516854, | |
| "grad_norm": 0.2175948364317461, | |
| "learning_rate": 1.9757843891326736e-05, | |
| "loss": 0.409, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.1123595505617976, | |
| "grad_norm": 0.16627200046068014, | |
| "learning_rate": 1.939647564851127e-05, | |
| "loss": 0.4118, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.121348314606742, | |
| "grad_norm": 0.18089353116200013, | |
| "learning_rate": 1.9037381952556217e-05, | |
| "loss": 0.4082, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.1303370786516855, | |
| "grad_norm": 0.1938341058307494, | |
| "learning_rate": 1.8680602445972463e-05, | |
| "loss": 0.4208, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 2.1393258426966293, | |
| "grad_norm": 0.15296391756902566, | |
| "learning_rate": 1.832617651579365e-05, | |
| "loss": 0.4143, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 2.148314606741573, | |
| "grad_norm": 0.2007977943202684, | |
| "learning_rate": 1.797414328922797e-05, | |
| "loss": 0.4126, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 2.157303370786517, | |
| "grad_norm": 0.14549535548231118, | |
| "learning_rate": 1.7624541629338676e-05, | |
| "loss": 0.4107, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.1662921348314605, | |
| "grad_norm": 0.1643469642395592, | |
| "learning_rate": 1.7277410130753775e-05, | |
| "loss": 0.4147, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 2.1752808988764043, | |
| "grad_norm": 0.1743166147174567, | |
| "learning_rate": 1.6932787115405318e-05, | |
| "loss": 0.4079, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.1842696629213485, | |
| "grad_norm": 0.15594946749382455, | |
| "learning_rate": 1.6590710628298826e-05, | |
| "loss": 0.41, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 2.1932584269662923, | |
| "grad_norm": 0.16475084403286017, | |
| "learning_rate": 1.6251218433313254e-05, | |
| "loss": 0.4091, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.202247191011236, | |
| "grad_norm": 0.13895524394350253, | |
| "learning_rate": 1.591434800903203e-05, | |
| "loss": 0.408, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.2112359550561798, | |
| "grad_norm": 0.14999957095600605, | |
| "learning_rate": 1.558013654460555e-05, | |
| "loss": 0.409, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.2202247191011235, | |
| "grad_norm": 0.1411289442089014, | |
| "learning_rate": 1.5248620935645666e-05, | |
| "loss": 0.4108, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 2.2292134831460673, | |
| "grad_norm": 0.13130064830617885, | |
| "learning_rate": 1.4919837780152544e-05, | |
| "loss": 0.4086, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.238202247191011, | |
| "grad_norm": 0.1362901361388582, | |
| "learning_rate": 1.4593823374474374e-05, | |
| "loss": 0.4134, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 2.247191011235955, | |
| "grad_norm": 0.1641330954517184, | |
| "learning_rate": 1.4270613709300429e-05, | |
| "loss": 0.4105, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.256179775280899, | |
| "grad_norm": 0.11707105305267158, | |
| "learning_rate": 1.3950244465687833e-05, | |
| "loss": 0.4113, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 2.2651685393258427, | |
| "grad_norm": 0.14883912799444574, | |
| "learning_rate": 1.3632751011122497e-05, | |
| "loss": 0.4117, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.2741573033707865, | |
| "grad_norm": 0.14576734750428819, | |
| "learning_rate": 1.3318168395614697e-05, | |
| "loss": 0.4086, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 2.2831460674157302, | |
| "grad_norm": 0.1325863043302826, | |
| "learning_rate": 1.3006531347829699e-05, | |
| "loss": 0.4075, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 2.292134831460674, | |
| "grad_norm": 0.14803632152992002, | |
| "learning_rate": 1.2697874271253844e-05, | |
| "loss": 0.4092, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.3011235955056177, | |
| "grad_norm": 0.1484465193910235, | |
| "learning_rate": 1.2392231240396542e-05, | |
| "loss": 0.4103, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.310112359550562, | |
| "grad_norm": 0.13481968569964842, | |
| "learning_rate": 1.2089635997028592e-05, | |
| "loss": 0.4165, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 2.3191011235955057, | |
| "grad_norm": 0.14559693501049936, | |
| "learning_rate": 1.1790121946457212e-05, | |
| "loss": 0.4166, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.3280898876404494, | |
| "grad_norm": 0.12726242281070635, | |
| "learning_rate": 1.1493722153838239e-05, | |
| "loss": 0.407, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 2.337078651685393, | |
| "grad_norm": 0.13666677240818353, | |
| "learning_rate": 1.120046934052585e-05, | |
| "loss": 0.413, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.346067415730337, | |
| "grad_norm": 0.1267204820522753, | |
| "learning_rate": 1.0910395880460274e-05, | |
| "loss": 0.4112, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 2.355056179775281, | |
| "grad_norm": 0.13846022363503335, | |
| "learning_rate": 1.062353379659383e-05, | |
| "loss": 0.4182, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 2.364044943820225, | |
| "grad_norm": 0.13931951845911503, | |
| "learning_rate": 1.0339914757355718e-05, | |
| "loss": 0.4058, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 2.3730337078651687, | |
| "grad_norm": 0.14081173124556376, | |
| "learning_rate": 1.0059570073155953e-05, | |
| "loss": 0.4224, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.3820224719101124, | |
| "grad_norm": 0.11818135028408412, | |
| "learning_rate": 9.782530692928832e-06, | |
| "loss": 0.4175, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.391011235955056, | |
| "grad_norm": 0.12675733200213124, | |
| "learning_rate": 9.508827200716273e-06, | |
| "loss": 0.4087, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.12595759152751707, | |
| "learning_rate": 9.238489812291469e-06, | |
| "loss": 0.4081, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 2.4089887640449437, | |
| "grad_norm": 0.12527623811173608, | |
| "learning_rate": 8.971548371823205e-06, | |
| "loss": 0.4082, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.417977528089888, | |
| "grad_norm": 0.122998316643011, | |
| "learning_rate": 8.708032348581144e-06, | |
| "loss": 0.4073, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 2.4269662921348316, | |
| "grad_norm": 0.10564845207022606, | |
| "learning_rate": 8.447970833682584e-06, | |
| "loss": 0.4113, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.4359550561797754, | |
| "grad_norm": 0.10685572250478023, | |
| "learning_rate": 8.191392536880852e-06, | |
| "loss": 0.4071, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 2.444943820224719, | |
| "grad_norm": 0.11554548264247637, | |
| "learning_rate": 7.938325783395924e-06, | |
| "loss": 0.4019, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.453932584269663, | |
| "grad_norm": 0.11306318481457633, | |
| "learning_rate": 7.68879851078741e-06, | |
| "loss": 0.4116, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 2.4629213483146066, | |
| "grad_norm": 0.10283238222894554, | |
| "learning_rate": 7.442838265870347e-06, | |
| "loss": 0.4097, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.4719101123595504, | |
| "grad_norm": 0.11264537827542116, | |
| "learning_rate": 7.2004722016741605e-06, | |
| "loss": 0.4086, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.4808988764044946, | |
| "grad_norm": 0.1184428903128185, | |
| "learning_rate": 6.961727074445055e-06, | |
| "loss": 0.4048, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.4898876404494383, | |
| "grad_norm": 0.10381057720149338, | |
| "learning_rate": 6.726629240692255e-06, | |
| "loss": 0.406, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 2.498876404494382, | |
| "grad_norm": 0.11411996855242165, | |
| "learning_rate": 6.4952046542783395e-06, | |
| "loss": 0.4084, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.507865168539326, | |
| "grad_norm": 0.11085772018759905, | |
| "learning_rate": 6.2674788635540415e-06, | |
| "loss": 0.4088, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 2.5168539325842696, | |
| "grad_norm": 0.11581754129424132, | |
| "learning_rate": 6.04347700853781e-06, | |
| "loss": 0.4091, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.5258426966292133, | |
| "grad_norm": 0.10915675557144788, | |
| "learning_rate": 5.823223818140458e-06, | |
| "loss": 0.4135, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 2.534831460674157, | |
| "grad_norm": 0.10472009125683931, | |
| "learning_rate": 5.606743607435183e-06, | |
| "loss": 0.4192, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.5438202247191013, | |
| "grad_norm": 0.10196405389390528, | |
| "learning_rate": 5.394060274973267e-06, | |
| "loss": 0.4143, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 2.552808988764045, | |
| "grad_norm": 0.09299288202132956, | |
| "learning_rate": 5.185197300145817e-06, | |
| "loss": 0.4073, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.561797752808989, | |
| "grad_norm": 0.10859218890312015, | |
| "learning_rate": 4.980177740591678e-06, | |
| "loss": 0.408, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.5707865168539326, | |
| "grad_norm": 0.1030110328672013, | |
| "learning_rate": 4.779024229652005e-06, | |
| "loss": 0.4086, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.5797752808988763, | |
| "grad_norm": 0.09705963975610957, | |
| "learning_rate": 4.581758973871609e-06, | |
| "loss": 0.4053, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 2.58876404494382, | |
| "grad_norm": 0.08893242924745905, | |
| "learning_rate": 4.3884037505474455e-06, | |
| "loss": 0.4051, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.597752808988764, | |
| "grad_norm": 0.09476330135285733, | |
| "learning_rate": 4.198979905324496e-06, | |
| "loss": 0.4062, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 2.606741573033708, | |
| "grad_norm": 0.1125307729702412, | |
| "learning_rate": 4.0135083498392905e-06, | |
| "loss": 0.4143, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.6157303370786518, | |
| "grad_norm": 0.10546064232164497, | |
| "learning_rate": 3.832009559411338e-06, | |
| "loss": 0.407, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.6247191011235955, | |
| "grad_norm": 0.09832745095728368, | |
| "learning_rate": 3.654503570782755e-06, | |
| "loss": 0.4094, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.6337078651685393, | |
| "grad_norm": 0.09906441503157463, | |
| "learning_rate": 3.481009979906258e-06, | |
| "loss": 0.408, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 2.642696629213483, | |
| "grad_norm": 0.09565058554084314, | |
| "learning_rate": 3.311547939781887e-06, | |
| "loss": 0.4064, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.6516853932584272, | |
| "grad_norm": 0.09922131509032601, | |
| "learning_rate": 3.14613615834253e-06, | |
| "loss": 0.4044, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.6606741573033705, | |
| "grad_norm": 0.10360258898579061, | |
| "learning_rate": 2.9847928963887198e-06, | |
| "loss": 0.4054, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.6696629213483147, | |
| "grad_norm": 0.09576331652157899, | |
| "learning_rate": 2.8275359655726586e-06, | |
| "loss": 0.4065, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.6786516853932585, | |
| "grad_norm": 0.08804843115482651, | |
| "learning_rate": 2.6743827264319012e-06, | |
| "loss": 0.4027, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.6876404494382022, | |
| "grad_norm": 0.0940651708680553, | |
| "learning_rate": 2.5253500864728155e-06, | |
| "loss": 0.4048, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.696629213483146, | |
| "grad_norm": 0.09087393252432437, | |
| "learning_rate": 2.3804544983040724e-06, | |
| "loss": 0.4025, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7056179775280897, | |
| "grad_norm": 0.09939517236482039, | |
| "learning_rate": 2.23971195782033e-06, | |
| "loss": 0.406, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.714606741573034, | |
| "grad_norm": 0.15328626125954675, | |
| "learning_rate": 2.1031380024363645e-06, | |
| "loss": 0.4121, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.7235955056179773, | |
| "grad_norm": 0.08802675816187339, | |
| "learning_rate": 1.9707477093717786e-06, | |
| "loss": 0.4083, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.7325842696629215, | |
| "grad_norm": 0.08803502781546772, | |
| "learning_rate": 1.8425556939865696e-06, | |
| "loss": 0.4146, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.741573033707865, | |
| "grad_norm": 0.08953466563820135, | |
| "learning_rate": 1.7185761081676222e-06, | |
| "loss": 0.4076, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.750561797752809, | |
| "grad_norm": 0.08902482730609233, | |
| "learning_rate": 1.5988226387664151e-06, | |
| "loss": 0.4072, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.7595505617977527, | |
| "grad_norm": 0.08914869211011264, | |
| "learning_rate": 1.4833085060880349e-06, | |
| "loss": 0.4116, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.7685393258426965, | |
| "grad_norm": 0.0839377567882791, | |
| "learning_rate": 1.3720464624317108e-06, | |
| "loss": 0.4028, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.7775280898876407, | |
| "grad_norm": 0.0860599340838286, | |
| "learning_rate": 1.2650487906830234e-06, | |
| "loss": 0.408, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.7865168539325844, | |
| "grad_norm": 0.08469381729317074, | |
| "learning_rate": 1.1623273029579195e-06, | |
| "loss": 0.4124, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.795505617977528, | |
| "grad_norm": 0.08378981417352441, | |
| "learning_rate": 1.063893339298674e-06, | |
| "loss": 0.4153, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.804494382022472, | |
| "grad_norm": 0.08792748870981947, | |
| "learning_rate": 9.697577664220303e-07, | |
| "loss": 0.4128, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.8134831460674157, | |
| "grad_norm": 0.08428338920338808, | |
| "learning_rate": 8.799309765195452e-07, | |
| "loss": 0.4047, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.8224719101123594, | |
| "grad_norm": 0.08341919328157262, | |
| "learning_rate": 7.944228861103264e-07, | |
| "loss": 0.4035, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.831460674157303, | |
| "grad_norm": 0.08371596387722409, | |
| "learning_rate": 7.132429349463011e-07, | |
| "loss": 0.4059, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.8404494382022474, | |
| "grad_norm": 0.0792325512831226, | |
| "learning_rate": 6.364000849700791e-07, | |
| "loss": 0.4039, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.849438202247191, | |
| "grad_norm": 0.08312852243276637, | |
| "learning_rate": 5.639028193256257e-07, | |
| "loss": 0.4067, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.858426966292135, | |
| "grad_norm": 0.08209771251775913, | |
| "learning_rate": 4.957591414217344e-07, | |
| "loss": 0.4077, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.8674157303370786, | |
| "grad_norm": 0.08673033811521172, | |
| "learning_rate": 4.3197657404848935e-07, | |
| "loss": 0.4163, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.8764044943820224, | |
| "grad_norm": 0.07889124751001583, | |
| "learning_rate": 3.725621585467698e-07, | |
| "loss": 0.4074, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.885393258426966, | |
| "grad_norm": 0.08351100735021297, | |
| "learning_rate": 3.1752245403092963e-07, | |
| "loss": 0.4047, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.89438202247191, | |
| "grad_norm": 0.0841384102489667, | |
| "learning_rate": 2.6686353666468323e-07, | |
| "loss": 0.4015, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.903370786516854, | |
| "grad_norm": 0.08172285090606024, | |
| "learning_rate": 2.2059099899033098e-07, | |
| "loss": 0.4007, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.912359550561798, | |
| "grad_norm": 0.08042881118540746, | |
| "learning_rate": 1.7870994931135977e-07, | |
| "loss": 0.4088, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.9213483146067416, | |
| "grad_norm": 0.08582343232361876, | |
| "learning_rate": 1.412250111285074e-07, | |
| "loss": 0.4081, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.9303370786516854, | |
| "grad_norm": 0.0810315622092791, | |
| "learning_rate": 1.0814032262935315e-07, | |
| "loss": 0.4079, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.939325842696629, | |
| "grad_norm": 0.07928028852132449, | |
| "learning_rate": 7.945953623146096e-08, | |
| "loss": 0.4076, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.9483146067415733, | |
| "grad_norm": 0.08125928562829822, | |
| "learning_rate": 5.518581817918645e-08, | |
| "loss": 0.4127, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.9573033707865166, | |
| "grad_norm": 0.07827386715600733, | |
| "learning_rate": 3.532184819412532e-08, | |
| "loss": 0.4074, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.966292134831461, | |
| "grad_norm": 0.08205445804803274, | |
| "learning_rate": 1.9869819179292315e-08, | |
| "loss": 0.4099, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.9752808988764046, | |
| "grad_norm": 0.08077759984225538, | |
| "learning_rate": 8.83143697702149e-09, | |
| "loss": 0.4092, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.9842696629213483, | |
| "grad_norm": 0.08245700392469553, | |
| "learning_rate": 2.2079201806501916e-09, | |
| "loss": 0.4106, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.993258426966292, | |
| "grad_norm": 0.08229489202929206, | |
| "learning_rate": 0.0, | |
| "loss": 0.4064, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.993258426966292, | |
| "step": 333, | |
| "total_flos": 7.998180122580484e+18, | |
| "train_loss": 0.4748834180939305, | |
| "train_runtime": 32625.0257, | |
| "train_samples_per_second": 5.234, | |
| "train_steps_per_second": 0.01 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 333, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.998180122580484e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |