| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 250, | |
| "global_step": 1272, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015723270440251573, | |
| "grad_norm": 6.609381042259111, | |
| "learning_rate": 1.794871794871795e-08, | |
| "loss": 2.2121, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0031446540880503146, | |
| "grad_norm": 6.5863108918091955, | |
| "learning_rate": 3.58974358974359e-08, | |
| "loss": 2.1816, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0047169811320754715, | |
| "grad_norm": 6.617655516365913, | |
| "learning_rate": 5.384615384615385e-08, | |
| "loss": 2.0071, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.006289308176100629, | |
| "grad_norm": 6.602018175612389, | |
| "learning_rate": 7.17948717948718e-08, | |
| "loss": 2.1686, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007861635220125786, | |
| "grad_norm": 6.591379792761674, | |
| "learning_rate": 8.974358974358973e-08, | |
| "loss": 2.1805, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009433962264150943, | |
| "grad_norm": 6.7248109628555, | |
| "learning_rate": 1.076923076923077e-07, | |
| "loss": 2.2243, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0110062893081761, | |
| "grad_norm": 6.3995094275133715, | |
| "learning_rate": 1.2564102564102563e-07, | |
| "loss": 2.3763, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.012578616352201259, | |
| "grad_norm": 6.296067719776998, | |
| "learning_rate": 1.435897435897436e-07, | |
| "loss": 2.298, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.014150943396226415, | |
| "grad_norm": 6.778527459516128, | |
| "learning_rate": 1.6153846153846155e-07, | |
| "loss": 2.1494, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.015723270440251572, | |
| "grad_norm": 6.594906718693546, | |
| "learning_rate": 1.7948717948717946e-07, | |
| "loss": 2.0825, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01729559748427673, | |
| "grad_norm": 6.7082225379425, | |
| "learning_rate": 1.9743589743589741e-07, | |
| "loss": 2.3567, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.018867924528301886, | |
| "grad_norm": 6.907609205305907, | |
| "learning_rate": 2.153846153846154e-07, | |
| "loss": 1.9809, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020440251572327043, | |
| "grad_norm": 5.9359307337140335, | |
| "learning_rate": 2.333333333333333e-07, | |
| "loss": 2.226, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0220125786163522, | |
| "grad_norm": 6.466867096117202, | |
| "learning_rate": 2.5128205128205126e-07, | |
| "loss": 2.1425, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02358490566037736, | |
| "grad_norm": 6.755674157390508, | |
| "learning_rate": 2.692307692307692e-07, | |
| "loss": 2.2433, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.025157232704402517, | |
| "grad_norm": 6.002945550670174, | |
| "learning_rate": 2.871794871794872e-07, | |
| "loss": 2.3029, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026729559748427674, | |
| "grad_norm": 6.733820715319282, | |
| "learning_rate": 3.0512820512820514e-07, | |
| "loss": 2.6192, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02830188679245283, | |
| "grad_norm": 6.814207366203309, | |
| "learning_rate": 3.230769230769231e-07, | |
| "loss": 2.1716, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.029874213836477988, | |
| "grad_norm": 6.649348126638431, | |
| "learning_rate": 3.41025641025641e-07, | |
| "loss": 2.0726, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.031446540880503145, | |
| "grad_norm": 6.419164036165898, | |
| "learning_rate": 3.589743589743589e-07, | |
| "loss": 2.2484, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0330188679245283, | |
| "grad_norm": 6.381550340344472, | |
| "learning_rate": 3.7692307692307687e-07, | |
| "loss": 2.0979, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03459119496855346, | |
| "grad_norm": 6.470517353012837, | |
| "learning_rate": 3.9487179487179483e-07, | |
| "loss": 2.0564, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.036163522012578615, | |
| "grad_norm": 6.817629340287558, | |
| "learning_rate": 4.128205128205128e-07, | |
| "loss": 2.2869, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03773584905660377, | |
| "grad_norm": 6.415896720614442, | |
| "learning_rate": 4.307692307692308e-07, | |
| "loss": 2.1393, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03930817610062893, | |
| "grad_norm": 6.758474027008109, | |
| "learning_rate": 4.4871794871794876e-07, | |
| "loss": 1.9729, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.040880503144654086, | |
| "grad_norm": 5.796971726063335, | |
| "learning_rate": 4.666666666666666e-07, | |
| "loss": 2.1908, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04245283018867924, | |
| "grad_norm": 5.790656105640873, | |
| "learning_rate": 4.846153846153846e-07, | |
| "loss": 2.0891, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0440251572327044, | |
| "grad_norm": 6.190761522380566, | |
| "learning_rate": 5.025641025641025e-07, | |
| "loss": 1.962, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04559748427672956, | |
| "grad_norm": 6.653844953656282, | |
| "learning_rate": 5.205128205128205e-07, | |
| "loss": 2.1133, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.04716981132075472, | |
| "grad_norm": 6.3047026890345395, | |
| "learning_rate": 5.384615384615384e-07, | |
| "loss": 2.2912, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04874213836477988, | |
| "grad_norm": 6.8087836989796875, | |
| "learning_rate": 5.564102564102564e-07, | |
| "loss": 2.2732, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.050314465408805034, | |
| "grad_norm": 6.756218951330409, | |
| "learning_rate": 5.743589743589744e-07, | |
| "loss": 2.1865, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05188679245283019, | |
| "grad_norm": 5.892752864062046, | |
| "learning_rate": 5.923076923076923e-07, | |
| "loss": 1.9986, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.05345911949685535, | |
| "grad_norm": 5.9596785550097495, | |
| "learning_rate": 6.102564102564103e-07, | |
| "loss": 2.3515, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.055031446540880505, | |
| "grad_norm": 5.817501243045476, | |
| "learning_rate": 6.282051282051282e-07, | |
| "loss": 2.1328, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05660377358490566, | |
| "grad_norm": 5.555025157362233, | |
| "learning_rate": 6.461538461538462e-07, | |
| "loss": 2.0956, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05817610062893082, | |
| "grad_norm": 4.66564680385535, | |
| "learning_rate": 6.64102564102564e-07, | |
| "loss": 2.1224, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.059748427672955975, | |
| "grad_norm": 4.682038309064788, | |
| "learning_rate": 6.82051282051282e-07, | |
| "loss": 1.8465, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.06132075471698113, | |
| "grad_norm": 4.520420535983458, | |
| "learning_rate": 7e-07, | |
| "loss": 2.2189, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.06289308176100629, | |
| "grad_norm": 4.7915678520719105, | |
| "learning_rate": 6.999988639134823e-07, | |
| "loss": 2.1286, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06446540880503145, | |
| "grad_norm": 4.612604930012734, | |
| "learning_rate": 6.999954556613048e-07, | |
| "loss": 2.0893, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0660377358490566, | |
| "grad_norm": 4.247600840809702, | |
| "learning_rate": 6.999897752655936e-07, | |
| "loss": 2.1122, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06761006289308176, | |
| "grad_norm": 3.9712862450759245, | |
| "learning_rate": 6.999818227632253e-07, | |
| "loss": 2.1013, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06918238993710692, | |
| "grad_norm": 4.081791414690391, | |
| "learning_rate": 6.99971598205827e-07, | |
| "loss": 2.0214, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.07075471698113207, | |
| "grad_norm": 3.7509350102589827, | |
| "learning_rate": 6.999591016597756e-07, | |
| "loss": 2.1367, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07232704402515723, | |
| "grad_norm": 3.2808902294971616, | |
| "learning_rate": 6.999443332061978e-07, | |
| "loss": 2.2945, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07389937106918239, | |
| "grad_norm": 3.3036022925167607, | |
| "learning_rate": 6.999272929409694e-07, | |
| "loss": 2.0151, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07547169811320754, | |
| "grad_norm": 3.3210132249336812, | |
| "learning_rate": 6.999079809747144e-07, | |
| "loss": 2.0188, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0770440251572327, | |
| "grad_norm": 3.0229418180275007, | |
| "learning_rate": 6.998863974328045e-07, | |
| "loss": 2.0217, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07861635220125786, | |
| "grad_norm": 2.9951351846113115, | |
| "learning_rate": 6.998625424553584e-07, | |
| "loss": 2.0772, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08018867924528301, | |
| "grad_norm": 2.991842161373637, | |
| "learning_rate": 6.99836416197241e-07, | |
| "loss": 1.9351, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.08176100628930817, | |
| "grad_norm": 3.330478724889984, | |
| "learning_rate": 6.998080188280617e-07, | |
| "loss": 2.2126, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 2.9629299494694283, | |
| "learning_rate": 6.99777350532174e-07, | |
| "loss": 2.3491, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.08490566037735849, | |
| "grad_norm": 2.9627317429014983, | |
| "learning_rate": 6.997444115086743e-07, | |
| "loss": 1.9852, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08647798742138364, | |
| "grad_norm": 2.840879727132188, | |
| "learning_rate": 6.997092019714002e-07, | |
| "loss": 1.939, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0880503144654088, | |
| "grad_norm": 2.9520960895336614, | |
| "learning_rate": 6.996717221489292e-07, | |
| "loss": 2.1034, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08962264150943396, | |
| "grad_norm": 3.015493625809345, | |
| "learning_rate": 6.996319722845775e-07, | |
| "loss": 1.9913, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.09119496855345911, | |
| "grad_norm": 2.694895109102356, | |
| "learning_rate": 6.995899526363981e-07, | |
| "loss": 1.9909, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09276729559748427, | |
| "grad_norm": 2.746013591310517, | |
| "learning_rate": 6.995456634771794e-07, | |
| "loss": 2.0852, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.09433962264150944, | |
| "grad_norm": 2.5054959395703302, | |
| "learning_rate": 6.994991050944431e-07, | |
| "loss": 2.1756, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0959119496855346, | |
| "grad_norm": 2.7415517103140172, | |
| "learning_rate": 6.994502777904428e-07, | |
| "loss": 2.0021, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09748427672955975, | |
| "grad_norm": 2.591902596902185, | |
| "learning_rate": 6.993991818821612e-07, | |
| "loss": 1.8463, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09905660377358491, | |
| "grad_norm": 2.5850508327560617, | |
| "learning_rate": 6.993458177013095e-07, | |
| "loss": 1.7361, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.10062893081761007, | |
| "grad_norm": 2.74240553360653, | |
| "learning_rate": 6.992901855943236e-07, | |
| "loss": 1.9652, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.10220125786163523, | |
| "grad_norm": 2.7901671418215286, | |
| "learning_rate": 6.992322859223628e-07, | |
| "loss": 1.9407, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.10377358490566038, | |
| "grad_norm": 2.592826985818942, | |
| "learning_rate": 6.991721190613075e-07, | |
| "loss": 1.8256, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.10534591194968554, | |
| "grad_norm": 2.4593316329973978, | |
| "learning_rate": 6.991096854017562e-07, | |
| "loss": 1.9612, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1069182389937107, | |
| "grad_norm": 2.731581032864078, | |
| "learning_rate": 6.990449853490233e-07, | |
| "loss": 1.8444, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10849056603773585, | |
| "grad_norm": 2.8559336852628707, | |
| "learning_rate": 6.989780193231367e-07, | |
| "loss": 1.8695, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.11006289308176101, | |
| "grad_norm": 2.7165119219767035, | |
| "learning_rate": 6.989087877588348e-07, | |
| "loss": 1.9658, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11163522012578617, | |
| "grad_norm": 2.829079631550892, | |
| "learning_rate": 6.988372911055634e-07, | |
| "loss": 2.1087, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.11320754716981132, | |
| "grad_norm": 2.507099458574326, | |
| "learning_rate": 6.987635298274733e-07, | |
| "loss": 1.8418, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11477987421383648, | |
| "grad_norm": 2.4685464129673726, | |
| "learning_rate": 6.986875044034171e-07, | |
| "loss": 1.8306, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.11635220125786164, | |
| "grad_norm": 2.5785937060649187, | |
| "learning_rate": 6.986092153269459e-07, | |
| "loss": 1.9845, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1179245283018868, | |
| "grad_norm": 2.4185902663784278, | |
| "learning_rate": 6.985286631063063e-07, | |
| "loss": 2.1262, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11949685534591195, | |
| "grad_norm": 2.6484961851922395, | |
| "learning_rate": 6.984458482644373e-07, | |
| "loss": 1.9226, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.12106918238993711, | |
| "grad_norm": 2.624404494631164, | |
| "learning_rate": 6.983607713389663e-07, | |
| "loss": 1.9014, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.12264150943396226, | |
| "grad_norm": 2.5143123431618717, | |
| "learning_rate": 6.982734328822063e-07, | |
| "loss": 1.7563, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.12421383647798742, | |
| "grad_norm": 2.288866534494279, | |
| "learning_rate": 6.981838334611518e-07, | |
| "loss": 2.1576, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.12578616352201258, | |
| "grad_norm": 2.4924549190125225, | |
| "learning_rate": 6.980919736574753e-07, | |
| "loss": 1.9265, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12735849056603774, | |
| "grad_norm": 2.6450039556661724, | |
| "learning_rate": 6.979978540675234e-07, | |
| "loss": 2.2224, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1289308176100629, | |
| "grad_norm": 2.3901087100576786, | |
| "learning_rate": 6.979014753023135e-07, | |
| "loss": 1.7917, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.13050314465408805, | |
| "grad_norm": 2.2742892989609835, | |
| "learning_rate": 6.978028379875291e-07, | |
| "loss": 1.7802, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.1320754716981132, | |
| "grad_norm": 2.314703480458537, | |
| "learning_rate": 6.977019427635158e-07, | |
| "loss": 2.0916, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.13364779874213836, | |
| "grad_norm": 2.304575288629416, | |
| "learning_rate": 6.975987902852778e-07, | |
| "loss": 2.0544, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.13522012578616352, | |
| "grad_norm": 2.2373974568043353, | |
| "learning_rate": 6.974933812224731e-07, | |
| "loss": 1.8365, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.13679245283018868, | |
| "grad_norm": 2.3841316516671265, | |
| "learning_rate": 6.973857162594091e-07, | |
| "loss": 1.9519, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.13836477987421383, | |
| "grad_norm": 2.268505399665297, | |
| "learning_rate": 6.972757960950384e-07, | |
| "loss": 2.0843, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.139937106918239, | |
| "grad_norm": 2.389940820234018, | |
| "learning_rate": 6.971636214429544e-07, | |
| "loss": 2.1255, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.14150943396226415, | |
| "grad_norm": 2.2702701401800294, | |
| "learning_rate": 6.970491930313862e-07, | |
| "loss": 1.7951, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1430817610062893, | |
| "grad_norm": 2.34707263205848, | |
| "learning_rate": 6.969325116031943e-07, | |
| "loss": 2.0553, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.14465408805031446, | |
| "grad_norm": 2.138066100619543, | |
| "learning_rate": 6.968135779158653e-07, | |
| "loss": 1.8837, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.14622641509433962, | |
| "grad_norm": 2.148707619444591, | |
| "learning_rate": 6.96692392741508e-07, | |
| "loss": 1.8783, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.14779874213836477, | |
| "grad_norm": 2.2157095129853266, | |
| "learning_rate": 6.965689568668468e-07, | |
| "loss": 1.9493, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14937106918238993, | |
| "grad_norm": 2.6174396924182117, | |
| "learning_rate": 6.964432710932181e-07, | |
| "loss": 1.9476, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1509433962264151, | |
| "grad_norm": 2.423080479151295, | |
| "learning_rate": 6.963153362365641e-07, | |
| "loss": 1.9149, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.15251572327044025, | |
| "grad_norm": 2.305911644784875, | |
| "learning_rate": 6.961851531274282e-07, | |
| "loss": 1.8686, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1540880503144654, | |
| "grad_norm": 2.5427101543409116, | |
| "learning_rate": 6.960527226109489e-07, | |
| "loss": 1.8722, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.15566037735849056, | |
| "grad_norm": 2.2044082229047075, | |
| "learning_rate": 6.959180455468553e-07, | |
| "loss": 1.9485, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.15723270440251572, | |
| "grad_norm": 2.337924598657549, | |
| "learning_rate": 6.9578112280946e-07, | |
| "loss": 1.8701, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15880503144654087, | |
| "grad_norm": 2.231091232316695, | |
| "learning_rate": 6.956419552876552e-07, | |
| "loss": 2.1111, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.16037735849056603, | |
| "grad_norm": 2.1902798087554647, | |
| "learning_rate": 6.955005438849058e-07, | |
| "loss": 1.9696, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1619496855345912, | |
| "grad_norm": 2.391741538055505, | |
| "learning_rate": 6.953568895192436e-07, | |
| "loss": 1.9091, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.16352201257861634, | |
| "grad_norm": 2.440486302600468, | |
| "learning_rate": 6.952109931232616e-07, | |
| "loss": 1.8899, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1650943396226415, | |
| "grad_norm": 3.1036603066171664, | |
| "learning_rate": 6.95062855644108e-07, | |
| "loss": 1.9706, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 2.176053941318332, | |
| "learning_rate": 6.9491247804348e-07, | |
| "loss": 2.0294, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.16823899371069181, | |
| "grad_norm": 2.2955252073229415, | |
| "learning_rate": 6.947598612976173e-07, | |
| "loss": 1.8521, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.16981132075471697, | |
| "grad_norm": 2.3179610819374856, | |
| "learning_rate": 6.946050063972961e-07, | |
| "loss": 2.0428, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.17138364779874213, | |
| "grad_norm": 2.3353177807842442, | |
| "learning_rate": 6.944479143478225e-07, | |
| "loss": 1.6969, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.17295597484276728, | |
| "grad_norm": 2.1751580486972903, | |
| "learning_rate": 6.942885861690258e-07, | |
| "loss": 2.1661, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17452830188679244, | |
| "grad_norm": 2.3502010826835873, | |
| "learning_rate": 6.941270228952526e-07, | |
| "loss": 1.8967, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.1761006289308176, | |
| "grad_norm": 2.080568954254506, | |
| "learning_rate": 6.939632255753589e-07, | |
| "loss": 2.0775, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.17767295597484276, | |
| "grad_norm": 2.0874560399766278, | |
| "learning_rate": 6.937971952727045e-07, | |
| "loss": 1.8397, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1792452830188679, | |
| "grad_norm": 2.235119037617934, | |
| "learning_rate": 6.936289330651452e-07, | |
| "loss": 2.0157, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.18081761006289307, | |
| "grad_norm": 2.21645540026438, | |
| "learning_rate": 6.934584400450265e-07, | |
| "loss": 1.7553, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.18238993710691823, | |
| "grad_norm": 2.278410487078488, | |
| "learning_rate": 6.932857173191757e-07, | |
| "loss": 1.8963, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.18396226415094338, | |
| "grad_norm": 2.288449048592226, | |
| "learning_rate": 6.931107660088955e-07, | |
| "loss": 2.0707, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.18553459119496854, | |
| "grad_norm": 2.540412196668586, | |
| "learning_rate": 6.929335872499565e-07, | |
| "loss": 1.9994, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1871069182389937, | |
| "grad_norm": 2.295470373319651, | |
| "learning_rate": 6.927541821925892e-07, | |
| "loss": 1.9994, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 2.4658706344060457, | |
| "learning_rate": 6.925725520014778e-07, | |
| "loss": 2.1002, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.19025157232704404, | |
| "grad_norm": 1.9874120823661252, | |
| "learning_rate": 6.923886978557511e-07, | |
| "loss": 1.891, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1918238993710692, | |
| "grad_norm": 2.1896911235137266, | |
| "learning_rate": 6.922026209489765e-07, | |
| "loss": 2.0324, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.19339622641509435, | |
| "grad_norm": 2.3459367974500687, | |
| "learning_rate": 6.920143224891506e-07, | |
| "loss": 1.9346, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1949685534591195, | |
| "grad_norm": 2.231755148378703, | |
| "learning_rate": 6.918238036986926e-07, | |
| "loss": 1.8345, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.19654088050314467, | |
| "grad_norm": 2.279350328924178, | |
| "learning_rate": 6.91631065814436e-07, | |
| "loss": 1.8852, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.19811320754716982, | |
| "grad_norm": 2.411938466651685, | |
| "learning_rate": 6.914361100876199e-07, | |
| "loss": 1.8085, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.19968553459119498, | |
| "grad_norm": 2.2484290713562403, | |
| "learning_rate": 6.912389377838822e-07, | |
| "loss": 1.7151, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.20125786163522014, | |
| "grad_norm": 2.27114158109643, | |
| "learning_rate": 6.910395501832502e-07, | |
| "loss": 1.9463, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2028301886792453, | |
| "grad_norm": 2.1360495919260765, | |
| "learning_rate": 6.908379485801327e-07, | |
| "loss": 2.0075, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.20440251572327045, | |
| "grad_norm": 2.234634251477468, | |
| "learning_rate": 6.906341342833119e-07, | |
| "loss": 1.8222, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2059748427672956, | |
| "grad_norm": 2.354620761146764, | |
| "learning_rate": 6.904281086159346e-07, | |
| "loss": 1.8145, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.20754716981132076, | |
| "grad_norm": 2.009458317182769, | |
| "learning_rate": 6.902198729155034e-07, | |
| "loss": 1.8962, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.20911949685534592, | |
| "grad_norm": 2.2143437574616938, | |
| "learning_rate": 6.900094285338686e-07, | |
| "loss": 1.9396, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.21069182389937108, | |
| "grad_norm": 2.2176657548464904, | |
| "learning_rate": 6.897967768372188e-07, | |
| "loss": 1.8453, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.21226415094339623, | |
| "grad_norm": 2.30003499713903, | |
| "learning_rate": 6.895819192060725e-07, | |
| "loss": 1.899, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2138364779874214, | |
| "grad_norm": 2.090050155082603, | |
| "learning_rate": 6.893648570352687e-07, | |
| "loss": 1.899, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.21540880503144655, | |
| "grad_norm": 2.2138416672788406, | |
| "learning_rate": 6.891455917339585e-07, | |
| "loss": 1.6803, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2169811320754717, | |
| "grad_norm": 2.275731103613008, | |
| "learning_rate": 6.889241247255951e-07, | |
| "loss": 1.8866, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.21855345911949686, | |
| "grad_norm": 2.2018144326894764, | |
| "learning_rate": 6.887004574479256e-07, | |
| "loss": 1.7657, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.22012578616352202, | |
| "grad_norm": 2.2574275757278586, | |
| "learning_rate": 6.884745913529804e-07, | |
| "loss": 1.8978, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.22169811320754718, | |
| "grad_norm": 2.4947452295545323, | |
| "learning_rate": 6.882465279070651e-07, | |
| "loss": 2.1121, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.22327044025157233, | |
| "grad_norm": 2.321836578962341, | |
| "learning_rate": 6.880162685907497e-07, | |
| "loss": 1.8622, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2248427672955975, | |
| "grad_norm": 2.234624773604752, | |
| "learning_rate": 6.877838148988602e-07, | |
| "loss": 1.8192, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.22641509433962265, | |
| "grad_norm": 2.1827223385186834, | |
| "learning_rate": 6.87549168340468e-07, | |
| "loss": 1.9317, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2279874213836478, | |
| "grad_norm": 2.2058319167479987, | |
| "learning_rate": 6.873123304388804e-07, | |
| "loss": 2.1388, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.22955974842767296, | |
| "grad_norm": 2.336708828184526, | |
| "learning_rate": 6.870733027316308e-07, | |
| "loss": 2.0724, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.23113207547169812, | |
| "grad_norm": 2.352880258314341, | |
| "learning_rate": 6.868320867704689e-07, | |
| "loss": 1.7697, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.23270440251572327, | |
| "grad_norm": 2.062384155984072, | |
| "learning_rate": 6.865886841213497e-07, | |
| "loss": 2.1101, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.23427672955974843, | |
| "grad_norm": 2.1530391868129626, | |
| "learning_rate": 6.863430963644248e-07, | |
| "loss": 1.9232, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.2358490566037736, | |
| "grad_norm": 2.1758733285873113, | |
| "learning_rate": 6.860953250940309e-07, | |
| "loss": 1.8156, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23742138364779874, | |
| "grad_norm": 2.3117896294416593, | |
| "learning_rate": 6.8584537191868e-07, | |
| "loss": 1.8781, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2389937106918239, | |
| "grad_norm": 2.00639335375149, | |
| "learning_rate": 6.855932384610488e-07, | |
| "loss": 1.8219, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.24056603773584906, | |
| "grad_norm": 2.10561171383, | |
| "learning_rate": 6.853389263579684e-07, | |
| "loss": 1.7392, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.24213836477987422, | |
| "grad_norm": 2.4444601099856795, | |
| "learning_rate": 6.850824372604132e-07, | |
| "loss": 1.9414, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.24371069182389937, | |
| "grad_norm": 2.4242659961108495, | |
| "learning_rate": 6.848237728334909e-07, | |
| "loss": 1.839, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.24528301886792453, | |
| "grad_norm": 2.2032265431848597, | |
| "learning_rate": 6.845629347564309e-07, | |
| "loss": 1.8687, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2468553459119497, | |
| "grad_norm": 2.2129417804509024, | |
| "learning_rate": 6.842999247225737e-07, | |
| "loss": 1.8612, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.24842767295597484, | |
| "grad_norm": 2.305447975246766, | |
| "learning_rate": 6.840347444393605e-07, | |
| "loss": 2.0208, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.122283205654406, | |
| "learning_rate": 6.837673956283212e-07, | |
| "loss": 1.8356, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 2.332633354247406, | |
| "learning_rate": 6.834978800250636e-07, | |
| "loss": 1.902, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2531446540880503, | |
| "grad_norm": 2.376667609853809, | |
| "learning_rate": 6.832261993792623e-07, | |
| "loss": 2.062, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.25471698113207547, | |
| "grad_norm": 2.220933142644897, | |
| "learning_rate": 6.829523554546471e-07, | |
| "loss": 2.0558, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2562893081761006, | |
| "grad_norm": 2.033225497961393, | |
| "learning_rate": 6.826763500289916e-07, | |
| "loss": 1.8403, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.2578616352201258, | |
| "grad_norm": 2.1916641547727576, | |
| "learning_rate": 6.823981848941018e-07, | |
| "loss": 2.0203, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.25943396226415094, | |
| "grad_norm": 2.247501037485472, | |
| "learning_rate": 6.821178618558043e-07, | |
| "loss": 1.775, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2610062893081761, | |
| "grad_norm": 2.1659369555105723, | |
| "learning_rate": 6.818353827339348e-07, | |
| "loss": 2.1447, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.26257861635220126, | |
| "grad_norm": 1.8752494947917147, | |
| "learning_rate": 6.815507493623258e-07, | |
| "loss": 1.8795, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.2641509433962264, | |
| "grad_norm": 2.1874497821008663, | |
| "learning_rate": 6.812639635887953e-07, | |
| "loss": 1.9524, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.26572327044025157, | |
| "grad_norm": 2.1976673497867862, | |
| "learning_rate": 6.809750272751346e-07, | |
| "loss": 1.8337, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.2672955974842767, | |
| "grad_norm": 2.2148302103458386, | |
| "learning_rate": 6.806839422970957e-07, | |
| "loss": 1.8816, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2688679245283019, | |
| "grad_norm": 2.1811128155566775, | |
| "learning_rate": 6.803907105443801e-07, | |
| "loss": 1.8994, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.27044025157232704, | |
| "grad_norm": 2.309399739378513, | |
| "learning_rate": 6.800953339206256e-07, | |
| "loss": 1.8565, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2720125786163522, | |
| "grad_norm": 2.2522360613496737, | |
| "learning_rate": 6.797978143433946e-07, | |
| "loss": 1.827, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.27358490566037735, | |
| "grad_norm": 2.1425200823912856, | |
| "learning_rate": 6.794981537441612e-07, | |
| "loss": 1.8159, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2751572327044025, | |
| "grad_norm": 2.2841020729940946, | |
| "learning_rate": 6.791963540682988e-07, | |
| "loss": 1.8514, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.27672955974842767, | |
| "grad_norm": 2.225194894385099, | |
| "learning_rate": 6.788924172750679e-07, | |
| "loss": 1.9328, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2783018867924528, | |
| "grad_norm": 2.09421585410522, | |
| "learning_rate": 6.785863453376026e-07, | |
| "loss": 1.7486, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.279874213836478, | |
| "grad_norm": 2.333617044046103, | |
| "learning_rate": 6.782781402428983e-07, | |
| "loss": 1.772, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.28144654088050314, | |
| "grad_norm": 2.178844778690441, | |
| "learning_rate": 6.779678039917989e-07, | |
| "loss": 1.7756, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2830188679245283, | |
| "grad_norm": 2.1517711064133644, | |
| "learning_rate": 6.776553385989832e-07, | |
| "loss": 1.9176, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.28459119496855345, | |
| "grad_norm": 2.2680430076464133, | |
| "learning_rate": 6.773407460929527e-07, | |
| "loss": 1.893, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2861635220125786, | |
| "grad_norm": 2.3746361775333686, | |
| "learning_rate": 6.770240285160175e-07, | |
| "loss": 1.943, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.28773584905660377, | |
| "grad_norm": 2.1349785564403216, | |
| "learning_rate": 6.76705187924284e-07, | |
| "loss": 1.7189, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.2893081761006289, | |
| "grad_norm": 2.1525208745450097, | |
| "learning_rate": 6.763842263876403e-07, | |
| "loss": 1.9378, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2908805031446541, | |
| "grad_norm": 2.1312473298021097, | |
| "learning_rate": 6.760611459897444e-07, | |
| "loss": 1.6941, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.29245283018867924, | |
| "grad_norm": 2.1276458698264302, | |
| "learning_rate": 6.757359488280091e-07, | |
| "loss": 1.8998, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2940251572327044, | |
| "grad_norm": 2.304634105409158, | |
| "learning_rate": 6.754086370135895e-07, | |
| "loss": 1.9326, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.29559748427672955, | |
| "grad_norm": 1.9930028532322692, | |
| "learning_rate": 6.750792126713684e-07, | |
| "loss": 1.8537, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2971698113207547, | |
| "grad_norm": 2.1595911760439668, | |
| "learning_rate": 6.747476779399436e-07, | |
| "loss": 1.8215, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.29874213836477986, | |
| "grad_norm": 2.218806657186074, | |
| "learning_rate": 6.744140349716127e-07, | |
| "loss": 1.7037, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.300314465408805, | |
| "grad_norm": 2.216745170195917, | |
| "learning_rate": 6.740782859323604e-07, | |
| "loss": 2.0752, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3018867924528302, | |
| "grad_norm": 2.077203113815276, | |
| "learning_rate": 6.737404330018436e-07, | |
| "loss": 1.9836, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.30345911949685533, | |
| "grad_norm": 2.192942695759785, | |
| "learning_rate": 6.734004783733772e-07, | |
| "loss": 1.6992, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3050314465408805, | |
| "grad_norm": 2.5819835895165806, | |
| "learning_rate": 6.730584242539209e-07, | |
| "loss": 2.4884, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.30660377358490565, | |
| "grad_norm": 2.127248431265692, | |
| "learning_rate": 6.727142728640633e-07, | |
| "loss": 1.9178, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3081761006289308, | |
| "grad_norm": 1.9502319695954797, | |
| "learning_rate": 6.72368026438009e-07, | |
| "loss": 1.871, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.30974842767295596, | |
| "grad_norm": 2.2313690170600045, | |
| "learning_rate": 6.720196872235629e-07, | |
| "loss": 1.6974, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3113207547169811, | |
| "grad_norm": 2.1737546254948574, | |
| "learning_rate": 6.716692574821164e-07, | |
| "loss": 1.9516, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.3128930817610063, | |
| "grad_norm": 2.089055152215892, | |
| "learning_rate": 6.713167394886324e-07, | |
| "loss": 1.7015, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.31446540880503143, | |
| "grad_norm": 2.1241406637075047, | |
| "learning_rate": 6.709621355316306e-07, | |
| "loss": 1.913, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3160377358490566, | |
| "grad_norm": 2.1582457787965343, | |
| "learning_rate": 6.706054479131726e-07, | |
| "loss": 1.9263, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.31761006289308175, | |
| "grad_norm": 2.3223231854168134, | |
| "learning_rate": 6.702466789488468e-07, | |
| "loss": 1.7648, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3191823899371069, | |
| "grad_norm": 2.0439680852310778, | |
| "learning_rate": 6.698858309677537e-07, | |
| "loss": 1.8036, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.32075471698113206, | |
| "grad_norm": 2.134474063516094, | |
| "learning_rate": 6.695229063124907e-07, | |
| "loss": 1.8974, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3223270440251572, | |
| "grad_norm": 2.1079010765204083, | |
| "learning_rate": 6.691579073391366e-07, | |
| "loss": 1.8955, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3238993710691824, | |
| "grad_norm": 2.027683856704508, | |
| "learning_rate": 6.687908364172367e-07, | |
| "loss": 1.9092, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.32547169811320753, | |
| "grad_norm": 2.246810336956572, | |
| "learning_rate": 6.684216959297871e-07, | |
| "loss": 1.679, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3270440251572327, | |
| "grad_norm": 2.413641384514312, | |
| "learning_rate": 6.680504882732195e-07, | |
| "loss": 1.7324, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.32861635220125784, | |
| "grad_norm": 2.183679584470248, | |
| "learning_rate": 6.676772158573852e-07, | |
| "loss": 1.7184, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.330188679245283, | |
| "grad_norm": 1.953756725302565, | |
| "learning_rate": 6.673018811055401e-07, | |
| "loss": 1.8537, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.33176100628930816, | |
| "grad_norm": 2.7587554774622616, | |
| "learning_rate": 6.669244864543286e-07, | |
| "loss": 1.7957, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 2.0690029827560874, | |
| "learning_rate": 6.665450343537673e-07, | |
| "loss": 1.5945, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.33490566037735847, | |
| "grad_norm": 1.9370210759038746, | |
| "learning_rate": 6.661635272672305e-07, | |
| "loss": 1.8212, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.33647798742138363, | |
| "grad_norm": 2.3157909721510634, | |
| "learning_rate": 6.657799676714325e-07, | |
| "loss": 1.8253, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3380503144654088, | |
| "grad_norm": 2.408718498079195, | |
| "learning_rate": 6.653943580564128e-07, | |
| "loss": 1.9183, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.33962264150943394, | |
| "grad_norm": 2.2428481931189115, | |
| "learning_rate": 6.650067009255193e-07, | |
| "loss": 1.9667, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3411949685534591, | |
| "grad_norm": 2.3060581158229754, | |
| "learning_rate": 6.646169987953921e-07, | |
| "loss": 1.7651, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.34276729559748426, | |
| "grad_norm": 2.272099574457665, | |
| "learning_rate": 6.642252541959475e-07, | |
| "loss": 1.9895, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3443396226415094, | |
| "grad_norm": 2.2813406057736016, | |
| "learning_rate": 6.638314696703613e-07, | |
| "loss": 2.1897, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.34591194968553457, | |
| "grad_norm": 2.18496346075307, | |
| "learning_rate": 6.634356477750522e-07, | |
| "loss": 1.8511, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3474842767295597, | |
| "grad_norm": 1.9232426664702658, | |
| "learning_rate": 6.630377910796655e-07, | |
| "loss": 1.8494, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.3490566037735849, | |
| "grad_norm": 1.9818215355449529, | |
| "learning_rate": 6.626379021670561e-07, | |
| "loss": 1.9395, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.35062893081761004, | |
| "grad_norm": 2.244779710254365, | |
| "learning_rate": 6.622359836332723e-07, | |
| "loss": 1.9374, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.3522012578616352, | |
| "grad_norm": 2.1262049775384333, | |
| "learning_rate": 6.618320380875379e-07, | |
| "loss": 1.879, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.35377358490566035, | |
| "grad_norm": 1.9601702919734405, | |
| "learning_rate": 6.614260681522365e-07, | |
| "loss": 1.7618, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3553459119496855, | |
| "grad_norm": 2.177142901528055, | |
| "learning_rate": 6.610180764628937e-07, | |
| "loss": 1.7007, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.35691823899371067, | |
| "grad_norm": 2.207301297000178, | |
| "learning_rate": 6.606080656681599e-07, | |
| "loss": 2.0777, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3584905660377358, | |
| "grad_norm": 2.325333907285728, | |
| "learning_rate": 6.601960384297937e-07, | |
| "loss": 1.8572, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.360062893081761, | |
| "grad_norm": 2.176590475529821, | |
| "learning_rate": 6.597819974226442e-07, | |
| "loss": 1.911, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.36163522012578614, | |
| "grad_norm": 2.231029983251665, | |
| "learning_rate": 6.593659453346336e-07, | |
| "loss": 1.8546, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3632075471698113, | |
| "grad_norm": 2.09941453798109, | |
| "learning_rate": 6.589478848667402e-07, | |
| "loss": 1.7259, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.36477987421383645, | |
| "grad_norm": 2.207729891460013, | |
| "learning_rate": 6.585278187329803e-07, | |
| "loss": 2.0615, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3663522012578616, | |
| "grad_norm": 2.160194675811721, | |
| "learning_rate": 6.581057496603907e-07, | |
| "loss": 1.5577, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.36792452830188677, | |
| "grad_norm": 2.1933688787719117, | |
| "learning_rate": 6.576816803890115e-07, | |
| "loss": 1.8231, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3694968553459119, | |
| "grad_norm": 2.0304475960191275, | |
| "learning_rate": 6.572556136718678e-07, | |
| "loss": 1.7399, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3710691823899371, | |
| "grad_norm": 2.0372413629068973, | |
| "learning_rate": 6.568275522749514e-07, | |
| "loss": 1.7089, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.37264150943396224, | |
| "grad_norm": 2.2572731149937297, | |
| "learning_rate": 6.563974989772047e-07, | |
| "loss": 2.0235, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.3742138364779874, | |
| "grad_norm": 2.0112586745579155, | |
| "learning_rate": 6.559654565704999e-07, | |
| "loss": 1.6913, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3757861635220126, | |
| "grad_norm": 2.020887524725217, | |
| "learning_rate": 6.555314278596232e-07, | |
| "loss": 1.9021, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 2.062052722629033, | |
| "learning_rate": 6.550954156622559e-07, | |
| "loss": 1.8555, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3789308176100629, | |
| "grad_norm": 2.146665256772467, | |
| "learning_rate": 6.546574228089551e-07, | |
| "loss": 1.9002, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3805031446540881, | |
| "grad_norm": 2.238196549782288, | |
| "learning_rate": 6.542174521431369e-07, | |
| "loss": 1.9807, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.38207547169811323, | |
| "grad_norm": 2.111526491095184, | |
| "learning_rate": 6.537755065210571e-07, | |
| "loss": 1.9268, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3836477987421384, | |
| "grad_norm": 1.9805520877642875, | |
| "learning_rate": 6.533315888117923e-07, | |
| "loss": 1.8807, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.38522012578616355, | |
| "grad_norm": 2.1505066292288313, | |
| "learning_rate": 6.528857018972223e-07, | |
| "loss": 1.8132, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3867924528301887, | |
| "grad_norm": 2.1682916632827913, | |
| "learning_rate": 6.524378486720107e-07, | |
| "loss": 1.8423, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.38836477987421386, | |
| "grad_norm": 2.131056377037645, | |
| "learning_rate": 6.519880320435858e-07, | |
| "loss": 1.9015, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.389937106918239, | |
| "grad_norm": 2.3204970045437094, | |
| "learning_rate": 6.515362549321227e-07, | |
| "loss": 1.9385, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3915094339622642, | |
| "grad_norm": 2.051436724730336, | |
| "learning_rate": 6.510825202705237e-07, | |
| "loss": 1.7837, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.39308176100628933, | |
| "grad_norm": 2.1183882128382785, | |
| "learning_rate": 6.506268310043991e-07, | |
| "loss": 1.9752, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.39308176100628933, | |
| "eval_sat2_MCTS_chains_SFT_val_loss": 1.7381622791290283, | |
| "eval_sat2_MCTS_chains_SFT_val_runtime": 91.8844, | |
| "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.188, | |
| "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.404, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3946540880503145, | |
| "grad_norm": 2.162981802199106, | |
| "learning_rate": 6.501691900920484e-07, | |
| "loss": 1.9521, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.39622641509433965, | |
| "grad_norm": 1.9032699491782559, | |
| "learning_rate": 6.497096005044415e-07, | |
| "loss": 1.6293, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3977987421383648, | |
| "grad_norm": 2.160690442124648, | |
| "learning_rate": 6.492480652251983e-07, | |
| "loss": 1.6815, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.39937106918238996, | |
| "grad_norm": 2.200251276319585, | |
| "learning_rate": 6.487845872505703e-07, | |
| "loss": 1.6191, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.4009433962264151, | |
| "grad_norm": 2.1153136111562834, | |
| "learning_rate": 6.483191695894209e-07, | |
| "loss": 1.7317, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4025157232704403, | |
| "grad_norm": 2.136580109576303, | |
| "learning_rate": 6.478518152632057e-07, | |
| "loss": 1.6782, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.40408805031446543, | |
| "grad_norm": 1.993099001469681, | |
| "learning_rate": 6.473825273059529e-07, | |
| "loss": 1.9167, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.4056603773584906, | |
| "grad_norm": 2.348061223274077, | |
| "learning_rate": 6.469113087642439e-07, | |
| "loss": 1.8473, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.40723270440251574, | |
| "grad_norm": 1.9562289450639117, | |
| "learning_rate": 6.46438162697193e-07, | |
| "loss": 1.8061, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4088050314465409, | |
| "grad_norm": 2.340728555526634, | |
| "learning_rate": 6.459630921764282e-07, | |
| "loss": 1.5497, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.41037735849056606, | |
| "grad_norm": 2.2351691072279096, | |
| "learning_rate": 6.454861002860705e-07, | |
| "loss": 2.1132, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.4119496855345912, | |
| "grad_norm": 2.1290065996236613, | |
| "learning_rate": 6.450071901227147e-07, | |
| "loss": 1.7072, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.41352201257861637, | |
| "grad_norm": 2.3103776756030054, | |
| "learning_rate": 6.445263647954086e-07, | |
| "loss": 2.0191, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.41509433962264153, | |
| "grad_norm": 2.1783834129569137, | |
| "learning_rate": 6.440436274256333e-07, | |
| "loss": 1.6563, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 2.3188145114913747, | |
| "learning_rate": 6.435589811472823e-07, | |
| "loss": 2.1497, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.41823899371069184, | |
| "grad_norm": 2.2490188310531303, | |
| "learning_rate": 6.430724291066422e-07, | |
| "loss": 1.6207, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.419811320754717, | |
| "grad_norm": 2.0324238162321877, | |
| "learning_rate": 6.425839744623711e-07, | |
| "loss": 1.8477, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.42138364779874216, | |
| "grad_norm": 2.1650874047734505, | |
| "learning_rate": 6.420936203854793e-07, | |
| "loss": 1.7936, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4229559748427673, | |
| "grad_norm": 2.157646176152135, | |
| "learning_rate": 6.416013700593074e-07, | |
| "loss": 2.1013, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.42452830188679247, | |
| "grad_norm": 2.1369610613705863, | |
| "learning_rate": 6.411072266795066e-07, | |
| "loss": 1.6905, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4261006289308176, | |
| "grad_norm": 2.108532284547873, | |
| "learning_rate": 6.406111934540178e-07, | |
| "loss": 1.78, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.4276729559748428, | |
| "grad_norm": 2.142315122232749, | |
| "learning_rate": 6.401132736030504e-07, | |
| "loss": 1.738, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.42924528301886794, | |
| "grad_norm": 2.2549541806802296, | |
| "learning_rate": 6.396134703590617e-07, | |
| "loss": 1.7392, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.4308176100628931, | |
| "grad_norm": 2.0286792453580573, | |
| "learning_rate": 6.391117869667358e-07, | |
| "loss": 1.8195, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.43238993710691825, | |
| "grad_norm": 2.0337872779297017, | |
| "learning_rate": 6.386082266829629e-07, | |
| "loss": 1.8084, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4339622641509434, | |
| "grad_norm": 2.095428278939121, | |
| "learning_rate": 6.381027927768171e-07, | |
| "loss": 1.943, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.43553459119496857, | |
| "grad_norm": 2.013844627037566, | |
| "learning_rate": 6.375954885295369e-07, | |
| "loss": 1.6934, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.4371069182389937, | |
| "grad_norm": 2.2579768491043497, | |
| "learning_rate": 6.37086317234502e-07, | |
| "loss": 1.8158, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4386792452830189, | |
| "grad_norm": 2.3262366087078354, | |
| "learning_rate": 6.365752821972134e-07, | |
| "loss": 1.83, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.44025157232704404, | |
| "grad_norm": 2.0303047010962376, | |
| "learning_rate": 6.36062386735271e-07, | |
| "loss": 1.7752, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4418238993710692, | |
| "grad_norm": 2.1138830916579425, | |
| "learning_rate": 6.355476341783529e-07, | |
| "loss": 1.9532, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.44339622641509435, | |
| "grad_norm": 2.100950656642803, | |
| "learning_rate": 6.35031027868193e-07, | |
| "loss": 1.7841, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4449685534591195, | |
| "grad_norm": 2.175666972234393, | |
| "learning_rate": 6.345125711585594e-07, | |
| "loss": 1.7535, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.44654088050314467, | |
| "grad_norm": 2.090692135911591, | |
| "learning_rate": 6.339922674152333e-07, | |
| "loss": 1.9545, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.4481132075471698, | |
| "grad_norm": 2.2150355818462875, | |
| "learning_rate": 6.334701200159867e-07, | |
| "loss": 1.7806, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.449685534591195, | |
| "grad_norm": 2.1792703239922218, | |
| "learning_rate": 6.3294613235056e-07, | |
| "loss": 1.8228, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.45125786163522014, | |
| "grad_norm": 2.3237367502125794, | |
| "learning_rate": 6.324203078206408e-07, | |
| "loss": 1.7555, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.4528301886792453, | |
| "grad_norm": 2.119235492747239, | |
| "learning_rate": 6.318926498398415e-07, | |
| "loss": 2.1297, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.45440251572327045, | |
| "grad_norm": 2.066839274608312, | |
| "learning_rate": 6.31363161833677e-07, | |
| "loss": 1.802, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.4559748427672956, | |
| "grad_norm": 2.1311539498302716, | |
| "learning_rate": 6.308318472395429e-07, | |
| "loss": 1.8139, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.45754716981132076, | |
| "grad_norm": 2.191273523825869, | |
| "learning_rate": 6.302987095066923e-07, | |
| "loss": 1.8633, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.4591194968553459, | |
| "grad_norm": 2.2656122854422462, | |
| "learning_rate": 6.297637520962143e-07, | |
| "loss": 1.7441, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4606918238993711, | |
| "grad_norm": 1.93728883044019, | |
| "learning_rate": 6.292269784810113e-07, | |
| "loss": 1.9895, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.46226415094339623, | |
| "grad_norm": 2.3187477308368343, | |
| "learning_rate": 6.286883921457763e-07, | |
| "loss": 1.83, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4638364779874214, | |
| "grad_norm": 2.202481261207392, | |
| "learning_rate": 6.281479965869702e-07, | |
| "loss": 1.8797, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.46540880503144655, | |
| "grad_norm": 2.0842105854347657, | |
| "learning_rate": 6.276057953127994e-07, | |
| "loss": 1.7532, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.4669811320754717, | |
| "grad_norm": 2.207130976094581, | |
| "learning_rate": 6.270617918431928e-07, | |
| "loss": 1.7917, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.46855345911949686, | |
| "grad_norm": 2.032959806862764, | |
| "learning_rate": 6.26515989709779e-07, | |
| "loss": 1.8665, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.470125786163522, | |
| "grad_norm": 1.9531386309076453, | |
| "learning_rate": 6.259683924558633e-07, | |
| "loss": 1.8778, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.4716981132075472, | |
| "grad_norm": 2.0714051206159896, | |
| "learning_rate": 6.254190036364051e-07, | |
| "loss": 1.7126, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.47327044025157233, | |
| "grad_norm": 2.2881658756108907, | |
| "learning_rate": 6.24867826817994e-07, | |
| "loss": 1.8592, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.4748427672955975, | |
| "grad_norm": 2.0634196311190345, | |
| "learning_rate": 6.243148655788276e-07, | |
| "loss": 2.3116, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.47641509433962265, | |
| "grad_norm": 2.1436128024329024, | |
| "learning_rate": 6.237601235086879e-07, | |
| "loss": 1.7473, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.4779874213836478, | |
| "grad_norm": 2.2670611315983793, | |
| "learning_rate": 6.232036042089171e-07, | |
| "loss": 1.9504, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.47955974842767296, | |
| "grad_norm": 2.138063229224171, | |
| "learning_rate": 6.226453112923962e-07, | |
| "loss": 1.8807, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4811320754716981, | |
| "grad_norm": 2.169257324016606, | |
| "learning_rate": 6.220852483835196e-07, | |
| "loss": 1.9444, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4827044025157233, | |
| "grad_norm": 2.230348996182296, | |
| "learning_rate": 6.215234191181725e-07, | |
| "loss": 1.8807, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.48427672955974843, | |
| "grad_norm": 2.2607928616037967, | |
| "learning_rate": 6.20959827143707e-07, | |
| "loss": 1.9083, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4858490566037736, | |
| "grad_norm": 2.3264562054360756, | |
| "learning_rate": 6.203944761189192e-07, | |
| "loss": 1.7524, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.48742138364779874, | |
| "grad_norm": 1.9976309357346456, | |
| "learning_rate": 6.198273697140243e-07, | |
| "loss": 1.9204, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4889937106918239, | |
| "grad_norm": 2.24371014902543, | |
| "learning_rate": 6.19258511610633e-07, | |
| "loss": 1.7535, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.49056603773584906, | |
| "grad_norm": 2.2058395033512554, | |
| "learning_rate": 6.186879055017288e-07, | |
| "loss": 1.7674, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4921383647798742, | |
| "grad_norm": 2.024400868779697, | |
| "learning_rate": 6.181155550916422e-07, | |
| "loss": 1.6954, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.4937106918238994, | |
| "grad_norm": 2.269535233545749, | |
| "learning_rate": 6.175414640960283e-07, | |
| "loss": 1.7337, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.49528301886792453, | |
| "grad_norm": 1.9745297525246102, | |
| "learning_rate": 6.169656362418414e-07, | |
| "loss": 1.8684, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4968553459119497, | |
| "grad_norm": 1.976330025219871, | |
| "learning_rate": 6.163880752673117e-07, | |
| "loss": 2.1739, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.49842767295597484, | |
| "grad_norm": 2.1760376472956025, | |
| "learning_rate": 6.158087849219204e-07, | |
| "loss": 1.9162, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.143628134207709, | |
| "learning_rate": 6.152277689663759e-07, | |
| "loss": 1.7411, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5015723270440252, | |
| "grad_norm": 1.9712614846347265, | |
| "learning_rate": 6.146450311725888e-07, | |
| "loss": 1.7378, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 2.1616472374659357, | |
| "learning_rate": 6.140605753236483e-07, | |
| "loss": 1.741, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5047169811320755, | |
| "grad_norm": 2.2014372084510074, | |
| "learning_rate": 6.134744052137967e-07, | |
| "loss": 1.7833, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.5062893081761006, | |
| "grad_norm": 1.982079271247313, | |
| "learning_rate": 6.128865246484048e-07, | |
| "loss": 1.8626, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5078616352201258, | |
| "grad_norm": 2.254872366939666, | |
| "learning_rate": 6.122969374439483e-07, | |
| "loss": 1.8039, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.5094339622641509, | |
| "grad_norm": 2.016915503448893, | |
| "learning_rate": 6.11705647427982e-07, | |
| "loss": 1.5489, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5110062893081762, | |
| "grad_norm": 2.1164070023469788, | |
| "learning_rate": 6.111126584391148e-07, | |
| "loss": 1.7177, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5125786163522013, | |
| "grad_norm": 2.1940717974335846, | |
| "learning_rate": 6.105179743269858e-07, | |
| "loss": 1.844, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5141509433962265, | |
| "grad_norm": 2.2525569223241186, | |
| "learning_rate": 6.099215989522382e-07, | |
| "loss": 2.4854, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.5157232704402516, | |
| "grad_norm": 2.0259464742758784, | |
| "learning_rate": 6.093235361864952e-07, | |
| "loss": 2.1479, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5172955974842768, | |
| "grad_norm": 2.3113158826769347, | |
| "learning_rate": 6.087237899123342e-07, | |
| "loss": 1.6807, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.5188679245283019, | |
| "grad_norm": 2.032892966587123, | |
| "learning_rate": 6.081223640232616e-07, | |
| "loss": 2.0318, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5204402515723271, | |
| "grad_norm": 2.3650596675106472, | |
| "learning_rate": 6.075192624236881e-07, | |
| "loss": 1.9068, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.5220125786163522, | |
| "grad_norm": 2.2695747251563225, | |
| "learning_rate": 6.069144890289028e-07, | |
| "loss": 1.6508, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5235849056603774, | |
| "grad_norm": 2.13682292483943, | |
| "learning_rate": 6.063080477650479e-07, | |
| "loss": 1.9008, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.5251572327044025, | |
| "grad_norm": 2.108983919386453, | |
| "learning_rate": 6.056999425690935e-07, | |
| "loss": 1.7042, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5267295597484277, | |
| "grad_norm": 2.1512302698424075, | |
| "learning_rate": 6.050901773888115e-07, | |
| "loss": 1.664, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5283018867924528, | |
| "grad_norm": 2.039353542540568, | |
| "learning_rate": 6.044787561827507e-07, | |
| "loss": 1.7934, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.529874213836478, | |
| "grad_norm": 2.0719588798830655, | |
| "learning_rate": 6.038656829202103e-07, | |
| "loss": 1.9121, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.5314465408805031, | |
| "grad_norm": 1.9491996764419244, | |
| "learning_rate": 6.032509615812147e-07, | |
| "loss": 1.9204, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.5330188679245284, | |
| "grad_norm": 2.1113652450511897, | |
| "learning_rate": 6.026345961564875e-07, | |
| "loss": 1.8414, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.5345911949685535, | |
| "grad_norm": 2.2032131432104767, | |
| "learning_rate": 6.020165906474257e-07, | |
| "loss": 1.7114, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5361635220125787, | |
| "grad_norm": 2.1791413791352565, | |
| "learning_rate": 6.013969490660731e-07, | |
| "loss": 2.2435, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.5377358490566038, | |
| "grad_norm": 2.0891510363928614, | |
| "learning_rate": 6.007756754350954e-07, | |
| "loss": 1.7701, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.539308176100629, | |
| "grad_norm": 2.0697597215600196, | |
| "learning_rate": 6.001527737877532e-07, | |
| "loss": 1.8208, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.5408805031446541, | |
| "grad_norm": 2.186994903601224, | |
| "learning_rate": 5.995282481678758e-07, | |
| "loss": 1.8373, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5424528301886793, | |
| "grad_norm": 2.2604938659796545, | |
| "learning_rate": 5.989021026298354e-07, | |
| "loss": 1.7995, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5440251572327044, | |
| "grad_norm": 2.265552317615986, | |
| "learning_rate": 5.982743412385207e-07, | |
| "loss": 1.7546, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5455974842767296, | |
| "grad_norm": 2.15564901479827, | |
| "learning_rate": 5.976449680693104e-07, | |
| "loss": 1.8596, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5471698113207547, | |
| "grad_norm": 2.0408737176330707, | |
| "learning_rate": 5.970139872080463e-07, | |
| "loss": 1.7603, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5487421383647799, | |
| "grad_norm": 2.0704318452346153, | |
| "learning_rate": 5.963814027510079e-07, | |
| "loss": 1.9277, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.550314465408805, | |
| "grad_norm": 2.2298784446231243, | |
| "learning_rate": 5.957472188048845e-07, | |
| "loss": 1.7991, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5518867924528302, | |
| "grad_norm": 2.0268792215372455, | |
| "learning_rate": 5.951114394867494e-07, | |
| "loss": 1.7636, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5534591194968553, | |
| "grad_norm": 2.0724518345088527, | |
| "learning_rate": 5.944740689240333e-07, | |
| "loss": 1.8476, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5550314465408805, | |
| "grad_norm": 2.2252899856685375, | |
| "learning_rate": 5.938351112544964e-07, | |
| "loss": 1.9069, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5566037735849056, | |
| "grad_norm": 2.0991045116850784, | |
| "learning_rate": 5.931945706262028e-07, | |
| "loss": 1.8448, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5581761006289309, | |
| "grad_norm": 2.049481500083905, | |
| "learning_rate": 5.92552451197493e-07, | |
| "loss": 1.8783, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.559748427672956, | |
| "grad_norm": 2.1757357613660893, | |
| "learning_rate": 5.919087571369567e-07, | |
| "loss": 1.8135, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5613207547169812, | |
| "grad_norm": 2.261669343627697, | |
| "learning_rate": 5.912634926234063e-07, | |
| "loss": 1.9118, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.5628930817610063, | |
| "grad_norm": 2.3346020516981976, | |
| "learning_rate": 5.906166618458491e-07, | |
| "loss": 1.9321, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5644654088050315, | |
| "grad_norm": 1.9754635648300218, | |
| "learning_rate": 5.89968269003461e-07, | |
| "loss": 1.7947, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 2.1082779863127232, | |
| "learning_rate": 5.893183183055581e-07, | |
| "loss": 2.1433, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5676100628930818, | |
| "grad_norm": 2.2719804428628474, | |
| "learning_rate": 5.886668139715704e-07, | |
| "loss": 1.7544, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.5691823899371069, | |
| "grad_norm": 2.063124532930253, | |
| "learning_rate": 5.880137602310138e-07, | |
| "loss": 1.8496, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5707547169811321, | |
| "grad_norm": 2.178454726609103, | |
| "learning_rate": 5.873591613234628e-07, | |
| "loss": 1.8731, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5723270440251572, | |
| "grad_norm": 2.2292005546265092, | |
| "learning_rate": 5.867030214985232e-07, | |
| "loss": 1.6673, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5738993710691824, | |
| "grad_norm": 2.1915150883178467, | |
| "learning_rate": 5.860453450158042e-07, | |
| "loss": 1.8147, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5754716981132075, | |
| "grad_norm": 2.068884145719646, | |
| "learning_rate": 5.853861361448906e-07, | |
| "loss": 1.8357, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5770440251572327, | |
| "grad_norm": 2.2335775263207656, | |
| "learning_rate": 5.847253991653161e-07, | |
| "loss": 1.7674, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5786163522012578, | |
| "grad_norm": 2.028099342557415, | |
| "learning_rate": 5.840631383665337e-07, | |
| "loss": 1.9504, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5801886792452831, | |
| "grad_norm": 2.171456280936142, | |
| "learning_rate": 5.833993580478899e-07, | |
| "loss": 1.7799, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5817610062893082, | |
| "grad_norm": 2.188440801596699, | |
| "learning_rate": 5.827340625185951e-07, | |
| "loss": 1.7664, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 2.061808710093214, | |
| "learning_rate": 5.820672560976968e-07, | |
| "loss": 1.8846, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5849056603773585, | |
| "grad_norm": 2.080838024759326, | |
| "learning_rate": 5.813989431140509e-07, | |
| "loss": 1.8208, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5864779874213837, | |
| "grad_norm": 2.1013196605531266, | |
| "learning_rate": 5.807291279062938e-07, | |
| "loss": 1.7698, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5880503144654088, | |
| "grad_norm": 2.1083157749408405, | |
| "learning_rate": 5.800578148228141e-07, | |
| "loss": 2.0087, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.589622641509434, | |
| "grad_norm": 2.327452355557212, | |
| "learning_rate": 5.793850082217248e-07, | |
| "loss": 2.2253, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5911949685534591, | |
| "grad_norm": 2.2687807974688656, | |
| "learning_rate": 5.787107124708343e-07, | |
| "loss": 1.7986, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5927672955974843, | |
| "grad_norm": 2.0752387638298835, | |
| "learning_rate": 5.780349319476189e-07, | |
| "loss": 1.6402, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5943396226415094, | |
| "grad_norm": 2.2870254808353954, | |
| "learning_rate": 5.773576710391937e-07, | |
| "loss": 1.8418, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5959119496855346, | |
| "grad_norm": 2.146779959111506, | |
| "learning_rate": 5.766789341422841e-07, | |
| "loss": 1.8692, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5974842767295597, | |
| "grad_norm": 2.2844314903310585, | |
| "learning_rate": 5.75998725663198e-07, | |
| "loss": 1.713, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5990566037735849, | |
| "grad_norm": 2.08256027270516, | |
| "learning_rate": 5.753170500177962e-07, | |
| "loss": 2.0656, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.60062893081761, | |
| "grad_norm": 2.1961855730334676, | |
| "learning_rate": 5.746339116314646e-07, | |
| "loss": 1.6188, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.6022012578616353, | |
| "grad_norm": 2.0468929231050748, | |
| "learning_rate": 5.739493149390851e-07, | |
| "loss": 1.8984, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.6037735849056604, | |
| "grad_norm": 2.0935907404848746, | |
| "learning_rate": 5.732632643850064e-07, | |
| "loss": 1.6483, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.6053459119496856, | |
| "grad_norm": 2.0252018235406575, | |
| "learning_rate": 5.725757644230159e-07, | |
| "loss": 1.9005, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6069182389937107, | |
| "grad_norm": 2.001163026327367, | |
| "learning_rate": 5.718868195163106e-07, | |
| "loss": 1.829, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.6084905660377359, | |
| "grad_norm": 2.1924828073774476, | |
| "learning_rate": 5.711964341374676e-07, | |
| "loss": 1.6139, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.610062893081761, | |
| "grad_norm": 2.228468039969882, | |
| "learning_rate": 5.705046127684158e-07, | |
| "loss": 1.8581, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.6116352201257862, | |
| "grad_norm": 2.1871856796652906, | |
| "learning_rate": 5.698113599004058e-07, | |
| "loss": 1.8884, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.6132075471698113, | |
| "grad_norm": 2.119012481165798, | |
| "learning_rate": 5.691166800339823e-07, | |
| "loss": 1.7771, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6147798742138365, | |
| "grad_norm": 2.1752727454942677, | |
| "learning_rate": 5.684205776789531e-07, | |
| "loss": 1.7475, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.6163522012578616, | |
| "grad_norm": 1.9703566361856617, | |
| "learning_rate": 5.677230573543612e-07, | |
| "loss": 1.995, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.6179245283018868, | |
| "grad_norm": 1.9783136401244312, | |
| "learning_rate": 5.670241235884547e-07, | |
| "loss": 1.6578, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.6194968553459119, | |
| "grad_norm": 2.1003352712507084, | |
| "learning_rate": 5.663237809186581e-07, | |
| "loss": 1.7801, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.6210691823899371, | |
| "grad_norm": 2.2674486565881344, | |
| "learning_rate": 5.656220338915417e-07, | |
| "loss": 1.7259, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6226415094339622, | |
| "grad_norm": 2.1909929939479196, | |
| "learning_rate": 5.649188870627932e-07, | |
| "loss": 1.791, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.6242138364779874, | |
| "grad_norm": 2.1847378593727855, | |
| "learning_rate": 5.642143449971877e-07, | |
| "loss": 1.6481, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.6257861635220126, | |
| "grad_norm": 2.296413505588846, | |
| "learning_rate": 5.635084122685582e-07, | |
| "loss": 1.6404, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6273584905660378, | |
| "grad_norm": 2.3710100879322815, | |
| "learning_rate": 5.628010934597652e-07, | |
| "loss": 1.8934, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.6289308176100629, | |
| "grad_norm": 2.1015013913914884, | |
| "learning_rate": 5.620923931626681e-07, | |
| "loss": 1.7094, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6305031446540881, | |
| "grad_norm": 2.2084477731115095, | |
| "learning_rate": 5.613823159780947e-07, | |
| "loss": 1.775, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.6320754716981132, | |
| "grad_norm": 1.9632035650078112, | |
| "learning_rate": 5.606708665158114e-07, | |
| "loss": 1.8468, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.6336477987421384, | |
| "grad_norm": 2.3789325797797045, | |
| "learning_rate": 5.59958049394493e-07, | |
| "loss": 1.9285, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.6352201257861635, | |
| "grad_norm": 2.155496601332782, | |
| "learning_rate": 5.592438692416937e-07, | |
| "loss": 2.0406, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6367924528301887, | |
| "grad_norm": 2.237193083014261, | |
| "learning_rate": 5.585283306938159e-07, | |
| "loss": 1.7627, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6383647798742138, | |
| "grad_norm": 2.093924681792703, | |
| "learning_rate": 5.578114383960806e-07, | |
| "loss": 1.5612, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.639937106918239, | |
| "grad_norm": 1.9118398198078217, | |
| "learning_rate": 5.570931970024976e-07, | |
| "loss": 1.7692, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.6415094339622641, | |
| "grad_norm": 2.065885602117071, | |
| "learning_rate": 5.563736111758344e-07, | |
| "loss": 2.1028, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6430817610062893, | |
| "grad_norm": 2.0965149313714035, | |
| "learning_rate": 5.55652685587587e-07, | |
| "loss": 2.112, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.6446540880503144, | |
| "grad_norm": 2.19620885008707, | |
| "learning_rate": 5.549304249179487e-07, | |
| "loss": 1.8514, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6462264150943396, | |
| "grad_norm": 2.157564335495825, | |
| "learning_rate": 5.542068338557801e-07, | |
| "loss": 1.9672, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.6477987421383647, | |
| "grad_norm": 2.339822370018209, | |
| "learning_rate": 5.534819170985786e-07, | |
| "loss": 1.9801, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.64937106918239, | |
| "grad_norm": 2.53967666106126, | |
| "learning_rate": 5.527556793524481e-07, | |
| "loss": 2.0856, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.6509433962264151, | |
| "grad_norm": 2.027714420058619, | |
| "learning_rate": 5.520281253320678e-07, | |
| "loss": 1.92, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6525157232704403, | |
| "grad_norm": 2.2931114820343717, | |
| "learning_rate": 5.512992597606626e-07, | |
| "loss": 1.7635, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6540880503144654, | |
| "grad_norm": 2.103312242134212, | |
| "learning_rate": 5.505690873699716e-07, | |
| "loss": 1.8141, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6556603773584906, | |
| "grad_norm": 2.0874926904070032, | |
| "learning_rate": 5.498376129002176e-07, | |
| "loss": 1.6943, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.6572327044025157, | |
| "grad_norm": 2.064459999298948, | |
| "learning_rate": 5.491048411000766e-07, | |
| "loss": 1.8251, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6588050314465409, | |
| "grad_norm": 2.043867970270658, | |
| "learning_rate": 5.483707767266467e-07, | |
| "loss": 1.9847, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.660377358490566, | |
| "grad_norm": 2.0798717919218332, | |
| "learning_rate": 5.47635424545417e-07, | |
| "loss": 1.7778, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6619496855345912, | |
| "grad_norm": 2.2400310365423897, | |
| "learning_rate": 5.468987893302375e-07, | |
| "loss": 1.9559, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6635220125786163, | |
| "grad_norm": 2.099625244938322, | |
| "learning_rate": 5.461608758632872e-07, | |
| "loss": 1.736, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6650943396226415, | |
| "grad_norm": 2.1409980099649455, | |
| "learning_rate": 5.454216889350435e-07, | |
| "loss": 1.7748, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.0325650663365105, | |
| "learning_rate": 5.44681233344251e-07, | |
| "loss": 1.7192, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6682389937106918, | |
| "grad_norm": 2.0307579619789213, | |
| "learning_rate": 5.4393951389789e-07, | |
| "loss": 1.8139, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6698113207547169, | |
| "grad_norm": 1.9960345481446478, | |
| "learning_rate": 5.431965354111465e-07, | |
| "loss": 1.64, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6713836477987422, | |
| "grad_norm": 2.2621592250213154, | |
| "learning_rate": 5.424523027073794e-07, | |
| "loss": 1.8895, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6729559748427673, | |
| "grad_norm": 1.9753545804161157, | |
| "learning_rate": 5.417068206180899e-07, | |
| "loss": 1.7637, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6745283018867925, | |
| "grad_norm": 2.2178958295189606, | |
| "learning_rate": 5.409600939828906e-07, | |
| "loss": 2.0521, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6761006289308176, | |
| "grad_norm": 1.8923979616131885, | |
| "learning_rate": 5.402121276494731e-07, | |
| "loss": 1.7683, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6776729559748428, | |
| "grad_norm": 2.0677112128365893, | |
| "learning_rate": 5.394629264735772e-07, | |
| "loss": 1.8395, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6792452830188679, | |
| "grad_norm": 2.0153045160857603, | |
| "learning_rate": 5.387124953189594e-07, | |
| "loss": 1.8411, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6808176100628931, | |
| "grad_norm": 2.18275253208093, | |
| "learning_rate": 5.379608390573607e-07, | |
| "loss": 1.8287, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6823899371069182, | |
| "grad_norm": 2.091662651103415, | |
| "learning_rate": 5.372079625684757e-07, | |
| "loss": 1.8308, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6839622641509434, | |
| "grad_norm": 2.1060091304508495, | |
| "learning_rate": 5.364538707399207e-07, | |
| "loss": 1.7317, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6855345911949685, | |
| "grad_norm": 2.2183309272293545, | |
| "learning_rate": 5.356985684672016e-07, | |
| "loss": 1.7259, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6871069182389937, | |
| "grad_norm": 2.076351332712323, | |
| "learning_rate": 5.349420606536826e-07, | |
| "loss": 1.8949, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6886792452830188, | |
| "grad_norm": 2.133834879680037, | |
| "learning_rate": 5.341843522105541e-07, | |
| "loss": 1.881, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.690251572327044, | |
| "grad_norm": 1.866508697696801, | |
| "learning_rate": 5.334254480568012e-07, | |
| "loss": 1.6093, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6918238993710691, | |
| "grad_norm": 2.026325564739705, | |
| "learning_rate": 5.326653531191709e-07, | |
| "loss": 1.7457, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6933962264150944, | |
| "grad_norm": 2.030947206901835, | |
| "learning_rate": 5.319040723321411e-07, | |
| "loss": 1.8016, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6949685534591195, | |
| "grad_norm": 2.2635231796009165, | |
| "learning_rate": 5.31141610637888e-07, | |
| "loss": 1.8274, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6965408805031447, | |
| "grad_norm": 2.160129609647997, | |
| "learning_rate": 5.303779729862541e-07, | |
| "loss": 1.841, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.6981132075471698, | |
| "grad_norm": 2.3857343889060085, | |
| "learning_rate": 5.296131643347164e-07, | |
| "loss": 2.0521, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.699685534591195, | |
| "grad_norm": 2.2373548879864, | |
| "learning_rate": 5.288471896483535e-07, | |
| "loss": 1.644, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7012578616352201, | |
| "grad_norm": 2.16265297512191, | |
| "learning_rate": 5.280800538998141e-07, | |
| "loss": 1.8585, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.7028301886792453, | |
| "grad_norm": 2.0560711051498277, | |
| "learning_rate": 5.273117620692847e-07, | |
| "loss": 1.6482, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.7044025157232704, | |
| "grad_norm": 1.9542683421441711, | |
| "learning_rate": 5.265423191444563e-07, | |
| "loss": 2.0183, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.7059748427672956, | |
| "grad_norm": 2.1353978544627044, | |
| "learning_rate": 5.257717301204932e-07, | |
| "loss": 1.8363, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.7075471698113207, | |
| "grad_norm": 2.1447188396187995, | |
| "learning_rate": 5.25e-07, | |
| "loss": 1.6944, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7091194968553459, | |
| "grad_norm": 2.3547091263057056, | |
| "learning_rate": 5.242271337929891e-07, | |
| "loss": 1.7432, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.710691823899371, | |
| "grad_norm": 2.4270796532966337, | |
| "learning_rate": 5.234531365168486e-07, | |
| "loss": 1.6861, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.7122641509433962, | |
| "grad_norm": 2.3217701732544302, | |
| "learning_rate": 5.22678013196309e-07, | |
| "loss": 1.6177, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.7138364779874213, | |
| "grad_norm": 2.1886144838681982, | |
| "learning_rate": 5.219017688634111e-07, | |
| "loss": 1.6984, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.7154088050314465, | |
| "grad_norm": 1.9865673398125758, | |
| "learning_rate": 5.211244085574735e-07, | |
| "loss": 1.7418, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.7169811320754716, | |
| "grad_norm": 2.1224471005243677, | |
| "learning_rate": 5.203459373250593e-07, | |
| "loss": 1.7137, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.7185534591194969, | |
| "grad_norm": 2.010328916041691, | |
| "learning_rate": 5.195663602199438e-07, | |
| "loss": 1.5998, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.720125786163522, | |
| "grad_norm": 2.1617887359262964, | |
| "learning_rate": 5.187856823030815e-07, | |
| "loss": 1.7791, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.7216981132075472, | |
| "grad_norm": 2.088298143518636, | |
| "learning_rate": 5.180039086425733e-07, | |
| "loss": 1.717, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.7232704402515723, | |
| "grad_norm": 2.182318441419754, | |
| "learning_rate": 5.172210443136335e-07, | |
| "loss": 1.7386, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7248427672955975, | |
| "grad_norm": 2.24576929264229, | |
| "learning_rate": 5.164370943985573e-07, | |
| "loss": 1.6459, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.7264150943396226, | |
| "grad_norm": 2.04417918075731, | |
| "learning_rate": 5.156520639866867e-07, | |
| "loss": 1.8024, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.7279874213836478, | |
| "grad_norm": 2.112329388375878, | |
| "learning_rate": 5.14865958174379e-07, | |
| "loss": 2.0854, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.7295597484276729, | |
| "grad_norm": 2.016676563210969, | |
| "learning_rate": 5.140787820649725e-07, | |
| "loss": 1.7524, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7311320754716981, | |
| "grad_norm": 2.60378450644644, | |
| "learning_rate": 5.132905407687537e-07, | |
| "loss": 2.0874, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7327044025157232, | |
| "grad_norm": 2.0550298802585427, | |
| "learning_rate": 5.125012394029245e-07, | |
| "loss": 1.7928, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7342767295597484, | |
| "grad_norm": 2.109873492408979, | |
| "learning_rate": 5.117108830915686e-07, | |
| "loss": 1.7396, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.7358490566037735, | |
| "grad_norm": 2.075833532198868, | |
| "learning_rate": 5.109194769656182e-07, | |
| "loss": 1.8012, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.7374213836477987, | |
| "grad_norm": 2.2968922617399143, | |
| "learning_rate": 5.10127026162821e-07, | |
| "loss": 1.677, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.7389937106918238, | |
| "grad_norm": 2.1646580219404767, | |
| "learning_rate": 5.093335358277063e-07, | |
| "loss": 1.8778, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7405660377358491, | |
| "grad_norm": 2.077818575976857, | |
| "learning_rate": 5.085390111115525e-07, | |
| "loss": 1.7151, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.7421383647798742, | |
| "grad_norm": 2.1792667307239117, | |
| "learning_rate": 5.077434571723527e-07, | |
| "loss": 1.7331, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7437106918238994, | |
| "grad_norm": 2.007840890946543, | |
| "learning_rate": 5.069468791747818e-07, | |
| "loss": 1.6337, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.7452830188679245, | |
| "grad_norm": 2.0523492740975886, | |
| "learning_rate": 5.061492822901629e-07, | |
| "loss": 1.7339, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.7468553459119497, | |
| "grad_norm": 2.1645747688452492, | |
| "learning_rate": 5.053506716964335e-07, | |
| "loss": 1.9892, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7484276729559748, | |
| "grad_norm": 2.30254590731602, | |
| "learning_rate": 5.04551052578112e-07, | |
| "loss": 1.8834, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.3644242948478915, | |
| "learning_rate": 5.03750430126264e-07, | |
| "loss": 1.8034, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.7515723270440252, | |
| "grad_norm": 2.1460399637911096, | |
| "learning_rate": 5.029488095384689e-07, | |
| "loss": 1.8283, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7531446540880503, | |
| "grad_norm": 2.178578800063943, | |
| "learning_rate": 5.021461960187858e-07, | |
| "loss": 1.6951, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 2.0849853089545154, | |
| "learning_rate": 5.013425947777198e-07, | |
| "loss": 1.8294, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7562893081761006, | |
| "grad_norm": 2.0454648568366327, | |
| "learning_rate": 5.005380110321882e-07, | |
| "loss": 1.8758, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.7578616352201258, | |
| "grad_norm": 2.016873990373029, | |
| "learning_rate": 4.997324500054869e-07, | |
| "loss": 1.8153, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7594339622641509, | |
| "grad_norm": 2.3068593437311353, | |
| "learning_rate": 4.989259169272557e-07, | |
| "loss": 2.0439, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.7610062893081762, | |
| "grad_norm": 2.23324104378393, | |
| "learning_rate": 4.981184170334456e-07, | |
| "loss": 1.6537, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7625786163522013, | |
| "grad_norm": 2.194801843258506, | |
| "learning_rate": 4.973099555662832e-07, | |
| "loss": 1.7096, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7641509433962265, | |
| "grad_norm": 2.031449083549838, | |
| "learning_rate": 4.965005377742386e-07, | |
| "loss": 1.774, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7657232704402516, | |
| "grad_norm": 2.1043645294914577, | |
| "learning_rate": 4.956901689119894e-07, | |
| "loss": 1.8458, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7672955974842768, | |
| "grad_norm": 1.953254357792575, | |
| "learning_rate": 4.948788542403877e-07, | |
| "loss": 1.8636, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7688679245283019, | |
| "grad_norm": 2.146268467480728, | |
| "learning_rate": 4.940665990264263e-07, | |
| "loss": 1.9093, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7704402515723271, | |
| "grad_norm": 1.9666125611299663, | |
| "learning_rate": 4.932534085432032e-07, | |
| "loss": 1.7225, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7720125786163522, | |
| "grad_norm": 2.318583475321128, | |
| "learning_rate": 4.924392880698882e-07, | |
| "loss": 2.1748, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7735849056603774, | |
| "grad_norm": 2.0092675503475004, | |
| "learning_rate": 4.91624242891689e-07, | |
| "loss": 1.9759, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7751572327044025, | |
| "grad_norm": 2.1097488696357076, | |
| "learning_rate": 4.90808278299816e-07, | |
| "loss": 1.9701, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7767295597484277, | |
| "grad_norm": 2.142041500023684, | |
| "learning_rate": 4.899913995914485e-07, | |
| "loss": 1.6193, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7783018867924528, | |
| "grad_norm": 2.2559396370443427, | |
| "learning_rate": 4.891736120696999e-07, | |
| "loss": 1.7115, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.779874213836478, | |
| "grad_norm": 2.1488219924141596, | |
| "learning_rate": 4.883549210435841e-07, | |
| "loss": 1.9703, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7814465408805031, | |
| "grad_norm": 2.1397162032458232, | |
| "learning_rate": 4.8753533182798e-07, | |
| "loss": 1.7644, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7830188679245284, | |
| "grad_norm": 2.191504611692712, | |
| "learning_rate": 4.867148497435977e-07, | |
| "loss": 1.6881, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7845911949685535, | |
| "grad_norm": 2.000204406082109, | |
| "learning_rate": 4.858934801169436e-07, | |
| "loss": 1.6802, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7861635220125787, | |
| "grad_norm": 2.100996106775111, | |
| "learning_rate": 4.850712282802863e-07, | |
| "loss": 1.7683, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7861635220125787, | |
| "eval_sat2_MCTS_chains_SFT_val_loss": 1.7106590270996094, | |
| "eval_sat2_MCTS_chains_SFT_val_runtime": 92.0806, | |
| "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.164, | |
| "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.401, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7877358490566038, | |
| "grad_norm": 2.1566781241616413, | |
| "learning_rate": 4.842480995716212e-07, | |
| "loss": 1.7176, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.789308176100629, | |
| "grad_norm": 2.1925936773589934, | |
| "learning_rate": 4.834240993346361e-07, | |
| "loss": 1.8595, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7908805031446541, | |
| "grad_norm": 2.249072634890456, | |
| "learning_rate": 4.825992329186777e-07, | |
| "loss": 1.7456, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7924528301886793, | |
| "grad_norm": 2.1147929947398025, | |
| "learning_rate": 4.817735056787149e-07, | |
| "loss": 1.6531, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7940251572327044, | |
| "grad_norm": 2.0853013840737935, | |
| "learning_rate": 4.809469229753053e-07, | |
| "loss": 1.707, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7955974842767296, | |
| "grad_norm": 1.9814812102879638, | |
| "learning_rate": 4.8011949017456e-07, | |
| "loss": 1.8545, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7971698113207547, | |
| "grad_norm": 2.242933290063341, | |
| "learning_rate": 4.792912126481094e-07, | |
| "loss": 1.7509, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7987421383647799, | |
| "grad_norm": 2.057417183985654, | |
| "learning_rate": 4.784620957730669e-07, | |
| "loss": 1.8915, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.800314465408805, | |
| "grad_norm": 2.1269232658429, | |
| "learning_rate": 4.776321449319959e-07, | |
| "loss": 1.6437, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.8018867924528302, | |
| "grad_norm": 2.0386799297363574, | |
| "learning_rate": 4.76801365512873e-07, | |
| "loss": 1.7157, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8034591194968553, | |
| "grad_norm": 2.1312494355819007, | |
| "learning_rate": 4.759697629090542e-07, | |
| "loss": 1.8856, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.8050314465408805, | |
| "grad_norm": 2.036024812845296, | |
| "learning_rate": 4.751373425192395e-07, | |
| "loss": 1.6698, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.8066037735849056, | |
| "grad_norm": 2.3974518115445242, | |
| "learning_rate": 4.743041097474381e-07, | |
| "loss": 1.9648, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.8081761006289309, | |
| "grad_norm": 2.1241520800303846, | |
| "learning_rate": 4.734700700029329e-07, | |
| "loss": 1.7318, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.809748427672956, | |
| "grad_norm": 2.1431917628718162, | |
| "learning_rate": 4.7263522870024566e-07, | |
| "loss": 1.6973, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.8113207547169812, | |
| "grad_norm": 2.234624716136248, | |
| "learning_rate": 4.7179959125910164e-07, | |
| "loss": 1.7548, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.8128930817610063, | |
| "grad_norm": 2.161266418044944, | |
| "learning_rate": 4.709631631043949e-07, | |
| "loss": 1.605, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.8144654088050315, | |
| "grad_norm": 1.9836121455186402, | |
| "learning_rate": 4.701259496661527e-07, | |
| "loss": 1.8285, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.8160377358490566, | |
| "grad_norm": 2.173970653905368, | |
| "learning_rate": 4.6928795637949986e-07, | |
| "loss": 1.7383, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.8176100628930818, | |
| "grad_norm": 2.161721549869913, | |
| "learning_rate": 4.6844918868462445e-07, | |
| "loss": 1.5783, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8191823899371069, | |
| "grad_norm": 2.159956411342309, | |
| "learning_rate": 4.676096520267417e-07, | |
| "loss": 1.8492, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.8207547169811321, | |
| "grad_norm": 2.0881573745933553, | |
| "learning_rate": 4.6676935185605884e-07, | |
| "loss": 1.7813, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.8223270440251572, | |
| "grad_norm": 2.1031987642542935, | |
| "learning_rate": 4.659282936277399e-07, | |
| "loss": 2.0953, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.8238993710691824, | |
| "grad_norm": 2.205744158912345, | |
| "learning_rate": 4.6508648280187014e-07, | |
| "loss": 1.7749, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.8254716981132075, | |
| "grad_norm": 2.084552384171812, | |
| "learning_rate": 4.642439248434205e-07, | |
| "loss": 1.8273, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8270440251572327, | |
| "grad_norm": 2.061048329367607, | |
| "learning_rate": 4.6340062522221245e-07, | |
| "loss": 1.759, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.8286163522012578, | |
| "grad_norm": 2.069167397462438, | |
| "learning_rate": 4.6255658941288197e-07, | |
| "loss": 1.8557, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.8301886792452831, | |
| "grad_norm": 2.144369600536378, | |
| "learning_rate": 4.6171182289484464e-07, | |
| "loss": 1.7656, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.8317610062893082, | |
| "grad_norm": 2.1514775909224375, | |
| "learning_rate": 4.608663311522597e-07, | |
| "loss": 1.8321, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 2.1170537274734507, | |
| "learning_rate": 4.6002011967399414e-07, | |
| "loss": 1.6458, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8349056603773585, | |
| "grad_norm": 2.357949753261125, | |
| "learning_rate": 4.591731939535879e-07, | |
| "loss": 1.6988, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.8364779874213837, | |
| "grad_norm": 2.010756013038571, | |
| "learning_rate": 4.583255594892175e-07, | |
| "loss": 1.6346, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.8380503144654088, | |
| "grad_norm": 2.0259740544494553, | |
| "learning_rate": 4.574772217836605e-07, | |
| "loss": 1.667, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.839622641509434, | |
| "grad_norm": 2.3278850594778793, | |
| "learning_rate": 4.5662818634426e-07, | |
| "loss": 1.867, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.8411949685534591, | |
| "grad_norm": 2.127352798058412, | |
| "learning_rate": 4.557784586828886e-07, | |
| "loss": 1.7475, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8427672955974843, | |
| "grad_norm": 2.0508704544909104, | |
| "learning_rate": 4.5492804431591275e-07, | |
| "loss": 1.9647, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.8443396226415094, | |
| "grad_norm": 1.9847000051548063, | |
| "learning_rate": 4.5407694876415707e-07, | |
| "loss": 1.7637, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.8459119496855346, | |
| "grad_norm": 2.146458433753693, | |
| "learning_rate": 4.532251775528685e-07, | |
| "loss": 1.9054, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8474842767295597, | |
| "grad_norm": 2.037086599135811, | |
| "learning_rate": 4.5237273621167977e-07, | |
| "loss": 1.9655, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.8490566037735849, | |
| "grad_norm": 2.338765254967049, | |
| "learning_rate": 4.5151963027457467e-07, | |
| "loss": 1.9605, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.85062893081761, | |
| "grad_norm": 2.2366764493368443, | |
| "learning_rate": 4.5066586527985104e-07, | |
| "loss": 1.8306, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.8522012578616353, | |
| "grad_norm": 1.9395938337987975, | |
| "learning_rate": 4.498114467700855e-07, | |
| "loss": 1.8506, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.8537735849056604, | |
| "grad_norm": 2.11126192377906, | |
| "learning_rate": 4.4895638029209737e-07, | |
| "loss": 2.0131, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.8553459119496856, | |
| "grad_norm": 1.9448520113288943, | |
| "learning_rate": 4.48100671396912e-07, | |
| "loss": 1.6417, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8569182389937107, | |
| "grad_norm": 2.17917918305954, | |
| "learning_rate": 4.4724432563972586e-07, | |
| "loss": 1.596, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.8584905660377359, | |
| "grad_norm": 2.2139144251618204, | |
| "learning_rate": 4.463873485798697e-07, | |
| "loss": 1.6793, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.860062893081761, | |
| "grad_norm": 2.1238330080137815, | |
| "learning_rate": 4.455297457807724e-07, | |
| "loss": 2.0614, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.8616352201257862, | |
| "grad_norm": 2.3023567382067873, | |
| "learning_rate": 4.4467152280992516e-07, | |
| "loss": 1.7894, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8632075471698113, | |
| "grad_norm": 2.041517573942539, | |
| "learning_rate": 4.4381268523884577e-07, | |
| "loss": 1.9789, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.8647798742138365, | |
| "grad_norm": 2.295893853414247, | |
| "learning_rate": 4.4295323864304123e-07, | |
| "loss": 1.6548, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8663522012578616, | |
| "grad_norm": 2.1107449778410152, | |
| "learning_rate": 4.420931886019727e-07, | |
| "loss": 1.8173, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.8679245283018868, | |
| "grad_norm": 1.9372269175313137, | |
| "learning_rate": 4.412325406990188e-07, | |
| "loss": 1.7247, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8694968553459119, | |
| "grad_norm": 1.8111270901761425, | |
| "learning_rate": 4.4037130052143943e-07, | |
| "loss": 1.7852, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.8710691823899371, | |
| "grad_norm": 2.0430528853223997, | |
| "learning_rate": 4.3950947366033957e-07, | |
| "loss": 1.8733, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8726415094339622, | |
| "grad_norm": 2.1440645400252416, | |
| "learning_rate": 4.386470657106326e-07, | |
| "loss": 1.7904, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8742138364779874, | |
| "grad_norm": 2.1458981176838026, | |
| "learning_rate": 4.377840822710047e-07, | |
| "loss": 1.8855, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8757861635220126, | |
| "grad_norm": 2.986445993481532, | |
| "learning_rate": 4.3692052894387775e-07, | |
| "loss": 1.7439, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8773584905660378, | |
| "grad_norm": 1.9740082789953175, | |
| "learning_rate": 4.360564113353734e-07, | |
| "loss": 1.7215, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8789308176100629, | |
| "grad_norm": 2.0624678582939984, | |
| "learning_rate": 4.3519173505527654e-07, | |
| "loss": 1.7609, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8805031446540881, | |
| "grad_norm": 2.1742856227918868, | |
| "learning_rate": 4.3432650571699923e-07, | |
| "loss": 1.7963, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8820754716981132, | |
| "grad_norm": 2.328267217087019, | |
| "learning_rate": 4.334607289375434e-07, | |
| "loss": 1.8014, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8836477987421384, | |
| "grad_norm": 2.2240480876262256, | |
| "learning_rate": 4.325944103374652e-07, | |
| "loss": 1.9335, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8852201257861635, | |
| "grad_norm": 2.0812109009351483, | |
| "learning_rate": 4.3172755554083857e-07, | |
| "loss": 1.6568, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8867924528301887, | |
| "grad_norm": 2.419224994100306, | |
| "learning_rate": 4.308601701752178e-07, | |
| "loss": 1.8096, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8883647798742138, | |
| "grad_norm": 2.291019726545779, | |
| "learning_rate": 4.299922598716017e-07, | |
| "loss": 1.6433, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.889937106918239, | |
| "grad_norm": 2.1682347425403634, | |
| "learning_rate": 4.2912383026439746e-07, | |
| "loss": 1.7372, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8915094339622641, | |
| "grad_norm": 2.2978680672025713, | |
| "learning_rate": 4.2825488699138295e-07, | |
| "loss": 1.8202, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8930817610062893, | |
| "grad_norm": 2.094968531097701, | |
| "learning_rate": 4.2738543569367104e-07, | |
| "loss": 1.6626, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8946540880503144, | |
| "grad_norm": 2.052297146112, | |
| "learning_rate": 4.2651548201567244e-07, | |
| "loss": 1.7142, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8962264150943396, | |
| "grad_norm": 2.318005287614226, | |
| "learning_rate": 4.2564503160505965e-07, | |
| "loss": 1.7043, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8977987421383647, | |
| "grad_norm": 2.2399641315697876, | |
| "learning_rate": 4.2477409011272947e-07, | |
| "loss": 1.7997, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.89937106918239, | |
| "grad_norm": 2.2279042572355157, | |
| "learning_rate": 4.239026631927671e-07, | |
| "loss": 1.8412, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.9009433962264151, | |
| "grad_norm": 2.2532345047567843, | |
| "learning_rate": 4.2303075650240874e-07, | |
| "loss": 1.7016, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.9025157232704403, | |
| "grad_norm": 2.0723779334034917, | |
| "learning_rate": 4.221583757020058e-07, | |
| "loss": 1.7483, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.9040880503144654, | |
| "grad_norm": 2.1869209764537527, | |
| "learning_rate": 4.21285526454987e-07, | |
| "loss": 1.7919, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.9056603773584906, | |
| "grad_norm": 2.2496647478161544, | |
| "learning_rate": 4.2041221442782234e-07, | |
| "loss": 1.714, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.9072327044025157, | |
| "grad_norm": 2.068857000999971, | |
| "learning_rate": 4.195384452899863e-07, | |
| "loss": 1.8463, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.9088050314465409, | |
| "grad_norm": 2.0359248854430034, | |
| "learning_rate": 4.1866422471392097e-07, | |
| "loss": 1.7426, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.910377358490566, | |
| "grad_norm": 2.228425159773193, | |
| "learning_rate": 4.1778955837499877e-07, | |
| "loss": 2.0427, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.9119496855345912, | |
| "grad_norm": 2.284187655683882, | |
| "learning_rate": 4.1691445195148624e-07, | |
| "loss": 1.847, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9135220125786163, | |
| "grad_norm": 2.1771730650916448, | |
| "learning_rate": 4.1603891112450694e-07, | |
| "loss": 1.9315, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.9150943396226415, | |
| "grad_norm": 2.1626615626582293, | |
| "learning_rate": 4.151629415780043e-07, | |
| "loss": 1.6456, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 2.305840039876449, | |
| "learning_rate": 4.142865489987052e-07, | |
| "loss": 1.6958, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.9182389937106918, | |
| "grad_norm": 2.154256346851341, | |
| "learning_rate": 4.1340973907608293e-07, | |
| "loss": 2.0085, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.9198113207547169, | |
| "grad_norm": 2.2041638948692537, | |
| "learning_rate": 4.125325175023197e-07, | |
| "loss": 2.1236, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9213836477987422, | |
| "grad_norm": 2.103411483270433, | |
| "learning_rate": 4.116548899722706e-07, | |
| "loss": 1.7799, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.9229559748427673, | |
| "grad_norm": 2.0063388038701904, | |
| "learning_rate": 4.1077686218342565e-07, | |
| "loss": 1.6889, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.9245283018867925, | |
| "grad_norm": 2.175245741274736, | |
| "learning_rate": 4.098984398358738e-07, | |
| "loss": 1.6925, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.9261006289308176, | |
| "grad_norm": 2.0336428356844634, | |
| "learning_rate": 4.090196286322654e-07, | |
| "loss": 1.8607, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.9276729559748428, | |
| "grad_norm": 2.037205174603859, | |
| "learning_rate": 4.0814043427777475e-07, | |
| "loss": 1.9117, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9292452830188679, | |
| "grad_norm": 2.30170118004087, | |
| "learning_rate": 4.072608624800641e-07, | |
| "loss": 1.9196, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.9308176100628931, | |
| "grad_norm": 2.1986833773630816, | |
| "learning_rate": 4.063809189492457e-07, | |
| "loss": 2.1265, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.9323899371069182, | |
| "grad_norm": 2.1016581569566033, | |
| "learning_rate": 4.055006093978452e-07, | |
| "loss": 1.8248, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.9339622641509434, | |
| "grad_norm": 2.318998947765389, | |
| "learning_rate": 4.0461993954076433e-07, | |
| "loss": 1.9183, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.9355345911949685, | |
| "grad_norm": 2.1280276120226396, | |
| "learning_rate": 4.037389150952441e-07, | |
| "loss": 1.5676, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.9371069182389937, | |
| "grad_norm": 2.074586892066852, | |
| "learning_rate": 4.028575417808272e-07, | |
| "loss": 1.5338, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.9386792452830188, | |
| "grad_norm": 2.0027829031033106, | |
| "learning_rate": 4.019758253193213e-07, | |
| "loss": 2.0435, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.940251572327044, | |
| "grad_norm": 1.9483550382264585, | |
| "learning_rate": 4.010937714347617e-07, | |
| "loss": 1.8436, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9418238993710691, | |
| "grad_norm": 2.1386021253495495, | |
| "learning_rate": 4.002113858533744e-07, | |
| "loss": 1.8209, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 2.2603117565548883, | |
| "learning_rate": 3.9932867430353857e-07, | |
| "loss": 1.6182, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9449685534591195, | |
| "grad_norm": 2.1949498745608036, | |
| "learning_rate": 3.9844564251574946e-07, | |
| "loss": 1.7724, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.9465408805031447, | |
| "grad_norm": 2.0756341905016047, | |
| "learning_rate": 3.9756229622258145e-07, | |
| "loss": 1.9824, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.9481132075471698, | |
| "grad_norm": 2.211419527471263, | |
| "learning_rate": 3.9667864115865083e-07, | |
| "loss": 1.764, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.949685534591195, | |
| "grad_norm": 1.9396778709562796, | |
| "learning_rate": 3.957946830605779e-07, | |
| "loss": 1.6183, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.9512578616352201, | |
| "grad_norm": 2.036577606226067, | |
| "learning_rate": 3.949104276669506e-07, | |
| "loss": 1.9739, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.9528301886792453, | |
| "grad_norm": 2.1523021934849496, | |
| "learning_rate": 3.940258807182871e-07, | |
| "loss": 1.8051, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9544025157232704, | |
| "grad_norm": 2.128010666946978, | |
| "learning_rate": 3.931410479569977e-07, | |
| "loss": 1.9472, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.9559748427672956, | |
| "grad_norm": 2.101658911780381, | |
| "learning_rate": 3.9225593512734856e-07, | |
| "loss": 1.7936, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.9575471698113207, | |
| "grad_norm": 2.0113616534057583, | |
| "learning_rate": 3.913705479754242e-07, | |
| "loss": 1.9101, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.9591194968553459, | |
| "grad_norm": 2.047513346746328, | |
| "learning_rate": 3.9048489224908957e-07, | |
| "loss": 1.7896, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.960691823899371, | |
| "grad_norm": 2.174472546503524, | |
| "learning_rate": 3.8959897369795354e-07, | |
| "loss": 1.8692, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.9622641509433962, | |
| "grad_norm": 2.1246505746426827, | |
| "learning_rate": 3.8871279807333106e-07, | |
| "loss": 1.6824, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9638364779874213, | |
| "grad_norm": 2.1762242058390773, | |
| "learning_rate": 3.8782637112820597e-07, | |
| "loss": 1.9873, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.9654088050314465, | |
| "grad_norm": 1.9373168302806898, | |
| "learning_rate": 3.8693969861719375e-07, | |
| "loss": 1.936, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9669811320754716, | |
| "grad_norm": 2.255844762606453, | |
| "learning_rate": 3.8605278629650403e-07, | |
| "loss": 1.7877, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.9685534591194969, | |
| "grad_norm": 2.108356173918864, | |
| "learning_rate": 3.851656399239032e-07, | |
| "loss": 1.8975, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.970125786163522, | |
| "grad_norm": 2.2742019781049225, | |
| "learning_rate": 3.8427826525867735e-07, | |
| "loss": 1.6503, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.9716981132075472, | |
| "grad_norm": 2.2147090179932873, | |
| "learning_rate": 3.8339066806159426e-07, | |
| "loss": 1.8588, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9732704402515723, | |
| "grad_norm": 2.2344073830055646, | |
| "learning_rate": 3.825028540948665e-07, | |
| "loss": 2.0376, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.9748427672955975, | |
| "grad_norm": 2.068870035023368, | |
| "learning_rate": 3.8161482912211413e-07, | |
| "loss": 1.8621, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9764150943396226, | |
| "grad_norm": 2.0716828751213687, | |
| "learning_rate": 3.807265989083269e-07, | |
| "loss": 1.9676, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.9779874213836478, | |
| "grad_norm": 2.173084402473959, | |
| "learning_rate": 3.798381692198266e-07, | |
| "loss": 1.5456, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9795597484276729, | |
| "grad_norm": 2.123085754997638, | |
| "learning_rate": 3.7894954582423053e-07, | |
| "loss": 1.693, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.9811320754716981, | |
| "grad_norm": 1.9313674111763137, | |
| "learning_rate": 3.780607344904134e-07, | |
| "loss": 1.8215, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9827044025157232, | |
| "grad_norm": 1.9446815492696663, | |
| "learning_rate": 3.7717174098846976e-07, | |
| "loss": 1.6901, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9842767295597484, | |
| "grad_norm": 2.1738358620885614, | |
| "learning_rate": 3.76282571089677e-07, | |
| "loss": 1.7188, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9858490566037735, | |
| "grad_norm": 1.9579450319413363, | |
| "learning_rate": 3.753932305664576e-07, | |
| "loss": 1.8272, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9874213836477987, | |
| "grad_norm": 2.12572045051787, | |
| "learning_rate": 3.7450372519234174e-07, | |
| "loss": 1.742, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9889937106918238, | |
| "grad_norm": 2.191799672435851, | |
| "learning_rate": 3.736140607419297e-07, | |
| "loss": 1.5517, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9905660377358491, | |
| "grad_norm": 1.8922363202506955, | |
| "learning_rate": 3.7272424299085454e-07, | |
| "loss": 1.8464, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9921383647798742, | |
| "grad_norm": 2.179698661804858, | |
| "learning_rate": 3.7183427771574476e-07, | |
| "loss": 1.6617, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9937106918238994, | |
| "grad_norm": 2.2771710047319074, | |
| "learning_rate": 3.7094417069418617e-07, | |
| "loss": 2.0336, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9952830188679245, | |
| "grad_norm": 2.006071135340339, | |
| "learning_rate": 3.7005392770468494e-07, | |
| "loss": 1.5935, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9968553459119497, | |
| "grad_norm": 2.1112816678317468, | |
| "learning_rate": 3.691635545266301e-07, | |
| "loss": 1.9101, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9984276729559748, | |
| "grad_norm": 2.2992568121548644, | |
| "learning_rate": 3.682730569402557e-07, | |
| "loss": 1.7981, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.0674766445976664, | |
| "learning_rate": 3.6738244072660335e-07, | |
| "loss": 1.8885, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.001572327044025, | |
| "grad_norm": 2.0323458623421025, | |
| "learning_rate": 3.6649171166748505e-07, | |
| "loss": 1.7354, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.0031446540880504, | |
| "grad_norm": 2.3263404881491803, | |
| "learning_rate": 3.656008755454452e-07, | |
| "loss": 1.8682, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.0047169811320755, | |
| "grad_norm": 2.0374703234555405, | |
| "learning_rate": 3.647099381437232e-07, | |
| "loss": 1.8107, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.0062893081761006, | |
| "grad_norm": 2.1124967616004855, | |
| "learning_rate": 3.6381890524621594e-07, | |
| "loss": 1.8595, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0078616352201257, | |
| "grad_norm": 1.9775535226958436, | |
| "learning_rate": 3.629277826374406e-07, | |
| "loss": 1.6904, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.009433962264151, | |
| "grad_norm": 2.0203547455163844, | |
| "learning_rate": 3.6203657610249633e-07, | |
| "loss": 1.825, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.0110062893081762, | |
| "grad_norm": 2.0758681388805025, | |
| "learning_rate": 3.6114529142702736e-07, | |
| "loss": 1.7352, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.0125786163522013, | |
| "grad_norm": 2.0224811786868906, | |
| "learning_rate": 3.602539343971851e-07, | |
| "loss": 1.6538, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.0141509433962264, | |
| "grad_norm": 2.2197437087910217, | |
| "learning_rate": 3.593625107995906e-07, | |
| "loss": 1.83, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.0157232704402517, | |
| "grad_norm": 1.9889586059387878, | |
| "learning_rate": 3.5847102642129743e-07, | |
| "loss": 1.7737, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.0172955974842768, | |
| "grad_norm": 2.19853180662544, | |
| "learning_rate": 3.575794870497533e-07, | |
| "loss": 1.8649, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.0188679245283019, | |
| "grad_norm": 2.0550115124535187, | |
| "learning_rate": 3.566878984727632e-07, | |
| "loss": 1.6925, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.020440251572327, | |
| "grad_norm": 2.2183115138318015, | |
| "learning_rate": 3.557962664784515e-07, | |
| "loss": 1.6351, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.0220125786163523, | |
| "grad_norm": 1.9968891207031567, | |
| "learning_rate": 3.549045968552242e-07, | |
| "loss": 1.8444, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0235849056603774, | |
| "grad_norm": 2.0649732438966435, | |
| "learning_rate": 3.540128953917321e-07, | |
| "loss": 1.7249, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.0251572327044025, | |
| "grad_norm": 2.137990641187568, | |
| "learning_rate": 3.531211678768324e-07, | |
| "loss": 1.8187, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.0267295597484276, | |
| "grad_norm": 2.228866151054972, | |
| "learning_rate": 3.5222942009955113e-07, | |
| "loss": 1.6852, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.028301886792453, | |
| "grad_norm": 2.0107405877299294, | |
| "learning_rate": 3.513376578490464e-07, | |
| "loss": 1.8454, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.029874213836478, | |
| "grad_norm": 2.0486309547186834, | |
| "learning_rate": 3.5044588691457e-07, | |
| "loss": 1.9823, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.0314465408805031, | |
| "grad_norm": 2.1373140135446995, | |
| "learning_rate": 3.4955411308543004e-07, | |
| "loss": 1.5547, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.0330188679245282, | |
| "grad_norm": 1.9722852672562703, | |
| "learning_rate": 3.486623421509536e-07, | |
| "loss": 1.6565, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.0345911949685536, | |
| "grad_norm": 2.071392217463609, | |
| "learning_rate": 3.477705799004489e-07, | |
| "loss": 1.6954, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.0361635220125787, | |
| "grad_norm": 2.1424736101079196, | |
| "learning_rate": 3.4687883212316763e-07, | |
| "loss": 1.6984, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.0377358490566038, | |
| "grad_norm": 2.17329780648581, | |
| "learning_rate": 3.459871046082678e-07, | |
| "loss": 1.733, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0393081761006289, | |
| "grad_norm": 2.0114875931787335, | |
| "learning_rate": 3.4509540314477576e-07, | |
| "loss": 1.7247, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.0408805031446542, | |
| "grad_norm": 2.1612750227336828, | |
| "learning_rate": 3.442037335215486e-07, | |
| "loss": 1.8257, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.0424528301886793, | |
| "grad_norm": 2.09654475278469, | |
| "learning_rate": 3.433121015272368e-07, | |
| "loss": 1.6016, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.0440251572327044, | |
| "grad_norm": 2.112814527327424, | |
| "learning_rate": 3.4242051295024674e-07, | |
| "loss": 1.8498, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.0455974842767295, | |
| "grad_norm": 2.137137873151086, | |
| "learning_rate": 3.4152897357870254e-07, | |
| "loss": 1.9413, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.0471698113207548, | |
| "grad_norm": 1.9781507699983039, | |
| "learning_rate": 3.406374892004093e-07, | |
| "loss": 2.0358, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.04874213836478, | |
| "grad_norm": 2.1445450846607037, | |
| "learning_rate": 3.39746065602815e-07, | |
| "loss": 1.5967, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.050314465408805, | |
| "grad_norm": 2.2106487525905156, | |
| "learning_rate": 3.388547085729726e-07, | |
| "loss": 1.6929, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.0518867924528301, | |
| "grad_norm": 2.1632446260742157, | |
| "learning_rate": 3.379634238975036e-07, | |
| "loss": 1.9754, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.0534591194968554, | |
| "grad_norm": 2.2423152444572385, | |
| "learning_rate": 3.370722173625594e-07, | |
| "loss": 1.8086, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0550314465408805, | |
| "grad_norm": 2.086454515712129, | |
| "learning_rate": 3.36181094753784e-07, | |
| "loss": 1.7976, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.0566037735849056, | |
| "grad_norm": 2.338596181941415, | |
| "learning_rate": 3.3529006185627677e-07, | |
| "loss": 1.8634, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.0581761006289307, | |
| "grad_norm": 2.2541670873895265, | |
| "learning_rate": 3.343991244545549e-07, | |
| "loss": 1.6529, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.059748427672956, | |
| "grad_norm": 2.2022765709519017, | |
| "learning_rate": 3.3350828833251497e-07, | |
| "loss": 1.9213, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.0613207547169812, | |
| "grad_norm": 2.033074601173319, | |
| "learning_rate": 3.326175592733966e-07, | |
| "loss": 1.8121, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.0628930817610063, | |
| "grad_norm": 2.0295160070279077, | |
| "learning_rate": 3.3172694305974435e-07, | |
| "loss": 1.8344, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.0644654088050314, | |
| "grad_norm": 2.3057324232200007, | |
| "learning_rate": 3.308364454733699e-07, | |
| "loss": 1.6693, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.0660377358490567, | |
| "grad_norm": 2.078695505755079, | |
| "learning_rate": 3.29946072295315e-07, | |
| "loss": 1.6955, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.0676100628930818, | |
| "grad_norm": 2.2250857375132016, | |
| "learning_rate": 3.290558293058139e-07, | |
| "loss": 1.775, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.069182389937107, | |
| "grad_norm": 1.9629837170183118, | |
| "learning_rate": 3.2816572228425526e-07, | |
| "loss": 1.5546, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.070754716981132, | |
| "grad_norm": 2.124163961606303, | |
| "learning_rate": 3.272757570091453e-07, | |
| "loss": 1.6283, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.0723270440251573, | |
| "grad_norm": 1.9918668591962903, | |
| "learning_rate": 3.2638593925807033e-07, | |
| "loss": 1.72, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.0738993710691824, | |
| "grad_norm": 2.264977591391123, | |
| "learning_rate": 3.2549627480765834e-07, | |
| "loss": 1.8045, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.0754716981132075, | |
| "grad_norm": 2.02537032266525, | |
| "learning_rate": 3.246067694335424e-07, | |
| "loss": 1.8902, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0770440251572326, | |
| "grad_norm": 2.4603365936273995, | |
| "learning_rate": 3.23717428910323e-07, | |
| "loss": 1.6676, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.078616352201258, | |
| "grad_norm": 2.2248140600403277, | |
| "learning_rate": 3.2282825901153026e-07, | |
| "loss": 1.7778, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.080188679245283, | |
| "grad_norm": 2.115407517557494, | |
| "learning_rate": 3.219392655095866e-07, | |
| "loss": 1.7059, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.0817610062893082, | |
| "grad_norm": 2.1771102044112136, | |
| "learning_rate": 3.2105045417576954e-07, | |
| "loss": 1.7673, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0833333333333333, | |
| "grad_norm": 2.1464674623014672, | |
| "learning_rate": 3.2016183078017346e-07, | |
| "loss": 1.8022, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.0849056603773586, | |
| "grad_norm": 2.0997445957592307, | |
| "learning_rate": 3.192734010916732e-07, | |
| "loss": 1.8693, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0864779874213837, | |
| "grad_norm": 2.094025022111356, | |
| "learning_rate": 3.1838517087788595e-07, | |
| "loss": 1.7098, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.0880503144654088, | |
| "grad_norm": 2.070698100046358, | |
| "learning_rate": 3.1749714590513353e-07, | |
| "loss": 1.8247, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0896226415094339, | |
| "grad_norm": 2.062398740678803, | |
| "learning_rate": 3.166093319384057e-07, | |
| "loss": 1.5966, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.0911949685534592, | |
| "grad_norm": 2.087955408161737, | |
| "learning_rate": 3.157217347413226e-07, | |
| "loss": 1.7022, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0927672955974843, | |
| "grad_norm": 2.1423902860765303, | |
| "learning_rate": 3.1483436007609676e-07, | |
| "loss": 1.847, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.0943396226415094, | |
| "grad_norm": 2.115585692766892, | |
| "learning_rate": 3.13947213703496e-07, | |
| "loss": 1.7229, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0959119496855345, | |
| "grad_norm": 2.064012528421271, | |
| "learning_rate": 3.1306030138280617e-07, | |
| "loss": 1.7403, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.0974842767295598, | |
| "grad_norm": 2.2178418206871275, | |
| "learning_rate": 3.1217362887179405e-07, | |
| "loss": 1.7121, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.099056603773585, | |
| "grad_norm": 2.267368653411109, | |
| "learning_rate": 3.1128720192666896e-07, | |
| "loss": 1.6441, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.10062893081761, | |
| "grad_norm": 2.0531341543445674, | |
| "learning_rate": 3.1040102630204643e-07, | |
| "loss": 1.8517, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1022012578616351, | |
| "grad_norm": 2.090184163075423, | |
| "learning_rate": 3.0951510775091045e-07, | |
| "loss": 1.7339, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.1037735849056605, | |
| "grad_norm": 2.271451855248717, | |
| "learning_rate": 3.086294520245758e-07, | |
| "loss": 1.6841, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.1053459119496856, | |
| "grad_norm": 2.1230777563905647, | |
| "learning_rate": 3.0774406487265135e-07, | |
| "loss": 1.6153, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.1069182389937107, | |
| "grad_norm": 2.129575845582385, | |
| "learning_rate": 3.0685895204300237e-07, | |
| "loss": 1.8306, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.1084905660377358, | |
| "grad_norm": 2.127822835214652, | |
| "learning_rate": 3.0597411928171293e-07, | |
| "loss": 1.8781, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.110062893081761, | |
| "grad_norm": 2.0801378059307445, | |
| "learning_rate": 3.0508957233304925e-07, | |
| "loss": 1.7194, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.1116352201257862, | |
| "grad_norm": 2.0975124821955724, | |
| "learning_rate": 3.042053169394221e-07, | |
| "loss": 1.713, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.1132075471698113, | |
| "grad_norm": 1.9540390287342302, | |
| "learning_rate": 3.033213588413492e-07, | |
| "loss": 1.8067, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.1147798742138364, | |
| "grad_norm": 2.1885065328152775, | |
| "learning_rate": 3.0243770377741847e-07, | |
| "loss": 1.6779, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.1163522012578617, | |
| "grad_norm": 2.016792783037323, | |
| "learning_rate": 3.0155435748425056e-07, | |
| "loss": 1.8584, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1179245283018868, | |
| "grad_norm": 2.0532056644149304, | |
| "learning_rate": 3.006713256964614e-07, | |
| "loss": 1.7892, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.119496855345912, | |
| "grad_norm": 2.1717833645402522, | |
| "learning_rate": 2.9978861414662555e-07, | |
| "loss": 1.7875, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.121069182389937, | |
| "grad_norm": 2.329982527967294, | |
| "learning_rate": 2.989062285652383e-07, | |
| "loss": 1.716, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.1226415094339623, | |
| "grad_norm": 2.102564151366302, | |
| "learning_rate": 2.9802417468067866e-07, | |
| "loss": 1.725, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.1242138364779874, | |
| "grad_norm": 2.005243042670392, | |
| "learning_rate": 2.9714245821917273e-07, | |
| "loss": 1.694, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.1257861635220126, | |
| "grad_norm": 2.209440331648405, | |
| "learning_rate": 2.9626108490475596e-07, | |
| "loss": 1.7861, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.1273584905660377, | |
| "grad_norm": 2.161682497833542, | |
| "learning_rate": 2.9538006045923564e-07, | |
| "loss": 1.9542, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.128930817610063, | |
| "grad_norm": 2.0410927204511093, | |
| "learning_rate": 2.944993906021547e-07, | |
| "loss": 2.0458, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.130503144654088, | |
| "grad_norm": 2.2980810899615585, | |
| "learning_rate": 2.936190810507544e-07, | |
| "loss": 1.6057, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.1320754716981132, | |
| "grad_norm": 2.1147420056792545, | |
| "learning_rate": 2.92739137519936e-07, | |
| "loss": 1.7409, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1336477987421383, | |
| "grad_norm": 2.2130777267174326, | |
| "learning_rate": 2.9185956572222527e-07, | |
| "loss": 1.7651, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.1352201257861636, | |
| "grad_norm": 2.2140788506158184, | |
| "learning_rate": 2.9098037136773475e-07, | |
| "loss": 1.7143, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.1367924528301887, | |
| "grad_norm": 2.1122237127505117, | |
| "learning_rate": 2.9010156016412616e-07, | |
| "loss": 1.8633, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.1383647798742138, | |
| "grad_norm": 1.8051995194002572, | |
| "learning_rate": 2.8922313781657437e-07, | |
| "loss": 1.7253, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.139937106918239, | |
| "grad_norm": 2.078502586781223, | |
| "learning_rate": 2.8834511002772954e-07, | |
| "loss": 1.7383, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.1415094339622642, | |
| "grad_norm": 2.0343412709026163, | |
| "learning_rate": 2.8746748249768034e-07, | |
| "loss": 1.6715, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.1430817610062893, | |
| "grad_norm": 2.3354289699172415, | |
| "learning_rate": 2.865902609239171e-07, | |
| "loss": 1.5695, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.1446540880503144, | |
| "grad_norm": 2.012287905864566, | |
| "learning_rate": 2.8571345100129475e-07, | |
| "loss": 1.5644, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.1462264150943395, | |
| "grad_norm": 2.1335991982813343, | |
| "learning_rate": 2.848370584219957e-07, | |
| "loss": 1.7191, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.1477987421383649, | |
| "grad_norm": 2.042910279341824, | |
| "learning_rate": 2.839610888754931e-07, | |
| "loss": 1.8231, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.14937106918239, | |
| "grad_norm": 2.104058986286464, | |
| "learning_rate": 2.8308554804851373e-07, | |
| "loss": 1.9903, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.150943396226415, | |
| "grad_norm": 2.2528134865633334, | |
| "learning_rate": 2.8221044162500126e-07, | |
| "loss": 1.618, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.1525157232704402, | |
| "grad_norm": 2.169090932499384, | |
| "learning_rate": 2.81335775286079e-07, | |
| "loss": 1.8708, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.1540880503144655, | |
| "grad_norm": 2.129509499660428, | |
| "learning_rate": 2.804615547100136e-07, | |
| "loss": 1.8978, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.1556603773584906, | |
| "grad_norm": 2.1659278567954, | |
| "learning_rate": 2.795877855721777e-07, | |
| "loss": 1.5827, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.1572327044025157, | |
| "grad_norm": 2.1781926760780324, | |
| "learning_rate": 2.7871447354501304e-07, | |
| "loss": 2.1652, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.1588050314465408, | |
| "grad_norm": 2.281659140467624, | |
| "learning_rate": 2.7784162429799415e-07, | |
| "loss": 1.68, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.1603773584905661, | |
| "grad_norm": 2.379387037873376, | |
| "learning_rate": 2.7696924349759123e-07, | |
| "loss": 1.6087, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.1619496855345912, | |
| "grad_norm": 2.0112044543536616, | |
| "learning_rate": 2.7609733680723295e-07, | |
| "loss": 1.5596, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.1635220125786163, | |
| "grad_norm": 2.053191724834195, | |
| "learning_rate": 2.7522590988727045e-07, | |
| "loss": 1.8064, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1650943396226414, | |
| "grad_norm": 2.0861593681907804, | |
| "learning_rate": 2.743549683949404e-07, | |
| "loss": 1.7197, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 2.139575160657706, | |
| "learning_rate": 2.734845179843275e-07, | |
| "loss": 1.7399, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.1682389937106918, | |
| "grad_norm": 2.020388423393523, | |
| "learning_rate": 2.726145643063289e-07, | |
| "loss": 1.9182, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.169811320754717, | |
| "grad_norm": 2.0021432569973623, | |
| "learning_rate": 2.717451130086171e-07, | |
| "loss": 1.8025, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.171383647798742, | |
| "grad_norm": 2.166154517705087, | |
| "learning_rate": 2.7087616973560256e-07, | |
| "loss": 1.76, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.1729559748427674, | |
| "grad_norm": 2.172540185241632, | |
| "learning_rate": 2.7000774012839826e-07, | |
| "loss": 1.739, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.1745283018867925, | |
| "grad_norm": 2.1123768460041568, | |
| "learning_rate": 2.6913982982478235e-07, | |
| "loss": 1.9205, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.1761006289308176, | |
| "grad_norm": 2.064019777362976, | |
| "learning_rate": 2.6827244445916145e-07, | |
| "loss": 1.6711, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.1776729559748427, | |
| "grad_norm": 2.4454300470632706, | |
| "learning_rate": 2.6740558966253464e-07, | |
| "loss": 1.9951, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.179245283018868, | |
| "grad_norm": 2.202888650923341, | |
| "learning_rate": 2.665392710624566e-07, | |
| "loss": 1.8166, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.179245283018868, | |
| "eval_sat2_MCTS_chains_SFT_val_loss": 1.7027679681777954, | |
| "eval_sat2_MCTS_chains_SFT_val_runtime": 91.7741, | |
| "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.201, | |
| "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.180817610062893, | |
| "grad_norm": 2.1197579207922765, | |
| "learning_rate": 2.656734942830008e-07, | |
| "loss": 1.789, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.1823899371069182, | |
| "grad_norm": 2.0955019518228646, | |
| "learning_rate": 2.648082649447234e-07, | |
| "loss": 1.8041, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1839622641509433, | |
| "grad_norm": 2.286056725260083, | |
| "learning_rate": 2.639435886646267e-07, | |
| "loss": 1.9274, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.1855345911949686, | |
| "grad_norm": 2.078238808064587, | |
| "learning_rate": 2.630794710561223e-07, | |
| "loss": 1.841, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1871069182389937, | |
| "grad_norm": 2.0466496074566414, | |
| "learning_rate": 2.622159177289953e-07, | |
| "loss": 1.8408, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.1886792452830188, | |
| "grad_norm": 2.0829604905191603, | |
| "learning_rate": 2.6135293428936736e-07, | |
| "loss": 1.7203, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.190251572327044, | |
| "grad_norm": 1.9719112678002333, | |
| "learning_rate": 2.604905263396604e-07, | |
| "loss": 1.8256, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.1918238993710693, | |
| "grad_norm": 2.212861952610994, | |
| "learning_rate": 2.596286994785605e-07, | |
| "loss": 1.7089, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1933962264150944, | |
| "grad_norm": 2.1479617774562176, | |
| "learning_rate": 2.5876745930098133e-07, | |
| "loss": 1.788, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.1949685534591195, | |
| "grad_norm": 2.180325040687691, | |
| "learning_rate": 2.5790681139802733e-07, | |
| "loss": 1.8424, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1965408805031448, | |
| "grad_norm": 2.0959805506163347, | |
| "learning_rate": 2.570467613569588e-07, | |
| "loss": 1.9297, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.1981132075471699, | |
| "grad_norm": 2.2360792245611174, | |
| "learning_rate": 2.5618731476115436e-07, | |
| "loss": 1.7221, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.199685534591195, | |
| "grad_norm": 2.1402022569964885, | |
| "learning_rate": 2.553284771900748e-07, | |
| "loss": 1.7152, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.20125786163522, | |
| "grad_norm": 2.347076356695893, | |
| "learning_rate": 2.544702542192276e-07, | |
| "loss": 1.9218, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.2028301886792452, | |
| "grad_norm": 2.1235602631273194, | |
| "learning_rate": 2.5361265142013034e-07, | |
| "loss": 1.7872, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.2044025157232705, | |
| "grad_norm": 2.178942765064468, | |
| "learning_rate": 2.527556743602741e-07, | |
| "loss": 1.7043, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.2059748427672956, | |
| "grad_norm": 2.180716726403436, | |
| "learning_rate": 2.5189932860308796e-07, | |
| "loss": 1.7261, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.2075471698113207, | |
| "grad_norm": 2.1659838220822016, | |
| "learning_rate": 2.510436197079026e-07, | |
| "loss": 1.5518, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.209119496855346, | |
| "grad_norm": 2.0823803015545144, | |
| "learning_rate": 2.501885532299145e-07, | |
| "loss": 1.6306, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.2106918238993711, | |
| "grad_norm": 2.0975682440855237, | |
| "learning_rate": 2.4933413472014904e-07, | |
| "loss": 1.7477, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.2122641509433962, | |
| "grad_norm": 2.3304268703171895, | |
| "learning_rate": 2.4848036972542535e-07, | |
| "loss": 1.9249, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.2138364779874213, | |
| "grad_norm": 2.158526522953037, | |
| "learning_rate": 2.476272637883203e-07, | |
| "loss": 1.9818, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.2154088050314464, | |
| "grad_norm": 2.4996434803308887, | |
| "learning_rate": 2.467748224471316e-07, | |
| "loss": 1.8969, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.2169811320754718, | |
| "grad_norm": 2.124488199922616, | |
| "learning_rate": 2.4592305123584285e-07, | |
| "loss": 1.6033, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.2185534591194969, | |
| "grad_norm": 2.3781806613712124, | |
| "learning_rate": 2.4507195568408727e-07, | |
| "loss": 1.8226, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.220125786163522, | |
| "grad_norm": 2.021004570791569, | |
| "learning_rate": 2.4422154131711143e-07, | |
| "loss": 1.7341, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.2216981132075473, | |
| "grad_norm": 2.1594846096039277, | |
| "learning_rate": 2.4337181365574e-07, | |
| "loss": 1.9486, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.2232704402515724, | |
| "grad_norm": 2.1845057975898987, | |
| "learning_rate": 2.4252277821633946e-07, | |
| "loss": 1.543, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.2248427672955975, | |
| "grad_norm": 2.344507158866424, | |
| "learning_rate": 2.4167444051078245e-07, | |
| "loss": 1.8528, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.2264150943396226, | |
| "grad_norm": 2.0236986373716004, | |
| "learning_rate": 2.40826806046412e-07, | |
| "loss": 1.7242, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2279874213836477, | |
| "grad_norm": 2.078201977490899, | |
| "learning_rate": 2.399798803260058e-07, | |
| "loss": 1.9902, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.229559748427673, | |
| "grad_norm": 2.0183020857677727, | |
| "learning_rate": 2.3913366884774034e-07, | |
| "loss": 1.7074, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.2311320754716981, | |
| "grad_norm": 2.1592329303327453, | |
| "learning_rate": 2.382881771051553e-07, | |
| "loss": 1.8272, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.2327044025157232, | |
| "grad_norm": 1.920219447662245, | |
| "learning_rate": 2.3744341058711808e-07, | |
| "loss": 1.9844, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.2342767295597485, | |
| "grad_norm": 2.210906315130372, | |
| "learning_rate": 2.3659937477778755e-07, | |
| "loss": 1.7823, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.2358490566037736, | |
| "grad_norm": 2.1278759914686547, | |
| "learning_rate": 2.3575607515657943e-07, | |
| "loss": 1.9001, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.2374213836477987, | |
| "grad_norm": 2.1057924683593314, | |
| "learning_rate": 2.3491351719812993e-07, | |
| "loss": 1.6949, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.2389937106918238, | |
| "grad_norm": 2.0673174239605117, | |
| "learning_rate": 2.3407170637226013e-07, | |
| "loss": 1.8988, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.240566037735849, | |
| "grad_norm": 1.9808848675265713, | |
| "learning_rate": 2.332306481439411e-07, | |
| "loss": 2.0551, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.2421383647798743, | |
| "grad_norm": 2.1946700318779704, | |
| "learning_rate": 2.3239034797325837e-07, | |
| "loss": 1.8481, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.2437106918238994, | |
| "grad_norm": 2.145296214864516, | |
| "learning_rate": 2.3155081131537557e-07, | |
| "loss": 1.601, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.2452830188679245, | |
| "grad_norm": 2.21099564914462, | |
| "learning_rate": 2.3071204362050016e-07, | |
| "loss": 1.6916, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.2468553459119498, | |
| "grad_norm": 2.149817455185584, | |
| "learning_rate": 2.298740503338474e-07, | |
| "loss": 1.7328, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.248427672955975, | |
| "grad_norm": 2.083870636996782, | |
| "learning_rate": 2.290368368956051e-07, | |
| "loss": 1.7316, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 2.2190350027051884, | |
| "learning_rate": 2.2820040874089833e-07, | |
| "loss": 1.7335, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.251572327044025, | |
| "grad_norm": 2.0847463644872732, | |
| "learning_rate": 2.2736477129975447e-07, | |
| "loss": 1.862, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.2531446540880502, | |
| "grad_norm": 2.1940464195440397, | |
| "learning_rate": 2.2652992999706714e-07, | |
| "loss": 1.909, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.2547169811320755, | |
| "grad_norm": 2.0762267573536723, | |
| "learning_rate": 2.2569589025256186e-07, | |
| "loss": 1.7823, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.2562893081761006, | |
| "grad_norm": 2.2023094376465124, | |
| "learning_rate": 2.248626574807605e-07, | |
| "loss": 1.7458, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.2578616352201257, | |
| "grad_norm": 2.093984864757531, | |
| "learning_rate": 2.2403023709094586e-07, | |
| "loss": 1.7313, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.259433962264151, | |
| "grad_norm": 2.092496952956971, | |
| "learning_rate": 2.2319863448712701e-07, | |
| "loss": 1.6546, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.2610062893081762, | |
| "grad_norm": 2.000639985772567, | |
| "learning_rate": 2.2236785506800412e-07, | |
| "loss": 1.7073, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.2625786163522013, | |
| "grad_norm": 1.948987350292626, | |
| "learning_rate": 2.21537904226933e-07, | |
| "loss": 2.023, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.2641509433962264, | |
| "grad_norm": 2.112242215195991, | |
| "learning_rate": 2.2070878735189064e-07, | |
| "loss": 1.7933, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.2657232704402515, | |
| "grad_norm": 2.055178325234487, | |
| "learning_rate": 2.1988050982543993e-07, | |
| "loss": 1.8308, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.2672955974842768, | |
| "grad_norm": 2.169844090849057, | |
| "learning_rate": 2.190530770246948e-07, | |
| "loss": 1.7516, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.2688679245283019, | |
| "grad_norm": 2.0219060313319077, | |
| "learning_rate": 2.1822649432128516e-07, | |
| "loss": 1.5981, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.270440251572327, | |
| "grad_norm": 2.201206530110713, | |
| "learning_rate": 2.1740076708132223e-07, | |
| "loss": 1.8104, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.2720125786163523, | |
| "grad_norm": 2.1652601552896447, | |
| "learning_rate": 2.165759006653639e-07, | |
| "loss": 1.7986, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.2735849056603774, | |
| "grad_norm": 2.0415201426832197, | |
| "learning_rate": 2.1575190042837886e-07, | |
| "loss": 1.7244, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2751572327044025, | |
| "grad_norm": 2.1143245858070174, | |
| "learning_rate": 2.1492877171971362e-07, | |
| "loss": 1.4419, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.2767295597484276, | |
| "grad_norm": 2.0485144393529806, | |
| "learning_rate": 2.141065198830563e-07, | |
| "loss": 1.7491, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.2783018867924527, | |
| "grad_norm": 1.8955436328860988, | |
| "learning_rate": 2.1328515025640226e-07, | |
| "loss": 1.8095, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.279874213836478, | |
| "grad_norm": 2.2901583680374036, | |
| "learning_rate": 2.124646681720199e-07, | |
| "loss": 1.6575, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.2814465408805031, | |
| "grad_norm": 2.275548594033002, | |
| "learning_rate": 2.116450789564159e-07, | |
| "loss": 1.6958, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.2830188679245282, | |
| "grad_norm": 2.3449017025523737, | |
| "learning_rate": 2.1082638793030008e-07, | |
| "loss": 1.5706, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.2845911949685536, | |
| "grad_norm": 2.317328281003811, | |
| "learning_rate": 2.100086004085516e-07, | |
| "loss": 1.7318, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.2861635220125787, | |
| "grad_norm": 2.226957187409235, | |
| "learning_rate": 2.0919172170018401e-07, | |
| "loss": 1.7858, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.2877358490566038, | |
| "grad_norm": 2.117343302028277, | |
| "learning_rate": 2.0837575710831098e-07, | |
| "loss": 1.6472, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.2893081761006289, | |
| "grad_norm": 2.2741664788753795, | |
| "learning_rate": 2.0756071193011176e-07, | |
| "loss": 2.1258, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.290880503144654, | |
| "grad_norm": 1.9084045733401307, | |
| "learning_rate": 2.0674659145679692e-07, | |
| "loss": 1.7781, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.2924528301886793, | |
| "grad_norm": 1.995611639444785, | |
| "learning_rate": 2.0593340097357373e-07, | |
| "loss": 1.8206, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2940251572327044, | |
| "grad_norm": 2.244777578569415, | |
| "learning_rate": 2.051211457596122e-07, | |
| "loss": 1.6349, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.2955974842767295, | |
| "grad_norm": 2.167997702023013, | |
| "learning_rate": 2.043098310880107e-07, | |
| "loss": 1.8128, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.2971698113207548, | |
| "grad_norm": 2.0751468176787387, | |
| "learning_rate": 2.034994622257615e-07, | |
| "loss": 1.7242, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.29874213836478, | |
| "grad_norm": 2.0555412382564757, | |
| "learning_rate": 2.0269004443371673e-07, | |
| "loss": 1.6657, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.300314465408805, | |
| "grad_norm": 2.161584510716646, | |
| "learning_rate": 2.018815829665546e-07, | |
| "loss": 1.6663, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.3018867924528301, | |
| "grad_norm": 2.201251049412695, | |
| "learning_rate": 2.0107408307274428e-07, | |
| "loss": 1.6481, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.3034591194968552, | |
| "grad_norm": 2.0149941921725962, | |
| "learning_rate": 2.0026754999451317e-07, | |
| "loss": 1.6969, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.3050314465408805, | |
| "grad_norm": 2.242230337987245, | |
| "learning_rate": 1.9946198896781174e-07, | |
| "loss": 1.9959, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.3066037735849056, | |
| "grad_norm": 2.15895547407779, | |
| "learning_rate": 1.986574052222802e-07, | |
| "loss": 1.7761, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.3081761006289307, | |
| "grad_norm": 2.181310101596831, | |
| "learning_rate": 1.9785380398121416e-07, | |
| "loss": 1.6648, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.309748427672956, | |
| "grad_norm": 1.9416146895135635, | |
| "learning_rate": 1.9705119046153114e-07, | |
| "loss": 1.7318, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.3113207547169812, | |
| "grad_norm": 2.0663038260287467, | |
| "learning_rate": 1.9624956987373606e-07, | |
| "loss": 1.9148, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.3128930817610063, | |
| "grad_norm": 2.293091774940269, | |
| "learning_rate": 1.9544894742188804e-07, | |
| "loss": 1.8369, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.3144654088050314, | |
| "grad_norm": 2.3609086075602206, | |
| "learning_rate": 1.9464932830356648e-07, | |
| "loss": 1.7337, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.3160377358490565, | |
| "grad_norm": 2.060684097800172, | |
| "learning_rate": 1.9385071770983697e-07, | |
| "loss": 1.5396, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.3176100628930818, | |
| "grad_norm": 1.995153844021168, | |
| "learning_rate": 1.93053120825218e-07, | |
| "loss": 1.6491, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.319182389937107, | |
| "grad_norm": 2.305659266839101, | |
| "learning_rate": 1.9225654282764733e-07, | |
| "loss": 1.8602, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.320754716981132, | |
| "grad_norm": 1.9550425262609439, | |
| "learning_rate": 1.9146098888844752e-07, | |
| "loss": 1.7687, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.3223270440251573, | |
| "grad_norm": 2.1752564296945143, | |
| "learning_rate": 1.9066646417229369e-07, | |
| "loss": 1.954, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.3238993710691824, | |
| "grad_norm": 2.2819270079625387, | |
| "learning_rate": 1.8987297383717918e-07, | |
| "loss": 1.6462, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.3254716981132075, | |
| "grad_norm": 2.160953368673478, | |
| "learning_rate": 1.8908052303438188e-07, | |
| "loss": 1.6413, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.3270440251572326, | |
| "grad_norm": 2.0789012158742803, | |
| "learning_rate": 1.882891169084313e-07, | |
| "loss": 1.827, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.3286163522012577, | |
| "grad_norm": 2.0716992608690448, | |
| "learning_rate": 1.8749876059707536e-07, | |
| "loss": 1.7414, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.330188679245283, | |
| "grad_norm": 2.103884856783866, | |
| "learning_rate": 1.867094592312463e-07, | |
| "loss": 2.0534, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.3317610062893082, | |
| "grad_norm": 2.4629953048647324, | |
| "learning_rate": 1.8592121793502755e-07, | |
| "loss": 1.7296, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.9458332362620918, | |
| "learning_rate": 1.8513404182562097e-07, | |
| "loss": 1.8213, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.3349056603773586, | |
| "grad_norm": 2.1510489748789583, | |
| "learning_rate": 1.8434793601331336e-07, | |
| "loss": 1.8064, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.3364779874213837, | |
| "grad_norm": 2.2594694003824567, | |
| "learning_rate": 1.8356290560144285e-07, | |
| "loss": 1.763, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3380503144654088, | |
| "grad_norm": 2.2650014550444606, | |
| "learning_rate": 1.8277895568636646e-07, | |
| "loss": 1.6519, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.3396226415094339, | |
| "grad_norm": 2.279277485826785, | |
| "learning_rate": 1.8199609135742672e-07, | |
| "loss": 1.7316, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.341194968553459, | |
| "grad_norm": 2.113275805679697, | |
| "learning_rate": 1.812143176969185e-07, | |
| "loss": 1.7905, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.3427672955974843, | |
| "grad_norm": 1.9917783424168132, | |
| "learning_rate": 1.8043363978005617e-07, | |
| "loss": 1.7985, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.3443396226415094, | |
| "grad_norm": 1.8965972111176195, | |
| "learning_rate": 1.7965406267494078e-07, | |
| "loss": 1.8407, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.3459119496855345, | |
| "grad_norm": 2.2071694500169174, | |
| "learning_rate": 1.7887559144252658e-07, | |
| "loss": 1.7755, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.3474842767295598, | |
| "grad_norm": 2.2472976512202, | |
| "learning_rate": 1.7809823113658896e-07, | |
| "loss": 1.8221, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.349056603773585, | |
| "grad_norm": 2.0844792244919876, | |
| "learning_rate": 1.7732198680369107e-07, | |
| "loss": 1.8871, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.35062893081761, | |
| "grad_norm": 2.027073391928856, | |
| "learning_rate": 1.765468634831514e-07, | |
| "loss": 1.7576, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.3522012578616351, | |
| "grad_norm": 2.2937069673682906, | |
| "learning_rate": 1.757728662070108e-07, | |
| "loss": 1.6525, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.3537735849056602, | |
| "grad_norm": 2.1305400050893972, | |
| "learning_rate": 1.7500000000000007e-07, | |
| "loss": 1.6292, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.3553459119496856, | |
| "grad_norm": 2.093411160599093, | |
| "learning_rate": 1.7422826987950683e-07, | |
| "loss": 2.0316, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.3569182389937107, | |
| "grad_norm": 2.053081781406045, | |
| "learning_rate": 1.7345768085554372e-07, | |
| "loss": 1.6936, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.3584905660377358, | |
| "grad_norm": 2.1226077695084555, | |
| "learning_rate": 1.726882379307153e-07, | |
| "loss": 1.7328, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.360062893081761, | |
| "grad_norm": 2.162295283463786, | |
| "learning_rate": 1.7191994610018574e-07, | |
| "loss": 1.75, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.3616352201257862, | |
| "grad_norm": 2.2966135587303946, | |
| "learning_rate": 1.711528103516464e-07, | |
| "loss": 1.7858, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.3632075471698113, | |
| "grad_norm": 2.13860142749209, | |
| "learning_rate": 1.703868356652837e-07, | |
| "loss": 1.9188, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.3647798742138364, | |
| "grad_norm": 2.1291702490610054, | |
| "learning_rate": 1.6962202701374592e-07, | |
| "loss": 1.6769, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.3663522012578615, | |
| "grad_norm": 2.0330576038157524, | |
| "learning_rate": 1.6885838936211206e-07, | |
| "loss": 1.7028, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.3679245283018868, | |
| "grad_norm": 2.174657226535088, | |
| "learning_rate": 1.6809592766785903e-07, | |
| "loss": 1.7184, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.369496855345912, | |
| "grad_norm": 2.1336668424018463, | |
| "learning_rate": 1.673346468808292e-07, | |
| "loss": 1.5666, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.371069182389937, | |
| "grad_norm": 2.0664898739681403, | |
| "learning_rate": 1.6657455194319875e-07, | |
| "loss": 1.6633, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.3726415094339623, | |
| "grad_norm": 2.07966395343747, | |
| "learning_rate": 1.6581564778944585e-07, | |
| "loss": 1.9728, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.3742138364779874, | |
| "grad_norm": 2.0802230341676493, | |
| "learning_rate": 1.6505793934631743e-07, | |
| "loss": 1.806, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.3757861635220126, | |
| "grad_norm": 2.0585446511098855, | |
| "learning_rate": 1.6430143153279843e-07, | |
| "loss": 1.7467, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.3773584905660377, | |
| "grad_norm": 2.0670814111420457, | |
| "learning_rate": 1.6354612926007947e-07, | |
| "loss": 1.8074, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.378930817610063, | |
| "grad_norm": 2.2448398015661977, | |
| "learning_rate": 1.6279203743152437e-07, | |
| "loss": 1.792, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.380503144654088, | |
| "grad_norm": 2.1936825808205476, | |
| "learning_rate": 1.620391609426394e-07, | |
| "loss": 1.7086, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.3820754716981132, | |
| "grad_norm": 2.0481925632804896, | |
| "learning_rate": 1.6128750468104068e-07, | |
| "loss": 1.6359, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.3836477987421385, | |
| "grad_norm": 2.307548887007293, | |
| "learning_rate": 1.6053707352642275e-07, | |
| "loss": 1.8802, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.3852201257861636, | |
| "grad_norm": 2.212178445396311, | |
| "learning_rate": 1.5978787235052684e-07, | |
| "loss": 1.6298, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.3867924528301887, | |
| "grad_norm": 2.4275019833470357, | |
| "learning_rate": 1.5903990601710933e-07, | |
| "loss": 1.6078, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.3883647798742138, | |
| "grad_norm": 2.097258459403789, | |
| "learning_rate": 1.5829317938191007e-07, | |
| "loss": 1.7955, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.389937106918239, | |
| "grad_norm": 2.2433716966343074, | |
| "learning_rate": 1.5754769729262068e-07, | |
| "loss": 1.841, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.3915094339622642, | |
| "grad_norm": 1.9611279725874884, | |
| "learning_rate": 1.5680346458885351e-07, | |
| "loss": 1.8903, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.3930817610062893, | |
| "grad_norm": 2.1857460166910703, | |
| "learning_rate": 1.560604861021099e-07, | |
| "loss": 1.8461, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.3946540880503144, | |
| "grad_norm": 1.9802850293847218, | |
| "learning_rate": 1.5531876665574905e-07, | |
| "loss": 1.6594, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.3962264150943398, | |
| "grad_norm": 2.044592124136331, | |
| "learning_rate": 1.5457831106495645e-07, | |
| "loss": 1.8477, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.3977987421383649, | |
| "grad_norm": 2.1730178709565027, | |
| "learning_rate": 1.538391241367128e-07, | |
| "loss": 1.8571, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.39937106918239, | |
| "grad_norm": 2.226125612813974, | |
| "learning_rate": 1.5310121066976246e-07, | |
| "loss": 1.8246, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.400943396226415, | |
| "grad_norm": 2.110513299587775, | |
| "learning_rate": 1.52364575454583e-07, | |
| "loss": 1.5938, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.4025157232704402, | |
| "grad_norm": 2.0945323670932483, | |
| "learning_rate": 1.5162922327335352e-07, | |
| "loss": 1.9624, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.4040880503144655, | |
| "grad_norm": 2.031350721397557, | |
| "learning_rate": 1.5089515889992337e-07, | |
| "loss": 1.5905, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.4056603773584906, | |
| "grad_norm": 2.5190983077756, | |
| "learning_rate": 1.5016238709978235e-07, | |
| "loss": 1.6355, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.4072327044025157, | |
| "grad_norm": 2.0786833230760946, | |
| "learning_rate": 1.4943091263002846e-07, | |
| "loss": 2.4022, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.408805031446541, | |
| "grad_norm": 2.2624991107175076, | |
| "learning_rate": 1.487007402393374e-07, | |
| "loss": 2.0047, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.4103773584905661, | |
| "grad_norm": 2.13658012100152, | |
| "learning_rate": 1.4797187466793216e-07, | |
| "loss": 2.0784, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.4119496855345912, | |
| "grad_norm": 2.154584941501371, | |
| "learning_rate": 1.4724432064755204e-07, | |
| "loss": 1.9006, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.4135220125786163, | |
| "grad_norm": 2.2513090599810224, | |
| "learning_rate": 1.4651808290142143e-07, | |
| "loss": 1.87, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.4150943396226414, | |
| "grad_norm": 2.1884972541614336, | |
| "learning_rate": 1.457931661442199e-07, | |
| "loss": 1.8312, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.4166666666666667, | |
| "grad_norm": 2.165721743117902, | |
| "learning_rate": 1.450695750820513e-07, | |
| "loss": 1.5696, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.4182389937106918, | |
| "grad_norm": 2.0366037954150964, | |
| "learning_rate": 1.4434731441241295e-07, | |
| "loss": 1.864, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.419811320754717, | |
| "grad_norm": 2.0359358262558436, | |
| "learning_rate": 1.4362638882416552e-07, | |
| "loss": 1.9211, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.4213836477987423, | |
| "grad_norm": 2.2277853781005144, | |
| "learning_rate": 1.429068029975025e-07, | |
| "loss": 1.6469, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.4229559748427674, | |
| "grad_norm": 2.1703686899029937, | |
| "learning_rate": 1.421885616039194e-07, | |
| "loss": 1.7961, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.4245283018867925, | |
| "grad_norm": 2.2098194543382097, | |
| "learning_rate": 1.4147166930618412e-07, | |
| "loss": 1.7475, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.4261006289308176, | |
| "grad_norm": 2.0638951952196156, | |
| "learning_rate": 1.4075613075830626e-07, | |
| "loss": 1.5421, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.4276729559748427, | |
| "grad_norm": 2.2638501605807786, | |
| "learning_rate": 1.400419506055069e-07, | |
| "loss": 2.0258, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.429245283018868, | |
| "grad_norm": 2.026762327168469, | |
| "learning_rate": 1.393291334841886e-07, | |
| "loss": 1.7273, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.430817610062893, | |
| "grad_norm": 2.0850924373321655, | |
| "learning_rate": 1.3861768402190533e-07, | |
| "loss": 1.6415, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.4323899371069182, | |
| "grad_norm": 2.105162873756192, | |
| "learning_rate": 1.379076068373319e-07, | |
| "loss": 1.7626, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.4339622641509435, | |
| "grad_norm": 2.2125942477293474, | |
| "learning_rate": 1.3719890654023485e-07, | |
| "loss": 1.6857, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.4355345911949686, | |
| "grad_norm": 2.1153484222332173, | |
| "learning_rate": 1.36491587731442e-07, | |
| "loss": 1.7955, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.4371069182389937, | |
| "grad_norm": 2.2997509014276742, | |
| "learning_rate": 1.3578565500281222e-07, | |
| "loss": 2.0574, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.4386792452830188, | |
| "grad_norm": 2.078107647008167, | |
| "learning_rate": 1.3508111293720675e-07, | |
| "loss": 2.0042, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.440251572327044, | |
| "grad_norm": 2.0399439337966787, | |
| "learning_rate": 1.343779661084584e-07, | |
| "loss": 1.574, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.4418238993710693, | |
| "grad_norm": 2.2889011438788063, | |
| "learning_rate": 1.33676219081342e-07, | |
| "loss": 1.7344, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.4433962264150944, | |
| "grad_norm": 2.333332062321592, | |
| "learning_rate": 1.329758764115452e-07, | |
| "loss": 1.5233, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.4449685534591195, | |
| "grad_norm": 2.1331182619855857, | |
| "learning_rate": 1.322769426456388e-07, | |
| "loss": 1.6755, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.4465408805031448, | |
| "grad_norm": 2.1531518116333745, | |
| "learning_rate": 1.3157942232104702e-07, | |
| "loss": 2.0816, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.4481132075471699, | |
| "grad_norm": 2.088795076446534, | |
| "learning_rate": 1.308833199660178e-07, | |
| "loss": 2.0221, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.449685534591195, | |
| "grad_norm": 2.00003816402544, | |
| "learning_rate": 1.3018864009959402e-07, | |
| "loss": 1.8191, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.45125786163522, | |
| "grad_norm": 2.0551941845228403, | |
| "learning_rate": 1.2949538723158427e-07, | |
| "loss": 1.779, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.4528301886792452, | |
| "grad_norm": 2.2468743977572547, | |
| "learning_rate": 1.288035658625323e-07, | |
| "loss": 1.7847, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.4544025157232705, | |
| "grad_norm": 2.2719489416819254, | |
| "learning_rate": 1.2811318048368927e-07, | |
| "loss": 1.6929, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.4559748427672956, | |
| "grad_norm": 2.0990742193527434, | |
| "learning_rate": 1.2742423557698407e-07, | |
| "loss": 1.6888, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.4575471698113207, | |
| "grad_norm": 2.21584285821969, | |
| "learning_rate": 1.2673673561499367e-07, | |
| "loss": 1.8427, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.459119496855346, | |
| "grad_norm": 2.145727226704997, | |
| "learning_rate": 1.2605068506091503e-07, | |
| "loss": 1.854, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.4606918238993711, | |
| "grad_norm": 2.2012601813217554, | |
| "learning_rate": 1.2536608836853537e-07, | |
| "loss": 1.6569, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.4622641509433962, | |
| "grad_norm": 2.1152395275847233, | |
| "learning_rate": 1.2468294998220374e-07, | |
| "loss": 1.8322, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4638364779874213, | |
| "grad_norm": 2.1134882928957226, | |
| "learning_rate": 1.2400127433680197e-07, | |
| "loss": 1.54, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.4654088050314464, | |
| "grad_norm": 2.058732975760006, | |
| "learning_rate": 1.2332106585771588e-07, | |
| "loss": 1.8464, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.4669811320754718, | |
| "grad_norm": 1.9450661693276792, | |
| "learning_rate": 1.226423289608063e-07, | |
| "loss": 1.6493, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.4685534591194969, | |
| "grad_norm": 2.162745466557572, | |
| "learning_rate": 1.2196506805238097e-07, | |
| "loss": 1.7669, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.470125786163522, | |
| "grad_norm": 1.9865409540246532, | |
| "learning_rate": 1.2128928752916557e-07, | |
| "loss": 1.9032, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.4716981132075473, | |
| "grad_norm": 2.1486068004112098, | |
| "learning_rate": 1.2061499177827517e-07, | |
| "loss": 1.7487, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.4732704402515724, | |
| "grad_norm": 2.1353688348778244, | |
| "learning_rate": 1.199421851771858e-07, | |
| "loss": 1.9648, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.4748427672955975, | |
| "grad_norm": 2.203719714639102, | |
| "learning_rate": 1.1927087209370627e-07, | |
| "loss": 2.2411, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.4764150943396226, | |
| "grad_norm": 2.1007559098440707, | |
| "learning_rate": 1.1860105688594913e-07, | |
| "loss": 1.9012, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.4779874213836477, | |
| "grad_norm": 2.0112830813880724, | |
| "learning_rate": 1.179327439023032e-07, | |
| "loss": 1.7904, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.479559748427673, | |
| "grad_norm": 2.147891306593028, | |
| "learning_rate": 1.1726593748140503e-07, | |
| "loss": 1.849, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.4811320754716981, | |
| "grad_norm": 2.1951970364454167, | |
| "learning_rate": 1.1660064195211026e-07, | |
| "loss": 1.8406, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.4827044025157232, | |
| "grad_norm": 2.251395431799435, | |
| "learning_rate": 1.1593686163346624e-07, | |
| "loss": 1.8115, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.4842767295597485, | |
| "grad_norm": 2.2374065524605915, | |
| "learning_rate": 1.1527460083468404e-07, | |
| "loss": 1.7597, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.4858490566037736, | |
| "grad_norm": 2.2516356519637153, | |
| "learning_rate": 1.1461386385510934e-07, | |
| "loss": 1.7996, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.4874213836477987, | |
| "grad_norm": 1.9259732674286747, | |
| "learning_rate": 1.1395465498419584e-07, | |
| "loss": 1.7016, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.4889937106918238, | |
| "grad_norm": 2.0151236267786943, | |
| "learning_rate": 1.1329697850147684e-07, | |
| "loss": 1.8591, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.490566037735849, | |
| "grad_norm": 2.1157729525808, | |
| "learning_rate": 1.1264083867653721e-07, | |
| "loss": 1.7659, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.4921383647798743, | |
| "grad_norm": 2.2039873137986485, | |
| "learning_rate": 1.1198623976898626e-07, | |
| "loss": 1.8312, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.4937106918238994, | |
| "grad_norm": 2.139918541343564, | |
| "learning_rate": 1.1133318602842961e-07, | |
| "loss": 1.8547, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4952830188679245, | |
| "grad_norm": 2.0927341451744716, | |
| "learning_rate": 1.1068168169444187e-07, | |
| "loss": 1.6786, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.4968553459119498, | |
| "grad_norm": 2.22996580434956, | |
| "learning_rate": 1.1003173099653898e-07, | |
| "loss": 1.9014, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.498427672955975, | |
| "grad_norm": 2.2224019206058707, | |
| "learning_rate": 1.093833381541509e-07, | |
| "loss": 1.9734, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.292687450843933, | |
| "learning_rate": 1.087365073765938e-07, | |
| "loss": 1.6376, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.501572327044025, | |
| "grad_norm": 1.933890837573844, | |
| "learning_rate": 1.0809124286304334e-07, | |
| "loss": 1.6966, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.5031446540880502, | |
| "grad_norm": 2.2347334629753557, | |
| "learning_rate": 1.0744754880250704e-07, | |
| "loss": 1.9026, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.5047169811320755, | |
| "grad_norm": 2.0708848593951084, | |
| "learning_rate": 1.0680542937379719e-07, | |
| "loss": 1.7771, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.5062893081761006, | |
| "grad_norm": 2.102938366432825, | |
| "learning_rate": 1.061648887455036e-07, | |
| "loss": 1.7984, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.507861635220126, | |
| "grad_norm": 2.2787001713384467, | |
| "learning_rate": 1.0552593107596671e-07, | |
| "loss": 1.7934, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.509433962264151, | |
| "grad_norm": 2.1532789357045794, | |
| "learning_rate": 1.0488856051325056e-07, | |
| "loss": 1.6814, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.5110062893081762, | |
| "grad_norm": 2.253404213973793, | |
| "learning_rate": 1.0425278119511557e-07, | |
| "loss": 1.5369, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.5125786163522013, | |
| "grad_norm": 2.1276754628326904, | |
| "learning_rate": 1.0361859724899213e-07, | |
| "loss": 1.6983, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.5141509433962264, | |
| "grad_norm": 2.04685261141141, | |
| "learning_rate": 1.0298601279195375e-07, | |
| "loss": 1.9189, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.5157232704402515, | |
| "grad_norm": 2.334536087166455, | |
| "learning_rate": 1.0235503193068961e-07, | |
| "loss": 1.9152, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.5172955974842768, | |
| "grad_norm": 2.1800867522294975, | |
| "learning_rate": 1.0172565876147919e-07, | |
| "loss": 1.8854, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.5188679245283019, | |
| "grad_norm": 2.235966102337266, | |
| "learning_rate": 1.0109789737016459e-07, | |
| "loss": 1.8736, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.5204402515723272, | |
| "grad_norm": 2.3031366168949385, | |
| "learning_rate": 1.0047175183212424e-07, | |
| "loss": 1.8837, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.5220125786163523, | |
| "grad_norm": 1.9731203822876688, | |
| "learning_rate": 9.984722621224678e-08, | |
| "loss": 1.6989, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.5235849056603774, | |
| "grad_norm": 2.0967189530567043, | |
| "learning_rate": 9.922432456490459e-08, | |
| "loss": 1.7385, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.5251572327044025, | |
| "grad_norm": 2.2284300025544, | |
| "learning_rate": 9.86030509339269e-08, | |
| "loss": 1.5122, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.5267295597484276, | |
| "grad_norm": 2.0075774050458017, | |
| "learning_rate": 9.798340935257439e-08, | |
| "loss": 1.7742, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.5283018867924527, | |
| "grad_norm": 2.253336294321935, | |
| "learning_rate": 9.736540384351247e-08, | |
| "loss": 1.8329, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.529874213836478, | |
| "grad_norm": 2.0229026010397795, | |
| "learning_rate": 9.674903841878527e-08, | |
| "loss": 1.8612, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.5314465408805031, | |
| "grad_norm": 2.1682039050875384, | |
| "learning_rate": 9.613431707978969e-08, | |
| "loss": 1.6209, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.5330188679245285, | |
| "grad_norm": 2.17108676879446, | |
| "learning_rate": 9.55212438172494e-08, | |
| "loss": 1.7289, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.5345911949685536, | |
| "grad_norm": 2.0449199036226466, | |
| "learning_rate": 9.49098226111885e-08, | |
| "loss": 1.7313, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.5361635220125787, | |
| "grad_norm": 2.0509938124397658, | |
| "learning_rate": 9.430005743090654e-08, | |
| "loss": 1.859, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.5377358490566038, | |
| "grad_norm": 2.1533522799374016, | |
| "learning_rate": 9.369195223495212e-08, | |
| "loss": 1.5909, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.5393081761006289, | |
| "grad_norm": 2.4410463221304957, | |
| "learning_rate": 9.308551097109723e-08, | |
| "loss": 1.9236, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.540880503144654, | |
| "grad_norm": 2.1289270496387256, | |
| "learning_rate": 9.248073757631187e-08, | |
| "loss": 1.6905, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.5424528301886793, | |
| "grad_norm": 2.160163073483539, | |
| "learning_rate": 9.187763597673842e-08, | |
| "loss": 1.572, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.5440251572327044, | |
| "grad_norm": 2.1864157152356247, | |
| "learning_rate": 9.127621008766583e-08, | |
| "loss": 1.486, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.5455974842767297, | |
| "grad_norm": 2.228493447513863, | |
| "learning_rate": 9.067646381350473e-08, | |
| "loss": 1.8109, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.5471698113207548, | |
| "grad_norm": 2.21672107568335, | |
| "learning_rate": 9.007840104776179e-08, | |
| "loss": 1.7224, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.54874213836478, | |
| "grad_norm": 2.0906671637683383, | |
| "learning_rate": 8.948202567301416e-08, | |
| "loss": 1.7993, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.550314465408805, | |
| "grad_norm": 2.103005760268071, | |
| "learning_rate": 8.888734156088509e-08, | |
| "loss": 1.7734, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.5518867924528301, | |
| "grad_norm": 2.1048717139473534, | |
| "learning_rate": 8.829435257201803e-08, | |
| "loss": 1.6411, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.5534591194968552, | |
| "grad_norm": 2.244151500892785, | |
| "learning_rate": 8.77030625560516e-08, | |
| "loss": 1.9157, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.5550314465408805, | |
| "grad_norm": 2.3422640414896407, | |
| "learning_rate": 8.711347535159517e-08, | |
| "loss": 1.446, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.5566037735849056, | |
| "grad_norm": 2.21179768023497, | |
| "learning_rate": 8.652559478620349e-08, | |
| "loss": 1.7682, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.558176100628931, | |
| "grad_norm": 2.130587628281344, | |
| "learning_rate": 8.593942467635173e-08, | |
| "loss": 1.9265, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.559748427672956, | |
| "grad_norm": 2.2427742208191384, | |
| "learning_rate": 8.535496882741118e-08, | |
| "loss": 1.8189, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.5613207547169812, | |
| "grad_norm": 2.319870729623824, | |
| "learning_rate": 8.47722310336241e-08, | |
| "loss": 1.7268, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.5628930817610063, | |
| "grad_norm": 2.0786710848008214, | |
| "learning_rate": 8.419121507807966e-08, | |
| "loss": 1.6414, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.5644654088050314, | |
| "grad_norm": 2.1766614925100805, | |
| "learning_rate": 8.361192473268831e-08, | |
| "loss": 1.7614, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.5660377358490565, | |
| "grad_norm": 2.0644913614642784, | |
| "learning_rate": 8.30343637581585e-08, | |
| "loss": 1.7658, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.5676100628930818, | |
| "grad_norm": 1.9942721896362847, | |
| "learning_rate": 8.245853590397171e-08, | |
| "loss": 1.5864, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.569182389937107, | |
| "grad_norm": 2.107035516490154, | |
| "learning_rate": 8.188444490835773e-08, | |
| "loss": 1.5109, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.5707547169811322, | |
| "grad_norm": 2.157536925590625, | |
| "learning_rate": 8.131209449827121e-08, | |
| "loss": 1.8098, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.5723270440251573, | |
| "grad_norm": 2.3169442855875535, | |
| "learning_rate": 8.074148838936693e-08, | |
| "loss": 1.718, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5723270440251573, | |
| "eval_sat2_MCTS_chains_SFT_val_loss": 1.7004035711288452, | |
| "eval_sat2_MCTS_chains_SFT_val_runtime": 92.1151, | |
| "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.16, | |
| "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.4, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5738993710691824, | |
| "grad_norm": 1.9653337976940755, | |
| "learning_rate": 8.017263028597577e-08, | |
| "loss": 1.6755, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.5754716981132075, | |
| "grad_norm": 2.259220567901995, | |
| "learning_rate": 7.960552388108074e-08, | |
| "loss": 1.6192, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.5770440251572326, | |
| "grad_norm": 2.172629330750012, | |
| "learning_rate": 7.9040172856293e-08, | |
| "loss": 1.7591, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.5786163522012577, | |
| "grad_norm": 2.235449144986895, | |
| "learning_rate": 7.847658088182764e-08, | |
| "loss": 1.6464, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.580188679245283, | |
| "grad_norm": 2.1331939859889766, | |
| "learning_rate": 7.791475161648044e-08, | |
| "loss": 1.7274, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.5817610062893082, | |
| "grad_norm": 2.11798299275123, | |
| "learning_rate": 7.735468870760373e-08, | |
| "loss": 1.9111, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.5833333333333335, | |
| "grad_norm": 2.1205150272186266, | |
| "learning_rate": 7.679639579108278e-08, | |
| "loss": 1.8506, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.5849056603773586, | |
| "grad_norm": 2.0605259082135428, | |
| "learning_rate": 7.623987649131212e-08, | |
| "loss": 1.5979, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.5864779874213837, | |
| "grad_norm": 2.366698058592538, | |
| "learning_rate": 7.568513442117235e-08, | |
| "loss": 1.6993, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.5880503144654088, | |
| "grad_norm": 2.218013241692686, | |
| "learning_rate": 7.513217318200599e-08, | |
| "loss": 1.7854, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.5896226415094339, | |
| "grad_norm": 1.9909585694013383, | |
| "learning_rate": 7.458099636359496e-08, | |
| "loss": 1.6368, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.591194968553459, | |
| "grad_norm": 2.290605549139274, | |
| "learning_rate": 7.403160754413676e-08, | |
| "loss": 1.7737, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.5927672955974843, | |
| "grad_norm": 2.1367293795429947, | |
| "learning_rate": 7.348401029022108e-08, | |
| "loss": 1.6134, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.5943396226415094, | |
| "grad_norm": 2.01820143740589, | |
| "learning_rate": 7.293820815680712e-08, | |
| "loss": 1.7256, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.5959119496855347, | |
| "grad_norm": 2.276755773858234, | |
| "learning_rate": 7.239420468720059e-08, | |
| "loss": 1.9804, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.5974842767295598, | |
| "grad_norm": 2.330331102976199, | |
| "learning_rate": 7.185200341302975e-08, | |
| "loss": 1.7043, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.599056603773585, | |
| "grad_norm": 2.1276439937862306, | |
| "learning_rate": 7.131160785422365e-08, | |
| "loss": 1.9397, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.60062893081761, | |
| "grad_norm": 2.172507453951336, | |
| "learning_rate": 7.077302151898875e-08, | |
| "loss": 1.9139, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.6022012578616351, | |
| "grad_norm": 2.1478223685971356, | |
| "learning_rate": 7.023624790378576e-08, | |
| "loss": 1.6555, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.6037735849056602, | |
| "grad_norm": 2.3969459707002256, | |
| "learning_rate": 6.97012904933078e-08, | |
| "loss": 1.9195, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.6053459119496856, | |
| "grad_norm": 1.9248844302089616, | |
| "learning_rate": 6.916815276045719e-08, | |
| "loss": 1.8894, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.6069182389937107, | |
| "grad_norm": 2.0212176784028717, | |
| "learning_rate": 6.863683816632293e-08, | |
| "loss": 1.7218, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.608490566037736, | |
| "grad_norm": 2.094474217163886, | |
| "learning_rate": 6.810735016015846e-08, | |
| "loss": 1.6663, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.610062893081761, | |
| "grad_norm": 2.0244431225363697, | |
| "learning_rate": 6.757969217935929e-08, | |
| "loss": 1.6878, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.6116352201257862, | |
| "grad_norm": 2.1281168867505014, | |
| "learning_rate": 6.705386764944006e-08, | |
| "loss": 1.8226, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.6132075471698113, | |
| "grad_norm": 2.0886881066721625, | |
| "learning_rate": 6.652987998401334e-08, | |
| "loss": 1.655, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.6147798742138364, | |
| "grad_norm": 2.0367504844576247, | |
| "learning_rate": 6.60077325847666e-08, | |
| "loss": 1.7722, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.6163522012578615, | |
| "grad_norm": 1.9930526077088684, | |
| "learning_rate": 6.548742884144054e-08, | |
| "loss": 1.7073, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.6179245283018868, | |
| "grad_norm": 2.159190917587176, | |
| "learning_rate": 6.4968972131807e-08, | |
| "loss": 1.8479, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.619496855345912, | |
| "grad_norm": 2.2749098562974903, | |
| "learning_rate": 6.445236582164699e-08, | |
| "loss": 1.9923, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.6210691823899372, | |
| "grad_norm": 2.2025296280904225, | |
| "learning_rate": 6.393761326472898e-08, | |
| "loss": 1.6454, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.6226415094339623, | |
| "grad_norm": 2.1334919488245414, | |
| "learning_rate": 6.342471780278667e-08, | |
| "loss": 1.6965, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.6242138364779874, | |
| "grad_norm": 2.322316370670461, | |
| "learning_rate": 6.291368276549802e-08, | |
| "loss": 1.7228, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.6257861635220126, | |
| "grad_norm": 1.995542145637434, | |
| "learning_rate": 6.240451147046318e-08, | |
| "loss": 1.595, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.6273584905660377, | |
| "grad_norm": 2.196444636496991, | |
| "learning_rate": 6.189720722318278e-08, | |
| "loss": 1.8758, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.6289308176100628, | |
| "grad_norm": 2.0927387390257657, | |
| "learning_rate": 6.139177331703707e-08, | |
| "loss": 2.1127, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.630503144654088, | |
| "grad_norm": 1.9307021857625342, | |
| "learning_rate": 6.088821303326411e-08, | |
| "loss": 1.803, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.6320754716981132, | |
| "grad_norm": 2.082620089850429, | |
| "learning_rate": 6.038652964093827e-08, | |
| "loss": 1.6595, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.6336477987421385, | |
| "grad_norm": 2.067695395804015, | |
| "learning_rate": 5.988672639694953e-08, | |
| "loss": 1.8777, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.6352201257861636, | |
| "grad_norm": 2.042833868758026, | |
| "learning_rate": 5.938880654598219e-08, | |
| "loss": 1.7071, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.6367924528301887, | |
| "grad_norm": 2.117785030809429, | |
| "learning_rate": 5.889277332049334e-08, | |
| "loss": 1.7538, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.6383647798742138, | |
| "grad_norm": 2.0095241417925265, | |
| "learning_rate": 5.839862994069262e-08, | |
| "loss": 1.8899, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.639937106918239, | |
| "grad_norm": 2.2478546507042405, | |
| "learning_rate": 5.79063796145207e-08, | |
| "loss": 1.8472, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.641509433962264, | |
| "grad_norm": 2.305709446534207, | |
| "learning_rate": 5.74160255376288e-08, | |
| "loss": 1.7277, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.6430817610062893, | |
| "grad_norm": 2.18183263977078, | |
| "learning_rate": 5.692757089335781e-08, | |
| "loss": 1.9153, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.6446540880503144, | |
| "grad_norm": 2.0254378536986386, | |
| "learning_rate": 5.644101885271778e-08, | |
| "loss": 1.8602, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.6462264150943398, | |
| "grad_norm": 2.2161096727855085, | |
| "learning_rate": 5.5956372574366835e-08, | |
| "loss": 1.6629, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.6477987421383649, | |
| "grad_norm": 2.301864500140701, | |
| "learning_rate": 5.547363520459137e-08, | |
| "loss": 1.7943, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.64937106918239, | |
| "grad_norm": 2.092542135751507, | |
| "learning_rate": 5.4992809877285235e-08, | |
| "loss": 1.7474, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.650943396226415, | |
| "grad_norm": 2.148759273760875, | |
| "learning_rate": 5.4513899713929394e-08, | |
| "loss": 1.9493, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6525157232704402, | |
| "grad_norm": 2.103715691929771, | |
| "learning_rate": 5.403690782357175e-08, | |
| "loss": 1.8557, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.6540880503144653, | |
| "grad_norm": 2.099977822828033, | |
| "learning_rate": 5.3561837302806944e-08, | |
| "loss": 1.7166, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.6556603773584906, | |
| "grad_norm": 1.9796989676905823, | |
| "learning_rate": 5.3088691235756094e-08, | |
| "loss": 1.9712, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.6572327044025157, | |
| "grad_norm": 2.119765004058249, | |
| "learning_rate": 5.2617472694047037e-08, | |
| "loss": 1.8249, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.658805031446541, | |
| "grad_norm": 2.0993720478618387, | |
| "learning_rate": 5.2148184736794346e-08, | |
| "loss": 1.8525, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.6603773584905661, | |
| "grad_norm": 2.023540938803579, | |
| "learning_rate": 5.1680830410579055e-08, | |
| "loss": 2.0546, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.6619496855345912, | |
| "grad_norm": 2.0699562130101015, | |
| "learning_rate": 5.121541274942966e-08, | |
| "loss": 1.9134, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.6635220125786163, | |
| "grad_norm": 2.144350196893977, | |
| "learning_rate": 5.07519347748018e-08, | |
| "loss": 1.7238, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.6650943396226414, | |
| "grad_norm": 2.0183983742920075, | |
| "learning_rate": 5.029039949555856e-08, | |
| "loss": 1.8309, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.1227831992403043, | |
| "learning_rate": 4.983080990795154e-08, | |
| "loss": 1.7035, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.6682389937106918, | |
| "grad_norm": 2.2858740994961506, | |
| "learning_rate": 4.937316899560099e-08, | |
| "loss": 1.5596, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.669811320754717, | |
| "grad_norm": 2.1280194157468344, | |
| "learning_rate": 4.891747972947634e-08, | |
| "loss": 1.6423, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.6713836477987423, | |
| "grad_norm": 2.2102497033701543, | |
| "learning_rate": 4.846374506787724e-08, | |
| "loss": 1.6832, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.6729559748427674, | |
| "grad_norm": 2.238490260804657, | |
| "learning_rate": 4.8011967956414156e-08, | |
| "loss": 1.6306, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.6745283018867925, | |
| "grad_norm": 2.200290791301945, | |
| "learning_rate": 4.756215132798929e-08, | |
| "loss": 1.778, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.6761006289308176, | |
| "grad_norm": 2.0985204995078024, | |
| "learning_rate": 4.7114298102777545e-08, | |
| "loss": 1.7058, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.6776729559748427, | |
| "grad_norm": 2.087398311229735, | |
| "learning_rate": 4.666841118820755e-08, | |
| "loss": 1.7865, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.6792452830188678, | |
| "grad_norm": 2.1078417553705817, | |
| "learning_rate": 4.622449347894291e-08, | |
| "loss": 1.7158, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.680817610062893, | |
| "grad_norm": 2.116464602679099, | |
| "learning_rate": 4.578254785686302e-08, | |
| "loss": 1.8466, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.6823899371069182, | |
| "grad_norm": 2.089237238472885, | |
| "learning_rate": 4.5342577191044845e-08, | |
| "loss": 1.6295, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6839622641509435, | |
| "grad_norm": 2.0547646711843988, | |
| "learning_rate": 4.4904584337744134e-08, | |
| "loss": 1.7459, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.6855345911949686, | |
| "grad_norm": 2.0054803327980335, | |
| "learning_rate": 4.4468572140376675e-08, | |
| "loss": 1.6197, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.6871069182389937, | |
| "grad_norm": 2.0697707743408293, | |
| "learning_rate": 4.403454342950009e-08, | |
| "loss": 1.829, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.6886792452830188, | |
| "grad_norm": 2.3284003484454416, | |
| "learning_rate": 4.360250102279542e-08, | |
| "loss": 1.8744, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.690251572327044, | |
| "grad_norm": 2.242850396768619, | |
| "learning_rate": 4.317244772504851e-08, | |
| "loss": 1.7455, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.691823899371069, | |
| "grad_norm": 2.1997671073539204, | |
| "learning_rate": 4.274438632813232e-08, | |
| "loss": 2.0059, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.6933962264150944, | |
| "grad_norm": 2.1566093126536643, | |
| "learning_rate": 4.2318319610988444e-08, | |
| "loss": 1.5531, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.6949685534591195, | |
| "grad_norm": 2.13597178016107, | |
| "learning_rate": 4.1894250339609196e-08, | |
| "loss": 1.8328, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.6965408805031448, | |
| "grad_norm": 2.07918842349438, | |
| "learning_rate": 4.1472181267019636e-08, | |
| "loss": 1.7407, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.6981132075471699, | |
| "grad_norm": 2.028781264262438, | |
| "learning_rate": 4.1052115133259726e-08, | |
| "loss": 1.8737, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.699685534591195, | |
| "grad_norm": 2.0355607974120016, | |
| "learning_rate": 4.063405466536631e-08, | |
| "loss": 1.5415, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.70125786163522, | |
| "grad_norm": 1.9226221416432272, | |
| "learning_rate": 4.021800257735578e-08, | |
| "loss": 1.9198, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.7028301886792452, | |
| "grad_norm": 1.8732770572866655, | |
| "learning_rate": 3.9803961570206315e-08, | |
| "loss": 1.8087, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.7044025157232703, | |
| "grad_norm": 2.346364883253458, | |
| "learning_rate": 3.9391934331840104e-08, | |
| "loss": 1.9382, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.7059748427672956, | |
| "grad_norm": 2.1529023946779433, | |
| "learning_rate": 3.898192353710623e-08, | |
| "loss": 1.8482, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.7075471698113207, | |
| "grad_norm": 2.2018108847525646, | |
| "learning_rate": 3.857393184776341e-08, | |
| "loss": 1.672, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.709119496855346, | |
| "grad_norm": 2.2290874113828902, | |
| "learning_rate": 3.8167961912462046e-08, | |
| "loss": 1.9239, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.7106918238993711, | |
| "grad_norm": 2.100849188665365, | |
| "learning_rate": 3.7764016366727704e-08, | |
| "loss": 1.8664, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.7122641509433962, | |
| "grad_norm": 2.1510675871347975, | |
| "learning_rate": 3.73620978329439e-08, | |
| "loss": 1.7952, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.7138364779874213, | |
| "grad_norm": 2.109222961162139, | |
| "learning_rate": 3.6962208920334554e-08, | |
| "loss": 1.7452, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.7154088050314464, | |
| "grad_norm": 2.121301963046306, | |
| "learning_rate": 3.656435222494782e-08, | |
| "loss": 1.8288, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.7169811320754715, | |
| "grad_norm": 2.0337417924569, | |
| "learning_rate": 3.61685303296387e-08, | |
| "loss": 1.8223, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.7185534591194969, | |
| "grad_norm": 2.1339663352283913, | |
| "learning_rate": 3.577474580405245e-08, | |
| "loss": 1.5421, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.720125786163522, | |
| "grad_norm": 2.049959891378422, | |
| "learning_rate": 3.5383001204607826e-08, | |
| "loss": 1.8102, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.7216981132075473, | |
| "grad_norm": 2.1813938906584847, | |
| "learning_rate": 3.499329907448072e-08, | |
| "loss": 2.1207, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.7232704402515724, | |
| "grad_norm": 2.0705000827955558, | |
| "learning_rate": 3.4605641943587113e-08, | |
| "loss": 1.8636, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.7248427672955975, | |
| "grad_norm": 2.238011733888544, | |
| "learning_rate": 3.4220032328567384e-08, | |
| "loss": 1.5974, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.7264150943396226, | |
| "grad_norm": 2.2247891478941857, | |
| "learning_rate": 3.383647273276945e-08, | |
| "loss": 1.8494, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.7279874213836477, | |
| "grad_norm": 2.312396508453329, | |
| "learning_rate": 3.345496564623257e-08, | |
| "loss": 1.885, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.7295597484276728, | |
| "grad_norm": 2.066339746833989, | |
| "learning_rate": 3.3075513545671434e-08, | |
| "loss": 1.7994, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.7311320754716981, | |
| "grad_norm": 2.0879972636931314, | |
| "learning_rate": 3.269811889445988e-08, | |
| "loss": 1.6402, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.7327044025157232, | |
| "grad_norm": 2.175814596182231, | |
| "learning_rate": 3.232278414261481e-08, | |
| "loss": 1.5661, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.7342767295597485, | |
| "grad_norm": 2.1194787082555018, | |
| "learning_rate": 3.194951172678054e-08, | |
| "loss": 1.7645, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.7358490566037736, | |
| "grad_norm": 1.8763626024150262, | |
| "learning_rate": 3.157830407021283e-08, | |
| "loss": 1.596, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.7374213836477987, | |
| "grad_norm": 1.9594370469295614, | |
| "learning_rate": 3.120916358276331e-08, | |
| "loss": 1.6861, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.7389937106918238, | |
| "grad_norm": 2.3713769253326453, | |
| "learning_rate": 3.084209266086331e-08, | |
| "loss": 1.5862, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.740566037735849, | |
| "grad_norm": 2.1938199292645937, | |
| "learning_rate": 3.047709368750924e-08, | |
| "loss": 1.6771, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.742138364779874, | |
| "grad_norm": 2.3215493957832267, | |
| "learning_rate": 3.01141690322463e-08, | |
| "loss": 1.7517, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.7437106918238994, | |
| "grad_norm": 2.221134746509961, | |
| "learning_rate": 2.9753321051153258e-08, | |
| "loss": 1.7712, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.7452830188679245, | |
| "grad_norm": 2.1030740049013517, | |
| "learning_rate": 2.9394552086827434e-08, | |
| "loss": 1.7837, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.7468553459119498, | |
| "grad_norm": 2.178087587225378, | |
| "learning_rate": 2.9037864468369417e-08, | |
| "loss": 1.7186, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.748427672955975, | |
| "grad_norm": 2.1574371301384434, | |
| "learning_rate": 2.8683260511367614e-08, | |
| "loss": 1.5805, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 2.042041045810803, | |
| "learning_rate": 2.8330742517883645e-08, | |
| "loss": 1.7781, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.751572327044025, | |
| "grad_norm": 1.996501467635451, | |
| "learning_rate": 2.7980312776437142e-08, | |
| "loss": 1.8566, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.7531446540880502, | |
| "grad_norm": 1.9640009019290934, | |
| "learning_rate": 2.7631973561990995e-08, | |
| "loss": 2.0415, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.7547169811320755, | |
| "grad_norm": 2.181588054812827, | |
| "learning_rate": 2.7285727135936608e-08, | |
| "loss": 1.6838, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.7562893081761006, | |
| "grad_norm": 2.1470396103954705, | |
| "learning_rate": 2.6941575746079108e-08, | |
| "loss": 1.9552, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.757861635220126, | |
| "grad_norm": 2.064818182873414, | |
| "learning_rate": 2.659952162662269e-08, | |
| "loss": 1.7339, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.759433962264151, | |
| "grad_norm": 2.39097255770087, | |
| "learning_rate": 2.625956699815639e-08, | |
| "loss": 1.6014, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.7610062893081762, | |
| "grad_norm": 2.08931618388101, | |
| "learning_rate": 2.592171406763949e-08, | |
| "loss": 1.6226, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.7625786163522013, | |
| "grad_norm": 2.085612329162341, | |
| "learning_rate": 2.5585965028387198e-08, | |
| "loss": 1.6741, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.7641509433962264, | |
| "grad_norm": 2.0859266300459067, | |
| "learning_rate": 2.5252322060056403e-08, | |
| "loss": 1.8141, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.7657232704402515, | |
| "grad_norm": 2.063805491342782, | |
| "learning_rate": 2.4920787328631565e-08, | |
| "loss": 1.6166, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.7672955974842768, | |
| "grad_norm": 2.1187493282183016, | |
| "learning_rate": 2.459136298641057e-08, | |
| "loss": 1.6022, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.7688679245283019, | |
| "grad_norm": 2.1979153283650414, | |
| "learning_rate": 2.426405117199089e-08, | |
| "loss": 1.6834, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.7704402515723272, | |
| "grad_norm": 2.20082255588697, | |
| "learning_rate": 2.393885401025565e-08, | |
| "loss": 1.9188, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.7720125786163523, | |
| "grad_norm": 2.1540464614977504, | |
| "learning_rate": 2.361577361235962e-08, | |
| "loss": 1.5527, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.7735849056603774, | |
| "grad_norm": 2.0788864004074923, | |
| "learning_rate": 2.3294812075716015e-08, | |
| "loss": 1.9392, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.7751572327044025, | |
| "grad_norm": 2.04615578653692, | |
| "learning_rate": 2.2975971483982428e-08, | |
| "loss": 1.8391, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.7767295597484276, | |
| "grad_norm": 2.034007984738505, | |
| "learning_rate": 2.265925390704726e-08, | |
| "loss": 1.8705, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.7783018867924527, | |
| "grad_norm": 2.146991438580307, | |
| "learning_rate": 2.2344661401016678e-08, | |
| "loss": 1.9585, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.779874213836478, | |
| "grad_norm": 2.279053736154095, | |
| "learning_rate": 2.203219600820112e-08, | |
| "loss": 2.1532, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.7814465408805031, | |
| "grad_norm": 2.0190752770101548, | |
| "learning_rate": 2.1721859757101658e-08, | |
| "loss": 1.6968, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.7830188679245285, | |
| "grad_norm": 2.28515721518105, | |
| "learning_rate": 2.1413654662397408e-08, | |
| "loss": 1.626, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.7845911949685536, | |
| "grad_norm": 2.135849516978255, | |
| "learning_rate": 2.1107582724932088e-08, | |
| "loss": 1.7029, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.7861635220125787, | |
| "grad_norm": 2.418793471259512, | |
| "learning_rate": 2.0803645931701158e-08, | |
| "loss": 1.759, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.7877358490566038, | |
| "grad_norm": 2.322445509938412, | |
| "learning_rate": 2.0501846255838835e-08, | |
| "loss": 1.6907, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.7893081761006289, | |
| "grad_norm": 2.115867407487902, | |
| "learning_rate": 2.0202185656605426e-08, | |
| "loss": 1.8523, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.790880503144654, | |
| "grad_norm": 2.2369615207847096, | |
| "learning_rate": 1.9904666079374393e-08, | |
| "loss": 1.8127, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.7924528301886793, | |
| "grad_norm": 2.0960658556245133, | |
| "learning_rate": 1.9609289455619883e-08, | |
| "loss": 1.5551, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.7940251572327044, | |
| "grad_norm": 2.085123820184512, | |
| "learning_rate": 1.9316057702904277e-08, | |
| "loss": 1.8461, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.7955974842767297, | |
| "grad_norm": 2.082494020984376, | |
| "learning_rate": 1.9024972724865423e-08, | |
| "loss": 1.9352, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.7971698113207548, | |
| "grad_norm": 2.093869902799492, | |
| "learning_rate": 1.8736036411204626e-08, | |
| "loss": 1.7042, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.79874213836478, | |
| "grad_norm": 2.3137806461183845, | |
| "learning_rate": 1.8449250637674162e-08, | |
| "loss": 1.7895, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.800314465408805, | |
| "grad_norm": 2.1172529387064563, | |
| "learning_rate": 1.8164617266065252e-08, | |
| "loss": 1.8503, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.8018867924528301, | |
| "grad_norm": 2.1273968333527282, | |
| "learning_rate": 1.7882138144195685e-08, | |
| "loss": 1.6297, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.8034591194968552, | |
| "grad_norm": 2.268804643140494, | |
| "learning_rate": 1.7601815105898215e-08, | |
| "loss": 1.73, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.8050314465408805, | |
| "grad_norm": 2.113208754031722, | |
| "learning_rate": 1.7323649971008393e-08, | |
| "loss": 1.6516, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.8066037735849056, | |
| "grad_norm": 2.3541202063911695, | |
| "learning_rate": 1.7047644545352903e-08, | |
| "loss": 1.681, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.808176100628931, | |
| "grad_norm": 2.248100742975873, | |
| "learning_rate": 1.6773800620737644e-08, | |
| "loss": 1.8295, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.809748427672956, | |
| "grad_norm": 1.9422747223256194, | |
| "learning_rate": 1.650211997493634e-08, | |
| "loss": 1.6425, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.8113207547169812, | |
| "grad_norm": 2.1573082083805577, | |
| "learning_rate": 1.6232604371678726e-08, | |
| "loss": 1.923, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.8128930817610063, | |
| "grad_norm": 2.1325777796205254, | |
| "learning_rate": 1.5965255560639394e-08, | |
| "loss": 1.7978, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.8144654088050314, | |
| "grad_norm": 2.002442725649601, | |
| "learning_rate": 1.5700075277426262e-08, | |
| "loss": 1.7531, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.8160377358490565, | |
| "grad_norm": 1.9280913979981322, | |
| "learning_rate": 1.543706524356917e-08, | |
| "loss": 1.8257, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.8176100628930818, | |
| "grad_norm": 2.1344422655015496, | |
| "learning_rate": 1.5176227166509058e-08, | |
| "loss": 1.7187, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.819182389937107, | |
| "grad_norm": 2.2004037593468224, | |
| "learning_rate": 1.491756273958673e-08, | |
| "loss": 1.9901, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.8207547169811322, | |
| "grad_norm": 2.0033561732048955, | |
| "learning_rate": 1.466107364203158e-08, | |
| "loss": 1.679, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.8223270440251573, | |
| "grad_norm": 2.060413883738862, | |
| "learning_rate": 1.440676153895114e-08, | |
| "loss": 1.7332, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.8238993710691824, | |
| "grad_norm": 2.2111387133657314, | |
| "learning_rate": 1.4154628081320014e-08, | |
| "loss": 1.7536, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.8254716981132075, | |
| "grad_norm": 2.131048492611576, | |
| "learning_rate": 1.3904674905969066e-08, | |
| "loss": 1.8556, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.8270440251572326, | |
| "grad_norm": 2.0610846483559664, | |
| "learning_rate": 1.3656903635575167e-08, | |
| "loss": 1.8077, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.8286163522012577, | |
| "grad_norm": 2.1736177189318733, | |
| "learning_rate": 1.3411315878650237e-08, | |
| "loss": 1.6993, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.830188679245283, | |
| "grad_norm": 2.2099683077755583, | |
| "learning_rate": 1.3167913229531135e-08, | |
| "loss": 1.7427, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.8317610062893082, | |
| "grad_norm": 2.222331633508216, | |
| "learning_rate": 1.2926697268369101e-08, | |
| "loss": 1.6154, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 2.056907346969074, | |
| "learning_rate": 1.2687669561119568e-08, | |
| "loss": 1.5743, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.8349056603773586, | |
| "grad_norm": 2.2254294416157134, | |
| "learning_rate": 1.245083165953194e-08, | |
| "loss": 1.6526, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.8364779874213837, | |
| "grad_norm": 1.975892358409506, | |
| "learning_rate": 1.2216185101139692e-08, | |
| "loss": 1.6919, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.8380503144654088, | |
| "grad_norm": 1.9887040123280215, | |
| "learning_rate": 1.1983731409250181e-08, | |
| "loss": 1.9421, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.8396226415094339, | |
| "grad_norm": 2.065623115774402, | |
| "learning_rate": 1.1753472092934858e-08, | |
| "loss": 1.664, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.841194968553459, | |
| "grad_norm": 2.0212401073533495, | |
| "learning_rate": 1.1525408647019474e-08, | |
| "loss": 1.9716, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.8427672955974843, | |
| "grad_norm": 2.0410818459050075, | |
| "learning_rate": 1.129954255207441e-08, | |
| "loss": 1.647, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.8443396226415094, | |
| "grad_norm": 2.2637429629509964, | |
| "learning_rate": 1.1075875274404834e-08, | |
| "loss": 1.9561, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.8459119496855347, | |
| "grad_norm": 2.1445378180592356, | |
| "learning_rate": 1.0854408266041543e-08, | |
| "loss": 1.8834, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.8474842767295598, | |
| "grad_norm": 2.171320431386727, | |
| "learning_rate": 1.063514296473132e-08, | |
| "loss": 1.8161, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.849056603773585, | |
| "grad_norm": 2.0483569709125966, | |
| "learning_rate": 1.041808079392753e-08, | |
| "loss": 1.811, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.85062893081761, | |
| "grad_norm": 2.1123320054916697, | |
| "learning_rate": 1.020322316278111e-08, | |
| "loss": 1.8621, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.8522012578616351, | |
| "grad_norm": 1.9907743276040535, | |
| "learning_rate": 9.990571466131276e-09, | |
| "loss": 1.8181, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.8537735849056602, | |
| "grad_norm": 2.2022266952576572, | |
| "learning_rate": 9.780127084496431e-09, | |
| "loss": 1.5795, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.8553459119496856, | |
| "grad_norm": 2.1497279507664535, | |
| "learning_rate": 9.571891384065272e-09, | |
| "loss": 1.6334, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.8569182389937107, | |
| "grad_norm": 2.2214098590343028, | |
| "learning_rate": 9.365865716687965e-09, | |
| "loss": 2.0922, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.858490566037736, | |
| "grad_norm": 2.064033723821905, | |
| "learning_rate": 9.162051419867245e-09, | |
| "loss": 1.6108, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.860062893081761, | |
| "grad_norm": 2.426092174363886, | |
| "learning_rate": 8.960449816749832e-09, | |
| "loss": 1.5644, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.8616352201257862, | |
| "grad_norm": 2.074214131520031, | |
| "learning_rate": 8.761062216117765e-09, | |
| "loss": 1.5808, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.8632075471698113, | |
| "grad_norm": 2.1958655328363146, | |
| "learning_rate": 8.563889912380046e-09, | |
| "loss": 1.8186, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.8647798742138364, | |
| "grad_norm": 2.2651556186496626, | |
| "learning_rate": 8.368934185564013e-09, | |
| "loss": 1.6952, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.8663522012578615, | |
| "grad_norm": 2.1256908151950915, | |
| "learning_rate": 8.176196301307264e-09, | |
| "loss": 1.7424, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.8679245283018868, | |
| "grad_norm": 2.0538768988218536, | |
| "learning_rate": 7.985677510849332e-09, | |
| "loss": 1.923, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.869496855345912, | |
| "grad_norm": 2.061086071767639, | |
| "learning_rate": 7.79737905102349e-09, | |
| "loss": 2.0386, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.8710691823899372, | |
| "grad_norm": 2.170281435880277, | |
| "learning_rate": 7.611302144248788e-09, | |
| "loss": 1.979, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.8726415094339623, | |
| "grad_norm": 2.0702664036289944, | |
| "learning_rate": 7.427447998522241e-09, | |
| "loss": 1.9203, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.8742138364779874, | |
| "grad_norm": 2.186437061351241, | |
| "learning_rate": 7.245817807410742e-09, | |
| "loss": 2.0204, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.8757861635220126, | |
| "grad_norm": 2.403033320304919, | |
| "learning_rate": 7.066412750043532e-09, | |
| "loss": 1.8169, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.8773584905660377, | |
| "grad_norm": 2.113165661681811, | |
| "learning_rate": 6.889233991104421e-09, | |
| "loss": 1.8014, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.8789308176100628, | |
| "grad_norm": 2.1364558866941907, | |
| "learning_rate": 6.714282680824252e-09, | |
| "loss": 1.8172, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.880503144654088, | |
| "grad_norm": 2.085369574271752, | |
| "learning_rate": 6.54155995497348e-09, | |
| "loss": 1.9062, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.8820754716981132, | |
| "grad_norm": 2.158776809363429, | |
| "learning_rate": 6.371066934854713e-09, | |
| "loss": 1.7571, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.8836477987421385, | |
| "grad_norm": 2.1204855812030936, | |
| "learning_rate": 6.202804727295441e-09, | |
| "loss": 1.4898, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.8852201257861636, | |
| "grad_norm": 1.9588917914820057, | |
| "learning_rate": 6.036774424641044e-09, | |
| "loss": 1.9212, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.8867924528301887, | |
| "grad_norm": 2.289926267202458, | |
| "learning_rate": 5.872977104747451e-09, | |
| "loss": 1.8261, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8883647798742138, | |
| "grad_norm": 2.248912074374657, | |
| "learning_rate": 5.711413830974177e-09, | |
| "loss": 1.5361, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.889937106918239, | |
| "grad_norm": 2.1042256295937656, | |
| "learning_rate": 5.5520856521775685e-09, | |
| "loss": 1.8066, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.891509433962264, | |
| "grad_norm": 2.180029267811429, | |
| "learning_rate": 5.3949936027039625e-09, | |
| "loss": 2.1126, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.8930817610062893, | |
| "grad_norm": 2.1625082758181744, | |
| "learning_rate": 5.240138702382729e-09, | |
| "loss": 1.7901, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.8946540880503144, | |
| "grad_norm": 2.1665119765928, | |
| "learning_rate": 5.087521956520058e-09, | |
| "loss": 1.8569, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.8962264150943398, | |
| "grad_norm": 2.0777861736774725, | |
| "learning_rate": 4.937144355891998e-09, | |
| "loss": 1.793, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.8977987421383649, | |
| "grad_norm": 2.066304356371116, | |
| "learning_rate": 4.789006876738438e-09, | |
| "loss": 1.7053, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.89937106918239, | |
| "grad_norm": 2.0957159756988006, | |
| "learning_rate": 4.643110480756423e-09, | |
| "loss": 1.7215, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.900943396226415, | |
| "grad_norm": 2.111256856709086, | |
| "learning_rate": 4.499456115094169e-09, | |
| "loss": 1.8245, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.9025157232704402, | |
| "grad_norm": 1.8782104151978112, | |
| "learning_rate": 4.358044712344688e-09, | |
| "loss": 1.6741, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.9040880503144653, | |
| "grad_norm": 2.141467865299451, | |
| "learning_rate": 4.218877190539927e-09, | |
| "loss": 1.7832, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.9056603773584906, | |
| "grad_norm": 2.0679767608171584, | |
| "learning_rate": 4.081954453144737e-09, | |
| "loss": 1.8032, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.9072327044025157, | |
| "grad_norm": 2.044401061483079, | |
| "learning_rate": 3.947277389051013e-09, | |
| "loss": 1.5991, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.908805031446541, | |
| "grad_norm": 2.3204022598290424, | |
| "learning_rate": 3.814846872571781e-09, | |
| "loss": 2.1206, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.9103773584905661, | |
| "grad_norm": 2.194698485486922, | |
| "learning_rate": 3.68466376343588e-09, | |
| "loss": 1.8517, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.9119496855345912, | |
| "grad_norm": 2.116787406844949, | |
| "learning_rate": 3.556728906781897e-09, | |
| "loss": 1.751, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.9135220125786163, | |
| "grad_norm": 2.105786362411949, | |
| "learning_rate": 3.4310431331531553e-09, | |
| "loss": 1.6647, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.9150943396226414, | |
| "grad_norm": 2.23499471842287, | |
| "learning_rate": 3.307607258491962e-09, | |
| "loss": 1.7705, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.9166666666666665, | |
| "grad_norm": 2.131569173737192, | |
| "learning_rate": 3.18642208413456e-09, | |
| "loss": 1.889, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.9182389937106918, | |
| "grad_norm": 2.0221410280515, | |
| "learning_rate": 3.067488396805684e-09, | |
| "loss": 1.5895, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.919811320754717, | |
| "grad_norm": 2.000235641315321, | |
| "learning_rate": 2.950806968613745e-09, | |
| "loss": 1.8866, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.9213836477987423, | |
| "grad_norm": 2.6217655796510377, | |
| "learning_rate": 2.8363785570455436e-09, | |
| "loss": 1.7802, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.9229559748427674, | |
| "grad_norm": 2.3713251232928996, | |
| "learning_rate": 2.724203904961531e-09, | |
| "loss": 1.7429, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.9245283018867925, | |
| "grad_norm": 2.0162450187498515, | |
| "learning_rate": 2.6142837405909113e-09, | |
| "loss": 1.6852, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.9261006289308176, | |
| "grad_norm": 1.9184165558043198, | |
| "learning_rate": 2.5066187775269034e-09, | |
| "loss": 2.0859, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.9276729559748427, | |
| "grad_norm": 2.0980898585416403, | |
| "learning_rate": 2.401209714722152e-09, | |
| "loss": 1.776, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.9292452830188678, | |
| "grad_norm": 2.102737428792809, | |
| "learning_rate": 2.2980572364841854e-09, | |
| "loss": 1.8705, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.930817610062893, | |
| "grad_norm": 2.235581223829696, | |
| "learning_rate": 2.1971620124709435e-09, | |
| "loss": 1.8633, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.9323899371069182, | |
| "grad_norm": 2.1003684288920006, | |
| "learning_rate": 2.098524697686427e-09, | |
| "loss": 1.8356, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.9339622641509435, | |
| "grad_norm": 2.085896244612752, | |
| "learning_rate": 2.002145932476501e-09, | |
| "loss": 1.6605, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.9355345911949686, | |
| "grad_norm": 2.249282131879236, | |
| "learning_rate": 1.908026342524738e-09, | |
| "loss": 1.572, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.9371069182389937, | |
| "grad_norm": 2.133881200071366, | |
| "learning_rate": 1.8161665388481796e-09, | |
| "loss": 1.7188, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.9386792452830188, | |
| "grad_norm": 2.0523111952597124, | |
| "learning_rate": 1.7265671177936092e-09, | |
| "loss": 2.0153, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.940251572327044, | |
| "grad_norm": 2.095543253062672, | |
| "learning_rate": 1.639228661033587e-09, | |
| "loss": 1.5568, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.941823899371069, | |
| "grad_norm": 2.2647929929993764, | |
| "learning_rate": 1.554151735562642e-09, | |
| "loss": 2.0448, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.9433962264150944, | |
| "grad_norm": 2.1607959179153404, | |
| "learning_rate": 1.47133689369362e-09, | |
| "loss": 1.6183, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.9449685534591195, | |
| "grad_norm": 2.069314519936326, | |
| "learning_rate": 1.3907846730541073e-09, | |
| "loss": 1.7316, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.9465408805031448, | |
| "grad_norm": 2.402914241865446, | |
| "learning_rate": 1.3124955965828966e-09, | |
| "loss": 1.6961, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.9481132075471699, | |
| "grad_norm": 2.418179213383202, | |
| "learning_rate": 1.2364701725266436e-09, | |
| "loss": 1.8602, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.949685534591195, | |
| "grad_norm": 2.0182001003309122, | |
| "learning_rate": 1.162708894436526e-09, | |
| "loss": 1.8426, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.95125786163522, | |
| "grad_norm": 1.9544224426157475, | |
| "learning_rate": 1.0912122411651348e-09, | |
| "loss": 1.912, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.9528301886792452, | |
| "grad_norm": 2.1284678150486975, | |
| "learning_rate": 1.0219806768631712e-09, | |
| "loss": 1.7659, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.9544025157232703, | |
| "grad_norm": 2.103380584288797, | |
| "learning_rate": 9.550146509766489e-10, | |
| "loss": 1.6285, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.9559748427672956, | |
| "grad_norm": 2.318883943366702, | |
| "learning_rate": 8.903145982438242e-10, | |
| "loss": 1.7361, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.9575471698113207, | |
| "grad_norm": 2.0210439137006007, | |
| "learning_rate": 8.278809386924767e-10, | |
| "loss": 1.8383, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.959119496855346, | |
| "grad_norm": 2.122214078228037, | |
| "learning_rate": 7.677140776371494e-10, | |
| "loss": 1.6549, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.9606918238993711, | |
| "grad_norm": 2.148904360940334, | |
| "learning_rate": 7.0981440567639e-10, | |
| "loss": 1.777, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.9622641509433962, | |
| "grad_norm": 2.157751893993133, | |
| "learning_rate": 6.541822986904589e-10, | |
| "loss": 1.7056, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.9638364779874213, | |
| "grad_norm": 1.9449801123342445, | |
| "learning_rate": 6.00818117838725e-10, | |
| "loss": 1.7219, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.9654088050314464, | |
| "grad_norm": 2.054951250857022, | |
| "learning_rate": 5.497222095572962e-10, | |
| "loss": 1.6673, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9654088050314464, | |
| "eval_sat2_MCTS_chains_SFT_val_loss": 1.6998926401138306, | |
| "eval_sat2_MCTS_chains_SFT_val_runtime": 91.7731, | |
| "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.202, | |
| "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9669811320754715, | |
| "grad_norm": 2.05069047537442, | |
| "learning_rate": 5.008949055568812e-10, | |
| "loss": 1.7608, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.9685534591194969, | |
| "grad_norm": 2.154702108749229, | |
| "learning_rate": 4.543365228205753e-10, | |
| "loss": 1.6858, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.970125786163522, | |
| "grad_norm": 2.075168647544607, | |
| "learning_rate": 4.1004736360183976e-10, | |
| "loss": 1.6641, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.9716981132075473, | |
| "grad_norm": 2.2047735011240226, | |
| "learning_rate": 3.6802771542244204e-10, | |
| "loss": 1.7977, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.9732704402515724, | |
| "grad_norm": 2.128359624030826, | |
| "learning_rate": 3.2827785107074623e-10, | |
| "loss": 1.5849, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.9748427672955975, | |
| "grad_norm": 2.1524898340309595, | |
| "learning_rate": 2.907980285997702e-10, | |
| "loss": 1.6826, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.9764150943396226, | |
| "grad_norm": 2.0578150120619005, | |
| "learning_rate": 2.555884913256312e-10, | |
| "loss": 1.9806, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.9779874213836477, | |
| "grad_norm": 2.0645335423876263, | |
| "learning_rate": 2.2264946782599158e-10, | |
| "loss": 1.7219, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.9795597484276728, | |
| "grad_norm": 2.1474076116693337, | |
| "learning_rate": 1.9198117193838791e-10, | |
| "loss": 1.853, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.9811320754716981, | |
| "grad_norm": 2.0521508824628105, | |
| "learning_rate": 1.6358380275906524e-10, | |
| "loss": 1.6023, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.9827044025157232, | |
| "grad_norm": 2.3303244823585625, | |
| "learning_rate": 1.3745754464157823e-10, | |
| "loss": 1.7856, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.9842767295597485, | |
| "grad_norm": 2.112760564566335, | |
| "learning_rate": 1.1360256719554762e-10, | |
| "loss": 2.052, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.9858490566037736, | |
| "grad_norm": 2.0973660824743323, | |
| "learning_rate": 9.201902528561123e-11, | |
| "loss": 1.8666, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.9874213836477987, | |
| "grad_norm": 2.1164187723139043, | |
| "learning_rate": 7.270705903056895e-11, | |
| "loss": 1.7619, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.9889937106918238, | |
| "grad_norm": 2.184592687671629, | |
| "learning_rate": 5.566679380210049e-11, | |
| "loss": 1.8967, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.990566037735849, | |
| "grad_norm": 2.0610164571083756, | |
| "learning_rate": 4.089834022437677e-11, | |
| "loss": 1.8686, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.992138364779874, | |
| "grad_norm": 2.176062181352369, | |
| "learning_rate": 2.8401794173049666e-11, | |
| "loss": 1.5427, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.9937106918238994, | |
| "grad_norm": 2.0675804076152313, | |
| "learning_rate": 1.8177236774707948e-11, | |
| "loss": 1.6378, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.9952830188679245, | |
| "grad_norm": 2.290052278929668, | |
| "learning_rate": 1.022473440637217e-11, | |
| "loss": 1.656, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.9968553459119498, | |
| "grad_norm": 2.287180872342125, | |
| "learning_rate": 4.544338695106064e-12, | |
| "loss": 1.8058, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.998427672955975, | |
| "grad_norm": 2.0048323595450275, | |
| "learning_rate": 1.1360865176279766e-12, | |
| "loss": 1.6803, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.049630174327001, | |
| "learning_rate": 0.0, | |
| "loss": 1.7973, | |
| "step": 1272 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1272, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 214395832958976.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |