| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.2410651716888577, | |
| "eval_steps": 500, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002803083391730904, | |
| "grad_norm": 5.1484708590919785, | |
| "learning_rate": 0.0, | |
| "loss": 0.9179, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005606166783461808, | |
| "grad_norm": 5.487034043781978, | |
| "learning_rate": 5.58659217877095e-08, | |
| "loss": 0.9131, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008409250175192713, | |
| "grad_norm": 4.33614237795095, | |
| "learning_rate": 1.11731843575419e-07, | |
| "loss": 0.7853, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011212333566923615, | |
| "grad_norm": 4.779222535523081, | |
| "learning_rate": 1.6759776536312851e-07, | |
| "loss": 0.8746, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01401541695865452, | |
| "grad_norm": 4.7983170759971925, | |
| "learning_rate": 2.23463687150838e-07, | |
| "loss": 0.842, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.016818500350385426, | |
| "grad_norm": 5.257374876703264, | |
| "learning_rate": 2.793296089385475e-07, | |
| "loss": 0.8968, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.019621583742116328, | |
| "grad_norm": 4.504136127398602, | |
| "learning_rate": 3.3519553072625703e-07, | |
| "loss": 0.8337, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02242466713384723, | |
| "grad_norm": 4.414835579986107, | |
| "learning_rate": 3.910614525139665e-07, | |
| "loss": 0.8165, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.025227750525578137, | |
| "grad_norm": 4.43151857886484, | |
| "learning_rate": 4.46927374301676e-07, | |
| "loss": 0.8104, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02803083391730904, | |
| "grad_norm": 4.676366358231245, | |
| "learning_rate": 5.027932960893855e-07, | |
| "loss": 0.864, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.030833917309039945, | |
| "grad_norm": 4.0730644153085525, | |
| "learning_rate": 5.58659217877095e-07, | |
| "loss": 0.7886, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03363700070077085, | |
| "grad_norm": 4.172588296774101, | |
| "learning_rate": 6.145251396648045e-07, | |
| "loss": 0.837, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.036440084092501754, | |
| "grad_norm": 3.9778364933448436, | |
| "learning_rate": 6.703910614525141e-07, | |
| "loss": 0.7436, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.039243167484232656, | |
| "grad_norm": 3.7060565427554772, | |
| "learning_rate": 7.262569832402236e-07, | |
| "loss": 0.7504, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04204625087596356, | |
| "grad_norm": 3.570615569466669, | |
| "learning_rate": 7.82122905027933e-07, | |
| "loss": 0.7675, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04484933426769446, | |
| "grad_norm": 4.036222308910076, | |
| "learning_rate": 8.379888268156425e-07, | |
| "loss": 0.7887, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04765241765942537, | |
| "grad_norm": 4.070011925383957, | |
| "learning_rate": 8.93854748603352e-07, | |
| "loss": 0.816, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.050455501051156273, | |
| "grad_norm": 3.5494513513779853, | |
| "learning_rate": 9.497206703910615e-07, | |
| "loss": 0.7722, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.053258584442887176, | |
| "grad_norm": 3.0005510195801204, | |
| "learning_rate": 1.005586592178771e-06, | |
| "loss": 0.7587, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05606166783461808, | |
| "grad_norm": 3.0859421992565337, | |
| "learning_rate": 1.0614525139664806e-06, | |
| "loss": 0.7771, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05886475122634898, | |
| "grad_norm": 2.6475980984097967, | |
| "learning_rate": 1.11731843575419e-06, | |
| "loss": 0.6854, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06166783461807989, | |
| "grad_norm": 2.6610671622699016, | |
| "learning_rate": 1.1731843575418997e-06, | |
| "loss": 0.7375, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0644709180098108, | |
| "grad_norm": 2.5679290240076766, | |
| "learning_rate": 1.229050279329609e-06, | |
| "loss": 0.6947, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0672740014015417, | |
| "grad_norm": 2.3180850072986154, | |
| "learning_rate": 1.2849162011173185e-06, | |
| "loss": 0.6874, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0700770847932726, | |
| "grad_norm": 2.0906746546884425, | |
| "learning_rate": 1.3407821229050281e-06, | |
| "loss": 0.7266, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07288016818500351, | |
| "grad_norm": 2.4132207093897358, | |
| "learning_rate": 1.3966480446927375e-06, | |
| "loss": 0.6971, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0756832515767344, | |
| "grad_norm": 2.227410085533061, | |
| "learning_rate": 1.4525139664804472e-06, | |
| "loss": 0.7127, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07848633496846531, | |
| "grad_norm": 2.3134005002437656, | |
| "learning_rate": 1.5083798882681566e-06, | |
| "loss": 0.7078, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08128941836019622, | |
| "grad_norm": 2.2309237756431672, | |
| "learning_rate": 1.564245810055866e-06, | |
| "loss": 0.6541, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08409250175192712, | |
| "grad_norm": 2.6641061491321403, | |
| "learning_rate": 1.6201117318435754e-06, | |
| "loss": 0.6975, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08689558514365803, | |
| "grad_norm": 2.079611257901034, | |
| "learning_rate": 1.675977653631285e-06, | |
| "loss": 0.6206, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08969866853538892, | |
| "grad_norm": 2.2660193487426965, | |
| "learning_rate": 1.7318435754189947e-06, | |
| "loss": 0.6665, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09250175192711983, | |
| "grad_norm": 2.146259839931881, | |
| "learning_rate": 1.787709497206704e-06, | |
| "loss": 0.6787, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09530483531885074, | |
| "grad_norm": 1.9839911994049741, | |
| "learning_rate": 1.8435754189944135e-06, | |
| "loss": 0.6521, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09810791871058164, | |
| "grad_norm": 1.6089250761086926, | |
| "learning_rate": 1.899441340782123e-06, | |
| "loss": 0.6217, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10091100210231255, | |
| "grad_norm": 1.5876982534765143, | |
| "learning_rate": 1.9553072625698325e-06, | |
| "loss": 0.5882, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10371408549404344, | |
| "grad_norm": 1.7222150058922074, | |
| "learning_rate": 2.011173184357542e-06, | |
| "loss": 0.5512, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.10651716888577435, | |
| "grad_norm": 1.8050081053129243, | |
| "learning_rate": 2.067039106145252e-06, | |
| "loss": 0.588, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10932025227750526, | |
| "grad_norm": 1.6434624778264646, | |
| "learning_rate": 2.1229050279329612e-06, | |
| "loss": 0.5931, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.11212333566923616, | |
| "grad_norm": 1.7526009826479017, | |
| "learning_rate": 2.1787709497206706e-06, | |
| "loss": 0.5785, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11492641906096707, | |
| "grad_norm": 1.6437945906395883, | |
| "learning_rate": 2.23463687150838e-06, | |
| "loss": 0.5689, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11772950245269796, | |
| "grad_norm": 1.5330527531585365, | |
| "learning_rate": 2.2905027932960895e-06, | |
| "loss": 0.5748, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12053258584442887, | |
| "grad_norm": 1.5288757695374733, | |
| "learning_rate": 2.3463687150837993e-06, | |
| "loss": 0.5448, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.12333566923615978, | |
| "grad_norm": 1.6436762237674445, | |
| "learning_rate": 2.4022346368715087e-06, | |
| "loss": 0.5589, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12613875262789068, | |
| "grad_norm": 1.4778751424937657, | |
| "learning_rate": 2.458100558659218e-06, | |
| "loss": 0.5309, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1289418360196216, | |
| "grad_norm": 1.3893104305753303, | |
| "learning_rate": 2.5139664804469276e-06, | |
| "loss": 0.5186, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1317449194113525, | |
| "grad_norm": 1.4030470862296944, | |
| "learning_rate": 2.569832402234637e-06, | |
| "loss": 0.5142, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1345480028030834, | |
| "grad_norm": 1.365418665825913, | |
| "learning_rate": 2.6256983240223464e-06, | |
| "loss": 0.5474, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1373510861948143, | |
| "grad_norm": 1.322864310730053, | |
| "learning_rate": 2.6815642458100562e-06, | |
| "loss": 0.5433, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1401541695865452, | |
| "grad_norm": 1.2841278505792295, | |
| "learning_rate": 2.7374301675977656e-06, | |
| "loss": 0.5461, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1429572529782761, | |
| "grad_norm": 1.4398037033168882, | |
| "learning_rate": 2.793296089385475e-06, | |
| "loss": 0.5089, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.14576033637000702, | |
| "grad_norm": 1.2507634146653344, | |
| "learning_rate": 2.8491620111731845e-06, | |
| "loss": 0.5085, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14856341976173792, | |
| "grad_norm": 1.216547503273406, | |
| "learning_rate": 2.9050279329608943e-06, | |
| "loss": 0.5096, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1513665031534688, | |
| "grad_norm": 1.2820613031068602, | |
| "learning_rate": 2.9608938547486037e-06, | |
| "loss": 0.5577, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15416958654519972, | |
| "grad_norm": 1.2887194937768396, | |
| "learning_rate": 3.016759776536313e-06, | |
| "loss": 0.4896, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.15697266993693063, | |
| "grad_norm": 1.3158880492878813, | |
| "learning_rate": 3.0726256983240226e-06, | |
| "loss": 0.5328, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15977575332866154, | |
| "grad_norm": 1.291865092954309, | |
| "learning_rate": 3.128491620111732e-06, | |
| "loss": 0.5188, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.16257883672039244, | |
| "grad_norm": 1.389666654490359, | |
| "learning_rate": 3.1843575418994414e-06, | |
| "loss": 0.5578, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.16538192011212333, | |
| "grad_norm": 1.2544095434531006, | |
| "learning_rate": 3.240223463687151e-06, | |
| "loss": 0.4622, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.16818500350385424, | |
| "grad_norm": 1.2954956535487678, | |
| "learning_rate": 3.2960893854748607e-06, | |
| "loss": 0.5285, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17098808689558515, | |
| "grad_norm": 1.3448826889626828, | |
| "learning_rate": 3.35195530726257e-06, | |
| "loss": 0.4972, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.17379117028731605, | |
| "grad_norm": 1.2888750839140442, | |
| "learning_rate": 3.4078212290502795e-06, | |
| "loss": 0.4565, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.17659425367904696, | |
| "grad_norm": 1.4478169815511575, | |
| "learning_rate": 3.4636871508379893e-06, | |
| "loss": 0.5086, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.17939733707077785, | |
| "grad_norm": 1.2345721438632833, | |
| "learning_rate": 3.5195530726256988e-06, | |
| "loss": 0.4887, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.18220042046250876, | |
| "grad_norm": 1.2750020626675667, | |
| "learning_rate": 3.575418994413408e-06, | |
| "loss": 0.4556, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.18500350385423966, | |
| "grad_norm": 1.1585364434531529, | |
| "learning_rate": 3.6312849162011176e-06, | |
| "loss": 0.4755, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18780658724597057, | |
| "grad_norm": 1.2251136398194762, | |
| "learning_rate": 3.687150837988827e-06, | |
| "loss": 0.5002, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.19060967063770148, | |
| "grad_norm": 1.2785943600022907, | |
| "learning_rate": 3.7430167597765364e-06, | |
| "loss": 0.5142, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.19341275402943237, | |
| "grad_norm": 1.292978777507132, | |
| "learning_rate": 3.798882681564246e-06, | |
| "loss": 0.4883, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.19621583742116327, | |
| "grad_norm": 1.4760688229840058, | |
| "learning_rate": 3.854748603351956e-06, | |
| "loss": 0.4798, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19901892081289418, | |
| "grad_norm": 1.3534620335056344, | |
| "learning_rate": 3.910614525139665e-06, | |
| "loss": 0.478, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2018220042046251, | |
| "grad_norm": 1.3463500240376554, | |
| "learning_rate": 3.9664804469273745e-06, | |
| "loss": 0.497, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.204625087596356, | |
| "grad_norm": 1.5078028142094007, | |
| "learning_rate": 4.022346368715084e-06, | |
| "loss": 0.4786, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.20742817098808689, | |
| "grad_norm": 1.34495483790764, | |
| "learning_rate": 4.078212290502794e-06, | |
| "loss": 0.4939, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2102312543798178, | |
| "grad_norm": 1.2340956043439963, | |
| "learning_rate": 4.134078212290504e-06, | |
| "loss": 0.4921, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2130343377715487, | |
| "grad_norm": 1.3085115291626992, | |
| "learning_rate": 4.189944134078213e-06, | |
| "loss": 0.4754, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2158374211632796, | |
| "grad_norm": 1.252728806989629, | |
| "learning_rate": 4.2458100558659224e-06, | |
| "loss": 0.5128, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.21864050455501052, | |
| "grad_norm": 1.4126957206402742, | |
| "learning_rate": 4.301675977653632e-06, | |
| "loss": 0.4885, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2214435879467414, | |
| "grad_norm": 1.3208716039081965, | |
| "learning_rate": 4.357541899441341e-06, | |
| "loss": 0.4786, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.22424667133847231, | |
| "grad_norm": 1.2574451697332716, | |
| "learning_rate": 4.413407821229051e-06, | |
| "loss": 0.474, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22704975473020322, | |
| "grad_norm": 1.287485870277125, | |
| "learning_rate": 4.46927374301676e-06, | |
| "loss": 0.505, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.22985283812193413, | |
| "grad_norm": 1.272270407613237, | |
| "learning_rate": 4.5251396648044695e-06, | |
| "loss": 0.4595, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.23265592151366504, | |
| "grad_norm": 1.469219474277953, | |
| "learning_rate": 4.581005586592179e-06, | |
| "loss": 0.4652, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.23545900490539592, | |
| "grad_norm": 1.2699463875565882, | |
| "learning_rate": 4.636871508379888e-06, | |
| "loss": 0.5229, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.23826208829712683, | |
| "grad_norm": 1.2856158617452633, | |
| "learning_rate": 4.692737430167599e-06, | |
| "loss": 0.4532, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.24106517168885774, | |
| "grad_norm": 1.4817415826333444, | |
| "learning_rate": 4.748603351955308e-06, | |
| "loss": 0.4771, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.24386825508058865, | |
| "grad_norm": 1.305173329779559, | |
| "learning_rate": 4.8044692737430175e-06, | |
| "loss": 0.4517, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.24667133847231956, | |
| "grad_norm": 1.236166182409069, | |
| "learning_rate": 4.860335195530727e-06, | |
| "loss": 0.4381, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.24947442186405044, | |
| "grad_norm": 1.3538165210321431, | |
| "learning_rate": 4.916201117318436e-06, | |
| "loss": 0.4601, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.25227750525578135, | |
| "grad_norm": 1.2959980524853771, | |
| "learning_rate": 4.972067039106146e-06, | |
| "loss": 0.5043, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25508058864751226, | |
| "grad_norm": 1.257731041700467, | |
| "learning_rate": 5.027932960893855e-06, | |
| "loss": 0.5042, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2578836720392432, | |
| "grad_norm": 1.1829514019122422, | |
| "learning_rate": 5.0837988826815645e-06, | |
| "loss": 0.5011, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2606867554309741, | |
| "grad_norm": 1.2039776317025723, | |
| "learning_rate": 5.139664804469274e-06, | |
| "loss": 0.4459, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.263489838822705, | |
| "grad_norm": 1.1537490486646111, | |
| "learning_rate": 5.195530726256983e-06, | |
| "loss": 0.4135, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2662929222144359, | |
| "grad_norm": 1.1836759088562796, | |
| "learning_rate": 5.251396648044693e-06, | |
| "loss": 0.4286, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2690960056061668, | |
| "grad_norm": 1.1644038720022742, | |
| "learning_rate": 5.307262569832403e-06, | |
| "loss": 0.4511, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.27189908899789766, | |
| "grad_norm": 1.2841070495233056, | |
| "learning_rate": 5.3631284916201125e-06, | |
| "loss": 0.4614, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2747021723896286, | |
| "grad_norm": 1.2674957253762746, | |
| "learning_rate": 5.418994413407822e-06, | |
| "loss": 0.4053, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2775052557813595, | |
| "grad_norm": 1.2564154040578148, | |
| "learning_rate": 5.474860335195531e-06, | |
| "loss": 0.4754, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2803083391730904, | |
| "grad_norm": 1.2707428474291496, | |
| "learning_rate": 5.530726256983241e-06, | |
| "loss": 0.4543, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2831114225648213, | |
| "grad_norm": 1.2696450733230285, | |
| "learning_rate": 5.58659217877095e-06, | |
| "loss": 0.4711, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2859145059565522, | |
| "grad_norm": 1.1385639533491085, | |
| "learning_rate": 5.6424581005586595e-06, | |
| "loss": 0.5073, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2887175893482831, | |
| "grad_norm": 1.2476843993569997, | |
| "learning_rate": 5.698324022346369e-06, | |
| "loss": 0.4832, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.29152067274001403, | |
| "grad_norm": 1.2700803146631456, | |
| "learning_rate": 5.754189944134078e-06, | |
| "loss": 0.456, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.29432375613174494, | |
| "grad_norm": 1.3063859932211015, | |
| "learning_rate": 5.810055865921789e-06, | |
| "loss": 0.4663, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.29712683952347585, | |
| "grad_norm": 1.37190656711132, | |
| "learning_rate": 5.865921787709497e-06, | |
| "loss": 0.425, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2999299229152067, | |
| "grad_norm": 1.336749076355929, | |
| "learning_rate": 5.9217877094972075e-06, | |
| "loss": 0.4532, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3027330063069376, | |
| "grad_norm": 1.3049835257948252, | |
| "learning_rate": 5.977653631284917e-06, | |
| "loss": 0.4433, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3055360896986685, | |
| "grad_norm": 1.3177076581236578, | |
| "learning_rate": 6.033519553072626e-06, | |
| "loss": 0.4422, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.30833917309039943, | |
| "grad_norm": 1.211428787666853, | |
| "learning_rate": 6.089385474860336e-06, | |
| "loss": 0.4736, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31114225648213034, | |
| "grad_norm": 1.246339941396602, | |
| "learning_rate": 6.145251396648045e-06, | |
| "loss": 0.4684, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.31394533987386125, | |
| "grad_norm": 1.139096368470202, | |
| "learning_rate": 6.2011173184357546e-06, | |
| "loss": 0.4711, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.31674842326559216, | |
| "grad_norm": 1.1649095863200642, | |
| "learning_rate": 6.256983240223464e-06, | |
| "loss": 0.4552, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.31955150665732307, | |
| "grad_norm": 1.2679588271735107, | |
| "learning_rate": 6.312849162011173e-06, | |
| "loss": 0.4623, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.322354590049054, | |
| "grad_norm": 1.2436538258573628, | |
| "learning_rate": 6.368715083798883e-06, | |
| "loss": 0.45, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3251576734407849, | |
| "grad_norm": 1.2362511627285122, | |
| "learning_rate": 6.424581005586593e-06, | |
| "loss": 0.4871, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.32796075683251574, | |
| "grad_norm": 1.2278455511243713, | |
| "learning_rate": 6.480446927374302e-06, | |
| "loss": 0.4304, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.33076384022424665, | |
| "grad_norm": 1.2708930790868107, | |
| "learning_rate": 6.536312849162012e-06, | |
| "loss": 0.4403, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.33356692361597756, | |
| "grad_norm": 1.1581710061976354, | |
| "learning_rate": 6.592178770949721e-06, | |
| "loss": 0.4266, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.33637000700770847, | |
| "grad_norm": 1.2609788947789218, | |
| "learning_rate": 6.648044692737431e-06, | |
| "loss": 0.4579, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3391730903994394, | |
| "grad_norm": 1.30396849435344, | |
| "learning_rate": 6.70391061452514e-06, | |
| "loss": 0.4427, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3419761737911703, | |
| "grad_norm": 1.2740701750338232, | |
| "learning_rate": 6.7597765363128496e-06, | |
| "loss": 0.4267, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3447792571829012, | |
| "grad_norm": 1.3545521723600704, | |
| "learning_rate": 6.815642458100559e-06, | |
| "loss": 0.4572, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3475823405746321, | |
| "grad_norm": 1.2832927748990008, | |
| "learning_rate": 6.871508379888268e-06, | |
| "loss": 0.4088, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.350385423966363, | |
| "grad_norm": 1.184187983962104, | |
| "learning_rate": 6.927374301675979e-06, | |
| "loss": 0.4178, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.35318850735809393, | |
| "grad_norm": 1.2692730688245826, | |
| "learning_rate": 6.983240223463687e-06, | |
| "loss": 0.4196, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3559915907498248, | |
| "grad_norm": 1.2286033209087615, | |
| "learning_rate": 7.0391061452513975e-06, | |
| "loss": 0.4623, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3587946741415557, | |
| "grad_norm": 1.3466288536167503, | |
| "learning_rate": 7.094972067039106e-06, | |
| "loss": 0.4304, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3615977575332866, | |
| "grad_norm": 1.1856599904519147, | |
| "learning_rate": 7.150837988826816e-06, | |
| "loss": 0.4577, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3644008409250175, | |
| "grad_norm": 1.2878071137137472, | |
| "learning_rate": 7.206703910614526e-06, | |
| "loss": 0.4512, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3672039243167484, | |
| "grad_norm": 1.2539611050742803, | |
| "learning_rate": 7.262569832402235e-06, | |
| "loss": 0.4492, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.37000700770847933, | |
| "grad_norm": 1.284633807763949, | |
| "learning_rate": 7.318435754189945e-06, | |
| "loss": 0.4469, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.37281009110021024, | |
| "grad_norm": 1.2173213618653451, | |
| "learning_rate": 7.374301675977654e-06, | |
| "loss": 0.4217, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.37561317449194115, | |
| "grad_norm": 1.6356085966747518, | |
| "learning_rate": 7.430167597765364e-06, | |
| "loss": 0.4674, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.37841625788367206, | |
| "grad_norm": 1.3110035885253777, | |
| "learning_rate": 7.486033519553073e-06, | |
| "loss": 0.4216, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.38121934127540297, | |
| "grad_norm": 1.182519047664272, | |
| "learning_rate": 7.541899441340783e-06, | |
| "loss": 0.4399, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3840224246671338, | |
| "grad_norm": 1.2628129755669273, | |
| "learning_rate": 7.597765363128492e-06, | |
| "loss": 0.4736, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.38682550805886473, | |
| "grad_norm": 1.156134702750007, | |
| "learning_rate": 7.653631284916202e-06, | |
| "loss": 0.4186, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.38962859145059564, | |
| "grad_norm": 1.2403816950592557, | |
| "learning_rate": 7.709497206703911e-06, | |
| "loss": 0.4262, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.39243167484232655, | |
| "grad_norm": 1.2297089863632304, | |
| "learning_rate": 7.76536312849162e-06, | |
| "loss": 0.4526, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.39523475823405746, | |
| "grad_norm": 1.3400924871254594, | |
| "learning_rate": 7.82122905027933e-06, | |
| "loss": 0.4632, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.39803784162578837, | |
| "grad_norm": 1.2753065276019078, | |
| "learning_rate": 7.87709497206704e-06, | |
| "loss": 0.46, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4008409250175193, | |
| "grad_norm": 1.3918096859402436, | |
| "learning_rate": 7.932960893854749e-06, | |
| "loss": 0.4265, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4036440084092502, | |
| "grad_norm": 1.4385006652657821, | |
| "learning_rate": 7.988826815642458e-06, | |
| "loss": 0.4462, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4064470918009811, | |
| "grad_norm": 1.571552869991663, | |
| "learning_rate": 8.044692737430168e-06, | |
| "loss": 0.4369, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.409250175192712, | |
| "grad_norm": 1.337744524737072, | |
| "learning_rate": 8.100558659217877e-06, | |
| "loss": 0.4211, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.41205325858444286, | |
| "grad_norm": 1.2554961285373831, | |
| "learning_rate": 8.156424581005588e-06, | |
| "loss": 0.4646, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.41485634197617377, | |
| "grad_norm": 1.208695228744723, | |
| "learning_rate": 8.212290502793296e-06, | |
| "loss": 0.4418, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4176594253679047, | |
| "grad_norm": 1.1581362053521904, | |
| "learning_rate": 8.268156424581007e-06, | |
| "loss": 0.4424, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4204625087596356, | |
| "grad_norm": 1.3537182059958488, | |
| "learning_rate": 8.324022346368715e-06, | |
| "loss": 0.4466, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4232655921513665, | |
| "grad_norm": 1.3804359968061342, | |
| "learning_rate": 8.379888268156426e-06, | |
| "loss": 0.42, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4260686755430974, | |
| "grad_norm": 1.3492166236416887, | |
| "learning_rate": 8.435754189944135e-06, | |
| "loss": 0.3851, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4288717589348283, | |
| "grad_norm": 1.2139526698153973, | |
| "learning_rate": 8.491620111731845e-06, | |
| "loss": 0.4643, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4316748423265592, | |
| "grad_norm": 1.2990736104285787, | |
| "learning_rate": 8.547486033519554e-06, | |
| "loss": 0.4666, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.43447792571829014, | |
| "grad_norm": 1.29053798784143, | |
| "learning_rate": 8.603351955307264e-06, | |
| "loss": 0.4384, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.43728100911002105, | |
| "grad_norm": 1.4367357751592493, | |
| "learning_rate": 8.659217877094973e-06, | |
| "loss": 0.4653, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4400840925017519, | |
| "grad_norm": 1.2956449867477386, | |
| "learning_rate": 8.715083798882683e-06, | |
| "loss": 0.4511, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4428871758934828, | |
| "grad_norm": 1.4106445195453023, | |
| "learning_rate": 8.770949720670392e-06, | |
| "loss": 0.4456, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4456902592852137, | |
| "grad_norm": 1.2719950562427567, | |
| "learning_rate": 8.826815642458101e-06, | |
| "loss": 0.4707, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.44849334267694463, | |
| "grad_norm": 1.3074151462469454, | |
| "learning_rate": 8.88268156424581e-06, | |
| "loss": 0.413, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.45129642606867554, | |
| "grad_norm": 1.2729626005251127, | |
| "learning_rate": 8.93854748603352e-06, | |
| "loss": 0.4564, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.45409950946040645, | |
| "grad_norm": 1.3933224721084836, | |
| "learning_rate": 8.99441340782123e-06, | |
| "loss": 0.4752, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.45690259285213736, | |
| "grad_norm": 1.2274005354049757, | |
| "learning_rate": 9.050279329608939e-06, | |
| "loss": 0.4077, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.45970567624386827, | |
| "grad_norm": 1.304403404718221, | |
| "learning_rate": 9.106145251396648e-06, | |
| "loss": 0.4032, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4625087596355992, | |
| "grad_norm": 1.2077276736873666, | |
| "learning_rate": 9.162011173184358e-06, | |
| "loss": 0.4521, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4653118430273301, | |
| "grad_norm": 1.314785322595634, | |
| "learning_rate": 9.217877094972067e-06, | |
| "loss": 0.4561, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.46811492641906094, | |
| "grad_norm": 1.324805699258867, | |
| "learning_rate": 9.273743016759777e-06, | |
| "loss": 0.442, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.47091800981079185, | |
| "grad_norm": 1.2628735158829716, | |
| "learning_rate": 9.329608938547486e-06, | |
| "loss": 0.4493, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.47372109320252276, | |
| "grad_norm": 1.3524729567879255, | |
| "learning_rate": 9.385474860335197e-06, | |
| "loss": 0.412, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.47652417659425367, | |
| "grad_norm": 1.3838707187120078, | |
| "learning_rate": 9.441340782122905e-06, | |
| "loss": 0.4248, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4793272599859846, | |
| "grad_norm": 1.3477705759051088, | |
| "learning_rate": 9.497206703910616e-06, | |
| "loss": 0.4362, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4821303433777155, | |
| "grad_norm": 1.4029804576470706, | |
| "learning_rate": 9.553072625698325e-06, | |
| "loss": 0.4229, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4849334267694464, | |
| "grad_norm": 1.3212879811094376, | |
| "learning_rate": 9.608938547486035e-06, | |
| "loss": 0.4462, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4877365101611773, | |
| "grad_norm": 1.2418714279374736, | |
| "learning_rate": 9.664804469273744e-06, | |
| "loss": 0.4393, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4905395935529082, | |
| "grad_norm": 1.308356659629243, | |
| "learning_rate": 9.720670391061454e-06, | |
| "loss": 0.4212, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4933426769446391, | |
| "grad_norm": 1.3384997992620349, | |
| "learning_rate": 9.776536312849163e-06, | |
| "loss": 0.425, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.49614576033637003, | |
| "grad_norm": 1.2644912624597537, | |
| "learning_rate": 9.832402234636873e-06, | |
| "loss": 0.4269, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4989488437281009, | |
| "grad_norm": 1.127396335911068, | |
| "learning_rate": 9.888268156424582e-06, | |
| "loss": 0.4054, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5017519271198319, | |
| "grad_norm": 1.2928897405382775, | |
| "learning_rate": 9.944134078212291e-06, | |
| "loss": 0.4246, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5045550105115627, | |
| "grad_norm": 1.2201307841604, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4229, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5073580939032937, | |
| "grad_norm": 1.1930036804674071, | |
| "learning_rate": 9.999990433600054e-06, | |
| "loss": 0.4228, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5101611772950245, | |
| "grad_norm": 1.2128301840951208, | |
| "learning_rate": 9.999961734436818e-06, | |
| "loss": 0.421, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5129642606867554, | |
| "grad_norm": 1.3718335494726508, | |
| "learning_rate": 9.999913902620112e-06, | |
| "loss": 0.416, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5157673440784863, | |
| "grad_norm": 1.2739769316629392, | |
| "learning_rate": 9.99984693833297e-06, | |
| "loss": 0.4411, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5185704274702172, | |
| "grad_norm": 1.2217498022218247, | |
| "learning_rate": 9.999760841831632e-06, | |
| "loss": 0.3921, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5213735108619482, | |
| "grad_norm": 1.2368078852394706, | |
| "learning_rate": 9.999655613445552e-06, | |
| "loss": 0.4485, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.524176594253679, | |
| "grad_norm": 1.2783184977008666, | |
| "learning_rate": 9.999531253577393e-06, | |
| "loss": 0.4306, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.52697967764541, | |
| "grad_norm": 1.2187408632625338, | |
| "learning_rate": 9.999387762703025e-06, | |
| "loss": 0.447, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5297827610371408, | |
| "grad_norm": 1.2385500306724004, | |
| "learning_rate": 9.999225141371528e-06, | |
| "loss": 0.4776, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5325858444288718, | |
| "grad_norm": 1.176251336630363, | |
| "learning_rate": 9.999043390205176e-06, | |
| "loss": 0.3769, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5353889278206027, | |
| "grad_norm": 1.205975361951228, | |
| "learning_rate": 9.998842509899456e-06, | |
| "loss": 0.3858, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5381920112123336, | |
| "grad_norm": 1.2260710319775825, | |
| "learning_rate": 9.998622501223045e-06, | |
| "loss": 0.4159, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5409950946040645, | |
| "grad_norm": 1.2624425805658182, | |
| "learning_rate": 9.998383365017821e-06, | |
| "loss": 0.4543, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5437981779957953, | |
| "grad_norm": 1.3494484879919946, | |
| "learning_rate": 9.998125102198855e-06, | |
| "loss": 0.4532, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5466012613875263, | |
| "grad_norm": 1.2926626394342269, | |
| "learning_rate": 9.9978477137544e-06, | |
| "loss": 0.4229, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5494043447792571, | |
| "grad_norm": 1.2615225655334812, | |
| "learning_rate": 9.997551200745905e-06, | |
| "loss": 0.4234, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5522074281709881, | |
| "grad_norm": 1.2447669303378754, | |
| "learning_rate": 9.997235564307992e-06, | |
| "loss": 0.4381, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.555010511562719, | |
| "grad_norm": 1.3385879305082795, | |
| "learning_rate": 9.996900805648462e-06, | |
| "loss": 0.4225, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5578135949544499, | |
| "grad_norm": 1.2403486069231195, | |
| "learning_rate": 9.996546926048291e-06, | |
| "loss": 0.4336, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5606166783461808, | |
| "grad_norm": 1.161836362730849, | |
| "learning_rate": 9.99617392686162e-06, | |
| "loss": 0.4083, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5634197617379118, | |
| "grad_norm": 1.178827297998461, | |
| "learning_rate": 9.995781809515754e-06, | |
| "loss": 0.4065, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5662228451296426, | |
| "grad_norm": 1.2949773092417358, | |
| "learning_rate": 9.995370575511151e-06, | |
| "loss": 0.4337, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5690259285213735, | |
| "grad_norm": 1.2049998818098415, | |
| "learning_rate": 9.994940226421423e-06, | |
| "loss": 0.4351, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5718290119131044, | |
| "grad_norm": 1.2038562515827964, | |
| "learning_rate": 9.994490763893328e-06, | |
| "loss": 0.4238, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5746320953048353, | |
| "grad_norm": 1.2682914962745881, | |
| "learning_rate": 9.994022189646762e-06, | |
| "loss": 0.4974, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5774351786965662, | |
| "grad_norm": 1.247037559522988, | |
| "learning_rate": 9.99353450547475e-06, | |
| "loss": 0.4922, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5802382620882971, | |
| "grad_norm": 1.3261046076147458, | |
| "learning_rate": 9.993027713243444e-06, | |
| "loss": 0.4865, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5830413454800281, | |
| "grad_norm": 1.2671942425231029, | |
| "learning_rate": 9.992501814892118e-06, | |
| "loss": 0.4399, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5858444288717589, | |
| "grad_norm": 1.2209344442747003, | |
| "learning_rate": 9.991956812433152e-06, | |
| "loss": 0.4551, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5886475122634899, | |
| "grad_norm": 1.3204049859067966, | |
| "learning_rate": 9.991392707952032e-06, | |
| "loss": 0.4535, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5914505956552207, | |
| "grad_norm": 1.2810704874390295, | |
| "learning_rate": 9.990809503607337e-06, | |
| "loss": 0.4057, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5942536790469517, | |
| "grad_norm": 1.2683455733657367, | |
| "learning_rate": 9.99020720163073e-06, | |
| "loss": 0.4438, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5970567624386826, | |
| "grad_norm": 1.1041168309285287, | |
| "learning_rate": 9.989585804326963e-06, | |
| "loss": 0.3951, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5998598458304134, | |
| "grad_norm": 1.238260130865137, | |
| "learning_rate": 9.988945314073842e-06, | |
| "loss": 0.43, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6026629292221444, | |
| "grad_norm": 1.2534944948678515, | |
| "learning_rate": 9.988285733322246e-06, | |
| "loss": 0.4358, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6054660126138752, | |
| "grad_norm": 1.377817255639513, | |
| "learning_rate": 9.9876070645961e-06, | |
| "loss": 0.4243, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6082690960056062, | |
| "grad_norm": 1.2458026957407808, | |
| "learning_rate": 9.986909310492369e-06, | |
| "loss": 0.4377, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.611072179397337, | |
| "grad_norm": 1.2147593299228276, | |
| "learning_rate": 9.98619247368105e-06, | |
| "loss": 0.4578, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.613875262789068, | |
| "grad_norm": 1.3231829202411483, | |
| "learning_rate": 9.985456556905168e-06, | |
| "loss": 0.4343, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6166783461807989, | |
| "grad_norm": 1.249603899966305, | |
| "learning_rate": 9.984701562980745e-06, | |
| "loss": 0.4208, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6194814295725298, | |
| "grad_norm": 1.302692808670413, | |
| "learning_rate": 9.983927494796817e-06, | |
| "loss": 0.4467, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6222845129642607, | |
| "grad_norm": 1.2344333360492339, | |
| "learning_rate": 9.983134355315397e-06, | |
| "loss": 0.427, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6250875963559915, | |
| "grad_norm": 1.2271410804803238, | |
| "learning_rate": 9.982322147571486e-06, | |
| "loss": 0.4295, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6278906797477225, | |
| "grad_norm": 1.2807750184640823, | |
| "learning_rate": 9.98149087467304e-06, | |
| "loss": 0.4019, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6306937631394534, | |
| "grad_norm": 1.323850652586938, | |
| "learning_rate": 9.980640539800978e-06, | |
| "loss": 0.4625, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6334968465311843, | |
| "grad_norm": 1.27630164124042, | |
| "learning_rate": 9.979771146209159e-06, | |
| "loss": 0.4068, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6362999299229152, | |
| "grad_norm": 1.1999461485138891, | |
| "learning_rate": 9.978882697224365e-06, | |
| "loss": 0.4323, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6391030133146461, | |
| "grad_norm": 1.2581968225422646, | |
| "learning_rate": 9.977975196246302e-06, | |
| "loss": 0.4339, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.641906096706377, | |
| "grad_norm": 1.240275234197154, | |
| "learning_rate": 9.977048646747578e-06, | |
| "loss": 0.4244, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.644709180098108, | |
| "grad_norm": 1.245467039884696, | |
| "learning_rate": 9.976103052273689e-06, | |
| "loss": 0.4152, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6475122634898388, | |
| "grad_norm": 1.214344966764467, | |
| "learning_rate": 9.97513841644301e-06, | |
| "loss": 0.4601, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6503153468815698, | |
| "grad_norm": 1.4007471905080375, | |
| "learning_rate": 9.974154742946775e-06, | |
| "loss": 0.4322, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6531184302733006, | |
| "grad_norm": 1.2215958249221774, | |
| "learning_rate": 9.973152035549072e-06, | |
| "loss": 0.4433, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6559215136650315, | |
| "grad_norm": 2.120995206727059, | |
| "learning_rate": 9.972130298086821e-06, | |
| "loss": 0.4457, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6587245970567625, | |
| "grad_norm": 1.253021146434877, | |
| "learning_rate": 9.97108953446976e-06, | |
| "loss": 0.4384, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6615276804484933, | |
| "grad_norm": 1.154866193185819, | |
| "learning_rate": 9.970029748680437e-06, | |
| "loss": 0.4186, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6643307638402243, | |
| "grad_norm": 1.1532100653062711, | |
| "learning_rate": 9.96895094477418e-06, | |
| "loss": 0.4031, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6671338472319551, | |
| "grad_norm": 1.1162555329667243, | |
| "learning_rate": 9.967853126879103e-06, | |
| "loss": 0.4323, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6699369306236861, | |
| "grad_norm": 1.1151043685218318, | |
| "learning_rate": 9.96673629919607e-06, | |
| "loss": 0.4191, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6727400140154169, | |
| "grad_norm": 1.1399063080426746, | |
| "learning_rate": 9.965600465998686e-06, | |
| "loss": 0.3987, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6755430974071479, | |
| "grad_norm": 1.1987807131271386, | |
| "learning_rate": 9.964445631633289e-06, | |
| "loss": 0.4545, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6783461807988788, | |
| "grad_norm": 1.1401905995341726, | |
| "learning_rate": 9.963271800518921e-06, | |
| "loss": 0.4524, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6811492641906096, | |
| "grad_norm": 1.1958925095826711, | |
| "learning_rate": 9.962078977147315e-06, | |
| "loss": 0.4049, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6839523475823406, | |
| "grad_norm": 1.1843051651809535, | |
| "learning_rate": 9.960867166082884e-06, | |
| "loss": 0.4432, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6867554309740714, | |
| "grad_norm": 1.207650238930726, | |
| "learning_rate": 9.959636371962693e-06, | |
| "loss": 0.4027, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6895585143658024, | |
| "grad_norm": 1.2451548104875187, | |
| "learning_rate": 9.95838659949645e-06, | |
| "loss": 0.394, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6923615977575333, | |
| "grad_norm": 1.2128418404191421, | |
| "learning_rate": 9.957117853466488e-06, | |
| "loss": 0.4291, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6951646811492642, | |
| "grad_norm": 1.2652572678307166, | |
| "learning_rate": 9.955830138727736e-06, | |
| "loss": 0.3884, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6979677645409951, | |
| "grad_norm": 1.2253466216228863, | |
| "learning_rate": 9.95452346020771e-06, | |
| "loss": 0.4227, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.700770847932726, | |
| "grad_norm": 1.2241766479031995, | |
| "learning_rate": 9.9531978229065e-06, | |
| "loss": 0.4035, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7035739313244569, | |
| "grad_norm": 1.2815556845680005, | |
| "learning_rate": 9.951853231896732e-06, | |
| "loss": 0.4127, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7063770147161879, | |
| "grad_norm": 1.2098755581989178, | |
| "learning_rate": 9.950489692323564e-06, | |
| "loss": 0.3664, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7091800981079187, | |
| "grad_norm": 1.240560733421699, | |
| "learning_rate": 9.949107209404664e-06, | |
| "loss": 0.4006, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7119831814996496, | |
| "grad_norm": 1.2504378152370468, | |
| "learning_rate": 9.947705788430185e-06, | |
| "loss": 0.4322, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7147862648913805, | |
| "grad_norm": 1.1186927421611175, | |
| "learning_rate": 9.946285434762748e-06, | |
| "loss": 0.4353, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.7175893482831114, | |
| "grad_norm": 1.1453594474291244, | |
| "learning_rate": 9.944846153837423e-06, | |
| "loss": 0.4236, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7203924316748423, | |
| "grad_norm": 1.2847311022382035, | |
| "learning_rate": 9.943387951161702e-06, | |
| "loss": 0.4614, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7231955150665732, | |
| "grad_norm": 1.2992529016358338, | |
| "learning_rate": 9.941910832315488e-06, | |
| "loss": 0.4381, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7259985984583042, | |
| "grad_norm": 1.2578873200013914, | |
| "learning_rate": 9.940414802951065e-06, | |
| "loss": 0.419, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.728801681850035, | |
| "grad_norm": 1.2120714209492573, | |
| "learning_rate": 9.938899868793074e-06, | |
| "loss": 0.4052, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.731604765241766, | |
| "grad_norm": 1.4598068796114356, | |
| "learning_rate": 9.937366035638507e-06, | |
| "loss": 0.4364, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7344078486334968, | |
| "grad_norm": 1.1979088951162828, | |
| "learning_rate": 9.935813309356666e-06, | |
| "loss": 0.424, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7372109320252278, | |
| "grad_norm": 1.2090949870085903, | |
| "learning_rate": 9.934241695889152e-06, | |
| "loss": 0.3708, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7400140154169587, | |
| "grad_norm": 1.1497339936210509, | |
| "learning_rate": 9.93265120124984e-06, | |
| "loss": 0.3913, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7428170988086895, | |
| "grad_norm": 1.1332107950494912, | |
| "learning_rate": 9.931041831524849e-06, | |
| "loss": 0.4215, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7456201822004205, | |
| "grad_norm": 1.3248126918741736, | |
| "learning_rate": 9.92941359287253e-06, | |
| "loss": 0.4317, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7484232655921513, | |
| "grad_norm": 1.13149487465254, | |
| "learning_rate": 9.927766491523438e-06, | |
| "loss": 0.4085, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7512263489838823, | |
| "grad_norm": 1.2498640918111141, | |
| "learning_rate": 9.926100533780304e-06, | |
| "loss": 0.43, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7540294323756132, | |
| "grad_norm": 1.1043646249350867, | |
| "learning_rate": 9.924415726018015e-06, | |
| "loss": 0.4296, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7568325157673441, | |
| "grad_norm": 1.1939471676982463, | |
| "learning_rate": 9.92271207468359e-06, | |
| "loss": 0.4259, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.759635599159075, | |
| "grad_norm": 1.1734698198260922, | |
| "learning_rate": 9.92098958629615e-06, | |
| "loss": 0.4315, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7624386825508059, | |
| "grad_norm": 1.2366506225023914, | |
| "learning_rate": 9.919248267446904e-06, | |
| "loss": 0.444, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7652417659425368, | |
| "grad_norm": 1.2935143937218294, | |
| "learning_rate": 9.91748812479911e-06, | |
| "loss": 0.4387, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7680448493342676, | |
| "grad_norm": 1.2165170235545248, | |
| "learning_rate": 9.915709165088063e-06, | |
| "loss": 0.4302, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7708479327259986, | |
| "grad_norm": 1.2403333327332409, | |
| "learning_rate": 9.913911395121055e-06, | |
| "loss": 0.3891, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7736510161177295, | |
| "grad_norm": 1.1409335992116196, | |
| "learning_rate": 9.912094821777362e-06, | |
| "loss": 0.4127, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7764540995094604, | |
| "grad_norm": 1.246259341579308, | |
| "learning_rate": 9.910259452008213e-06, | |
| "loss": 0.4433, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7792571829011913, | |
| "grad_norm": 1.2152200486896416, | |
| "learning_rate": 9.908405292836758e-06, | |
| "loss": 0.4377, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7820602662929222, | |
| "grad_norm": 1.2701928834270326, | |
| "learning_rate": 9.906532351358047e-06, | |
| "loss": 0.4105, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7848633496846531, | |
| "grad_norm": 1.2002485609561173, | |
| "learning_rate": 9.904640634739007e-06, | |
| "loss": 0.4232, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7876664330763841, | |
| "grad_norm": 1.220915641122119, | |
| "learning_rate": 9.902730150218403e-06, | |
| "loss": 0.3833, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7904695164681149, | |
| "grad_norm": 1.2221671775995764, | |
| "learning_rate": 9.900800905106817e-06, | |
| "loss": 0.4414, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7932725998598459, | |
| "grad_norm": 1.112496291445257, | |
| "learning_rate": 9.898852906786624e-06, | |
| "loss": 0.4461, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7960756832515767, | |
| "grad_norm": 1.146299506919545, | |
| "learning_rate": 9.896886162711955e-06, | |
| "loss": 0.3982, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7988787666433076, | |
| "grad_norm": 1.2228381900025063, | |
| "learning_rate": 9.894900680408674e-06, | |
| "loss": 0.4267, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8016818500350386, | |
| "grad_norm": 1.1804828603616793, | |
| "learning_rate": 9.892896467474348e-06, | |
| "loss": 0.397, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8044849334267694, | |
| "grad_norm": 1.1779671579043933, | |
| "learning_rate": 9.890873531578218e-06, | |
| "loss": 0.4178, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.8072880168185004, | |
| "grad_norm": 1.1973271401486334, | |
| "learning_rate": 9.888831880461171e-06, | |
| "loss": 0.431, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8100911002102312, | |
| "grad_norm": 1.3283745000265912, | |
| "learning_rate": 9.886771521935706e-06, | |
| "loss": 0.4171, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8128941836019622, | |
| "grad_norm": 1.2927994872402913, | |
| "learning_rate": 9.88469246388591e-06, | |
| "loss": 0.4244, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.815697266993693, | |
| "grad_norm": 1.1224149905731677, | |
| "learning_rate": 9.88259471426742e-06, | |
| "loss": 0.4008, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.818500350385424, | |
| "grad_norm": 1.1423126396133614, | |
| "learning_rate": 9.880478281107404e-06, | |
| "loss": 0.4376, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8213034337771549, | |
| "grad_norm": 1.1440128493566712, | |
| "learning_rate": 9.87834317250452e-06, | |
| "loss": 0.392, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8241065171688857, | |
| "grad_norm": 1.2339673417138244, | |
| "learning_rate": 9.876189396628889e-06, | |
| "loss": 0.4144, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8269096005606167, | |
| "grad_norm": 1.2058763608514713, | |
| "learning_rate": 9.874016961722062e-06, | |
| "loss": 0.4253, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8297126839523475, | |
| "grad_norm": 1.2173187014135698, | |
| "learning_rate": 9.871825876096992e-06, | |
| "loss": 0.4006, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8325157673440785, | |
| "grad_norm": 1.168628149123796, | |
| "learning_rate": 9.869616148138002e-06, | |
| "loss": 0.4251, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8353188507358094, | |
| "grad_norm": 1.1695554670785075, | |
| "learning_rate": 9.867387786300743e-06, | |
| "loss": 0.4097, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8381219341275403, | |
| "grad_norm": 1.172817055064454, | |
| "learning_rate": 9.865140799112183e-06, | |
| "loss": 0.4432, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8409250175192712, | |
| "grad_norm": 1.2450897072979896, | |
| "learning_rate": 9.862875195170547e-06, | |
| "loss": 0.392, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8437281009110021, | |
| "grad_norm": 1.1423510742995624, | |
| "learning_rate": 9.860590983145307e-06, | |
| "loss": 0.4269, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.846531184302733, | |
| "grad_norm": 1.321059116128163, | |
| "learning_rate": 9.858288171777137e-06, | |
| "loss": 0.4188, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.849334267694464, | |
| "grad_norm": 1.1798784843294252, | |
| "learning_rate": 9.855966769877882e-06, | |
| "loss": 0.3991, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8521373510861948, | |
| "grad_norm": 1.1931905188257432, | |
| "learning_rate": 9.853626786330529e-06, | |
| "loss": 0.4248, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8549404344779257, | |
| "grad_norm": 1.1213243707873113, | |
| "learning_rate": 9.851268230089159e-06, | |
| "loss": 0.3863, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8577435178696566, | |
| "grad_norm": 1.1340621760749867, | |
| "learning_rate": 9.848891110178936e-06, | |
| "loss": 0.3895, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8605466012613875, | |
| "grad_norm": 1.2154043545797661, | |
| "learning_rate": 9.846495435696044e-06, | |
| "loss": 0.4013, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8633496846531185, | |
| "grad_norm": 1.2117311685748091, | |
| "learning_rate": 9.844081215807684e-06, | |
| "loss": 0.3986, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8661527680448493, | |
| "grad_norm": 1.1801740574734845, | |
| "learning_rate": 9.841648459752006e-06, | |
| "loss": 0.4101, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8689558514365803, | |
| "grad_norm": 1.190858126233445, | |
| "learning_rate": 9.839197176838102e-06, | |
| "loss": 0.4294, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8717589348283111, | |
| "grad_norm": 1.1329469986887595, | |
| "learning_rate": 9.836727376445949e-06, | |
| "loss": 0.4173, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8745620182200421, | |
| "grad_norm": 1.363901133414959, | |
| "learning_rate": 9.834239068026388e-06, | |
| "loss": 0.417, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.877365101611773, | |
| "grad_norm": 1.2669242824719738, | |
| "learning_rate": 9.831732261101081e-06, | |
| "loss": 0.4015, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8801681850035038, | |
| "grad_norm": 1.2125343853948405, | |
| "learning_rate": 9.829206965262477e-06, | |
| "loss": 0.4076, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8829712683952348, | |
| "grad_norm": 1.3037486777718863, | |
| "learning_rate": 9.826663190173766e-06, | |
| "loss": 0.4103, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8857743517869656, | |
| "grad_norm": 1.1678797734363202, | |
| "learning_rate": 9.824100945568862e-06, | |
| "loss": 0.4303, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8885774351786966, | |
| "grad_norm": 1.2814171712048255, | |
| "learning_rate": 9.821520241252345e-06, | |
| "loss": 0.4444, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8913805185704274, | |
| "grad_norm": 1.2662469748298908, | |
| "learning_rate": 9.818921087099435e-06, | |
| "loss": 0.418, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8941836019621584, | |
| "grad_norm": 1.1798395604362133, | |
| "learning_rate": 9.81630349305595e-06, | |
| "loss": 0.4006, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8969866853538893, | |
| "grad_norm": 1.2210641823585986, | |
| "learning_rate": 9.813667469138273e-06, | |
| "loss": 0.4121, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8997897687456202, | |
| "grad_norm": 1.113750506404835, | |
| "learning_rate": 9.811013025433306e-06, | |
| "loss": 0.4001, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9025928521373511, | |
| "grad_norm": 1.1820454191767555, | |
| "learning_rate": 9.80834017209844e-06, | |
| "loss": 0.4076, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.905395935529082, | |
| "grad_norm": 1.3666718348985063, | |
| "learning_rate": 9.805648919361505e-06, | |
| "loss": 0.4264, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9081990189208129, | |
| "grad_norm": 1.1007502397580635, | |
| "learning_rate": 9.802939277520742e-06, | |
| "loss": 0.3926, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9110021023125437, | |
| "grad_norm": 1.146317357704069, | |
| "learning_rate": 9.800211256944758e-06, | |
| "loss": 0.383, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.9138051857042747, | |
| "grad_norm": 1.1540030060149493, | |
| "learning_rate": 9.797464868072489e-06, | |
| "loss": 0.4081, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9166082690960056, | |
| "grad_norm": 1.1968849969085296, | |
| "learning_rate": 9.794700121413153e-06, | |
| "loss": 0.4259, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9194113524877365, | |
| "grad_norm": 1.1565356272261325, | |
| "learning_rate": 9.791917027546223e-06, | |
| "loss": 0.421, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9222144358794674, | |
| "grad_norm": 1.205469308148384, | |
| "learning_rate": 9.789115597121372e-06, | |
| "loss": 0.3445, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9250175192711984, | |
| "grad_norm": 1.3194233731376825, | |
| "learning_rate": 9.786295840858444e-06, | |
| "loss": 0.4042, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9278206026629292, | |
| "grad_norm": 1.2302518477644826, | |
| "learning_rate": 9.783457769547403e-06, | |
| "loss": 0.4077, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9306236860546602, | |
| "grad_norm": 1.173610556015113, | |
| "learning_rate": 9.7806013940483e-06, | |
| "loss": 0.438, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.933426769446391, | |
| "grad_norm": 1.179100651678326, | |
| "learning_rate": 9.777726725291227e-06, | |
| "loss": 0.3853, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9362298528381219, | |
| "grad_norm": 1.2097175846660944, | |
| "learning_rate": 9.774833774276278e-06, | |
| "loss": 0.4083, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9390329362298528, | |
| "grad_norm": 1.1234197630642826, | |
| "learning_rate": 9.7719225520735e-06, | |
| "loss": 0.3823, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9418360196215837, | |
| "grad_norm": 1.2464354979721357, | |
| "learning_rate": 9.768993069822862e-06, | |
| "loss": 0.4106, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9446391030133147, | |
| "grad_norm": 1.2436160510868548, | |
| "learning_rate": 9.766045338734204e-06, | |
| "loss": 0.3906, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9474421864050455, | |
| "grad_norm": 1.123928535467421, | |
| "learning_rate": 9.763079370087196e-06, | |
| "loss": 0.3872, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9502452697967765, | |
| "grad_norm": 1.1822751620037166, | |
| "learning_rate": 9.760095175231292e-06, | |
| "loss": 0.4356, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9530483531885073, | |
| "grad_norm": 1.1621700619231359, | |
| "learning_rate": 9.757092765585695e-06, | |
| "loss": 0.4097, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9558514365802383, | |
| "grad_norm": 1.1746658349586656, | |
| "learning_rate": 9.754072152639305e-06, | |
| "loss": 0.4427, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9586545199719692, | |
| "grad_norm": 1.1182360916017116, | |
| "learning_rate": 9.75103334795068e-06, | |
| "loss": 0.4284, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9614576033637001, | |
| "grad_norm": 1.1591583547321715, | |
| "learning_rate": 9.747976363147985e-06, | |
| "loss": 0.4247, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.964260686755431, | |
| "grad_norm": 1.1567249929917516, | |
| "learning_rate": 9.744901209928959e-06, | |
| "loss": 0.3987, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9670637701471618, | |
| "grad_norm": 1.1069495365564859, | |
| "learning_rate": 9.741807900060858e-06, | |
| "loss": 0.4308, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9698668535388928, | |
| "grad_norm": 1.1817032261442333, | |
| "learning_rate": 9.73869644538042e-06, | |
| "loss": 0.4497, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9726699369306236, | |
| "grad_norm": 1.05271516179772, | |
| "learning_rate": 9.73556685779381e-06, | |
| "loss": 0.4119, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9754730203223546, | |
| "grad_norm": 1.170513661266846, | |
| "learning_rate": 9.732419149276586e-06, | |
| "loss": 0.3896, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9782761037140855, | |
| "grad_norm": 1.1360548736118075, | |
| "learning_rate": 9.729253331873643e-06, | |
| "loss": 0.3971, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9810791871058164, | |
| "grad_norm": 1.1978579692167597, | |
| "learning_rate": 9.726069417699167e-06, | |
| "loss": 0.4166, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9838822704975473, | |
| "grad_norm": 1.1945154693243045, | |
| "learning_rate": 9.722867418936601e-06, | |
| "loss": 0.4263, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9866853538892782, | |
| "grad_norm": 1.166230468871644, | |
| "learning_rate": 9.719647347838584e-06, | |
| "loss": 0.4042, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9894884372810091, | |
| "grad_norm": 1.1826135025862843, | |
| "learning_rate": 9.71640921672691e-06, | |
| "loss": 0.3896, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9922915206727401, | |
| "grad_norm": 1.2382476108408331, | |
| "learning_rate": 9.713153037992484e-06, | |
| "loss": 0.385, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9950946040644709, | |
| "grad_norm": 1.1847256862767788, | |
| "learning_rate": 9.709878824095266e-06, | |
| "loss": 0.4272, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9978976874562018, | |
| "grad_norm": 1.2348908655062902, | |
| "learning_rate": 9.706586587564236e-06, | |
| "loss": 0.3994, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.2348908655062902, | |
| "learning_rate": 9.703276340997331e-06, | |
| "loss": 0.3106, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.002803083391731, | |
| "grad_norm": 1.0847506359320522, | |
| "learning_rate": 9.699948097061412e-06, | |
| "loss": 0.3172, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.0056061667834617, | |
| "grad_norm": 1.0314207594332343, | |
| "learning_rate": 9.6966018684922e-06, | |
| "loss": 0.2897, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.0084092501751927, | |
| "grad_norm": 1.0439078471345375, | |
| "learning_rate": 9.693237668094242e-06, | |
| "loss": 0.328, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0112123335669236, | |
| "grad_norm": 1.0093501975936998, | |
| "learning_rate": 9.689855508740852e-06, | |
| "loss": 0.3314, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.0140154169586546, | |
| "grad_norm": 1.0454941837116838, | |
| "learning_rate": 9.686455403374062e-06, | |
| "loss": 0.2894, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0168185003503853, | |
| "grad_norm": 1.0985918166666147, | |
| "learning_rate": 9.683037365004584e-06, | |
| "loss": 0.349, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.0196215837421163, | |
| "grad_norm": 1.0912615951275002, | |
| "learning_rate": 9.679601406711746e-06, | |
| "loss": 0.3347, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0224246671338473, | |
| "grad_norm": 1.1344040429952662, | |
| "learning_rate": 9.676147541643449e-06, | |
| "loss": 0.2837, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.0252277505255782, | |
| "grad_norm": 1.0053485977042733, | |
| "learning_rate": 9.67267578301611e-06, | |
| "loss": 0.3058, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.028030833917309, | |
| "grad_norm": 1.110591276900221, | |
| "learning_rate": 9.669186144114627e-06, | |
| "loss": 0.3311, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.03083391730904, | |
| "grad_norm": 1.0505594139502015, | |
| "learning_rate": 9.66567863829231e-06, | |
| "loss": 0.2907, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.033637000700771, | |
| "grad_norm": 1.085730742136853, | |
| "learning_rate": 9.662153278970842e-06, | |
| "loss": 0.3222, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.0364400840925017, | |
| "grad_norm": 1.1000592830799627, | |
| "learning_rate": 9.65861007964022e-06, | |
| "loss": 0.36, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0392431674842326, | |
| "grad_norm": 1.0403078396095022, | |
| "learning_rate": 9.655049053858709e-06, | |
| "loss": 0.3166, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0420462508759636, | |
| "grad_norm": 1.0898946838202692, | |
| "learning_rate": 9.65147021525279e-06, | |
| "loss": 0.3318, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0448493342676946, | |
| "grad_norm": 1.1636912765172764, | |
| "learning_rate": 9.647873577517102e-06, | |
| "loss": 0.3112, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.0476524176594253, | |
| "grad_norm": 1.062876460848521, | |
| "learning_rate": 9.644259154414396e-06, | |
| "loss": 0.3213, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.0504555010511563, | |
| "grad_norm": 1.0411461676692622, | |
| "learning_rate": 9.640626959775475e-06, | |
| "loss": 0.2833, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0532585844428872, | |
| "grad_norm": 1.0698719260700786, | |
| "learning_rate": 9.636977007499153e-06, | |
| "loss": 0.3183, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.056061667834618, | |
| "grad_norm": 1.1063438881099272, | |
| "learning_rate": 9.633309311552192e-06, | |
| "loss": 0.3185, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.058864751226349, | |
| "grad_norm": 1.1755296275706542, | |
| "learning_rate": 9.62962388596925e-06, | |
| "loss": 0.3356, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.06166783461808, | |
| "grad_norm": 1.1170189445294432, | |
| "learning_rate": 9.625920744852829e-06, | |
| "loss": 0.3183, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.0644709180098109, | |
| "grad_norm": 1.097121892348547, | |
| "learning_rate": 9.622199902373218e-06, | |
| "loss": 0.3228, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0672740014015416, | |
| "grad_norm": 1.0270976338759783, | |
| "learning_rate": 9.618461372768446e-06, | |
| "loss": 0.3238, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.0700770847932726, | |
| "grad_norm": 1.014617517007521, | |
| "learning_rate": 9.614705170344221e-06, | |
| "loss": 0.2889, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0728801681850035, | |
| "grad_norm": 1.0832491882178725, | |
| "learning_rate": 9.610931309473875e-06, | |
| "loss": 0.3448, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.0756832515767345, | |
| "grad_norm": 1.085160625021815, | |
| "learning_rate": 9.607139804598316e-06, | |
| "loss": 0.3467, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0784863349684652, | |
| "grad_norm": 1.084827294674734, | |
| "learning_rate": 9.603330670225964e-06, | |
| "loss": 0.3281, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0812894183601962, | |
| "grad_norm": 1.0773604121220748, | |
| "learning_rate": 9.599503920932698e-06, | |
| "loss": 0.3532, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0840925017519272, | |
| "grad_norm": 1.0235123673940172, | |
| "learning_rate": 9.595659571361805e-06, | |
| "loss": 0.3166, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.0868955851436581, | |
| "grad_norm": 0.9963295027443626, | |
| "learning_rate": 9.591797636223921e-06, | |
| "loss": 0.3195, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0896986685353889, | |
| "grad_norm": 1.1501677549952232, | |
| "learning_rate": 9.587918130296969e-06, | |
| "loss": 0.3155, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.0925017519271198, | |
| "grad_norm": 1.003967215467938, | |
| "learning_rate": 9.584021068426114e-06, | |
| "loss": 0.3063, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0953048353188508, | |
| "grad_norm": 1.0108615715138858, | |
| "learning_rate": 9.580106465523695e-06, | |
| "loss": 0.3574, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0981079187105816, | |
| "grad_norm": 1.1165766187456356, | |
| "learning_rate": 9.576174336569177e-06, | |
| "loss": 0.2864, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.1009110021023125, | |
| "grad_norm": 1.0543544935190767, | |
| "learning_rate": 9.572224696609085e-06, | |
| "loss": 0.3239, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.1037140854940435, | |
| "grad_norm": 1.0377856653383537, | |
| "learning_rate": 9.568257560756955e-06, | |
| "loss": 0.3123, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.1065171688857744, | |
| "grad_norm": 1.1139234753576157, | |
| "learning_rate": 9.564272944193269e-06, | |
| "loss": 0.3428, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.1093202522775052, | |
| "grad_norm": 1.0936556362287269, | |
| "learning_rate": 9.560270862165401e-06, | |
| "loss": 0.2733, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.1121233356692362, | |
| "grad_norm": 1.0370010859408514, | |
| "learning_rate": 9.556251329987561e-06, | |
| "loss": 0.2821, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.1149264190609671, | |
| "grad_norm": 1.0311513839370219, | |
| "learning_rate": 9.552214363040725e-06, | |
| "loss": 0.3423, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.1177295024526979, | |
| "grad_norm": 1.10210044846964, | |
| "learning_rate": 9.548159976772593e-06, | |
| "loss": 0.3303, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.1205325858444288, | |
| "grad_norm": 0.9858827916119725, | |
| "learning_rate": 9.544088186697515e-06, | |
| "loss": 0.2932, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1233356692361598, | |
| "grad_norm": 0.9983897052826234, | |
| "learning_rate": 9.539999008396442e-06, | |
| "loss": 0.3458, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.1261387526278908, | |
| "grad_norm": 1.02060751773833, | |
| "learning_rate": 9.535892457516858e-06, | |
| "loss": 0.3236, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1289418360196215, | |
| "grad_norm": 1.030336385659217, | |
| "learning_rate": 9.531768549772728e-06, | |
| "loss": 0.3017, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.1317449194113525, | |
| "grad_norm": 1.0518307790625796, | |
| "learning_rate": 9.527627300944434e-06, | |
| "loss": 0.3415, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1345480028030834, | |
| "grad_norm": 1.0220350871337325, | |
| "learning_rate": 9.523468726878707e-06, | |
| "loss": 0.3283, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.1373510861948142, | |
| "grad_norm": 1.0047110923356777, | |
| "learning_rate": 9.519292843488584e-06, | |
| "loss": 0.3306, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1401541695865451, | |
| "grad_norm": 1.08438304004638, | |
| "learning_rate": 9.515099666753333e-06, | |
| "loss": 0.3116, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.142957252978276, | |
| "grad_norm": 1.0793229294918072, | |
| "learning_rate": 9.510889212718398e-06, | |
| "loss": 0.3662, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.145760336370007, | |
| "grad_norm": 1.1062791611794396, | |
| "learning_rate": 9.506661497495332e-06, | |
| "loss": 0.3316, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.148563419761738, | |
| "grad_norm": 1.0827647053862033, | |
| "learning_rate": 9.502416537261739e-06, | |
| "loss": 0.2952, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1513665031534688, | |
| "grad_norm": 1.0678606153312582, | |
| "learning_rate": 9.498154348261217e-06, | |
| "loss": 0.3075, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.1541695865451997, | |
| "grad_norm": 1.0418705421995285, | |
| "learning_rate": 9.493874946803287e-06, | |
| "loss": 0.3327, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1569726699369307, | |
| "grad_norm": 1.0096111225194304, | |
| "learning_rate": 9.489578349263336e-06, | |
| "loss": 0.2977, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.1597757533286615, | |
| "grad_norm": 1.1086300275277539, | |
| "learning_rate": 9.485264572082551e-06, | |
| "loss": 0.31, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1625788367203924, | |
| "grad_norm": 1.0836113044122648, | |
| "learning_rate": 9.480933631767858e-06, | |
| "loss": 0.2845, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.1653819201121234, | |
| "grad_norm": 1.075806150693619, | |
| "learning_rate": 9.476585544891862e-06, | |
| "loss": 0.2996, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1681850035038543, | |
| "grad_norm": 1.0465845198833996, | |
| "learning_rate": 9.472220328092778e-06, | |
| "loss": 0.3168, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.170988086895585, | |
| "grad_norm": 1.0492451545412518, | |
| "learning_rate": 9.467837998074369e-06, | |
| "loss": 0.3059, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.173791170287316, | |
| "grad_norm": 1.0230461022692288, | |
| "learning_rate": 9.463438571605885e-06, | |
| "loss": 0.3287, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.176594253679047, | |
| "grad_norm": 1.005464439961268, | |
| "learning_rate": 9.459022065521994e-06, | |
| "loss": 0.3508, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1793973370707778, | |
| "grad_norm": 1.0919205018252067, | |
| "learning_rate": 9.45458849672272e-06, | |
| "loss": 0.3391, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.1822004204625087, | |
| "grad_norm": 1.092549869041747, | |
| "learning_rate": 9.450137882173385e-06, | |
| "loss": 0.3305, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1850035038542397, | |
| "grad_norm": 1.05626041378706, | |
| "learning_rate": 9.445670238904528e-06, | |
| "loss": 0.3215, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.1878065872459707, | |
| "grad_norm": 1.0293502687515852, | |
| "learning_rate": 9.441185584011854e-06, | |
| "loss": 0.2953, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1906096706377014, | |
| "grad_norm": 0.9785312880056695, | |
| "learning_rate": 9.436683934656165e-06, | |
| "loss": 0.3092, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.1934127540294324, | |
| "grad_norm": 1.0204104347912326, | |
| "learning_rate": 9.432165308063293e-06, | |
| "loss": 0.3354, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.1962158374211633, | |
| "grad_norm": 1.1553343174766495, | |
| "learning_rate": 9.427629721524034e-06, | |
| "loss": 0.3125, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.199018920812894, | |
| "grad_norm": 1.0599997718566063, | |
| "learning_rate": 9.423077192394081e-06, | |
| "loss": 0.3172, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.201822004204625, | |
| "grad_norm": 1.162318840029123, | |
| "learning_rate": 9.418507738093958e-06, | |
| "loss": 0.3392, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.204625087596356, | |
| "grad_norm": 1.0011365892808832, | |
| "learning_rate": 9.413921376108958e-06, | |
| "loss": 0.3132, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.207428170988087, | |
| "grad_norm": 1.0369608568465343, | |
| "learning_rate": 9.409318123989073e-06, | |
| "loss": 0.341, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.2102312543798177, | |
| "grad_norm": 1.1433348864427362, | |
| "learning_rate": 9.404697999348917e-06, | |
| "loss": 0.301, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.2130343377715487, | |
| "grad_norm": 0.9794101295514953, | |
| "learning_rate": 9.40006101986768e-06, | |
| "loss": 0.3232, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.2158374211632796, | |
| "grad_norm": 1.0367960310279924, | |
| "learning_rate": 9.395407203289036e-06, | |
| "loss": 0.3038, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.2186405045550106, | |
| "grad_norm": 1.0586375505573105, | |
| "learning_rate": 9.3907365674211e-06, | |
| "loss": 0.3347, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.2214435879467413, | |
| "grad_norm": 1.0401963116022124, | |
| "learning_rate": 9.386049130136335e-06, | |
| "loss": 0.3284, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.2242466713384723, | |
| "grad_norm": 1.0717767652915415, | |
| "learning_rate": 9.381344909371504e-06, | |
| "loss": 0.3243, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.2270497547302033, | |
| "grad_norm": 1.1136261306345807, | |
| "learning_rate": 9.376623923127588e-06, | |
| "loss": 0.2979, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2298528381219342, | |
| "grad_norm": 0.9752498937994857, | |
| "learning_rate": 9.371886189469724e-06, | |
| "loss": 0.3265, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.232655921513665, | |
| "grad_norm": 0.9663441642697678, | |
| "learning_rate": 9.367131726527137e-06, | |
| "loss": 0.3241, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.235459004905396, | |
| "grad_norm": 1.0172564072998984, | |
| "learning_rate": 9.362360552493062e-06, | |
| "loss": 0.2972, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.238262088297127, | |
| "grad_norm": 1.0437288823429967, | |
| "learning_rate": 9.35757268562468e-06, | |
| "loss": 0.289, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2410651716888577, | |
| "grad_norm": 1.0425647440619517, | |
| "learning_rate": 9.352768144243059e-06, | |
| "loss": 0.3415, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.2438682550805886, | |
| "grad_norm": 1.0884794197548682, | |
| "learning_rate": 9.347946946733055e-06, | |
| "loss": 0.3181, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2466713384723196, | |
| "grad_norm": 1.1105563286976543, | |
| "learning_rate": 9.343109111543275e-06, | |
| "loss": 0.3373, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.2494744218640506, | |
| "grad_norm": 1.0409391306978304, | |
| "learning_rate": 9.338254657185988e-06, | |
| "loss": 0.285, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.2522775052557813, | |
| "grad_norm": 1.1418504158321083, | |
| "learning_rate": 9.333383602237047e-06, | |
| "loss": 0.3006, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.2550805886475123, | |
| "grad_norm": 1.1141933886489612, | |
| "learning_rate": 9.328495965335844e-06, | |
| "loss": 0.3074, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2578836720392432, | |
| "grad_norm": 1.0126163466912967, | |
| "learning_rate": 9.323591765185208e-06, | |
| "loss": 0.3305, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.260686755430974, | |
| "grad_norm": 0.99727536700764, | |
| "learning_rate": 9.31867102055136e-06, | |
| "loss": 0.29, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.263489838822705, | |
| "grad_norm": 0.9539744388123551, | |
| "learning_rate": 9.31373375026382e-06, | |
| "loss": 0.3111, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.266292922214436, | |
| "grad_norm": 1.0227036740085016, | |
| "learning_rate": 9.308779973215355e-06, | |
| "loss": 0.2659, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.2690960056061669, | |
| "grad_norm": 1.0842800400398311, | |
| "learning_rate": 9.303809708361884e-06, | |
| "loss": 0.3205, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.2718990889978976, | |
| "grad_norm": 1.1550945457289452, | |
| "learning_rate": 9.298822974722425e-06, | |
| "loss": 0.3131, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.2747021723896286, | |
| "grad_norm": 1.1106936037552206, | |
| "learning_rate": 9.293819791379016e-06, | |
| "loss": 0.3464, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.2775052557813595, | |
| "grad_norm": 1.0204999168425828, | |
| "learning_rate": 9.288800177476636e-06, | |
| "loss": 0.3334, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.2803083391730903, | |
| "grad_norm": 1.101739084994357, | |
| "learning_rate": 9.28376415222314e-06, | |
| "loss": 0.3527, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.2831114225648212, | |
| "grad_norm": 1.0132658206492835, | |
| "learning_rate": 9.278711734889178e-06, | |
| "loss": 0.3507, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.2859145059565522, | |
| "grad_norm": 1.093935580030477, | |
| "learning_rate": 9.27364294480813e-06, | |
| "loss": 0.3125, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.2887175893482832, | |
| "grad_norm": 1.0840732446328496, | |
| "learning_rate": 9.268557801376027e-06, | |
| "loss": 0.2964, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2915206727400141, | |
| "grad_norm": 1.0652339440301186, | |
| "learning_rate": 9.263456324051472e-06, | |
| "loss": 0.3346, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.2943237561317449, | |
| "grad_norm": 0.9905423959308148, | |
| "learning_rate": 9.258338532355575e-06, | |
| "loss": 0.2979, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2971268395234758, | |
| "grad_norm": 1.113098382378706, | |
| "learning_rate": 9.253204445871874e-06, | |
| "loss": 0.3269, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2999299229152066, | |
| "grad_norm": 1.1279986985216184, | |
| "learning_rate": 9.248054084246259e-06, | |
| "loss": 0.3321, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.3027330063069376, | |
| "grad_norm": 1.1171578114970255, | |
| "learning_rate": 9.242887467186896e-06, | |
| "loss": 0.3099, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.3055360896986685, | |
| "grad_norm": 1.0217011590704772, | |
| "learning_rate": 9.237704614464157e-06, | |
| "loss": 0.3251, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.3083391730903995, | |
| "grad_norm": 1.0450128549258906, | |
| "learning_rate": 9.232505545910536e-06, | |
| "loss": 0.3789, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.3111422564821305, | |
| "grad_norm": 1.0600119003347885, | |
| "learning_rate": 9.227290281420583e-06, | |
| "loss": 0.3291, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.3139453398738612, | |
| "grad_norm": 1.0842489244628608, | |
| "learning_rate": 9.22205884095082e-06, | |
| "loss": 0.303, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.3167484232655922, | |
| "grad_norm": 1.0435075367939863, | |
| "learning_rate": 9.216811244519667e-06, | |
| "loss": 0.3043, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.3195515066573231, | |
| "grad_norm": 1.022280858797138, | |
| "learning_rate": 9.211547512207366e-06, | |
| "loss": 0.3199, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.3223545900490539, | |
| "grad_norm": 1.0884951175650506, | |
| "learning_rate": 9.206267664155906e-06, | |
| "loss": 0.3003, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.3251576734407848, | |
| "grad_norm": 1.0283609768763116, | |
| "learning_rate": 9.200971720568942e-06, | |
| "loss": 0.304, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.3279607568325158, | |
| "grad_norm": 1.0934855137470945, | |
| "learning_rate": 9.19565970171172e-06, | |
| "loss": 0.3331, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3307638402242468, | |
| "grad_norm": 1.03357671176792, | |
| "learning_rate": 9.190331627910998e-06, | |
| "loss": 0.2869, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.3335669236159775, | |
| "grad_norm": 0.9869700427404623, | |
| "learning_rate": 9.184987519554969e-06, | |
| "loss": 0.2753, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.3363700070077085, | |
| "grad_norm": 1.0051093138314826, | |
| "learning_rate": 9.179627397093184e-06, | |
| "loss": 0.3028, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.3391730903994394, | |
| "grad_norm": 1.119221633996362, | |
| "learning_rate": 9.174251281036478e-06, | |
| "loss": 0.2821, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.3419761737911702, | |
| "grad_norm": 1.0416388270107602, | |
| "learning_rate": 9.168859191956876e-06, | |
| "loss": 0.3033, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.3447792571829011, | |
| "grad_norm": 1.014104502114144, | |
| "learning_rate": 9.163451150487531e-06, | |
| "loss": 0.3162, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.347582340574632, | |
| "grad_norm": 0.9819678478497201, | |
| "learning_rate": 9.15802717732264e-06, | |
| "loss": 0.2956, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.350385423966363, | |
| "grad_norm": 0.940075199188654, | |
| "learning_rate": 9.152587293217362e-06, | |
| "loss": 0.3095, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.353188507358094, | |
| "grad_norm": 1.046977799888376, | |
| "learning_rate": 9.147131518987738e-06, | |
| "loss": 0.3041, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.3559915907498248, | |
| "grad_norm": 1.0739950959035311, | |
| "learning_rate": 9.141659875510615e-06, | |
| "loss": 0.3161, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.3587946741415557, | |
| "grad_norm": 1.055258970335336, | |
| "learning_rate": 9.136172383723568e-06, | |
| "loss": 0.2877, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.3615977575332865, | |
| "grad_norm": 0.9942925273922855, | |
| "learning_rate": 9.130669064624811e-06, | |
| "loss": 0.3394, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.3644008409250175, | |
| "grad_norm": 1.0792689049266269, | |
| "learning_rate": 9.125149939273123e-06, | |
| "loss": 0.3165, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.3672039243167484, | |
| "grad_norm": 1.0022733710364253, | |
| "learning_rate": 9.119615028787771e-06, | |
| "loss": 0.2988, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.3700070077084794, | |
| "grad_norm": 1.0365258649982823, | |
| "learning_rate": 9.114064354348422e-06, | |
| "loss": 0.3183, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.3728100911002104, | |
| "grad_norm": 1.040841698853146, | |
| "learning_rate": 9.108497937195064e-06, | |
| "loss": 0.2916, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.375613174491941, | |
| "grad_norm": 0.9702615599424862, | |
| "learning_rate": 9.102915798627925e-06, | |
| "loss": 0.3183, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.378416257883672, | |
| "grad_norm": 1.0254553655980456, | |
| "learning_rate": 9.097317960007395e-06, | |
| "loss": 0.3106, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.381219341275403, | |
| "grad_norm": 1.0330801056438677, | |
| "learning_rate": 9.091704442753938e-06, | |
| "loss": 0.3493, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.3840224246671338, | |
| "grad_norm": 1.0482605604178792, | |
| "learning_rate": 9.086075268348014e-06, | |
| "loss": 0.3222, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.3868255080588647, | |
| "grad_norm": 1.0689177080576502, | |
| "learning_rate": 9.080430458329996e-06, | |
| "loss": 0.3252, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.3896285914505957, | |
| "grad_norm": 0.9952213910620371, | |
| "learning_rate": 9.07477003430009e-06, | |
| "loss": 0.3777, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.3924316748423267, | |
| "grad_norm": 0.9969778598795931, | |
| "learning_rate": 9.06909401791825e-06, | |
| "loss": 0.3064, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.3952347582340574, | |
| "grad_norm": 1.0324696911274618, | |
| "learning_rate": 9.063402430904087e-06, | |
| "loss": 0.3355, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.3980378416257884, | |
| "grad_norm": 1.0311228329364959, | |
| "learning_rate": 9.057695295036806e-06, | |
| "loss": 0.3329, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.4008409250175193, | |
| "grad_norm": 0.9956688549063683, | |
| "learning_rate": 9.0519726321551e-06, | |
| "loss": 0.3102, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.40364400840925, | |
| "grad_norm": 0.9432882129064981, | |
| "learning_rate": 9.046234464157087e-06, | |
| "loss": 0.3194, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.406447091800981, | |
| "grad_norm": 1.0009435461744933, | |
| "learning_rate": 9.040480813000205e-06, | |
| "loss": 0.3212, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.409250175192712, | |
| "grad_norm": 1.0819896746344966, | |
| "learning_rate": 9.03471170070115e-06, | |
| "loss": 0.3188, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.412053258584443, | |
| "grad_norm": 1.0309234137171452, | |
| "learning_rate": 9.028927149335773e-06, | |
| "loss": 0.3242, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.4148563419761737, | |
| "grad_norm": 1.0458803781264536, | |
| "learning_rate": 9.02312718103901e-06, | |
| "loss": 0.2826, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.4176594253679047, | |
| "grad_norm": 1.042415071678378, | |
| "learning_rate": 9.017311818004785e-06, | |
| "loss": 0.3319, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.4204625087596356, | |
| "grad_norm": 0.9484013403771449, | |
| "learning_rate": 9.011481082485933e-06, | |
| "loss": 0.3451, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.4232655921513664, | |
| "grad_norm": 1.1681194445835863, | |
| "learning_rate": 9.005634996794117e-06, | |
| "loss": 0.3313, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.4260686755430974, | |
| "grad_norm": 1.055505140987747, | |
| "learning_rate": 8.99977358329973e-06, | |
| "loss": 0.3162, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.4288717589348283, | |
| "grad_norm": 0.9831333063132164, | |
| "learning_rate": 8.993896864431825e-06, | |
| "loss": 0.3121, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.4316748423265593, | |
| "grad_norm": 0.9791973011046631, | |
| "learning_rate": 8.988004862678019e-06, | |
| "loss": 0.3051, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.4344779257182902, | |
| "grad_norm": 1.026083867106151, | |
| "learning_rate": 8.98209760058441e-06, | |
| "loss": 0.3508, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.437281009110021, | |
| "grad_norm": 1.0447255347040039, | |
| "learning_rate": 8.97617510075549e-06, | |
| "loss": 0.2768, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.440084092501752, | |
| "grad_norm": 1.1380333354753407, | |
| "learning_rate": 8.970237385854059e-06, | |
| "loss": 0.3334, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4428871758934827, | |
| "grad_norm": 0.9696366640107756, | |
| "learning_rate": 8.96428447860114e-06, | |
| "loss": 0.3231, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.4456902592852137, | |
| "grad_norm": 1.0822835386163632, | |
| "learning_rate": 8.95831640177589e-06, | |
| "loss": 0.3453, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.4484933426769446, | |
| "grad_norm": 1.1510291788031735, | |
| "learning_rate": 8.952333178215515e-06, | |
| "loss": 0.3118, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.4512964260686756, | |
| "grad_norm": 1.0632519616648684, | |
| "learning_rate": 8.946334830815176e-06, | |
| "loss": 0.2929, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.4540995094604066, | |
| "grad_norm": 1.536963560887113, | |
| "learning_rate": 8.940321382527908e-06, | |
| "loss": 0.329, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.4569025928521373, | |
| "grad_norm": 1.1543457182131098, | |
| "learning_rate": 8.934292856364535e-06, | |
| "loss": 0.3333, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.4597056762438683, | |
| "grad_norm": 1.1422839399927136, | |
| "learning_rate": 8.928249275393572e-06, | |
| "loss": 0.2989, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.4625087596355992, | |
| "grad_norm": 1.0323164597196008, | |
| "learning_rate": 8.922190662741146e-06, | |
| "loss": 0.3427, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.46531184302733, | |
| "grad_norm": 1.0171844433621025, | |
| "learning_rate": 8.916117041590899e-06, | |
| "loss": 0.3306, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.468114926419061, | |
| "grad_norm": 1.005207029365479, | |
| "learning_rate": 8.910028435183906e-06, | |
| "loss": 0.3017, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.470918009810792, | |
| "grad_norm": 0.9931572125901683, | |
| "learning_rate": 8.903924866818589e-06, | |
| "loss": 0.3412, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.4737210932025229, | |
| "grad_norm": 1.0116486286896589, | |
| "learning_rate": 8.897806359850614e-06, | |
| "loss": 0.2799, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.4765241765942536, | |
| "grad_norm": 0.9010403515246571, | |
| "learning_rate": 8.891672937692818e-06, | |
| "loss": 0.3329, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.4793272599859846, | |
| "grad_norm": 0.983591867787472, | |
| "learning_rate": 8.885524623815107e-06, | |
| "loss": 0.3559, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.4821303433777155, | |
| "grad_norm": 1.0389716365696682, | |
| "learning_rate": 8.879361441744374e-06, | |
| "loss": 0.3558, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.4849334267694463, | |
| "grad_norm": 0.9273702187776793, | |
| "learning_rate": 8.873183415064401e-06, | |
| "loss": 0.2942, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.4877365101611773, | |
| "grad_norm": 0.9669391636702603, | |
| "learning_rate": 8.866990567415785e-06, | |
| "loss": 0.3199, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.4905395935529082, | |
| "grad_norm": 1.0603730492574703, | |
| "learning_rate": 8.860782922495821e-06, | |
| "loss": 0.3397, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.4933426769446392, | |
| "grad_norm": 1.0224313519746386, | |
| "learning_rate": 8.854560504058442e-06, | |
| "loss": 0.3142, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.4961457603363701, | |
| "grad_norm": 1.0076135234872883, | |
| "learning_rate": 8.8483233359141e-06, | |
| "loss": 0.3121, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.4989488437281009, | |
| "grad_norm": 1.067686299191742, | |
| "learning_rate": 8.842071441929695e-06, | |
| "loss": 0.3051, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.5017519271198319, | |
| "grad_norm": 0.9900504355007723, | |
| "learning_rate": 8.835804846028473e-06, | |
| "loss": 0.2995, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.5045550105115626, | |
| "grad_norm": 1.0611202706352416, | |
| "learning_rate": 8.82952357218994e-06, | |
| "loss": 0.3311, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.5073580939032936, | |
| "grad_norm": 1.0223872953740087, | |
| "learning_rate": 8.823227644449767e-06, | |
| "loss": 0.3298, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.5101611772950245, | |
| "grad_norm": 1.004431585067902, | |
| "learning_rate": 8.8169170868997e-06, | |
| "loss": 0.3317, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.5129642606867555, | |
| "grad_norm": 1.0372525705982982, | |
| "learning_rate": 8.810591923687468e-06, | |
| "loss": 0.3099, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5157673440784865, | |
| "grad_norm": 0.9763097184040664, | |
| "learning_rate": 8.804252179016681e-06, | |
| "loss": 0.3518, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.5185704274702172, | |
| "grad_norm": 1.0786399759038623, | |
| "learning_rate": 8.797897877146757e-06, | |
| "loss": 0.351, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.5213735108619482, | |
| "grad_norm": 1.0216449815318813, | |
| "learning_rate": 8.791529042392813e-06, | |
| "loss": 0.2949, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.524176594253679, | |
| "grad_norm": 1.005020537619243, | |
| "learning_rate": 8.785145699125577e-06, | |
| "loss": 0.292, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.5269796776454099, | |
| "grad_norm": 1.0117733349440563, | |
| "learning_rate": 8.778747871771293e-06, | |
| "loss": 0.3386, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.5297827610371408, | |
| "grad_norm": 1.0619820560833055, | |
| "learning_rate": 8.772335584811631e-06, | |
| "loss": 0.2836, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.5325858444288718, | |
| "grad_norm": 0.9828529966945531, | |
| "learning_rate": 8.765908862783595e-06, | |
| "loss": 0.2974, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.5353889278206028, | |
| "grad_norm": 1.0825846295263688, | |
| "learning_rate": 8.75946773027942e-06, | |
| "loss": 0.2855, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5381920112123337, | |
| "grad_norm": 1.0026209804243462, | |
| "learning_rate": 8.753012211946485e-06, | |
| "loss": 0.29, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.5409950946040645, | |
| "grad_norm": 0.9734512557172048, | |
| "learning_rate": 8.74654233248722e-06, | |
| "loss": 0.2806, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5437981779957952, | |
| "grad_norm": 1.0209335742663634, | |
| "learning_rate": 8.740058116659007e-06, | |
| "loss": 0.332, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.5466012613875262, | |
| "grad_norm": 0.9987360165038864, | |
| "learning_rate": 8.733559589274086e-06, | |
| "loss": 0.2941, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5494043447792571, | |
| "grad_norm": 0.9709216484754912, | |
| "learning_rate": 8.72704677519946e-06, | |
| "loss": 0.3052, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.5522074281709881, | |
| "grad_norm": 1.0291970398046555, | |
| "learning_rate": 8.720519699356804e-06, | |
| "loss": 0.3479, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.555010511562719, | |
| "grad_norm": 1.042970879067335, | |
| "learning_rate": 8.713978386722367e-06, | |
| "loss": 0.3302, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.55781359495445, | |
| "grad_norm": 0.9212891106054633, | |
| "learning_rate": 8.707422862326872e-06, | |
| "loss": 0.3301, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.5606166783461808, | |
| "grad_norm": 1.0666204145131457, | |
| "learning_rate": 8.700853151255427e-06, | |
| "loss": 0.3177, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.5634197617379118, | |
| "grad_norm": 1.0299268466077998, | |
| "learning_rate": 8.694269278647425e-06, | |
| "loss": 0.2912, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.5662228451296425, | |
| "grad_norm": 0.9977307082011863, | |
| "learning_rate": 8.687671269696451e-06, | |
| "loss": 0.3274, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.5690259285213735, | |
| "grad_norm": 0.9997169102518809, | |
| "learning_rate": 8.681059149650181e-06, | |
| "loss": 0.2897, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.5718290119131044, | |
| "grad_norm": 1.0531067882949778, | |
| "learning_rate": 8.674432943810287e-06, | |
| "loss": 0.3507, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.5746320953048354, | |
| "grad_norm": 0.9875951790749613, | |
| "learning_rate": 8.667792677532346e-06, | |
| "loss": 0.3176, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.5774351786965664, | |
| "grad_norm": 0.988294038093233, | |
| "learning_rate": 8.661138376225735e-06, | |
| "loss": 0.3233, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.580238262088297, | |
| "grad_norm": 0.9840482743038865, | |
| "learning_rate": 8.654470065353535e-06, | |
| "loss": 0.327, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.583041345480028, | |
| "grad_norm": 0.9856704483396864, | |
| "learning_rate": 8.647787770432439e-06, | |
| "loss": 0.3056, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.5858444288717588, | |
| "grad_norm": 0.9806977120576214, | |
| "learning_rate": 8.641091517032648e-06, | |
| "loss": 0.3596, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.5886475122634898, | |
| "grad_norm": 0.9912904769787096, | |
| "learning_rate": 8.634381330777778e-06, | |
| "loss": 0.3086, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.5914505956552207, | |
| "grad_norm": 1.0134362756226047, | |
| "learning_rate": 8.627657237344762e-06, | |
| "loss": 0.3402, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.5942536790469517, | |
| "grad_norm": 1.0485977509584994, | |
| "learning_rate": 8.620919262463743e-06, | |
| "loss": 0.313, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.5970567624386827, | |
| "grad_norm": 0.9780179966189363, | |
| "learning_rate": 8.614167431917986e-06, | |
| "loss": 0.3101, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5998598458304134, | |
| "grad_norm": 1.0499324110603412, | |
| "learning_rate": 8.607401771543778e-06, | |
| "loss": 0.3315, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.6026629292221444, | |
| "grad_norm": 1.0940218868255043, | |
| "learning_rate": 8.600622307230323e-06, | |
| "loss": 0.323, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.6054660126138751, | |
| "grad_norm": 1.0193645559843643, | |
| "learning_rate": 8.593829064919648e-06, | |
| "loss": 0.3376, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.608269096005606, | |
| "grad_norm": 1.0962814559008822, | |
| "learning_rate": 8.5870220706065e-06, | |
| "loss": 0.3172, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.611072179397337, | |
| "grad_norm": 1.0355134477786425, | |
| "learning_rate": 8.580201350338255e-06, | |
| "loss": 0.3445, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.613875262789068, | |
| "grad_norm": 0.9832928764016196, | |
| "learning_rate": 8.573366930214807e-06, | |
| "loss": 0.3377, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.616678346180799, | |
| "grad_norm": 1.0432431672185147, | |
| "learning_rate": 8.566518836388472e-06, | |
| "loss": 0.3156, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.61948142957253, | |
| "grad_norm": 1.0386229114434877, | |
| "learning_rate": 8.559657095063893e-06, | |
| "loss": 0.3585, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.6222845129642607, | |
| "grad_norm": 1.0190174075590552, | |
| "learning_rate": 8.552781732497937e-06, | |
| "loss": 0.3226, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.6250875963559914, | |
| "grad_norm": 1.0755188869356076, | |
| "learning_rate": 8.545892774999589e-06, | |
| "loss": 0.2947, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6278906797477224, | |
| "grad_norm": 0.9724040098316253, | |
| "learning_rate": 8.538990248929858e-06, | |
| "loss": 0.3343, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.6306937631394534, | |
| "grad_norm": 1.0380136461497245, | |
| "learning_rate": 8.532074180701674e-06, | |
| "loss": 0.3153, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.6334968465311843, | |
| "grad_norm": 1.0437703219167505, | |
| "learning_rate": 8.525144596779788e-06, | |
| "loss": 0.3115, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.6362999299229153, | |
| "grad_norm": 1.073505032497063, | |
| "learning_rate": 8.518201523680668e-06, | |
| "loss": 0.3228, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6391030133146463, | |
| "grad_norm": 0.9898757612005716, | |
| "learning_rate": 8.5112449879724e-06, | |
| "loss": 0.3132, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.641906096706377, | |
| "grad_norm": 1.0294276811837257, | |
| "learning_rate": 8.504275016274584e-06, | |
| "loss": 0.2908, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.644709180098108, | |
| "grad_norm": 1.0331323793330456, | |
| "learning_rate": 8.497291635258235e-06, | |
| "loss": 0.2939, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.6475122634898387, | |
| "grad_norm": 1.0336815270279005, | |
| "learning_rate": 8.490294871645681e-06, | |
| "loss": 0.3085, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.6503153468815697, | |
| "grad_norm": 0.9500201552994334, | |
| "learning_rate": 8.483284752210457e-06, | |
| "loss": 0.3207, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.6531184302733006, | |
| "grad_norm": 1.0189139734647936, | |
| "learning_rate": 8.476261303777205e-06, | |
| "loss": 0.3321, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.6559215136650316, | |
| "grad_norm": 1.0321224570797916, | |
| "learning_rate": 8.46922455322157e-06, | |
| "loss": 0.3169, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.6587245970567626, | |
| "grad_norm": 1.0676739360063043, | |
| "learning_rate": 8.462174527470102e-06, | |
| "loss": 0.3405, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.6615276804484933, | |
| "grad_norm": 1.0114685422562408, | |
| "learning_rate": 8.455111253500147e-06, | |
| "loss": 0.279, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.6643307638402243, | |
| "grad_norm": 1.0101245131587206, | |
| "learning_rate": 8.448034758339747e-06, | |
| "loss": 0.2895, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.667133847231955, | |
| "grad_norm": 1.0295531144079708, | |
| "learning_rate": 8.440945069067535e-06, | |
| "loss": 0.2911, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.669936930623686, | |
| "grad_norm": 0.9981860968875388, | |
| "learning_rate": 8.433842212812632e-06, | |
| "loss": 0.2941, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.672740014015417, | |
| "grad_norm": 0.9972808864654994, | |
| "learning_rate": 8.426726216754543e-06, | |
| "loss": 0.3283, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.675543097407148, | |
| "grad_norm": 0.9204669787570922, | |
| "learning_rate": 8.419597108123054e-06, | |
| "loss": 0.3421, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.6783461807988789, | |
| "grad_norm": 1.0403976854928259, | |
| "learning_rate": 8.412454914198127e-06, | |
| "loss": 0.3166, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.6811492641906096, | |
| "grad_norm": 1.0572732959169833, | |
| "learning_rate": 8.405299662309794e-06, | |
| "loss": 0.3598, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.6839523475823406, | |
| "grad_norm": 1.129066341505925, | |
| "learning_rate": 8.39813137983806e-06, | |
| "loss": 0.3221, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.6867554309740713, | |
| "grad_norm": 1.0295275170206426, | |
| "learning_rate": 8.390950094212783e-06, | |
| "loss": 0.2815, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.6895585143658023, | |
| "grad_norm": 0.9730574912477955, | |
| "learning_rate": 8.383755832913583e-06, | |
| "loss": 0.3538, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.6923615977575333, | |
| "grad_norm": 1.1385237539297977, | |
| "learning_rate": 8.376548623469737e-06, | |
| "loss": 0.3224, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.6951646811492642, | |
| "grad_norm": 1.0094609622481272, | |
| "learning_rate": 8.369328493460061e-06, | |
| "loss": 0.3015, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.6979677645409952, | |
| "grad_norm": 1.0913134241157278, | |
| "learning_rate": 8.362095470512815e-06, | |
| "loss": 0.279, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.7007708479327261, | |
| "grad_norm": 1.0647490413873508, | |
| "learning_rate": 8.354849582305596e-06, | |
| "loss": 0.3016, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.703573931324457, | |
| "grad_norm": 1.0816023958456211, | |
| "learning_rate": 8.347590856565231e-06, | |
| "loss": 0.328, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.7063770147161879, | |
| "grad_norm": 0.9665218301407825, | |
| "learning_rate": 8.340319321067668e-06, | |
| "loss": 0.3222, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.7091800981079186, | |
| "grad_norm": 1.114093897425612, | |
| "learning_rate": 8.333035003637873e-06, | |
| "loss": 0.308, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.7119831814996496, | |
| "grad_norm": 0.9870441819524826, | |
| "learning_rate": 8.325737932149726e-06, | |
| "loss": 0.2778, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.7147862648913805, | |
| "grad_norm": 0.9972031988711308, | |
| "learning_rate": 8.318428134525906e-06, | |
| "loss": 0.3503, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.7175893482831115, | |
| "grad_norm": 1.0611971970199314, | |
| "learning_rate": 8.311105638737796e-06, | |
| "loss": 0.2857, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.7203924316748425, | |
| "grad_norm": 1.0049712797899566, | |
| "learning_rate": 8.303770472805361e-06, | |
| "loss": 0.3047, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.7231955150665732, | |
| "grad_norm": 1.077820808212372, | |
| "learning_rate": 8.296422664797055e-06, | |
| "loss": 0.3514, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.7259985984583042, | |
| "grad_norm": 0.9534620185925327, | |
| "learning_rate": 8.289062242829707e-06, | |
| "loss": 0.3175, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.728801681850035, | |
| "grad_norm": 0.9533962250197514, | |
| "learning_rate": 8.281689235068411e-06, | |
| "loss": 0.3304, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.7316047652417659, | |
| "grad_norm": 1.0377944574846643, | |
| "learning_rate": 8.274303669726427e-06, | |
| "loss": 0.3108, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.7344078486334968, | |
| "grad_norm": 0.9776215264248671, | |
| "learning_rate": 8.26690557506506e-06, | |
| "loss": 0.3226, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.7372109320252278, | |
| "grad_norm": 1.071996115988915, | |
| "learning_rate": 8.259494979393563e-06, | |
| "loss": 0.2754, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.7400140154169588, | |
| "grad_norm": 0.9706195771601769, | |
| "learning_rate": 8.252071911069029e-06, | |
| "loss": 0.3221, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.7428170988086895, | |
| "grad_norm": 0.9372850256828986, | |
| "learning_rate": 8.244636398496268e-06, | |
| "loss": 0.2968, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.7456201822004205, | |
| "grad_norm": 0.9645042926256575, | |
| "learning_rate": 8.237188470127721e-06, | |
| "loss": 0.322, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.7484232655921512, | |
| "grad_norm": 0.9650005316732679, | |
| "learning_rate": 8.229728154463331e-06, | |
| "loss": 0.2816, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.7512263489838822, | |
| "grad_norm": 0.9863893918718459, | |
| "learning_rate": 8.22225548005044e-06, | |
| "loss": 0.3215, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.7540294323756132, | |
| "grad_norm": 1.0018570360888792, | |
| "learning_rate": 8.214770475483686e-06, | |
| "loss": 0.339, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.7568325157673441, | |
| "grad_norm": 1.1193797750866719, | |
| "learning_rate": 8.207273169404892e-06, | |
| "loss": 0.3577, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.759635599159075, | |
| "grad_norm": 1.0543465412125153, | |
| "learning_rate": 8.199763590502945e-06, | |
| "loss": 0.2906, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.762438682550806, | |
| "grad_norm": 1.0456949346053654, | |
| "learning_rate": 8.1922417675137e-06, | |
| "loss": 0.3132, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.7652417659425368, | |
| "grad_norm": 1.0462724551977218, | |
| "learning_rate": 8.184707729219865e-06, | |
| "loss": 0.3245, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.7680448493342675, | |
| "grad_norm": 0.9823174814664697, | |
| "learning_rate": 8.177161504450887e-06, | |
| "loss": 0.3309, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.7708479327259985, | |
| "grad_norm": 0.9957506589063193, | |
| "learning_rate": 8.169603122082852e-06, | |
| "loss": 0.3393, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.7736510161177295, | |
| "grad_norm": 1.0179763156590598, | |
| "learning_rate": 8.16203261103836e-06, | |
| "loss": 0.2781, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.7764540995094604, | |
| "grad_norm": 1.0073877575165777, | |
| "learning_rate": 8.154450000286425e-06, | |
| "loss": 0.32, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.7792571829011914, | |
| "grad_norm": 1.1450794851221155, | |
| "learning_rate": 8.146855318842366e-06, | |
| "loss": 0.3267, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.7820602662929224, | |
| "grad_norm": 1.0133968691268989, | |
| "learning_rate": 8.139248595767682e-06, | |
| "loss": 0.3325, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.784863349684653, | |
| "grad_norm": 1.0972907692412197, | |
| "learning_rate": 8.131629860169959e-06, | |
| "loss": 0.324, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.787666433076384, | |
| "grad_norm": 1.0747756547403522, | |
| "learning_rate": 8.123999141202743e-06, | |
| "loss": 0.3291, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.7904695164681148, | |
| "grad_norm": 1.0470139173415747, | |
| "learning_rate": 8.11635646806544e-06, | |
| "loss": 0.3523, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.7932725998598458, | |
| "grad_norm": 0.9883826668641853, | |
| "learning_rate": 8.108701870003195e-06, | |
| "loss": 0.3225, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7960756832515767, | |
| "grad_norm": 0.9807749134553183, | |
| "learning_rate": 8.101035376306789e-06, | |
| "loss": 0.3078, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.7988787666433077, | |
| "grad_norm": 0.9590271955969843, | |
| "learning_rate": 8.093357016312518e-06, | |
| "loss": 0.3199, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.8016818500350387, | |
| "grad_norm": 0.999075545826115, | |
| "learning_rate": 8.085666819402089e-06, | |
| "loss": 0.2991, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.8044849334267694, | |
| "grad_norm": 0.9834688572176502, | |
| "learning_rate": 8.077964815002497e-06, | |
| "loss": 0.2773, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.8072880168185004, | |
| "grad_norm": 1.003972639907366, | |
| "learning_rate": 8.070251032585932e-06, | |
| "loss": 0.2935, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.8100911002102311, | |
| "grad_norm": 1.0037639414072954, | |
| "learning_rate": 8.062525501669638e-06, | |
| "loss": 0.3337, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.812894183601962, | |
| "grad_norm": 1.0504008521656023, | |
| "learning_rate": 8.054788251815824e-06, | |
| "loss": 0.3075, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.815697266993693, | |
| "grad_norm": 1.041018797822688, | |
| "learning_rate": 8.047039312631542e-06, | |
| "loss": 0.2941, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.818500350385424, | |
| "grad_norm": 1.1476887413595336, | |
| "learning_rate": 8.039278713768572e-06, | |
| "loss": 0.3008, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.821303433777155, | |
| "grad_norm": 1.063731096461173, | |
| "learning_rate": 8.031506484923312e-06, | |
| "loss": 0.2836, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.8241065171688857, | |
| "grad_norm": 1.0609057745391275, | |
| "learning_rate": 8.02372265583666e-06, | |
| "loss": 0.3083, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.8269096005606167, | |
| "grad_norm": 0.9729626141695646, | |
| "learning_rate": 8.015927256293906e-06, | |
| "loss": 0.3062, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.8297126839523474, | |
| "grad_norm": 1.0277640358114108, | |
| "learning_rate": 8.008120316124612e-06, | |
| "loss": 0.3389, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.8325157673440784, | |
| "grad_norm": 1.0700790517166436, | |
| "learning_rate": 8.000301865202507e-06, | |
| "loss": 0.2827, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.8353188507358094, | |
| "grad_norm": 1.0204749289182065, | |
| "learning_rate": 7.992471933445359e-06, | |
| "loss": 0.3574, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.8381219341275403, | |
| "grad_norm": 1.054119271327418, | |
| "learning_rate": 7.984630550814872e-06, | |
| "loss": 0.3235, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.8409250175192713, | |
| "grad_norm": 1.0276559132927785, | |
| "learning_rate": 7.976777747316568e-06, | |
| "loss": 0.3191, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.8437281009110023, | |
| "grad_norm": 0.9749404229198887, | |
| "learning_rate": 7.96891355299967e-06, | |
| "loss": 0.2777, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.846531184302733, | |
| "grad_norm": 0.9135511168776641, | |
| "learning_rate": 7.961037997956988e-06, | |
| "loss": 0.2897, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.849334267694464, | |
| "grad_norm": 1.030736095815461, | |
| "learning_rate": 7.953151112324807e-06, | |
| "loss": 0.3229, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.8521373510861947, | |
| "grad_norm": 1.0021336354889787, | |
| "learning_rate": 7.94525292628277e-06, | |
| "loss": 0.2957, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.8549404344779257, | |
| "grad_norm": 1.111103040839117, | |
| "learning_rate": 7.937343470053753e-06, | |
| "loss": 0.327, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.8577435178696566, | |
| "grad_norm": 1.0988776635638167, | |
| "learning_rate": 7.929422773903772e-06, | |
| "loss": 0.3038, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.8605466012613876, | |
| "grad_norm": 1.0809251731993994, | |
| "learning_rate": 7.921490868141843e-06, | |
| "loss": 0.3069, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.8633496846531186, | |
| "grad_norm": 1.05344570430685, | |
| "learning_rate": 7.91354778311988e-06, | |
| "loss": 0.3092, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.8661527680448493, | |
| "grad_norm": 1.070191527052215, | |
| "learning_rate": 7.90559354923257e-06, | |
| "loss": 0.3209, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.8689558514365803, | |
| "grad_norm": 1.085172191078759, | |
| "learning_rate": 7.897628196917273e-06, | |
| "loss": 0.298, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.871758934828311, | |
| "grad_norm": 1.0239732236288808, | |
| "learning_rate": 7.889651756653882e-06, | |
| "loss": 0.3235, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.874562018220042, | |
| "grad_norm": 1.0072086598087635, | |
| "learning_rate": 7.881664258964726e-06, | |
| "loss": 0.285, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.877365101611773, | |
| "grad_norm": 1.0171350132611843, | |
| "learning_rate": 7.873665734414445e-06, | |
| "loss": 0.3341, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.880168185003504, | |
| "grad_norm": 1.0486133267457405, | |
| "learning_rate": 7.865656213609869e-06, | |
| "loss": 0.3324, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.8829712683952349, | |
| "grad_norm": 1.0417101669974131, | |
| "learning_rate": 7.857635727199915e-06, | |
| "loss": 0.3424, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.8857743517869656, | |
| "grad_norm": 0.9617447446447893, | |
| "learning_rate": 7.849604305875452e-06, | |
| "loss": 0.2945, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.8885774351786966, | |
| "grad_norm": 1.001044918507061, | |
| "learning_rate": 7.841561980369197e-06, | |
| "loss": 0.294, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.8913805185704273, | |
| "grad_norm": 0.938846630312967, | |
| "learning_rate": 7.83350878145559e-06, | |
| "loss": 0.3342, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.8941836019621583, | |
| "grad_norm": 1.0509429693043588, | |
| "learning_rate": 7.825444739950678e-06, | |
| "loss": 0.3195, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.8969866853538893, | |
| "grad_norm": 1.0549270517058966, | |
| "learning_rate": 7.817369886712003e-06, | |
| "loss": 0.3101, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.8997897687456202, | |
| "grad_norm": 0.9941886709286132, | |
| "learning_rate": 7.809284252638474e-06, | |
| "loss": 0.3119, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.9025928521373512, | |
| "grad_norm": 0.9572303093383921, | |
| "learning_rate": 7.801187868670253e-06, | |
| "loss": 0.3086, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.9053959355290822, | |
| "grad_norm": 1.0565296838152023, | |
| "learning_rate": 7.79308076578864e-06, | |
| "loss": 0.3451, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.908199018920813, | |
| "grad_norm": 1.0167744263598684, | |
| "learning_rate": 7.784962975015953e-06, | |
| "loss": 0.3098, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.9110021023125436, | |
| "grad_norm": 0.9452488464744048, | |
| "learning_rate": 7.776834527415403e-06, | |
| "loss": 0.2989, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.9138051857042746, | |
| "grad_norm": 0.9649112314730967, | |
| "learning_rate": 7.76869545409098e-06, | |
| "loss": 0.3242, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.9166082690960056, | |
| "grad_norm": 0.9854323873721721, | |
| "learning_rate": 7.76054578618734e-06, | |
| "loss": 0.3344, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.9194113524877365, | |
| "grad_norm": 1.0405989073796909, | |
| "learning_rate": 7.752385554889674e-06, | |
| "loss": 0.297, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.9222144358794675, | |
| "grad_norm": 0.9433706915766583, | |
| "learning_rate": 7.744214791423597e-06, | |
| "loss": 0.3116, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.9250175192711985, | |
| "grad_norm": 1.0542882712602182, | |
| "learning_rate": 7.736033527055025e-06, | |
| "loss": 0.2795, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.9278206026629292, | |
| "grad_norm": 1.1667611822036257, | |
| "learning_rate": 7.727841793090058e-06, | |
| "loss": 0.3235, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.9306236860546602, | |
| "grad_norm": 1.0443348900152258, | |
| "learning_rate": 7.719639620874857e-06, | |
| "loss": 0.3042, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.933426769446391, | |
| "grad_norm": 1.0629009170054755, | |
| "learning_rate": 7.711427041795525e-06, | |
| "loss": 0.3191, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.9362298528381219, | |
| "grad_norm": 0.995609193672333, | |
| "learning_rate": 7.703204087277989e-06, | |
| "loss": 0.2706, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.9390329362298528, | |
| "grad_norm": 1.0613346871930387, | |
| "learning_rate": 7.694970788787877e-06, | |
| "loss": 0.3379, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.9418360196215838, | |
| "grad_norm": 1.038889453929205, | |
| "learning_rate": 7.686727177830403e-06, | |
| "loss": 0.3378, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.9446391030133148, | |
| "grad_norm": 0.9158531584661774, | |
| "learning_rate": 7.678473285950233e-06, | |
| "loss": 0.2839, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.9474421864050455, | |
| "grad_norm": 0.9600167344012924, | |
| "learning_rate": 7.670209144731382e-06, | |
| "loss": 0.3405, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.9502452697967765, | |
| "grad_norm": 1.0169027673837827, | |
| "learning_rate": 7.661934785797083e-06, | |
| "loss": 0.3189, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.9530483531885072, | |
| "grad_norm": 0.9857662223953864, | |
| "learning_rate": 7.653650240809667e-06, | |
| "loss": 0.3097, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.9558514365802382, | |
| "grad_norm": 1.0340909796071234, | |
| "learning_rate": 7.645355541470441e-06, | |
| "loss": 0.3514, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.9586545199719692, | |
| "grad_norm": 1.0367612546167349, | |
| "learning_rate": 7.63705071951957e-06, | |
| "loss": 0.2808, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.9614576033637001, | |
| "grad_norm": 0.9660509007381327, | |
| "learning_rate": 7.628735806735954e-06, | |
| "loss": 0.3482, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.964260686755431, | |
| "grad_norm": 0.969366457212378, | |
| "learning_rate": 7.620410834937102e-06, | |
| "loss": 0.2941, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.9670637701471618, | |
| "grad_norm": 0.9720365665382235, | |
| "learning_rate": 7.612075835979023e-06, | |
| "loss": 0.3183, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.9698668535388928, | |
| "grad_norm": 0.9372831657252071, | |
| "learning_rate": 7.603730841756087e-06, | |
| "loss": 0.323, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.9726699369306235, | |
| "grad_norm": 0.9036831971891457, | |
| "learning_rate": 7.595375884200917e-06, | |
| "loss": 0.3093, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.9754730203223545, | |
| "grad_norm": 0.9862325513159128, | |
| "learning_rate": 7.587010995284259e-06, | |
| "loss": 0.3688, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.9782761037140855, | |
| "grad_norm": 1.1290794165906493, | |
| "learning_rate": 7.57863620701486e-06, | |
| "loss": 0.3258, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.9810791871058164, | |
| "grad_norm": 1.011312295887435, | |
| "learning_rate": 7.57025155143935e-06, | |
| "loss": 0.2734, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.9838822704975474, | |
| "grad_norm": 0.9830584089932598, | |
| "learning_rate": 7.56185706064212e-06, | |
| "loss": 0.3022, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.9866853538892784, | |
| "grad_norm": 1.0572513469714837, | |
| "learning_rate": 7.553452766745186e-06, | |
| "loss": 0.3281, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.989488437281009, | |
| "grad_norm": 0.9856430690146805, | |
| "learning_rate": 7.54503870190809e-06, | |
| "loss": 0.3216, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.99229152067274, | |
| "grad_norm": 0.9753729640372196, | |
| "learning_rate": 7.536614898327752e-06, | |
| "loss": 0.311, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.9950946040644708, | |
| "grad_norm": 0.9864433750205578, | |
| "learning_rate": 7.52818138823836e-06, | |
| "loss": 0.2953, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.9978976874562018, | |
| "grad_norm": 0.9892478387895355, | |
| "learning_rate": 7.519738203911251e-06, | |
| "loss": 0.3078, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9438255779603106, | |
| "learning_rate": 7.511285377654771e-06, | |
| "loss": 0.2418, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.002803083391731, | |
| "grad_norm": 0.9080470878038334, | |
| "learning_rate": 7.502822941814171e-06, | |
| "loss": 0.2548, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.005606166783462, | |
| "grad_norm": 0.8826594015336416, | |
| "learning_rate": 7.494350928771466e-06, | |
| "loss": 0.2594, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.008409250175193, | |
| "grad_norm": 0.8386141382563697, | |
| "learning_rate": 7.485869370945325e-06, | |
| "loss": 0.2423, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.0112123335669234, | |
| "grad_norm": 0.8223334458252292, | |
| "learning_rate": 7.477378300790935e-06, | |
| "loss": 0.2366, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.0140154169586544, | |
| "grad_norm": 0.8888082287599987, | |
| "learning_rate": 7.468877750799887e-06, | |
| "loss": 0.2125, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.0168185003503853, | |
| "grad_norm": 0.9620219450586115, | |
| "learning_rate": 7.460367753500045e-06, | |
| "loss": 0.2459, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.0196215837421163, | |
| "grad_norm": 0.8418731161458017, | |
| "learning_rate": 7.451848341455424e-06, | |
| "loss": 0.2294, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.0224246671338473, | |
| "grad_norm": 0.9676558507429851, | |
| "learning_rate": 7.443319547266064e-06, | |
| "loss": 0.204, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.0252277505255782, | |
| "grad_norm": 0.8911468048586156, | |
| "learning_rate": 7.43478140356791e-06, | |
| "loss": 0.2358, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.028030833917309, | |
| "grad_norm": 0.9789863373908476, | |
| "learning_rate": 7.426233943032679e-06, | |
| "loss": 0.2105, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.0308339173090397, | |
| "grad_norm": 0.8331751325732193, | |
| "learning_rate": 7.417677198367742e-06, | |
| "loss": 0.2348, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.0336370007007707, | |
| "grad_norm": 0.9120461125338731, | |
| "learning_rate": 7.409111202315996e-06, | |
| "loss": 0.2534, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.0364400840925017, | |
| "grad_norm": 0.902479135637207, | |
| "learning_rate": 7.400535987655739e-06, | |
| "loss": 0.2057, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.0392431674842326, | |
| "grad_norm": 0.8439282822189133, | |
| "learning_rate": 7.391951587200543e-06, | |
| "loss": 0.2225, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.0420462508759636, | |
| "grad_norm": 0.8248802433546494, | |
| "learning_rate": 7.3833580337991316e-06, | |
| "loss": 0.2535, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.0448493342676946, | |
| "grad_norm": 0.8873168428680299, | |
| "learning_rate": 7.374755360335253e-06, | |
| "loss": 0.1747, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0476524176594255, | |
| "grad_norm": 0.83012996391051, | |
| "learning_rate": 7.366143599727551e-06, | |
| "loss": 0.2454, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.0504555010511565, | |
| "grad_norm": 0.8914474878489079, | |
| "learning_rate": 7.3575227849294475e-06, | |
| "loss": 0.2396, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.053258584442887, | |
| "grad_norm": 0.9122725647435613, | |
| "learning_rate": 7.3488929489290066e-06, | |
| "loss": 0.1932, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.056061667834618, | |
| "grad_norm": 0.8529844434099163, | |
| "learning_rate": 7.340254124748811e-06, | |
| "loss": 0.2123, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.058864751226349, | |
| "grad_norm": 0.872863409999642, | |
| "learning_rate": 7.331606345445841e-06, | |
| "loss": 0.2603, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.06166783461808, | |
| "grad_norm": 0.8300659979722327, | |
| "learning_rate": 7.322949644111343e-06, | |
| "loss": 0.2199, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.064470918009811, | |
| "grad_norm": 0.8094207514208382, | |
| "learning_rate": 7.314284053870704e-06, | |
| "loss": 0.2157, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.067274001401542, | |
| "grad_norm": 0.8362024824106278, | |
| "learning_rate": 7.305609607883325e-06, | |
| "loss": 0.2148, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.070077084793273, | |
| "grad_norm": 0.8690662905370502, | |
| "learning_rate": 7.296926339342493e-06, | |
| "loss": 0.236, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.0728801681850033, | |
| "grad_norm": 0.8328393205731416, | |
| "learning_rate": 7.288234281475255e-06, | |
| "loss": 0.2276, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.0756832515767343, | |
| "grad_norm": 0.875039815996908, | |
| "learning_rate": 7.279533467542295e-06, | |
| "loss": 0.2098, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.0784863349684652, | |
| "grad_norm": 0.8813695483618433, | |
| "learning_rate": 7.270823930837796e-06, | |
| "loss": 0.2298, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.081289418360196, | |
| "grad_norm": 0.8829770875487482, | |
| "learning_rate": 7.262105704689325e-06, | |
| "loss": 0.2129, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.084092501751927, | |
| "grad_norm": 0.9148251721146092, | |
| "learning_rate": 7.253378822457696e-06, | |
| "loss": 0.2545, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.086895585143658, | |
| "grad_norm": 0.8517069377242505, | |
| "learning_rate": 7.244643317536847e-06, | |
| "loss": 0.2087, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.089698668535389, | |
| "grad_norm": 0.8335500923445399, | |
| "learning_rate": 7.235899223353713e-06, | |
| "loss": 0.209, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.0925017519271196, | |
| "grad_norm": 0.8362768129075926, | |
| "learning_rate": 7.2271465733680935e-06, | |
| "loss": 0.2061, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.0953048353188506, | |
| "grad_norm": 0.9051045608267754, | |
| "learning_rate": 7.21838540107253e-06, | |
| "loss": 0.2238, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.0981079187105816, | |
| "grad_norm": 0.9254101156511656, | |
| "learning_rate": 7.20961573999217e-06, | |
| "loss": 0.1949, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.1009110021023125, | |
| "grad_norm": 0.8494352768539863, | |
| "learning_rate": 7.200837623684654e-06, | |
| "loss": 0.2459, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.1037140854940435, | |
| "grad_norm": 0.8708647921141418, | |
| "learning_rate": 7.192051085739966e-06, | |
| "loss": 0.2273, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.1065171688857744, | |
| "grad_norm": 0.8469502145093527, | |
| "learning_rate": 7.183256159780321e-06, | |
| "loss": 0.2094, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.1093202522775054, | |
| "grad_norm": 0.9201483952213855, | |
| "learning_rate": 7.17445287946003e-06, | |
| "loss": 0.205, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.112123335669236, | |
| "grad_norm": 0.877283394608266, | |
| "learning_rate": 7.1656412784653765e-06, | |
| "loss": 0.2104, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.114926419060967, | |
| "grad_norm": 0.85682961264913, | |
| "learning_rate": 7.1568213905144765e-06, | |
| "loss": 0.2305, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.117729502452698, | |
| "grad_norm": 0.9068964187625099, | |
| "learning_rate": 7.147993249357163e-06, | |
| "loss": 0.229, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.120532585844429, | |
| "grad_norm": 0.845779591253865, | |
| "learning_rate": 7.139156888774845e-06, | |
| "loss": 0.2203, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.12333566923616, | |
| "grad_norm": 0.8107378653739076, | |
| "learning_rate": 7.130312342580389e-06, | |
| "loss": 0.2773, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.1261387526278908, | |
| "grad_norm": 0.8869784736417072, | |
| "learning_rate": 7.121459644617979e-06, | |
| "loss": 0.2175, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.1289418360196217, | |
| "grad_norm": 0.9196118134762095, | |
| "learning_rate": 7.112598828762998e-06, | |
| "loss": 0.1877, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.1317449194113527, | |
| "grad_norm": 0.8425074607390686, | |
| "learning_rate": 7.103729928921886e-06, | |
| "loss": 0.2362, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.134548002803083, | |
| "grad_norm": 0.8493908080559867, | |
| "learning_rate": 7.094852979032021e-06, | |
| "loss": 0.2223, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.137351086194814, | |
| "grad_norm": 0.8539519141110158, | |
| "learning_rate": 7.085968013061585e-06, | |
| "loss": 0.1856, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.140154169586545, | |
| "grad_norm": 0.8822840272790886, | |
| "learning_rate": 7.0770750650094335e-06, | |
| "loss": 0.2206, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.142957252978276, | |
| "grad_norm": 0.8973299598197616, | |
| "learning_rate": 7.068174168904965e-06, | |
| "loss": 0.2333, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.145760336370007, | |
| "grad_norm": 0.8417160109366958, | |
| "learning_rate": 7.059265358807991e-06, | |
| "loss": 0.2471, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.148563419761738, | |
| "grad_norm": 0.8376755979996072, | |
| "learning_rate": 7.050348668808609e-06, | |
| "loss": 0.2082, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.151366503153469, | |
| "grad_norm": 0.8551391805726851, | |
| "learning_rate": 7.041424133027068e-06, | |
| "loss": 0.2247, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.1541695865451995, | |
| "grad_norm": 0.8439640386477707, | |
| "learning_rate": 7.032491785613641e-06, | |
| "loss": 0.2429, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.1569726699369305, | |
| "grad_norm": 0.9282329146542126, | |
| "learning_rate": 7.023551660748489e-06, | |
| "loss": 0.2247, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.1597757533286615, | |
| "grad_norm": 0.8759587860371314, | |
| "learning_rate": 7.014603792641536e-06, | |
| "loss": 0.224, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.1625788367203924, | |
| "grad_norm": 0.8890636750475975, | |
| "learning_rate": 7.005648215532338e-06, | |
| "loss": 0.2169, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.1653819201121234, | |
| "grad_norm": 0.8072442845315246, | |
| "learning_rate": 6.996684963689946e-06, | |
| "loss": 0.2254, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.1681850035038543, | |
| "grad_norm": 0.8187290548361686, | |
| "learning_rate": 6.987714071412781e-06, | |
| "loss": 0.1894, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.1709880868955853, | |
| "grad_norm": 0.8450241179024212, | |
| "learning_rate": 6.9787355730285e-06, | |
| "loss": 0.202, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.1737911702873163, | |
| "grad_norm": 0.8925844522225541, | |
| "learning_rate": 6.969749502893868e-06, | |
| "loss": 0.23, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.176594253679047, | |
| "grad_norm": 0.8790931855716, | |
| "learning_rate": 6.9607558953946175e-06, | |
| "loss": 0.2066, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.1793973370707778, | |
| "grad_norm": 0.9381548374096674, | |
| "learning_rate": 6.9517547849453315e-06, | |
| "loss": 0.2706, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.1822004204625087, | |
| "grad_norm": 0.9238601216907635, | |
| "learning_rate": 6.942746205989295e-06, | |
| "loss": 0.2297, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.1850035038542397, | |
| "grad_norm": 0.9866325447543289, | |
| "learning_rate": 6.933730192998378e-06, | |
| "loss": 0.189, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.1878065872459707, | |
| "grad_norm": 0.8449034073236847, | |
| "learning_rate": 6.924706780472894e-06, | |
| "loss": 0.2045, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.1906096706377016, | |
| "grad_norm": 0.8553219392885109, | |
| "learning_rate": 6.915676002941473e-06, | |
| "loss": 0.2285, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.193412754029432, | |
| "grad_norm": 0.8794945951009563, | |
| "learning_rate": 6.906637894960928e-06, | |
| "loss": 0.2156, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.196215837421163, | |
| "grad_norm": 0.8987598950398725, | |
| "learning_rate": 6.897592491116117e-06, | |
| "loss": 0.1859, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.199018920812894, | |
| "grad_norm": 0.8509492945576145, | |
| "learning_rate": 6.888539826019824e-06, | |
| "loss": 0.2158, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.201822004204625, | |
| "grad_norm": 0.882168503694161, | |
| "learning_rate": 6.879479934312616e-06, | |
| "loss": 0.2092, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.204625087596356, | |
| "grad_norm": 0.8634636045570749, | |
| "learning_rate": 6.8704128506627086e-06, | |
| "loss": 0.2266, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.207428170988087, | |
| "grad_norm": 0.8518036135167025, | |
| "learning_rate": 6.861338609765842e-06, | |
| "loss": 0.2571, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.210231254379818, | |
| "grad_norm": 0.8405617797518206, | |
| "learning_rate": 6.852257246345145e-06, | |
| "loss": 0.2266, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.213034337771549, | |
| "grad_norm": 0.901508884663155, | |
| "learning_rate": 6.843168795150998e-06, | |
| "loss": 0.2163, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.2158374211632794, | |
| "grad_norm": 0.820196270139301, | |
| "learning_rate": 6.834073290960905e-06, | |
| "loss": 0.2296, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.2186405045550104, | |
| "grad_norm": 0.852940857104472, | |
| "learning_rate": 6.82497076857936e-06, | |
| "loss": 0.1952, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.2214435879467413, | |
| "grad_norm": 0.8318702724851098, | |
| "learning_rate": 6.815861262837706e-06, | |
| "loss": 0.212, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.2242466713384723, | |
| "grad_norm": 1.0371558711867102, | |
| "learning_rate": 6.80674480859402e-06, | |
| "loss": 0.2053, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.2270497547302033, | |
| "grad_norm": 0.9082510123437368, | |
| "learning_rate": 6.797621440732955e-06, | |
| "loss": 0.2283, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.2298528381219342, | |
| "grad_norm": 0.8122480768933811, | |
| "learning_rate": 6.788491194165629e-06, | |
| "loss": 0.2047, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.232655921513665, | |
| "grad_norm": 0.8349666180090791, | |
| "learning_rate": 6.779354103829474e-06, | |
| "loss": 0.2559, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.2354590049053957, | |
| "grad_norm": 0.8941808733581829, | |
| "learning_rate": 6.770210204688119e-06, | |
| "loss": 0.1728, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.2382620882971267, | |
| "grad_norm": 0.8346775956103791, | |
| "learning_rate": 6.761059531731241e-06, | |
| "loss": 0.2162, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.2410651716888577, | |
| "grad_norm": 0.8488565835499525, | |
| "learning_rate": 6.751902119974437e-06, | |
| "loss": 0.2154, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1785, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 996094226137088.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |