| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1626, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0024622960911049553, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.0816326530612243e-07, |
| "loss": 1.3865270614624023, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0049245921822099106, |
| "grad_norm": 36.75, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 1.8756635189056396, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.007386888273314866, |
| "grad_norm": 3.625, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 1.1310276985168457, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.009849184364419821, |
| "grad_norm": 6.09375, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.8238341808319092, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.012311480455524777, |
| "grad_norm": 12.0, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 2.2014291286468506, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.014773776546629732, |
| "grad_norm": 19.5, |
| "learning_rate": 4.489795918367348e-06, |
| "loss": 2.4339303970336914, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.017236072637734686, |
| "grad_norm": 9.1875, |
| "learning_rate": 5.306122448979593e-06, |
| "loss": 1.3835787773132324, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.019698368728839642, |
| "grad_norm": 3.453125, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 1.1793060302734375, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0221606648199446, |
| "grad_norm": 2.21875, |
| "learning_rate": 6.938775510204082e-06, |
| "loss": 1.173147439956665, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.024622960911049555, |
| "grad_norm": 12.875, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 2.2560791969299316, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02708525700215451, |
| "grad_norm": 3.859375, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.753507137298584, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.029547553093259463, |
| "grad_norm": 11.125, |
| "learning_rate": 9.387755102040818e-06, |
| "loss": 2.109658718109131, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03200984918436442, |
| "grad_norm": 4.375, |
| "learning_rate": 1.0204081632653063e-05, |
| "loss": 1.7001088857650757, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03447214527546937, |
| "grad_norm": 7.53125, |
| "learning_rate": 1.1020408163265306e-05, |
| "loss": 2.2228636741638184, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03693444136657433, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.1836734693877552e-05, |
| "loss": 1.233575463294983, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.039396737457679284, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.2653061224489798e-05, |
| "loss": 1.834639549255371, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.041859033548784244, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.3469387755102042e-05, |
| "loss": 1.8060579299926758, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0443213296398892, |
| "grad_norm": 2.625, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 1.4287090301513672, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04678362573099415, |
| "grad_norm": 11.375, |
| "learning_rate": 1.510204081632653e-05, |
| "loss": 2.1703319549560547, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04924592182209911, |
| "grad_norm": 40.75, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 2.1797375679016113, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05170821791320406, |
| "grad_norm": 15.8125, |
| "learning_rate": 1.673469387755102e-05, |
| "loss": 1.9881037473678589, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05417051400430902, |
| "grad_norm": 9.375, |
| "learning_rate": 1.7551020408163266e-05, |
| "loss": 1.735787034034729, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.056632810095413974, |
| "grad_norm": 8.125, |
| "learning_rate": 1.836734693877551e-05, |
| "loss": 1.9953798055648804, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05909510618651893, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.9183673469387756e-05, |
| "loss": 1.1727348566055298, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.061557402277623886, |
| "grad_norm": 3.4375, |
| "learning_rate": 2e-05, |
| "loss": 1.6915946006774902, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06401969836872884, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.9999936502625722e-05, |
| "loss": 2.3282856941223145, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0664819944598338, |
| "grad_norm": 5.90625, |
| "learning_rate": 1.9999746011510863e-05, |
| "loss": 1.9712034463882446, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06894429055093874, |
| "grad_norm": 18.375, |
| "learning_rate": 1.9999428529679345e-05, |
| "loss": 1.5145387649536133, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0714065866420437, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.9998984062170987e-05, |
| "loss": 1.1939287185668945, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.07386888273314866, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.9998412616041416e-05, |
| "loss": 1.7602123022079468, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07633117882425362, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.9997714200361962e-05, |
| "loss": 1.446789264678955, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07879347491535857, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.999688882621952e-05, |
| "loss": 1.6264426708221436, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.08125577100646353, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.9995936506716357e-05, |
| "loss": 1.63454008102417, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08371806709756849, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.9994857256969928e-05, |
| "loss": 1.8928616046905518, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08618036318867343, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.999365109411261e-05, |
| "loss": 1.7232308387756348, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0886426592797784, |
| "grad_norm": 3.5, |
| "learning_rate": 1.9992318037291443e-05, |
| "loss": 1.5345882177352905, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09110495537088335, |
| "grad_norm": 2.390625, |
| "learning_rate": 1.9990858107667836e-05, |
| "loss": 1.5957210063934326, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0935672514619883, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.9989271328417207e-05, |
| "loss": 1.5378596782684326, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09602954755309326, |
| "grad_norm": 4.625, |
| "learning_rate": 1.998755772472864e-05, |
| "loss": 1.7094926834106445, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09849184364419822, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.9985717323804467e-05, |
| "loss": 1.6278411149978638, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10095413973530316, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.998375015485984e-05, |
| "loss": 1.1961259841918945, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.10341643582640812, |
| "grad_norm": 6.15625, |
| "learning_rate": 1.9981656249122285e-05, |
| "loss": 1.1318538188934326, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.10587873191751308, |
| "grad_norm": 7.0625, |
| "learning_rate": 1.997943563983117e-05, |
| "loss": 1.7807825803756714, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10834102800861804, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.9977088362237217e-05, |
| "loss": 1.4653401374816895, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.11080332409972299, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.9974614453601913e-05, |
| "loss": 1.245106816291809, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11326562019082795, |
| "grad_norm": 13.625, |
| "learning_rate": 1.997201395319694e-05, |
| "loss": 1.646073818206787, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11572791628193291, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.996928690230353e-05, |
| "loss": 1.5019184350967407, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11819021237303785, |
| "grad_norm": 4.375, |
| "learning_rate": 1.996643334421182e-05, |
| "loss": 1.4860734939575195, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.12065250846414281, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.9963453324220185e-05, |
| "loss": 1.1848664283752441, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.12311480455524777, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.9960346889634478e-05, |
| "loss": 1.2456748485565186, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12557710064635272, |
| "grad_norm": 1.765625, |
| "learning_rate": 1.9957114089767306e-05, |
| "loss": 1.163445234298706, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.12803939673745768, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.9953754975937246e-05, |
| "loss": 1.5070371627807617, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.13050169282856264, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.9950269601468033e-05, |
| "loss": 1.0462160110473633, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1329639889196676, |
| "grad_norm": 2.75, |
| "learning_rate": 1.9946658021687694e-05, |
| "loss": 1.46537184715271, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.13542628501077256, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.994292029392768e-05, |
| "loss": 1.5482763051986694, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1378885811018775, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.993905647752198e-05, |
| "loss": 1.0207593441009521, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 6.46875, |
| "learning_rate": 1.9935066633806133e-05, |
| "loss": 1.77092444896698, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1428131732840874, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.9930950826116288e-05, |
| "loss": 1.4896173477172852, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.14527546937519237, |
| "grad_norm": 1.6796875, |
| "learning_rate": 1.9926709119788197e-05, |
| "loss": 1.1458995342254639, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.14773776546629733, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.9922341582156156e-05, |
| "loss": 1.0295559167861938, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1502000615574023, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.9917848282551965e-05, |
| "loss": 1.4944086074829102, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.15266235764850725, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.9913229292303806e-05, |
| "loss": 1.5551412105560303, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.15512465373961218, |
| "grad_norm": 23.5, |
| "learning_rate": 1.990848468473511e-05, |
| "loss": 1.9140477180480957, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.15758694983071714, |
| "grad_norm": 10.4375, |
| "learning_rate": 1.9903614535163417e-05, |
| "loss": 1.4774185419082642, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1600492459218221, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.989861892089914e-05, |
| "loss": 1.1932008266448975, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.16251154201292706, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9893497921244394e-05, |
| "loss": 1.253312349319458, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.16497383810403202, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.9888251617491674e-05, |
| "loss": 1.0982537269592285, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.16743613419513698, |
| "grad_norm": 3.5, |
| "learning_rate": 1.9882880092922612e-05, |
| "loss": 1.5139843225479126, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1698984302862419, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.9877383432806633e-05, |
| "loss": 1.542289137840271, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.17236072637734687, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.9871761724399617e-05, |
| "loss": 1.432151436805725, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.17482302246845183, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.986601505694248e-05, |
| "loss": 1.500737190246582, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1772853185595568, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.986014352165981e-05, |
| "loss": 1.461523413658142, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.17974761465066175, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.985414721175837e-05, |
| "loss": 1.5014877319335938, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1822099107417667, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.9848026222425636e-05, |
| "loss": 1.4726862907409668, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.18467220683287167, |
| "grad_norm": 4.625, |
| "learning_rate": 1.9841780650828308e-05, |
| "loss": 1.543365716934204, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1871345029239766, |
| "grad_norm": 16.5, |
| "learning_rate": 1.9835410596110723e-05, |
| "loss": 0.5347945094108582, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.18959679901508156, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.982891615939333e-05, |
| "loss": 1.6506417989730835, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.19205909510618652, |
| "grad_norm": 20.875, |
| "learning_rate": 1.982229744377104e-05, |
| "loss": 0.903097927570343, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.19452139119729148, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.9815554554311623e-05, |
| "loss": 1.4461334943771362, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.19698368728839644, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.9808687598054023e-05, |
| "loss": 1.1890747547149658, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1994459833795014, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.980169668400666e-05, |
| "loss": 1.4282548427581787, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.20190827947060633, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.9794581923145708e-05, |
| "loss": 1.2562037706375122, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2043705755617113, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.9787343428413327e-05, |
| "loss": 1.4614920616149902, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.20683287165281625, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.9779981314715866e-05, |
| "loss": 1.3043287992477417, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2092951677439212, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.9772495698922047e-05, |
| "loss": 1.17995285987854, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.21175746383502617, |
| "grad_norm": 6.03125, |
| "learning_rate": 1.9764886699861104e-05, |
| "loss": 2.0112454891204834, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.21421975992613113, |
| "grad_norm": 5.6875, |
| "learning_rate": 1.9757154438320914e-05, |
| "loss": 1.485538363456726, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.21668205601723609, |
| "grad_norm": 3.78125, |
| "learning_rate": 1.974929903704604e-05, |
| "loss": 1.445993423461914, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.21914435210834102, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.9741320620735832e-05, |
| "loss": 1.4375782012939453, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.22160664819944598, |
| "grad_norm": 5.875, |
| "learning_rate": 1.9733219316042404e-05, |
| "loss": 1.8119451999664307, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.22406894429055094, |
| "grad_norm": 6.4375, |
| "learning_rate": 1.9724995251568648e-05, |
| "loss": 1.9366390705108643, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2265312403816559, |
| "grad_norm": 5.0, |
| "learning_rate": 1.97166485578662e-05, |
| "loss": 1.4353549480438232, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.22899353647276086, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.9708179367433333e-05, |
| "loss": 1.4814636707305908, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.23145583256386582, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.969958781471289e-05, |
| "loss": 1.3983485698699951, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "grad_norm": 6.4375, |
| "learning_rate": 1.9690874036090126e-05, |
| "loss": 1.8465726375579834, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2363804247460757, |
| "grad_norm": 2.875, |
| "learning_rate": 1.9682038169890563e-05, |
| "loss": 1.4366203546524048, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.23884272083718067, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.9673080356377778e-05, |
| "loss": 1.397793173789978, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.24130501692828563, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.9664000737751176e-05, |
| "loss": 0.40697720646858215, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.24376731301939059, |
| "grad_norm": 9.9375, |
| "learning_rate": 1.9654799458143744e-05, |
| "loss": 0.7866343259811401, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.24622960911049555, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.9645476663619748e-05, |
| "loss": 1.4268109798431396, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2486919052016005, |
| "grad_norm": 9.3125, |
| "learning_rate": 1.9636032502172445e-05, |
| "loss": 1.2419297695159912, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.25115420129270544, |
| "grad_norm": 5.65625, |
| "learning_rate": 1.962646712372169e-05, |
| "loss": 1.7364747524261475, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2536164973838104, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.9616780680111587e-05, |
| "loss": 1.3980765342712402, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.25607879347491536, |
| "grad_norm": 12.1875, |
| "learning_rate": 1.9606973325108077e-05, |
| "loss": 1.4629418849945068, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2585410895660203, |
| "grad_norm": 5.8125, |
| "learning_rate": 1.9597045214396472e-05, |
| "loss": 1.361374855041504, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2610033856571253, |
| "grad_norm": 3.90625, |
| "learning_rate": 1.958699650557902e-05, |
| "loss": 1.4552102088928223, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.2634656817482302, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.9576827358172377e-05, |
| "loss": 1.4295791387557983, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2659279778393352, |
| "grad_norm": 7.21875, |
| "learning_rate": 1.956653793360508e-05, |
| "loss": 1.4938560724258423, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2683902739304401, |
| "grad_norm": 10.875, |
| "learning_rate": 1.955612839521499e-05, |
| "loss": 1.405943512916565, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2708525700215451, |
| "grad_norm": 7.09375, |
| "learning_rate": 1.95455989082467e-05, |
| "loss": 1.8168143033981323, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.27331486611265005, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.9534949639848894e-05, |
| "loss": 1.880413293838501, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.275777162203755, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.9524180759071724e-05, |
| "loss": 1.4368586540222168, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.27823945829485996, |
| "grad_norm": 7.59375, |
| "learning_rate": 1.9513292436864107e-05, |
| "loss": 1.4332315921783447, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 4.375, |
| "learning_rate": 1.9502284846071003e-05, |
| "loss": 1.4779151678085327, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2831640504770699, |
| "grad_norm": 8.75, |
| "learning_rate": 1.9491158161430703e-05, |
| "loss": 0.5792175531387329, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2856263465681748, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.9479912559572e-05, |
| "loss": 1.0462322235107422, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2880886426592798, |
| "grad_norm": 7.375, |
| "learning_rate": 1.946854821901146e-05, |
| "loss": 1.3507080078125, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.29055093875038474, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.945706532015052e-05, |
| "loss": 1.4383283853530884, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.29301323484148967, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.9445464045272668e-05, |
| "loss": 0.7620460987091064, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.29547553093259465, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.9433744578540525e-05, |
| "loss": 1.3795279264450073, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2979378270236996, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.942190710599293e-05, |
| "loss": 1.4460288286209106, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3004001231148046, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.940995181554199e-05, |
| "loss": 1.4355218410491943, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3028624192059095, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.93978788969701e-05, |
| "loss": 1.339043140411377, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3053247152970145, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.9385688541926903e-05, |
| "loss": 1.4305763244628906, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3077870113881194, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.9373380943926295e-05, |
| "loss": 1.7878942489624023, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.31024930747922436, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.9360956298343313e-05, |
| "loss": 1.680354356765747, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.31271160357032934, |
| "grad_norm": 5.90625, |
| "learning_rate": 1.934841480241105e-05, |
| "loss": 1.5553169250488281, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3151738996614343, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.9335756655217513e-05, |
| "loss": 1.4183763265609741, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.31763619575253926, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.9322982057702492e-05, |
| "loss": 1.391609787940979, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3200984918436442, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.931009121265433e-05, |
| "loss": 1.4094479084014893, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3225607879347491, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.9297084324706734e-05, |
| "loss": 1.4225077629089355, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3250230840258541, |
| "grad_norm": 6.40625, |
| "learning_rate": 1.9283961600335503e-05, |
| "loss": 1.468010663986206, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.32748538011695905, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.927072324785529e-05, |
| "loss": 1.7119166851043701, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.32994767620806403, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.9257369477416224e-05, |
| "loss": 1.0271199941635132, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.33240997229916897, |
| "grad_norm": 8.5, |
| "learning_rate": 1.9243900501000666e-05, |
| "loss": 1.992653727531433, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.33487226839027395, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.9230316532419776e-05, |
| "loss": 1.1357910633087158, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3373345644813789, |
| "grad_norm": 4.875, |
| "learning_rate": 1.9216617787310126e-05, |
| "loss": 1.4825578927993774, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3397968605724838, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.920280448313031e-05, |
| "loss": 1.0347270965576172, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3422591566635888, |
| "grad_norm": 12.625, |
| "learning_rate": 1.918887683915746e-05, |
| "loss": 1.3586125373840332, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.34472145275469374, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.9174835076483786e-05, |
| "loss": 1.4484443664550781, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3471837488457987, |
| "grad_norm": 21.0, |
| "learning_rate": 1.916067941801305e-05, |
| "loss": 1.623072624206543, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.34964604493690365, |
| "grad_norm": 2.25, |
| "learning_rate": 1.914641008845704e-05, |
| "loss": 1.2479501962661743, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.35210834102800864, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.9132027314331992e-05, |
| "loss": 1.23157799243927, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3545706371191136, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.9117531323955004e-05, |
| "loss": 1.4075965881347656, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3570329332102185, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.910292234744042e-05, |
| "loss": 1.6323527097702026, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3594952293013235, |
| "grad_norm": 6.15625, |
| "learning_rate": 1.9088200616696135e-05, |
| "loss": 1.7271039485931396, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3619575253924284, |
| "grad_norm": 9.625, |
| "learning_rate": 1.9073366365419974e-05, |
| "loss": 1.7908841371536255, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3644198214835334, |
| "grad_norm": 5.75, |
| "learning_rate": 1.9058419829095926e-05, |
| "loss": 1.6885616779327393, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.36688211757463834, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.9043361244990458e-05, |
| "loss": 1.6981712579727173, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.36934441366574333, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.9028190852148695e-05, |
| "loss": 1.8226585388183594, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.37180670975684826, |
| "grad_norm": 4.5, |
| "learning_rate": 1.9012908891390674e-05, |
| "loss": 1.448561191558838, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.3742690058479532, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.8997515605307484e-05, |
| "loss": 1.1009801626205444, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3767313019390582, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.898201123825744e-05, |
| "loss": 1.4048492908477783, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3791935980301631, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.8966396036362197e-05, |
| "loss": 1.317664384841919, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3816558941212681, |
| "grad_norm": 7.125, |
| "learning_rate": 1.8950670247502823e-05, |
| "loss": 1.1580454111099243, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.38411819021237303, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.8934834121315904e-05, |
| "loss": 1.486496925354004, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.38658048630347797, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.8918887909189537e-05, |
| "loss": 1.1772874593734741, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.38904278239458295, |
| "grad_norm": 1.75, |
| "learning_rate": 1.8902831864259384e-05, |
| "loss": 1.039048671722412, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3915050784856879, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.8886666241404614e-05, |
| "loss": 1.3585329055786133, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3939673745767929, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.887039129724387e-05, |
| "loss": 1.1052215099334717, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3964296706678978, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.8854007290131223e-05, |
| "loss": 1.4763174057006836, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3988919667590028, |
| "grad_norm": 6.34375, |
| "learning_rate": 1.8837514480152016e-05, |
| "loss": 1.395377278327942, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4013542628501077, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.882091312911879e-05, |
| "loss": 1.043440580368042, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.40381655894121266, |
| "grad_norm": 4.5625, |
| "learning_rate": 1.880420350056709e-05, |
| "loss": 1.8225022554397583, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.40627885503231764, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.87873858597513e-05, |
| "loss": 1.035279393196106, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4087411511234226, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.877046047364044e-05, |
| "loss": 1.4025003910064697, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.41120344721452756, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.875342761091389e-05, |
| "loss": 1.5152015686035156, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4136657433056325, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.8736287541957172e-05, |
| "loss": 1.0334498882293701, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.4161280393967375, |
| "grad_norm": 9.25, |
| "learning_rate": 1.8719040538857625e-05, |
| "loss": 1.5699793100357056, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4185903354878424, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.8701686875400104e-05, |
| "loss": 1.0974748134613037, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 4.25, |
| "learning_rate": 1.8684226827062632e-05, |
| "loss": 1.4441235065460205, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.42351492767005233, |
| "grad_norm": 7.5, |
| "learning_rate": 1.8666660671012002e-05, |
| "loss": 1.0178951025009155, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.42597722376115726, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.8648988686099416e-05, |
| "loss": 1.7429275512695312, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.42843951985226225, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.863121115285604e-05, |
| "loss": 1.3890095949172974, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4309018159433672, |
| "grad_norm": 107.0, |
| "learning_rate": 1.8613328353488533e-05, |
| "loss": 1.671781301498413, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.43336411203447217, |
| "grad_norm": 6.4375, |
| "learning_rate": 1.8595340571874607e-05, |
| "loss": 0.9639192223548889, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4358264081255771, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.8577248093558486e-05, |
| "loss": 1.3523774147033691, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.43828870421668203, |
| "grad_norm": 6.53125, |
| "learning_rate": 1.855905120574638e-05, |
| "loss": 1.4467836618423462, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.440751000307787, |
| "grad_norm": 6.125, |
| "learning_rate": 1.854075019730194e-05, |
| "loss": 1.521872878074646, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.44321329639889195, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.8522345358741662e-05, |
| "loss": 0.7035669088363647, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.44567559248999694, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.8503836982230284e-05, |
| "loss": 1.9208122491836548, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.4481378885811019, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.848522536157612e-05, |
| "loss": 1.4902818202972412, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.45060018467220686, |
| "grad_norm": 5.625, |
| "learning_rate": 1.8466510792226447e-05, |
| "loss": 1.7599055767059326, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4530624807633118, |
| "grad_norm": 9.875, |
| "learning_rate": 1.8447693571262757e-05, |
| "loss": 1.6332001686096191, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4555247768544167, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.842877399739608e-05, |
| "loss": 1.3132367134094238, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4579870729455217, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.840975237096224e-05, |
| "loss": 1.3803317546844482, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.46044936903662664, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.8390628993917062e-05, |
| "loss": 1.3456385135650635, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.46291166512773163, |
| "grad_norm": 7.3125, |
| "learning_rate": 1.8371404169831613e-05, |
| "loss": 0.39371660351753235, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.46537396121883656, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.8352078203887346e-05, |
| "loss": 1.3137223720550537, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "grad_norm": 6.5, |
| "learning_rate": 1.8332651402871286e-05, |
| "loss": 0.324982613325119, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4702985534010465, |
| "grad_norm": 7.8125, |
| "learning_rate": 1.8313124075171153e-05, |
| "loss": 1.7339143753051758, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4727608494921514, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.8293496530770448e-05, |
| "loss": 1.3264766931533813, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4752231455832564, |
| "grad_norm": 5.28125, |
| "learning_rate": 1.827376908124356e-05, |
| "loss": 1.732757568359375, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.47768544167436133, |
| "grad_norm": 17.625, |
| "learning_rate": 1.8253942039750795e-05, |
| "loss": 1.7728583812713623, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.4801477377654663, |
| "grad_norm": 2.046875, |
| "learning_rate": 1.8234015721033428e-05, |
| "loss": 1.1088775396347046, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.48261003385657125, |
| "grad_norm": 7.75, |
| "learning_rate": 1.8213990441408687e-05, |
| "loss": 1.7161972522735596, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4850723299476762, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.819386651876474e-05, |
| "loss": 1.3242639303207397, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.48753462603878117, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.8173644272555645e-05, |
| "loss": 1.387306571006775, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4899969221298861, |
| "grad_norm": 11.875, |
| "learning_rate": 1.815332402379629e-05, |
| "loss": 0.28826314210891724, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4924592182209911, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.8132906095057287e-05, |
| "loss": 1.4168837070465088, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.494921514312096, |
| "grad_norm": 7.15625, |
| "learning_rate": 1.8112390810459842e-05, |
| "loss": 1.8249226808547974, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.497383810403201, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.8091778495670645e-05, |
| "loss": 1.3672676086425781, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.49984610649430594, |
| "grad_norm": 1.8359375, |
| "learning_rate": 1.8071069477896655e-05, |
| "loss": 1.1166040897369385, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5023084025854109, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.805026408587994e-05, |
| "loss": 1.401571273803711, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5047706986765158, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.8029362649892436e-05, |
| "loss": 1.0254689455032349, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5072329947676208, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.8008365501730716e-05, |
| "loss": 1.4256839752197266, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5096952908587258, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.7987272974710733e-05, |
| "loss": 1.2576653957366943, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5121575869498307, |
| "grad_norm": 6.65625, |
| "learning_rate": 1.7966085403662502e-05, |
| "loss": 1.847425937652588, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5146198830409356, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.79448031249248e-05, |
| "loss": 1.2791142463684082, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5170821791320406, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.7923426476339843e-05, |
| "loss": 1.4304306507110596, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5195444752231456, |
| "grad_norm": 2.765625, |
| "learning_rate": 1.7901955797247894e-05, |
| "loss": 1.354073405265808, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5220067713142506, |
| "grad_norm": 5.46875, |
| "learning_rate": 1.7880391428481877e-05, |
| "loss": 1.1258585453033447, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5244690674053555, |
| "grad_norm": 7.96875, |
| "learning_rate": 1.7858733712362006e-05, |
| "loss": 1.2407653331756592, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5269313634964604, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.7836982992690298e-05, |
| "loss": 1.3420263528823853, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.5293936595875655, |
| "grad_norm": 1.6640625, |
| "learning_rate": 1.781513961474515e-05, |
| "loss": 1.070509672164917, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5318559556786704, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.7793203925275857e-05, |
| "loss": 1.4249287843704224, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5343182517697753, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.777117627249708e-05, |
| "loss": 1.3717284202575684, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5367805478608803, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.774905700608335e-05, |
| "loss": 1.177480697631836, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5392428439519852, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.7726846477163506e-05, |
| "loss": 1.0270402431488037, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5417051400430902, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.7704545038315108e-05, |
| "loss": 1.0033745765686035, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5441674361341952, |
| "grad_norm": 51.25, |
| "learning_rate": 1.7682153043558865e-05, |
| "loss": 1.7934285402297974, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5466297322253001, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.765967084835299e-05, |
| "loss": 1.5169916152954102, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.549092028316405, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.763709880958758e-05, |
| "loss": 1.102067470550537, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.55155432440751, |
| "grad_norm": 5.9375, |
| "learning_rate": 1.7614437285578927e-05, |
| "loss": 1.742466926574707, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.554016620498615, |
| "grad_norm": 2.484375, |
| "learning_rate": 1.7591686636063855e-05, |
| "loss": 0.9622822403907776, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5564789165897199, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.756884722219398e-05, |
| "loss": 1.3980923891067505, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5589412126808249, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.754591940653002e-05, |
| "loss": 1.2967207431793213, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.7522903553035983e-05, |
| "loss": 1.026415228843689, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5638658048630347, |
| "grad_norm": 8.4375, |
| "learning_rate": 1.749980002707344e-05, |
| "loss": 1.6526079177856445, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5663281009541398, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.747660919539571e-05, |
| "loss": 1.0682464838027954, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5687903970452447, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.745333142614201e-05, |
| "loss": 1.2323286533355713, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5712526931363496, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.742996708883165e-05, |
| "loss": 1.657741665840149, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5737149892274546, |
| "grad_norm": 5.6875, |
| "learning_rate": 1.740651655435815e-05, |
| "loss": 1.5120787620544434, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5761772853185596, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.7382980194983354e-05, |
| "loss": 1.3939659595489502, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5786395814096645, |
| "grad_norm": 5.75, |
| "learning_rate": 1.735935838433151e-05, |
| "loss": 1.6433215141296387, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5811018775007695, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.7335651497383357e-05, |
| "loss": 1.078176498413086, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5835641735918744, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.731185991047017e-05, |
| "loss": 1.3398302793502808, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5860264696829793, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.7287984001267765e-05, |
| "loss": 1.344508171081543, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5884887657740844, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.7264024148790538e-05, |
| "loss": 1.453425407409668, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5909510618651893, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.7239980733385408e-05, |
| "loss": 0.9735173583030701, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5934133579562942, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.721585413672582e-05, |
| "loss": 1.3980371952056885, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5958756540473992, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.7191644741805648e-05, |
| "loss": 1.3482059240341187, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5983379501385041, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.716735293293316e-05, |
| "loss": 1.404923439025879, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.6008002462296091, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.7142979095724865e-05, |
| "loss": 1.5890945196151733, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.6032625423207141, |
| "grad_norm": 10.25, |
| "learning_rate": 1.7118523617099435e-05, |
| "loss": 1.7281887531280518, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.605724838411819, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.7093986885271532e-05, |
| "loss": 1.4024686813354492, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6081871345029239, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.7069369289745673e-05, |
| "loss": 1.1231578588485718, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.610649430594029, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.704467122131003e-05, |
| "loss": 1.6918822526931763, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6131117266851339, |
| "grad_norm": 5.6875, |
| "learning_rate": 1.7019893072030222e-05, |
| "loss": 1.7565666437149048, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.6155740227762388, |
| "grad_norm": 6.0, |
| "learning_rate": 1.6995035235243098e-05, |
| "loss": 1.582336187362671, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6180363188673438, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.6970098105550514e-05, |
| "loss": 1.2266004085540771, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.6204986149584487, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.694508207881302e-05, |
| "loss": 1.3281134366989136, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6229609110495538, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.691998755214363e-05, |
| "loss": 1.2356681823730469, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.6254232071406587, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.689481492390148e-05, |
| "loss": 1.0685112476348877, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.6278855032317636, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.686956459368551e-05, |
| "loss": 1.0986112356185913, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6303477993228686, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.6844236962328154e-05, |
| "loss": 1.1448196172714233, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.6328100954139735, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.681883243188892e-05, |
| "loss": 1.5838472843170166, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.6352723915050785, |
| "grad_norm": 5.0, |
| "learning_rate": 1.6793351405648053e-05, |
| "loss": 1.0939499139785767, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.6377346875961835, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.6767794288100123e-05, |
| "loss": 0.9746682643890381, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.6401969836872884, |
| "grad_norm": 7.53125, |
| "learning_rate": 1.6742161484947596e-05, |
| "loss": 0.9929898977279663, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6426592797783933, |
| "grad_norm": 7.28125, |
| "learning_rate": 1.6716453403094394e-05, |
| "loss": 1.6372830867767334, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6451215758694983, |
| "grad_norm": 18.875, |
| "learning_rate": 1.6690670450639435e-05, |
| "loss": 0.2726695239543915, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6475838719606033, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.6664813036870174e-05, |
| "loss": 1.3791524171829224, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6500461680517082, |
| "grad_norm": 20.5, |
| "learning_rate": 1.6638881572256078e-05, |
| "loss": 1.7047182321548462, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6525084641428132, |
| "grad_norm": 5.5, |
| "learning_rate": 1.6612876468442118e-05, |
| "loss": 1.8910508155822754, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6549707602339181, |
| "grad_norm": 6.0, |
| "learning_rate": 1.6586798138242258e-05, |
| "loss": 1.3536272048950195, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6574330563250231, |
| "grad_norm": 5.9375, |
| "learning_rate": 1.6560646995632865e-05, |
| "loss": 1.404782772064209, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6598953524161281, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.6534423455746157e-05, |
| "loss": 1.3882639408111572, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.662357648507233, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.6508127934863633e-05, |
| "loss": 1.3433642387390137, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6648199445983379, |
| "grad_norm": 7.5625, |
| "learning_rate": 1.6481760850409406e-05, |
| "loss": 1.7808656692504883, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6672822406894429, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.645532262094364e-05, |
| "loss": 1.405790090560913, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6697445367805479, |
| "grad_norm": 8.0625, |
| "learning_rate": 1.6428813666155878e-05, |
| "loss": 1.3506624698638916, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6722068328716528, |
| "grad_norm": 4.125, |
| "learning_rate": 1.6402234406858375e-05, |
| "loss": 1.3872720003128052, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6746691289627578, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.6375585264979423e-05, |
| "loss": 1.1865075826644897, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6771314250538627, |
| "grad_norm": 4.625, |
| "learning_rate": 1.6348866663556645e-05, |
| "loss": 1.4540220499038696, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6795937211449676, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.6322079026730317e-05, |
| "loss": 1.0791795253753662, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6820560172360727, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.6295222779736586e-05, |
| "loss": 1.1618213653564453, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6845183133271776, |
| "grad_norm": 5.78125, |
| "learning_rate": 1.626829834890074e-05, |
| "loss": 1.6633763313293457, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6869806094182825, |
| "grad_norm": 8.375, |
| "learning_rate": 1.624130616163044e-05, |
| "loss": 1.7596007585525513, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6894429055093875, |
| "grad_norm": 1.5625, |
| "learning_rate": 1.6214246646408946e-05, |
| "loss": 1.0530022382736206, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6919052016004925, |
| "grad_norm": 3.375, |
| "learning_rate": 1.61871202327883e-05, |
| "loss": 1.3792953491210938, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6943674976915974, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.6159927351382512e-05, |
| "loss": 1.3962174654006958, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6968297937827024, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.6132668433860723e-05, |
| "loss": 1.3606011867523193, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6992920898738073, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.6105343912940355e-05, |
| "loss": 1.3807508945465088, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.6077954222380235e-05, |
| "loss": 1.3539392948150635, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7042166820560173, |
| "grad_norm": 5.125, |
| "learning_rate": 1.6050499796973733e-05, |
| "loss": 1.3989124298095703, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.7066789781471222, |
| "grad_norm": 3.875, |
| "learning_rate": 1.6022981072541823e-05, |
| "loss": 1.3723649978637695, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.7091412742382271, |
| "grad_norm": 7.65625, |
| "learning_rate": 1.599539848592619e-05, |
| "loss": 1.3160829544067383, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.7116035703293321, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.5967752474982296e-05, |
| "loss": 1.1242200136184692, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.714065866420437, |
| "grad_norm": 2.4375, |
| "learning_rate": 1.5940043478572413e-05, |
| "loss": 1.0219156742095947, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.716528162511542, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.591227193655867e-05, |
| "loss": 0.9959127902984619, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.718990458602647, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.5884438289796067e-05, |
| "loss": 1.828487753868103, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7214527546937519, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.5856542980125477e-05, |
| "loss": 1.4034947156906128, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.7239150507848569, |
| "grad_norm": 13.75, |
| "learning_rate": 1.5828586450366626e-05, |
| "loss": 1.3598823547363281, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.7263773468759618, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.5800569144311078e-05, |
| "loss": 1.3686673641204834, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7288396429670668, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.5772491506715174e-05, |
| "loss": 1.3769757747650146, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.7313019390581718, |
| "grad_norm": 9.5625, |
| "learning_rate": 1.5744353983292975e-05, |
| "loss": 0.6412605047225952, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.7337642351492767, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.5716157020709196e-05, |
| "loss": 1.3520535230636597, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.7362265312403816, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.5687901066572116e-05, |
| "loss": 1.0531518459320068, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.7386888273314867, |
| "grad_norm": 3.375, |
| "learning_rate": 1.565958656942645e-05, |
| "loss": 1.3364739418029785, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7411511234225916, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.563121397874626e-05, |
| "loss": 1.0583405494689941, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.7436134195136965, |
| "grad_norm": 6.875, |
| "learning_rate": 1.5602783744927794e-05, |
| "loss": 1.699558138847351, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7460757156048015, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.557429631928235e-05, |
| "loss": 1.6496608257293701, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.7485380116959064, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.5545752154029118e-05, |
| "loss": 1.3926259279251099, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.7510003077870114, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.5517151702287977e-05, |
| "loss": 1.0908641815185547, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7534626038781164, |
| "grad_norm": 2.28125, |
| "learning_rate": 1.548849541807233e-05, |
| "loss": 1.1665232181549072, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7559248999692213, |
| "grad_norm": 4.375, |
| "learning_rate": 1.5459783756281872e-05, |
| "loss": 1.5498981475830078, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7583871960603262, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.543101717269539e-05, |
| "loss": 1.3930026292800903, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7608494921514312, |
| "grad_norm": 18.75, |
| "learning_rate": 1.5402196123963514e-05, |
| "loss": 1.3921393156051636, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7633117882425362, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.537332106760147e-05, |
| "loss": 1.5707228183746338, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7657740843336411, |
| "grad_norm": 5.125, |
| "learning_rate": 1.5344392461981835e-05, |
| "loss": 1.1423331499099731, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7682363804247461, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.5315410766327224e-05, |
| "loss": 1.3473308086395264, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.770698676515851, |
| "grad_norm": 6.65625, |
| "learning_rate": 1.5286376440703034e-05, |
| "loss": 1.5814166069030762, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7731609726069559, |
| "grad_norm": 6.625, |
| "learning_rate": 1.5257289946010123e-05, |
| "loss": 1.664976954460144, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.775623268698061, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.5228151743977502e-05, |
| "loss": 1.0675089359283447, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7780855647891659, |
| "grad_norm": 6.34375, |
| "learning_rate": 1.5198962297155002e-05, |
| "loss": 1.5473486185073853, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7805478608802708, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.5169722068905927e-05, |
| "loss": 1.4237251281738281, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7830101569713758, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.514043152339971e-05, |
| "loss": 1.1319770812988281, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7854724530624808, |
| "grad_norm": 5.90625, |
| "learning_rate": 1.5111091125604538e-05, |
| "loss": 1.7654370069503784, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7879347491535857, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.5081701341279957e-05, |
| "loss": 1.407934546470642, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7903970452446907, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.5052262636969506e-05, |
| "loss": 1.3491337299346924, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7928593413357956, |
| "grad_norm": 1.796875, |
| "learning_rate": 1.502277547999329e-05, |
| "loss": 1.125083565711975, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7953216374269005, |
| "grad_norm": 6.0, |
| "learning_rate": 1.4993240338440571e-05, |
| "loss": 1.3817883729934692, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7977839335180056, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.4963657681162328e-05, |
| "loss": 1.3694324493408203, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.8002462296091105, |
| "grad_norm": 7.71875, |
| "learning_rate": 1.4934027977763838e-05, |
| "loss": 1.451867699623108, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8027085257002154, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.4904351698597181e-05, |
| "loss": 1.386351466178894, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.8051708217913204, |
| "grad_norm": 1.6796875, |
| "learning_rate": 1.4874629314753812e-05, |
| "loss": 1.0673191547393799, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.8076331178824253, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.4844861298057068e-05, |
| "loss": 1.4586551189422607, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.8100954139735304, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.4815048121054667e-05, |
| "loss": 1.042107105255127, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.8125577100646353, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.4785190257011231e-05, |
| "loss": 1.6682562828063965, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8150200061557402, |
| "grad_norm": 7.21875, |
| "learning_rate": 1.4755288179900741e-05, |
| "loss": 1.720628261566162, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.8174823022468451, |
| "grad_norm": 7.15625, |
| "learning_rate": 1.4725342364399055e-05, |
| "loss": 1.3896342515945435, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8199445983379502, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.4695353285876328e-05, |
| "loss": 1.3969242572784424, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.8224068944290551, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.46653214203895e-05, |
| "loss": 1.686731219291687, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.8248691905201601, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.463524724467472e-05, |
| "loss": 1.7890194654464722, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.827331486611265, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.4605131236139789e-05, |
| "loss": 1.3969485759735107, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.8297937827023699, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.4574973872856566e-05, |
| "loss": 1.009456992149353, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.832256078793475, |
| "grad_norm": 5.75, |
| "learning_rate": 1.4544775633553409e-05, |
| "loss": 1.0795286893844604, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.8347183748845799, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.4514536997607533e-05, |
| "loss": 1.291078805923462, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.8371806709756848, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.4484258445037437e-05, |
| "loss": 1.2912898063659668, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8396429670667898, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.4453940456495268e-05, |
| "loss": 1.5154544115066528, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 11.6875, |
| "learning_rate": 1.4423583513259196e-05, |
| "loss": 1.7723913192749023, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.8445675592489997, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.4393188097225764e-05, |
| "loss": 1.4048473834991455, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.8470298553401047, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.4362754690902242e-05, |
| "loss": 1.736893653869629, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.8494921514312096, |
| "grad_norm": 2.625, |
| "learning_rate": 1.4332283777398992e-05, |
| "loss": 1.4180538654327393, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8519544475223145, |
| "grad_norm": 4.875, |
| "learning_rate": 1.4301775840421756e-05, |
| "loss": 1.700308084487915, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.8544167436134195, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.4271231364264008e-05, |
| "loss": 1.2139472961425781, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8568790397045245, |
| "grad_norm": 8.25, |
| "learning_rate": 1.424065083379926e-05, |
| "loss": 1.690704584121704, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.8593413357956294, |
| "grad_norm": 5.625, |
| "learning_rate": 1.421003473447335e-05, |
| "loss": 1.757250189781189, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8618036318867344, |
| "grad_norm": 9.25, |
| "learning_rate": 1.4179383552296768e-05, |
| "loss": 1.7566320896148682, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8642659279778393, |
| "grad_norm": 6.0625, |
| "learning_rate": 1.4148697773836908e-05, |
| "loss": 1.804456353187561, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8667282240689443, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.4117977886210352e-05, |
| "loss": 1.6510390043258667, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8691905201600493, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.4087224377075162e-05, |
| "loss": 1.194544792175293, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8716528162511542, |
| "grad_norm": 5.75, |
| "learning_rate": 1.4056437734623103e-05, |
| "loss": 1.3318874835968018, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8741151123422591, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.4025618447571914e-05, |
| "loss": 1.4258933067321777, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8765774084333641, |
| "grad_norm": 1.8671875, |
| "learning_rate": 1.3994767005157543e-05, |
| "loss": 1.1039819717407227, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8790397045244691, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.3963883897126395e-05, |
| "loss": 1.3149468898773193, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.881502000615574, |
| "grad_norm": 7.15625, |
| "learning_rate": 1.393296961372753e-05, |
| "loss": 1.3563876152038574, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.883964296706679, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.390202464570491e-05, |
| "loss": 1.226351022720337, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8864265927977839, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.3871049484289586e-05, |
| "loss": 1.6103639602661133, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.3840044621191907e-05, |
| "loss": 1.40117347240448, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8913511849799939, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.380901054859373e-05, |
| "loss": 1.0493632555007935, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8938134810710988, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.3777947759140581e-05, |
| "loss": 1.497347354888916, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8962757771622037, |
| "grad_norm": 1.546875, |
| "learning_rate": 1.3746856745933861e-05, |
| "loss": 1.1111018657684326, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8987380732533087, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.3715738002522983e-05, |
| "loss": 1.1223242282867432, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9012003693444137, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.3684592022897577e-05, |
| "loss": 1.526750087738037, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.9036626654355187, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.3653419301479625e-05, |
| "loss": 1.1531429290771484, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.9061249615266236, |
| "grad_norm": 6.84375, |
| "learning_rate": 1.3622220333115618e-05, |
| "loss": 1.627996563911438, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.9085872576177285, |
| "grad_norm": 5.0, |
| "learning_rate": 1.3590995613068695e-05, |
| "loss": 1.3804816007614136, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.9110495537088334, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.3559745637010796e-05, |
| "loss": 1.3431119918823242, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9135118497999385, |
| "grad_norm": 7.21875, |
| "learning_rate": 1.3528470901014768e-05, |
| "loss": 1.7569446563720703, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.9159741458910434, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.3497171901546527e-05, |
| "loss": 1.4046237468719482, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9184364419821484, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.3465849135457133e-05, |
| "loss": 1.1801738739013672, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.9208987380732533, |
| "grad_norm": 3.625, |
| "learning_rate": 1.3434503099974943e-05, |
| "loss": 1.414689540863037, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.9233610341643582, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.3403134292697688e-05, |
| "loss": 1.3589739799499512, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9258233302554633, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.3371743211584602e-05, |
| "loss": 1.2147026062011719, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.9282856263465682, |
| "grad_norm": 14.0, |
| "learning_rate": 1.3340330354948483e-05, |
| "loss": 0.6764575242996216, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.9307479224376731, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.330889622144781e-05, |
| "loss": 1.1622259616851807, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.9332102185287781, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.3277441310078824e-05, |
| "loss": 1.3609400987625122, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.3245966120167592e-05, |
| "loss": 1.149078130722046, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.938134810710988, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.3214471151362092e-05, |
| "loss": 1.119340419769287, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.940597106802093, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.3182956903624278e-05, |
| "loss": 1.0370396375656128, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.9430594028931979, |
| "grad_norm": 2.453125, |
| "learning_rate": 1.3151423877222147e-05, |
| "loss": 1.1257320642471313, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.9455216989843028, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.3119872572721794e-05, |
| "loss": 1.3441581726074219, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.9479839950754079, |
| "grad_norm": 5.75, |
| "learning_rate": 1.3088303490979471e-05, |
| "loss": 1.3604907989501953, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9504462911665128, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.3056717133133621e-05, |
| "loss": 1.6805719137191772, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.9529085872576177, |
| "grad_norm": 5.625, |
| "learning_rate": 1.3025114000596943e-05, |
| "loss": 1.780057430267334, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.9553708833487227, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.2993494595048422e-05, |
| "loss": 1.401186466217041, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.9578331794398276, |
| "grad_norm": 8.875, |
| "learning_rate": 1.2961859418425365e-05, |
| "loss": 1.7668989896774292, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.9602954755309326, |
| "grad_norm": 5.6875, |
| "learning_rate": 1.2930208972915437e-05, |
| "loss": 1.4184396266937256, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9627577716220376, |
| "grad_norm": 3.75, |
| "learning_rate": 1.2898543760948673e-05, |
| "loss": 1.4058780670166016, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.9652200677131425, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.2866864285189543e-05, |
| "loss": 1.0642720460891724, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9676823638042474, |
| "grad_norm": 5.65625, |
| "learning_rate": 1.2835171048528916e-05, |
| "loss": 1.7296231985092163, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9701446598953524, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.2803464554076128e-05, |
| "loss": 1.4836857318878174, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9726069559864574, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.2771745305150965e-05, |
| "loss": 1.7830345630645752, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9750692520775623, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.2740013805275672e-05, |
| "loss": 1.3922364711761475, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.9775315481686673, |
| "grad_norm": 7.34375, |
| "learning_rate": 1.2708270558166995e-05, |
| "loss": 1.0389618873596191, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.9799938442597722, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.2676516067728135e-05, |
| "loss": 1.5342938899993896, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.264475083804078e-05, |
| "loss": 1.7565385103225708, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9849184364419822, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.2612975373357113e-05, |
| "loss": 1.398611068725586, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9873807325330871, |
| "grad_norm": 5.78125, |
| "learning_rate": 1.2581190178091764e-05, |
| "loss": 1.4105567932128906, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.989843028624192, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.2549395756813852e-05, |
| "loss": 1.1484860181808472, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.992305324715297, |
| "grad_norm": 6.875, |
| "learning_rate": 1.251759261423894e-05, |
| "loss": 0.9945257902145386, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.994767620806402, |
| "grad_norm": 12.5625, |
| "learning_rate": 1.2485781255221037e-05, |
| "loss": 1.5860981941223145, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.997229916897507, |
| "grad_norm": 13.4375, |
| "learning_rate": 1.2453962184744595e-05, |
| "loss": 1.3061414957046509, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9996922129886119, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.2422135907916459e-05, |
| "loss": 1.0748600959777832, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.0012311480455525, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.239030292995789e-05, |
| "loss": 1.1813337802886963, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.0036934441366574, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.2358463756196515e-05, |
| "loss": 1.3365702629089355, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.0061557402277623, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.2326618892058316e-05, |
| "loss": 1.269797444343567, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.0086180363188673, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.2294768843059611e-05, |
| "loss": 1.130170226097107, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.0110803324099722, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.2262914114799011e-05, |
| "loss": 1.5535081624984741, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.0135426285010773, |
| "grad_norm": 9.0625, |
| "learning_rate": 1.2231055212949427e-05, |
| "loss": 1.7664412260055542, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.0160049245921823, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.219919264325001e-05, |
| "loss": 1.4970834255218506, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.0184672206832872, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.2167326911498137e-05, |
| "loss": 1.1276826858520508, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.0209295167743921, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.2135458523541384e-05, |
| "loss": 1.0198701620101929, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.023391812865497, |
| "grad_norm": 6.0, |
| "learning_rate": 1.2103587985269483e-05, |
| "loss": 1.1860932111740112, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.025854108956602, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.207171580260632e-05, |
| "loss": 1.4768877029418945, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.028316405047707, |
| "grad_norm": 8.625, |
| "learning_rate": 1.2039842481501865e-05, |
| "loss": 1.481208086013794, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.0307787011388119, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.200796852792417e-05, |
| "loss": 1.473567008972168, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.0332409972299168, |
| "grad_norm": 10.5625, |
| "learning_rate": 1.1976094447851323e-05, |
| "loss": 1.5777289867401123, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.035703293321022, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.1944220747263425e-05, |
| "loss": 1.3818743228912354, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.0381655894121269, |
| "grad_norm": 3.625, |
| "learning_rate": 1.1912347932134552e-05, |
| "loss": 1.2724220752716064, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.0406278855032318, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.1880476508424717e-05, |
| "loss": 1.3566083908081055, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.0430901815943368, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.1848606982071851e-05, |
| "loss": 1.2785669565200806, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.0455524776854417, |
| "grad_norm": 5.375, |
| "learning_rate": 1.1816739858983767e-05, |
| "loss": 1.5428179502487183, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.0480147737765466, |
| "grad_norm": 5.53125, |
| "learning_rate": 1.178487564503012e-05, |
| "loss": 1.7369728088378906, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.0504770698676515, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.1753014846034398e-05, |
| "loss": 1.6508008241653442, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.0529393659587565, |
| "grad_norm": 5.125, |
| "learning_rate": 1.1721157967765869e-05, |
| "loss": 1.4951319694519043, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.0554016620498614, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.1689305515931556e-05, |
| "loss": 1.5579488277435303, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.0578639581409663, |
| "grad_norm": 9.0625, |
| "learning_rate": 1.1657457996168233e-05, |
| "loss": 1.2465214729309082, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.0603262542320715, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.1625615914034363e-05, |
| "loss": 1.1531850099563599, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.0627885503231764, |
| "grad_norm": 7.625, |
| "learning_rate": 1.1593779775002104e-05, |
| "loss": 1.6242802143096924, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.0652508464142814, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.1561950084449258e-05, |
| "loss": 1.7797261476516724, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.0677131425053863, |
| "grad_norm": 7.625, |
| "learning_rate": 1.153012734765127e-05, |
| "loss": 1.0688107013702393, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.0701754385964912, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.1498312069773205e-05, |
| "loss": 0.603493332862854, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.0726377346875962, |
| "grad_norm": 7.59375, |
| "learning_rate": 1.1466504755861708e-05, |
| "loss": 1.2946546077728271, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.075100030778701, |
| "grad_norm": 4.0, |
| "learning_rate": 1.143470591083701e-05, |
| "loss": 1.3011809587478638, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.077562326869806, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.1402916039484898e-05, |
| "loss": 1.3322241306304932, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.080024622960911, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.1371135646448716e-05, |
| "loss": 1.3409028053283691, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.082486919052016, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.1339365236221344e-05, |
| "loss": 1.5541951656341553, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.084949215143121, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.1307605313137185e-05, |
| "loss": 1.6270629167556763, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.087411511234226, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.127585638136417e-05, |
| "loss": 1.40193510055542, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.089873807325331, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.1244118944895751e-05, |
| "loss": 1.3631030321121216, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.0923361034164358, |
| "grad_norm": 11.4375, |
| "learning_rate": 1.1212393507542898e-05, |
| "loss": 1.293651819229126, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0947983995075408, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.1180680572926107e-05, |
| "loss": 1.4282387495040894, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0972606955986457, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.1148980644467393e-05, |
| "loss": 1.5414776802062988, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.0997229916897506, |
| "grad_norm": 2.0625, |
| "learning_rate": 1.1117294225382316e-05, |
| "loss": 1.2819738388061523, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.1021852877808556, |
| "grad_norm": 3.625, |
| "learning_rate": 1.1085621818671974e-05, |
| "loss": 1.116639256477356, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.1046475838719605, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.1053963927115037e-05, |
| "loss": 1.3504618406295776, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.1071098799630656, |
| "grad_norm": 4.375, |
| "learning_rate": 1.102232105325975e-05, |
| "loss": 1.4307514429092407, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.1095721760541706, |
| "grad_norm": 2.5, |
| "learning_rate": 1.0990693699415962e-05, |
| "loss": 1.2542567253112793, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.1120344721452755, |
| "grad_norm": 7.71875, |
| "learning_rate": 1.0959082367647155e-05, |
| "loss": 1.3272080421447754, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.1144967682363804, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.0927487559762478e-05, |
| "loss": 1.344172477722168, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.1169590643274854, |
| "grad_norm": 7.59375, |
| "learning_rate": 1.0895909777308757e-05, |
| "loss": 1.2731947898864746, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.1194213604185903, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.0864349521562563e-05, |
| "loss": 1.2336888313293457, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.1218836565096952, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.0832807293522239e-05, |
| "loss": 1.125575304031372, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.1243459526008002, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.080128359389995e-05, |
| "loss": 1.1796314716339111, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.1268082486919053, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.0769778923113736e-05, |
| "loss": 1.1832040548324585, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.1292705447830103, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.0738293781279561e-05, |
| "loss": 1.1113415956497192, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.1317328408741152, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.0706828668203384e-05, |
| "loss": 1.1446493864059448, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.1341951369652201, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.067538408337323e-05, |
| "loss": 1.3466662168502808, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.136657433056325, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.064396052595123e-05, |
| "loss": 1.1979475021362305, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.13911972914743, |
| "grad_norm": 6.0, |
| "learning_rate": 1.0612558494765735e-05, |
| "loss": 1.2253812551498413, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.141582025238535, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.0581178488303379e-05, |
| "loss": 1.512798547744751, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.1440443213296398, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.0549821004701163e-05, |
| "loss": 1.214385986328125, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.1465066174207448, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.0518486541738552e-05, |
| "loss": 1.0102102756500244, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.1489689135118497, |
| "grad_norm": 8.4375, |
| "learning_rate": 1.0487175596829584e-05, |
| "loss": 1.2178149223327637, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.1514312096029546, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.0455888667014956e-05, |
| "loss": 1.3471554517745972, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.1538935056940598, |
| "grad_norm": 32.5, |
| "learning_rate": 1.0424626248954135e-05, |
| "loss": 1.5330407619476318, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.1563558017851647, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.0393388838917489e-05, |
| "loss": 1.6406910419464111, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.1588180978762697, |
| "grad_norm": 1.703125, |
| "learning_rate": 1.0362176932778399e-05, |
| "loss": 1.2105987071990967, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.1612803939673746, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.0330991026005384e-05, |
| "loss": 1.194588303565979, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.1637426900584795, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.0299831613654243e-05, |
| "loss": 1.1566952466964722, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.1662049861495845, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.026869919036019e-05, |
| "loss": 1.2074699401855469, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.1686672822406894, |
| "grad_norm": 11.375, |
| "learning_rate": 1.0237594250330013e-05, |
| "loss": 1.3596782684326172, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.1711295783317943, |
| "grad_norm": 5.84375, |
| "learning_rate": 1.020651728733422e-05, |
| "loss": 1.3205690383911133, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.1735918744228995, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.0175468794699193e-05, |
| "loss": 1.337862253189087, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.1760541705140044, |
| "grad_norm": 3.875, |
| "learning_rate": 1.014444926529937e-05, |
| "loss": 1.3420543670654297, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.1785164666051093, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.0113459191549423e-05, |
| "loss": 1.3313000202178955, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.1809787626962143, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.008249906539643e-05, |
| "loss": 1.4042177200317383, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.1834410587873192, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.0051569378312066e-05, |
| "loss": 1.3378522396087646, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.1859033548784241, |
| "grad_norm": 10.9375, |
| "learning_rate": 1.0020670621284814e-05, |
| "loss": 0.8419127464294434, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.188365650969529, |
| "grad_norm": 3.796875, |
| "learning_rate": 9.989803284812156e-06, |
| "loss": 0.8327467441558838, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.190827947060634, |
| "grad_norm": 18.0, |
| "learning_rate": 9.958967858892796e-06, |
| "loss": 1.1072711944580078, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.193290243151739, |
| "grad_norm": 11.375, |
| "learning_rate": 9.928164833018884e-06, |
| "loss": 1.109494686126709, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.1957525392428439, |
| "grad_norm": 7.15625, |
| "learning_rate": 9.897394696168232e-06, |
| "loss": 1.2777066230773926, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.1982148353339488, |
| "grad_norm": 4.03125, |
| "learning_rate": 9.866657936796567e-06, |
| "loss": 1.089713454246521, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.200677131425054, |
| "grad_norm": 4.875, |
| "learning_rate": 9.835955042829762e-06, |
| "loss": 1.1587715148925781, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.2031394275161589, |
| "grad_norm": 5.3125, |
| "learning_rate": 9.805286501656111e-06, |
| "loss": 1.300113558769226, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.2056017236072638, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.774652800118567e-06, |
| "loss": 1.2401779890060425, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.2080640196983687, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.74405442450704e-06, |
| "loss": 1.2466282844543457, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.713491860550646e-06, |
| "loss": 1.485695242881775, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.2129886118805786, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.682965593410037e-06, |
| "loss": 1.6573221683502197, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.2154509079716835, |
| "grad_norm": 3.734375, |
| "learning_rate": 9.652476107669662e-06, |
| "loss": 1.3761565685272217, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.2179132040627885, |
| "grad_norm": 4.09375, |
| "learning_rate": 9.622023887330094e-06, |
| "loss": 1.3099732398986816, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.2203755001538936, |
| "grad_norm": 10.625, |
| "learning_rate": 9.591609415800338e-06, |
| "loss": 1.5944232940673828, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.2228377962449986, |
| "grad_norm": 7.375, |
| "learning_rate": 9.561233175890165e-06, |
| "loss": 1.7219964265823364, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.2253000923361035, |
| "grad_norm": 2.78125, |
| "learning_rate": 9.530895649802445e-06, |
| "loss": 1.623438835144043, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.2277623884272084, |
| "grad_norm": 3.359375, |
| "learning_rate": 9.50059731912549e-06, |
| "loss": 1.3701614141464233, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.2302246845183133, |
| "grad_norm": 2.8125, |
| "learning_rate": 9.470338664825408e-06, |
| "loss": 1.2980146408081055, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.2326869806094183, |
| "grad_norm": 5.71875, |
| "learning_rate": 9.44012016723848e-06, |
| "loss": 1.5235289335250854, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.2351492767005232, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.409942306063513e-06, |
| "loss": 1.6062097549438477, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.2376115727916281, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.379805560354246e-06, |
| "loss": 1.3337829113006592, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.240073868882733, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.349710408511734e-06, |
| "loss": 0.7538601160049438, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.242536164973838, |
| "grad_norm": 27.25, |
| "learning_rate": 9.319657328276757e-06, |
| "loss": 0.47900092601776123, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.244998461064943, |
| "grad_norm": 4.75, |
| "learning_rate": 9.289646796722234e-06, |
| "loss": 1.0039315223693848, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.247460757156048, |
| "grad_norm": 6.375, |
| "learning_rate": 9.259679290245658e-06, |
| "loss": 1.2915596961975098, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.249923053247153, |
| "grad_norm": 4.0, |
| "learning_rate": 9.229755284561518e-06, |
| "loss": 1.336082935333252, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.252385349338258, |
| "grad_norm": 4.90625, |
| "learning_rate": 9.19987525469376e-06, |
| "loss": 1.416182279586792, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.254847645429363, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.170039674968254e-06, |
| "loss": 1.378662109375, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.2573099415204678, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.140249019005236e-06, |
| "loss": 1.3030860424041748, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.2597722376115728, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.110503759711811e-06, |
| "loss": 1.3451809883117676, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.2622345337026777, |
| "grad_norm": 5.75, |
| "learning_rate": 9.080804369274451e-06, |
| "loss": 1.3729634284973145, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.2646968297937828, |
| "grad_norm": 3.65625, |
| "learning_rate": 9.051151319151479e-06, |
| "loss": 1.3505221605300903, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.2671591258848878, |
| "grad_norm": 5.1875, |
| "learning_rate": 9.021545080065603e-06, |
| "loss": 1.3553135395050049, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.2696214219759927, |
| "grad_norm": 6.09375, |
| "learning_rate": 8.991986121996432e-06, |
| "loss": 1.4693278074264526, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.2720837180670976, |
| "grad_norm": 7.09375, |
| "learning_rate": 8.962474914173022e-06, |
| "loss": 1.8386784791946411, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.2745460141582026, |
| "grad_norm": 5.3125, |
| "learning_rate": 8.933011925066431e-06, |
| "loss": 1.5184224843978882, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.2770083102493075, |
| "grad_norm": 14.0, |
| "learning_rate": 8.903597622382263e-06, |
| "loss": 1.3686227798461914, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.2794706063404124, |
| "grad_norm": 5.15625, |
| "learning_rate": 8.87423247305327e-06, |
| "loss": 1.3770601749420166, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.2819329024315174, |
| "grad_norm": 8.75, |
| "learning_rate": 8.84491694323192e-06, |
| "loss": 0.8821253776550293, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.2843951985226223, |
| "grad_norm": 2.84375, |
| "learning_rate": 8.815651498283002e-06, |
| "loss": 0.7115093469619751, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.2868574946137272, |
| "grad_norm": 3.0625, |
| "learning_rate": 8.786436602776248e-06, |
| "loss": 1.1449503898620605, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.2893197907048322, |
| "grad_norm": 3.359375, |
| "learning_rate": 8.757272720478942e-06, |
| "loss": 1.3050785064697266, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.291782086795937, |
| "grad_norm": 4.78125, |
| "learning_rate": 8.728160314348575e-06, |
| "loss": 1.0610979795455933, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.2942443828870422, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.699099846525486e-06, |
| "loss": 0.9030791521072388, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.2967066789781472, |
| "grad_norm": 4.15625, |
| "learning_rate": 8.670091778325521e-06, |
| "loss": 1.3431543111801147, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.299168975069252, |
| "grad_norm": 2.90625, |
| "learning_rate": 8.641136570232724e-06, |
| "loss": 1.3691339492797852, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.301631271160357, |
| "grad_norm": 2.78125, |
| "learning_rate": 8.612234681892017e-06, |
| "loss": 1.3442999124526978, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.304093567251462, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.583386572101902e-06, |
| "loss": 1.3384771347045898, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.306555863342567, |
| "grad_norm": 6.6875, |
| "learning_rate": 8.554592698807185e-06, |
| "loss": 1.4566752910614014, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.3090181594336718, |
| "grad_norm": 6.09375, |
| "learning_rate": 8.525853519091708e-06, |
| "loss": 1.7774509191513062, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.311480455524777, |
| "grad_norm": 5.5625, |
| "learning_rate": 8.497169489171077e-06, |
| "loss": 1.4398928880691528, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.313942751615882, |
| "grad_norm": 4.5, |
| "learning_rate": 8.468541064385447e-06, |
| "loss": 1.4056460857391357, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.3164050477069869, |
| "grad_norm": 4.78125, |
| "learning_rate": 8.439968699192262e-06, |
| "loss": 1.2474167346954346, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.3188673437980918, |
| "grad_norm": 2.65625, |
| "learning_rate": 8.411452847159063e-06, |
| "loss": 1.4466845989227295, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.3213296398891967, |
| "grad_norm": 2.875, |
| "learning_rate": 8.382993960956287e-06, |
| "loss": 1.3356812000274658, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.3237919359803016, |
| "grad_norm": 3.578125, |
| "learning_rate": 8.35459249235007e-06, |
| "loss": 1.3684732913970947, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.3262542320714066, |
| "grad_norm": 13.3125, |
| "learning_rate": 8.32624889219508e-06, |
| "loss": 1.5551846027374268, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.3287165281625115, |
| "grad_norm": 1.7734375, |
| "learning_rate": 8.297963610427366e-06, |
| "loss": 1.287471055984497, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.3311788242536164, |
| "grad_norm": 8.375, |
| "learning_rate": 8.269737096057207e-06, |
| "loss": 1.3594995737075806, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.3336411203447214, |
| "grad_norm": 4.125, |
| "learning_rate": 8.24156979716199e-06, |
| "loss": 1.451033592224121, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.3361034164358263, |
| "grad_norm": 4.625, |
| "learning_rate": 8.213462160879098e-06, |
| "loss": 1.272244930267334, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.3385657125269312, |
| "grad_norm": 2.0625, |
| "learning_rate": 8.185414633398805e-06, |
| "loss": 1.1681973934173584, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.3410280086180364, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.157427659957198e-06, |
| "loss": 1.1624126434326172, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.3434903047091413, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.12950168482911e-06, |
| "loss": 1.3475921154022217, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.3459526008002463, |
| "grad_norm": 13.6875, |
| "learning_rate": 8.101637151321057e-06, |
| "loss": 1.4795109033584595, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.3484148968913512, |
| "grad_norm": 2.078125, |
| "learning_rate": 8.07383450176423e-06, |
| "loss": 1.3539352416992188, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.3508771929824561, |
| "grad_norm": 2.984375, |
| "learning_rate": 8.046094177507436e-06, |
| "loss": 1.0916264057159424, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.353339489073561, |
| "grad_norm": 3.078125, |
| "learning_rate": 8.018416618910105e-06, |
| "loss": 1.337206482887268, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.355801785164666, |
| "grad_norm": 6.78125, |
| "learning_rate": 7.99080226533532e-06, |
| "loss": 1.5372506380081177, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.3582640812557711, |
| "grad_norm": 8.875, |
| "learning_rate": 7.963251555142813e-06, |
| "loss": 1.4474639892578125, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.360726377346876, |
| "grad_norm": 6.90625, |
| "learning_rate": 7.935764925682028e-06, |
| "loss": 1.782578468322754, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.363188673437981, |
| "grad_norm": 10.25, |
| "learning_rate": 7.908342813285159e-06, |
| "loss": 1.6106759309768677, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.365650969529086, |
| "grad_norm": 10.125, |
| "learning_rate": 7.880985653260244e-06, |
| "loss": 1.5926954746246338, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.3681132656201909, |
| "grad_norm": 7.375, |
| "learning_rate": 7.853693879884239e-06, |
| "loss": 1.7612438201904297, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.3705755617112958, |
| "grad_norm": 3.3125, |
| "learning_rate": 7.826467926396125e-06, |
| "loss": 1.5579084157943726, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.3730378578024007, |
| "grad_norm": 1.5703125, |
| "learning_rate": 7.799308224990049e-06, |
| "loss": 1.1745721101760864, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.3755001538935057, |
| "grad_norm": 3.59375, |
| "learning_rate": 7.772215206808441e-06, |
| "loss": 1.1942408084869385, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.3779624499846106, |
| "grad_norm": 3.890625, |
| "learning_rate": 7.745189301935184e-06, |
| "loss": 1.2781388759613037, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.3804247460757155, |
| "grad_norm": 10.5, |
| "learning_rate": 7.71823093938877e-06, |
| "loss": 1.2326617240905762, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.3828870421668205, |
| "grad_norm": 3.4375, |
| "learning_rate": 7.691340547115508e-06, |
| "loss": 1.1817359924316406, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.3853493382579254, |
| "grad_norm": 1.65625, |
| "learning_rate": 7.664518551982729e-06, |
| "loss": 1.280542016029358, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.3878116343490305, |
| "grad_norm": 1.4140625, |
| "learning_rate": 7.637765379771997e-06, |
| "loss": 1.0744314193725586, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.3902739304401355, |
| "grad_norm": 3.484375, |
| "learning_rate": 7.61108145517236e-06, |
| "loss": 1.1780340671539307, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.3927362265312404, |
| "grad_norm": 2.65625, |
| "learning_rate": 7.5844672017736e-06, |
| "loss": 1.1386570930480957, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.3951985226223453, |
| "grad_norm": 4.21875, |
| "learning_rate": 7.557923042059525e-06, |
| "loss": 1.2564072608947754, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.3976608187134503, |
| "grad_norm": 8.5625, |
| "learning_rate": 7.531449397401243e-06, |
| "loss": 1.358655333518982, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.4001231148045552, |
| "grad_norm": 12.125, |
| "learning_rate": 7.505046688050486e-06, |
| "loss": 1.1821155548095703, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.4025854108956601, |
| "grad_norm": 4.65625, |
| "learning_rate": 7.4787153331329356e-06, |
| "loss": 1.3920905590057373, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.4050477069867653, |
| "grad_norm": 2.703125, |
| "learning_rate": 7.452455750641563e-06, |
| "loss": 1.3678568601608276, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.4075100030778702, |
| "grad_norm": 3.71875, |
| "learning_rate": 7.4262683574300046e-06, |
| "loss": 1.2067809104919434, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.4099722991689752, |
| "grad_norm": 10.25, |
| "learning_rate": 7.4001535692059335e-06, |
| "loss": 1.400128722190857, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.41243459526008, |
| "grad_norm": 3.140625, |
| "learning_rate": 7.374111800524476e-06, |
| "loss": 1.1754021644592285, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.414896891351185, |
| "grad_norm": 5.3125, |
| "learning_rate": 7.34814346478161e-06, |
| "loss": 1.3996424674987793, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.41735918744229, |
| "grad_norm": 1.78125, |
| "learning_rate": 7.322248974207624e-06, |
| "loss": 1.1624915599822998, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.4198214835333949, |
| "grad_norm": 7.1875, |
| "learning_rate": 7.296428739860557e-06, |
| "loss": 1.2524189949035645, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.4222837796244998, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.270683171619675e-06, |
| "loss": 1.1983616352081299, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.4247460757156047, |
| "grad_norm": 5.15625, |
| "learning_rate": 7.2450126781789795e-06, |
| "loss": 1.263120412826538, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.4272083718067097, |
| "grad_norm": 2.8125, |
| "learning_rate": 7.219417667040702e-06, |
| "loss": 1.5528199672698975, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.4296706678978146, |
| "grad_norm": 6.375, |
| "learning_rate": 7.193898544508842e-06, |
| "loss": 1.5049046277999878, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.4321329639889195, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.168455715682716e-06, |
| "loss": 1.2450196743011475, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.4345952600800247, |
| "grad_norm": 5.75, |
| "learning_rate": 7.143089584450531e-06, |
| "loss": 1.0869059562683105, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.4370575561711296, |
| "grad_norm": 3.734375, |
| "learning_rate": 7.117800553482971e-06, |
| "loss": 1.3680589199066162, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.4395198522622346, |
| "grad_norm": 13.8125, |
| "learning_rate": 7.092589024226804e-06, |
| "loss": 1.4548523426055908, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.4419821483533395, |
| "grad_norm": 6.15625, |
| "learning_rate": 7.067455396898504e-06, |
| "loss": 1.0294753313064575, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 14.375, |
| "learning_rate": 7.042400070477908e-06, |
| "loss": 1.1527860164642334, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.4469067405355494, |
| "grad_norm": 5.0625, |
| "learning_rate": 7.0174234427018736e-06, |
| "loss": 1.667987585067749, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.4493690366266543, |
| "grad_norm": 9.75, |
| "learning_rate": 6.992525910057972e-06, |
| "loss": 1.6407973766326904, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.4518313327177594, |
| "grad_norm": 9.125, |
| "learning_rate": 6.967707867778193e-06, |
| "loss": 1.551527500152588, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.4542936288088644, |
| "grad_norm": 3.21875, |
| "learning_rate": 6.9429697098326634e-06, |
| "loss": 1.400420069694519, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.4567559248999693, |
| "grad_norm": 3.28125, |
| "learning_rate": 6.918311828923403e-06, |
| "loss": 1.3203402757644653, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.4592182209910742, |
| "grad_norm": 3.4375, |
| "learning_rate": 6.893734616478087e-06, |
| "loss": 1.2934377193450928, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.4616805170821792, |
| "grad_norm": 7.84375, |
| "learning_rate": 6.869238462643825e-06, |
| "loss": 0.8468174934387207, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.464142813173284, |
| "grad_norm": 3.78125, |
| "learning_rate": 6.844823756280985e-06, |
| "loss": 0.7017765641212463, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.466605109264389, |
| "grad_norm": 6.75, |
| "learning_rate": 6.8204908849569996e-06, |
| "loss": 0.8379335999488831, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.469067405355494, |
| "grad_norm": 4.78125, |
| "learning_rate": 6.79624023494023e-06, |
| "loss": 0.8475155234336853, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.471529701446599, |
| "grad_norm": 3.625, |
| "learning_rate": 6.772072191193826e-06, |
| "loss": 1.5360143184661865, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.4739919975377038, |
| "grad_norm": 12.375, |
| "learning_rate": 6.747987137369616e-06, |
| "loss": 1.451025366783142, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.4764542936288088, |
| "grad_norm": 6.125, |
| "learning_rate": 6.72398545580202e-06, |
| "loss": 1.6992993354797363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.4789165897199137, |
| "grad_norm": 2.859375, |
| "learning_rate": 6.700067527501979e-06, |
| "loss": 1.4374724626541138, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.4813788858110188, |
| "grad_norm": 4.34375, |
| "learning_rate": 6.676233732150905e-06, |
| "loss": 1.423210859298706, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.4838411819021238, |
| "grad_norm": 6.375, |
| "learning_rate": 6.652484448094654e-06, |
| "loss": 1.3673293590545654, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.4863034779932287, |
| "grad_norm": 3.171875, |
| "learning_rate": 6.628820052337515e-06, |
| "loss": 1.3383548259735107, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.4887657740843336, |
| "grad_norm": 2.71875, |
| "learning_rate": 6.605240920536241e-06, |
| "loss": 0.7290570139884949, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.4912280701754386, |
| "grad_norm": 3.828125, |
| "learning_rate": 6.581747426994074e-06, |
| "loss": 0.8285163044929504, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.4936903662665435, |
| "grad_norm": 4.8125, |
| "learning_rate": 6.558339944654797e-06, |
| "loss": 1.524817705154419, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.4961526623576484, |
| "grad_norm": 4.09375, |
| "learning_rate": 6.5350188450968275e-06, |
| "loss": 1.5156073570251465, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.4986149584487536, |
| "grad_norm": 1.96875, |
| "learning_rate": 6.511784498527316e-06, |
| "loss": 1.266753911972046, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.5010772545398585, |
| "grad_norm": 4.28125, |
| "learning_rate": 6.488637273776258e-06, |
| "loss": 1.234669804573059, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.5035395506309635, |
| "grad_norm": 3.296875, |
| "learning_rate": 6.465577538290656e-06, |
| "loss": 1.1362870931625366, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.5060018467220684, |
| "grad_norm": 5.78125, |
| "learning_rate": 6.4426056581286736e-06, |
| "loss": 1.2194573879241943, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.5084641428131733, |
| "grad_norm": 2.484375, |
| "learning_rate": 6.419721997953825e-06, |
| "loss": 1.3203624486923218, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.5109264389042782, |
| "grad_norm": 8.875, |
| "learning_rate": 6.396926921029197e-06, |
| "loss": 1.4041712284088135, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.5133887349953832, |
| "grad_norm": 3.015625, |
| "learning_rate": 6.374220789211669e-06, |
| "loss": 1.6859148740768433, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.515851031086488, |
| "grad_norm": 3.03125, |
| "learning_rate": 6.351603962946182e-06, |
| "loss": 1.2609457969665527, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.518313327177593, |
| "grad_norm": 4.0625, |
| "learning_rate": 6.329076801260007e-06, |
| "loss": 1.3652920722961426, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.520775623268698, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.306639661757047e-06, |
| "loss": 1.1765468120574951, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.523237919359803, |
| "grad_norm": 12.0625, |
| "learning_rate": 6.2842929006121645e-06, |
| "loss": 1.2304123640060425, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.5257002154509078, |
| "grad_norm": 4.03125, |
| "learning_rate": 6.262036872565519e-06, |
| "loss": 1.1622458696365356, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.5281625115420128, |
| "grad_norm": 1.765625, |
| "learning_rate": 6.239871930916952e-06, |
| "loss": 1.1903202533721924, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.530624807633118, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.21779842752036e-06, |
| "loss": 1.1756622791290283, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.5330871037242229, |
| "grad_norm": 3.09375, |
| "learning_rate": 6.195816712778119e-06, |
| "loss": 1.361944556236267, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.5355493998153278, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.1739271356355205e-06, |
| "loss": 1.207919955253601, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.5380116959064327, |
| "grad_norm": 2.96875, |
| "learning_rate": 6.152130043575235e-06, |
| "loss": 1.128209114074707, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.5404739919975377, |
| "grad_norm": 4.9375, |
| "learning_rate": 6.130425782611788e-06, |
| "loss": 0.9894086122512817, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.5429362880886428, |
| "grad_norm": 5.4375, |
| "learning_rate": 6.1088146972860796e-06, |
| "loss": 1.4114530086517334, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.5453985841797477, |
| "grad_norm": 3.28125, |
| "learning_rate": 6.0872971306598985e-06, |
| "loss": 1.6339147090911865, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.5478608802708527, |
| "grad_norm": 1.34375, |
| "learning_rate": 6.065873424310493e-06, |
| "loss": 1.2093985080718994, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.5503231763619576, |
| "grad_norm": 4.1875, |
| "learning_rate": 6.044543918325134e-06, |
| "loss": 1.422555923461914, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.5527854724530625, |
| "grad_norm": 1.6015625, |
| "learning_rate": 6.0233089512957335e-06, |
| "loss": 1.3422693014144897, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.5552477685441675, |
| "grad_norm": 6.75, |
| "learning_rate": 6.002168860313449e-06, |
| "loss": 1.1010103225708008, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.5577100646352724, |
| "grad_norm": 4.28125, |
| "learning_rate": 5.9811239809633504e-06, |
| "loss": 1.3068557977676392, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.5601723607263773, |
| "grad_norm": 3.875, |
| "learning_rate": 5.960174647319083e-06, |
| "loss": 1.1887340545654297, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.5626346568174823, |
| "grad_norm": 6.21875, |
| "learning_rate": 5.939321191937567e-06, |
| "loss": 1.1840931177139282, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.5650969529085872, |
| "grad_norm": 2.609375, |
| "learning_rate": 5.918563945853714e-06, |
| "loss": 1.3886611461639404, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.5675592489996921, |
| "grad_norm": 1.703125, |
| "learning_rate": 5.8979032385751845e-06, |
| "loss": 1.1980421543121338, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.570021545090797, |
| "grad_norm": 5.4375, |
| "learning_rate": 5.877339398077142e-06, |
| "loss": 1.4251586198806763, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.572483841181902, |
| "grad_norm": 8.9375, |
| "learning_rate": 5.8568727507970566e-06, |
| "loss": 1.4789252281188965, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.574946137273007, |
| "grad_norm": 3.453125, |
| "learning_rate": 5.836503621629518e-06, |
| "loss": 1.3751678466796875, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.577408433364112, |
| "grad_norm": 5.09375, |
| "learning_rate": 5.8162323339210795e-06, |
| "loss": 1.5434916019439697, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.579870729455217, |
| "grad_norm": 1.0546875, |
| "learning_rate": 5.796059209465128e-06, |
| "loss": 1.2941160202026367, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.582333025546322, |
| "grad_norm": 3.40625, |
| "learning_rate": 5.775984568496774e-06, |
| "loss": 1.2361758947372437, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.5847953216374269, |
| "grad_norm": 4.125, |
| "learning_rate": 5.756008729687764e-06, |
| "loss": 1.2213199138641357, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.587257617728532, |
| "grad_norm": 5.15625, |
| "learning_rate": 5.7361320101414264e-06, |
| "loss": 1.370686411857605, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.589719913819637, |
| "grad_norm": 2.328125, |
| "learning_rate": 5.716354725387634e-06, |
| "loss": 1.160779595375061, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.5921822099107419, |
| "grad_norm": 3.625, |
| "learning_rate": 5.696677189377804e-06, |
| "loss": 1.149789810180664, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.5946445060018468, |
| "grad_norm": 3.96875, |
| "learning_rate": 5.677099714479901e-06, |
| "loss": 1.3322994709014893, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.5971068020929517, |
| "grad_norm": 3.640625, |
| "learning_rate": 5.657622611473487e-06, |
| "loss": 1.3151819705963135, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.5995690981840567, |
| "grad_norm": 3.78125, |
| "learning_rate": 5.638246189544789e-06, |
| "loss": 1.4213796854019165, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.6020313942751616, |
| "grad_norm": 4.75, |
| "learning_rate": 5.618970756281786e-06, |
| "loss": 1.6766854524612427, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.6044936903662665, |
| "grad_norm": 8.0625, |
| "learning_rate": 5.5997966176693255e-06, |
| "loss": 1.551700472831726, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.6069559864573715, |
| "grad_norm": 2.46875, |
| "learning_rate": 5.580724078084273e-06, |
| "loss": 1.2433726787567139, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.6094182825484764, |
| "grad_norm": 10.25, |
| "learning_rate": 5.561753440290676e-06, |
| "loss": 1.3765232563018799, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.6118805786395813, |
| "grad_norm": 4.3125, |
| "learning_rate": 5.542885005434956e-06, |
| "loss": 1.6626167297363281, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.6143428747306863, |
| "grad_norm": 11.0625, |
| "learning_rate": 5.524119073041125e-06, |
| "loss": 1.5003547668457031, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.6168051708217912, |
| "grad_norm": 4.125, |
| "learning_rate": 5.505455941006048e-06, |
| "loss": 1.4539849758148193, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.6192674669128961, |
| "grad_norm": 3.234375, |
| "learning_rate": 5.486895905594696e-06, |
| "loss": 1.255268931388855, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.621729763004001, |
| "grad_norm": 3.234375, |
| "learning_rate": 5.468439261435443e-06, |
| "loss": 1.2248173952102661, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.6241920590951062, |
| "grad_norm": 1.5625, |
| "learning_rate": 5.450086301515402e-06, |
| "loss": 1.1668376922607422, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.6266543551862112, |
| "grad_norm": 1.3125, |
| "learning_rate": 5.4318373171757635e-06, |
| "loss": 0.9886284470558167, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.629116651277316, |
| "grad_norm": 5.0, |
| "learning_rate": 5.413692598107173e-06, |
| "loss": 1.1245368719100952, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 4.65625, |
| "learning_rate": 5.395652432345137e-06, |
| "loss": 1.3283562660217285, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.6340412434595262, |
| "grad_norm": 3.625, |
| "learning_rate": 5.377717106265447e-06, |
| "loss": 1.361234426498413, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.636503539550631, |
| "grad_norm": 1.625, |
| "learning_rate": 5.3598869045796256e-06, |
| "loss": 1.0329114198684692, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.638965835641736, |
| "grad_norm": 4.28125, |
| "learning_rate": 5.342162110330427e-06, |
| "loss": 0.9817519187927246, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.641428131732841, |
| "grad_norm": 7.1875, |
| "learning_rate": 5.3245430048873205e-06, |
| "loss": 1.1899058818817139, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.643890427823946, |
| "grad_norm": 18.125, |
| "learning_rate": 5.307029867942037e-06, |
| "loss": 0.9700236320495605, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.6463527239150508, |
| "grad_norm": 3.28125, |
| "learning_rate": 5.289622977504136e-06, |
| "loss": 0.7763628959655762, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.6488150200061558, |
| "grad_norm": 7.28125, |
| "learning_rate": 5.272322609896572e-06, |
| "loss": 1.5835676193237305, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.6512773160972607, |
| "grad_norm": 6.625, |
| "learning_rate": 5.2551290397513266e-06, |
| "loss": 1.6835378408432007, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.6537396121883656, |
| "grad_norm": 10.875, |
| "learning_rate": 5.2380425400050375e-06, |
| "loss": 1.568629503250122, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.6562019082794706, |
| "grad_norm": 3.21875, |
| "learning_rate": 5.221063381894673e-06, |
| "loss": 1.3448878526687622, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.6586642043705755, |
| "grad_norm": 4.75, |
| "learning_rate": 5.204191834953222e-06, |
| "loss": 1.3649985790252686, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.6611265004616804, |
| "grad_norm": 5.125, |
| "learning_rate": 5.187428167005419e-06, |
| "loss": 1.326650619506836, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.6635887965527854, |
| "grad_norm": 7.0, |
| "learning_rate": 5.1707726441634875e-06, |
| "loss": 1.4459569454193115, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.6660510926438903, |
| "grad_norm": 7.65625, |
| "learning_rate": 5.1542255308229185e-06, |
| "loss": 1.614980936050415, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.6685133887349952, |
| "grad_norm": 5.4375, |
| "learning_rate": 5.137787089658273e-06, |
| "loss": 1.3426003456115723, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.6709756848261004, |
| "grad_norm": 5.03125, |
| "learning_rate": 5.121457581619018e-06, |
| "loss": 1.3568965196609497, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.6734379809172053, |
| "grad_norm": 2.859375, |
| "learning_rate": 5.105237265925373e-06, |
| "loss": 1.208372712135315, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.6759002770083102, |
| "grad_norm": 4.59375, |
| "learning_rate": 5.089126400064199e-06, |
| "loss": 1.2874377965927124, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.6783625730994152, |
| "grad_norm": 1.796875, |
| "learning_rate": 5.0731252397849195e-06, |
| "loss": 1.2037644386291504, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.6808248691905203, |
| "grad_norm": 2.09375, |
| "learning_rate": 5.057234039095447e-06, |
| "loss": 1.1050446033477783, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.6832871652816253, |
| "grad_norm": 4.0, |
| "learning_rate": 5.041453050258165e-06, |
| "loss": 1.3572784662246704, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.6857494613727302, |
| "grad_norm": 6.5625, |
| "learning_rate": 5.025782523785911e-06, |
| "loss": 1.7393821477890015, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.6882117574638351, |
| "grad_norm": 7.9375, |
| "learning_rate": 5.010222708438004e-06, |
| "loss": 1.312801480293274, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.69067405355494, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.9947738512163e-06, |
| "loss": 1.1735351085662842, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.693136349646045, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.979436197361265e-06, |
| "loss": 1.368802547454834, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.69559864573715, |
| "grad_norm": 3.953125, |
| "learning_rate": 4.964209990348089e-06, |
| "loss": 1.3448070287704468, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.6980609418282548, |
| "grad_norm": 4.0, |
| "learning_rate": 4.94909547188281e-06, |
| "loss": 1.2951633930206299, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.7005232379193598, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.934092881898489e-06, |
| "loss": 1.3092372417449951, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.7029855340104647, |
| "grad_norm": 5.875, |
| "learning_rate": 4.919202458551394e-06, |
| "loss": 1.4099408388137817, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.7054478301015696, |
| "grad_norm": 4.34375, |
| "learning_rate": 4.9044244382172215e-06, |
| "loss": 1.3373868465423584, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.7079101261926746, |
| "grad_norm": 4.625, |
| "learning_rate": 4.88975905548734e-06, |
| "loss": 1.3168833255767822, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.7103724222837795, |
| "grad_norm": 1.96875, |
| "learning_rate": 4.8752065431650775e-06, |
| "loss": 1.1487715244293213, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.7128347183748844, |
| "grad_norm": 1.984375, |
| "learning_rate": 4.8607671322620134e-06, |
| "loss": 1.083390712738037, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.7152970144659896, |
| "grad_norm": 2.015625, |
| "learning_rate": 4.846441051994317e-06, |
| "loss": 0.9462494850158691, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.7177593105570945, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.832228529779107e-06, |
| "loss": 1.4706915616989136, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.7202216066481995, |
| "grad_norm": 3.828125, |
| "learning_rate": 4.818129791230845e-06, |
| "loss": 1.5781259536743164, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.7226839027393044, |
| "grad_norm": 3.421875, |
| "learning_rate": 4.804145060157752e-06, |
| "loss": 1.3088247776031494, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.7251461988304093, |
| "grad_norm": 3.359375, |
| "learning_rate": 4.790274558558255e-06, |
| "loss": 1.305666446685791, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.7276084949215145, |
| "grad_norm": 3.125, |
| "learning_rate": 4.776518506617457e-06, |
| "loss": 1.3846698999404907, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.7300707910126194, |
| "grad_norm": 8.0, |
| "learning_rate": 4.762877122703658e-06, |
| "loss": 0.9111043214797974, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.7325330871037243, |
| "grad_norm": 4.0625, |
| "learning_rate": 4.749350623364867e-06, |
| "loss": 0.9622360467910767, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.7349953831948293, |
| "grad_norm": 1.6640625, |
| "learning_rate": 4.735939223325387e-06, |
| "loss": 1.1692111492156982, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.7374576792859342, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.722643135482389e-06, |
| "loss": 1.1715750694274902, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.7399199753770391, |
| "grad_norm": 1.53125, |
| "learning_rate": 4.709462570902536e-06, |
| "loss": 1.1869937181472778, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.742382271468144, |
| "grad_norm": 6.3125, |
| "learning_rate": 4.696397738818644e-06, |
| "loss": 1.3076727390289307, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.744844567559249, |
| "grad_norm": 6.46875, |
| "learning_rate": 4.683448846626342e-06, |
| "loss": 1.78236722946167, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.747306863650354, |
| "grad_norm": 5.84375, |
| "learning_rate": 4.670616099880796e-06, |
| "loss": 1.399848222732544, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.7497691597414589, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.657899702293436e-06, |
| "loss": 1.1672091484069824, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.7522314558325638, |
| "grad_norm": 1.9921875, |
| "learning_rate": 4.645299855728726e-06, |
| "loss": 1.084723949432373, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.7546937519236687, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.63281676020096e-06, |
| "loss": 1.277264952659607, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.7571560480147737, |
| "grad_norm": 3.15625, |
| "learning_rate": 4.620450613871082e-06, |
| "loss": 1.5163521766662598, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.7596183441058786, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.608201613043551e-06, |
| "loss": 1.3597209453582764, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.7620806401969837, |
| "grad_norm": 4.71875, |
| "learning_rate": 4.596069952163215e-06, |
| "loss": 1.3845343589782715, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.7645429362880887, |
| "grad_norm": 8.3125, |
| "learning_rate": 4.584055823812224e-06, |
| "loss": 1.3936517238616943, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.7670052323791936, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.572159418706983e-06, |
| "loss": 1.2084264755249023, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.7694675284702985, |
| "grad_norm": 4.25, |
| "learning_rate": 4.560380925695109e-06, |
| "loss": 1.3428120613098145, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.7719298245614035, |
| "grad_norm": 6.8125, |
| "learning_rate": 4.54872053175245e-06, |
| "loss": 1.7809275388717651, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.7743921206525086, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.537178421980104e-06, |
| "loss": 1.2580034732818604, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.7768544167436136, |
| "grad_norm": 14.4375, |
| "learning_rate": 4.52575477960149e-06, |
| "loss": 1.1773604154586792, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.7793167128347185, |
| "grad_norm": 4.59375, |
| "learning_rate": 4.514449785959429e-06, |
| "loss": 1.5239715576171875, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.7817790089258234, |
| "grad_norm": 3.125, |
| "learning_rate": 4.503263620513274e-06, |
| "loss": 1.2753288745880127, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.7842413050169283, |
| "grad_norm": 4.71875, |
| "learning_rate": 4.49219646083606e-06, |
| "loss": 1.2915542125701904, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.7867036011080333, |
| "grad_norm": 5.0625, |
| "learning_rate": 4.481248482611682e-06, |
| "loss": 1.6656956672668457, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.7891658971991382, |
| "grad_norm": 4.65625, |
| "learning_rate": 4.470419859632109e-06, |
| "loss": 1.3530993461608887, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.7916281932902431, |
| "grad_norm": 3.125, |
| "learning_rate": 4.459710763794619e-06, |
| "loss": 1.230569839477539, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.794090489381348, |
| "grad_norm": 8.5625, |
| "learning_rate": 4.449121365099082e-06, |
| "loss": 1.2140610218048096, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.796552785472453, |
| "grad_norm": 3.8125, |
| "learning_rate": 4.4386518316452475e-06, |
| "loss": 1.3054462671279907, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.799015081563558, |
| "grad_norm": 6.59375, |
| "learning_rate": 4.428302329630089e-06, |
| "loss": 1.515989065170288, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.8014773776546629, |
| "grad_norm": 3.796875, |
| "learning_rate": 4.418073023345158e-06, |
| "loss": 1.2513904571533203, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.8039396737457678, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.407964075173976e-06, |
| "loss": 1.2142295837402344, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.8064019698368727, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.397975645589459e-06, |
| "loss": 1.1632449626922607, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.8088642659279779, |
| "grad_norm": 2.28125, |
| "learning_rate": 4.38810789315137e-06, |
| "loss": 1.2213386297225952, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.8113265620190828, |
| "grad_norm": 5.5625, |
| "learning_rate": 4.378360974503803e-06, |
| "loss": 1.3299362659454346, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.8137888581101878, |
| "grad_norm": 19.125, |
| "learning_rate": 4.368735044372691e-06, |
| "loss": 1.8193198442459106, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.8162511542012927, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.359230255563357e-06, |
| "loss": 1.4013632535934448, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.8187134502923976, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.349846758958085e-06, |
| "loss": 1.3816094398498535, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.8211757463835028, |
| "grad_norm": 5.40625, |
| "learning_rate": 4.340584703513722e-06, |
| "loss": 1.48891019821167, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.8236380424746077, |
| "grad_norm": 6.875, |
| "learning_rate": 4.33144423625932e-06, |
| "loss": 1.8420138359069824, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.8261003385657126, |
| "grad_norm": 5.625, |
| "learning_rate": 4.322425502293797e-06, |
| "loss": 1.484515905380249, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.8285626346568176, |
| "grad_norm": 4.0, |
| "learning_rate": 4.313528644783633e-06, |
| "loss": 1.1373395919799805, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.8310249307479225, |
| "grad_norm": 2.5625, |
| "learning_rate": 4.304753804960603e-06, |
| "loss": 1.0549803972244263, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.8334872268390274, |
| "grad_norm": 3.125, |
| "learning_rate": 4.2961011221195255e-06, |
| "loss": 1.1374645233154297, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.8359495229301324, |
| "grad_norm": 3.15625, |
| "learning_rate": 4.287570733616063e-06, |
| "loss": 1.2891483306884766, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.8384118190212373, |
| "grad_norm": 6.15625, |
| "learning_rate": 4.279162774864535e-06, |
| "loss": 1.3952784538269043, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.8408741151123422, |
| "grad_norm": 11.4375, |
| "learning_rate": 4.270877379335764e-06, |
| "loss": 1.6006450653076172, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.8433364112034472, |
| "grad_norm": 3.375, |
| "learning_rate": 4.2627146785549675e-06, |
| "loss": 1.6013039350509644, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.845798707294552, |
| "grad_norm": 5.65625, |
| "learning_rate": 4.254674802099661e-06, |
| "loss": 1.509192943572998, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.848261003385657, |
| "grad_norm": 5.1875, |
| "learning_rate": 4.2467578775976064e-06, |
| "loss": 1.611980676651001, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.850723299476762, |
| "grad_norm": 4.125, |
| "learning_rate": 4.238964030724785e-06, |
| "loss": 1.4414465427398682, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.8531855955678669, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.231293385203395e-06, |
| "loss": 1.5326135158538818, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.855647891658972, |
| "grad_norm": 7.09375, |
| "learning_rate": 4.2237460627999035e-06, |
| "loss": 1.3705086708068848, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.858110187750077, |
| "grad_norm": 5.5, |
| "learning_rate": 4.216322183323097e-06, |
| "loss": 1.7913298606872559, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.860572483841182, |
| "grad_norm": 6.28125, |
| "learning_rate": 4.2090218646221884e-06, |
| "loss": 1.5537046194076538, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.8630347799322868, |
| "grad_norm": 6.03125, |
| "learning_rate": 4.201845222584946e-06, |
| "loss": 1.7360601425170898, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.8654970760233918, |
| "grad_norm": 6.34375, |
| "learning_rate": 4.194792371135853e-06, |
| "loss": 1.8205009698867798, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.867959372114497, |
| "grad_norm": 3.375, |
| "learning_rate": 4.187863422234293e-06, |
| "loss": 1.408042073249817, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.8704216682056019, |
| "grad_norm": 5.625, |
| "learning_rate": 4.181058485872784e-06, |
| "loss": 1.096937656402588, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.8728839642967068, |
| "grad_norm": 6.09375, |
| "learning_rate": 4.174377670075222e-06, |
| "loss": 1.3984037637710571, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.8753462603878117, |
| "grad_norm": 2.15625, |
| "learning_rate": 4.167821080895174e-06, |
| "loss": 1.3008735179901123, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.8778085564789166, |
| "grad_norm": 3.296875, |
| "learning_rate": 4.161388822414189e-06, |
| "loss": 1.1213737726211548, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.8802708525700216, |
| "grad_norm": 6.40625, |
| "learning_rate": 4.155080996740145e-06, |
| "loss": 1.3446485996246338, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.8827331486611265, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.148897704005638e-06, |
| "loss": 1.3206844329833984, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.8851954447522314, |
| "grad_norm": 48.0, |
| "learning_rate": 4.14283904236638e-06, |
| "loss": 1.3920776844024658, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.8876577408433364, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.136905107999645e-06, |
| "loss": 1.4610090255737305, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.8901200369344413, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.13109599510275e-06, |
| "loss": 1.2146025896072388, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.8925823330255462, |
| "grad_norm": 3.296875, |
| "learning_rate": 4.125411795891547e-06, |
| "loss": 1.1985912322998047, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.8950446291166512, |
| "grad_norm": 2.4375, |
| "learning_rate": 4.119852600598966e-06, |
| "loss": 1.32261323928833, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.897506925207756, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.114418497473584e-06, |
| "loss": 1.1342700719833374, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.899969221298861, |
| "grad_norm": 5.71875, |
| "learning_rate": 4.109109572778222e-06, |
| "loss": 1.235834002494812, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.9024315173899662, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.103925910788572e-06, |
| "loss": 1.3796794414520264, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.9048938134810711, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.0988675937918686e-06, |
| "loss": 1.2857390642166138, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.907356109572176, |
| "grad_norm": 4.5, |
| "learning_rate": 4.093934702085574e-06, |
| "loss": 1.4970194101333618, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.909818405663281, |
| "grad_norm": 6.25, |
| "learning_rate": 4.089127313976101e-06, |
| "loss": 1.31523597240448, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.912280701754386, |
| "grad_norm": 6.5, |
| "learning_rate": 4.084445505777584e-06, |
| "loss": 1.5725702047348022, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.914742997845491, |
| "grad_norm": 4.75, |
| "learning_rate": 4.079889351810655e-06, |
| "loss": 1.5622414350509644, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.917205293936596, |
| "grad_norm": 9.125, |
| "learning_rate": 4.0754589244012665e-06, |
| "loss": 1.2499128580093384, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.919667590027701, |
| "grad_norm": 5.34375, |
| "learning_rate": 4.071154293879545e-06, |
| "loss": 1.224461555480957, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.9221298861188059, |
| "grad_norm": 7.53125, |
| "learning_rate": 4.066975528578675e-06, |
| "loss": 1.4134670495986938, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.9245921822099108, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.062922694833813e-06, |
| "loss": 1.2926013469696045, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.9270544783010157, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.058995856981032e-06, |
| "loss": 0.9741660356521606, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.9295167743921207, |
| "grad_norm": 4.46875, |
| "learning_rate": 4.055195077356308e-06, |
| "loss": 0.7483295798301697, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.9319790704832256, |
| "grad_norm": 4.46875, |
| "learning_rate": 4.051520416294521e-06, |
| "loss": 1.2966933250427246, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.9344413665743305, |
| "grad_norm": 9.4375, |
| "learning_rate": 4.0479719321285045e-06, |
| "loss": 1.2867720127105713, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.9369036626654355, |
| "grad_norm": 2.4375, |
| "learning_rate": 4.044549681188113e-06, |
| "loss": 1.154860258102417, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.9393659587565404, |
| "grad_norm": 2.09375, |
| "learning_rate": 4.041253717799337e-06, |
| "loss": 1.0206176042556763, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.9418282548476453, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.038084094283428e-06, |
| "loss": 1.0539655685424805, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.9442905509387503, |
| "grad_norm": 5.78125, |
| "learning_rate": 4.035040860956082e-06, |
| "loss": 1.2525365352630615, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.9467528470298552, |
| "grad_norm": 6.0625, |
| "learning_rate": 4.032124066126629e-06, |
| "loss": 1.2998080253601074, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.9492151431209603, |
| "grad_norm": 3.453125, |
| "learning_rate": 4.029333756097271e-06, |
| "loss": 1.5448267459869385, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.9516774392120653, |
| "grad_norm": 4.40625, |
| "learning_rate": 4.026669975162351e-06, |
| "loss": 1.6457065343856812, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.9541397353031702, |
| "grad_norm": 6.4375, |
| "learning_rate": 4.02413276560764e-06, |
| "loss": 1.6280792951583862, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.9566020313942751, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.021722167709676e-06, |
| "loss": 1.5384184122085571, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.95906432748538, |
| "grad_norm": 4.5, |
| "learning_rate": 4.019438219735116e-06, |
| "loss": 1.6012859344482422, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.9615266235764852, |
| "grad_norm": 6.4375, |
| "learning_rate": 4.017280957940137e-06, |
| "loss": 1.3362534046173096, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.9639889196675901, |
| "grad_norm": 2.15625, |
| "learning_rate": 4.015250416569853e-06, |
| "loss": 1.2762130498886108, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.966451215758695, |
| "grad_norm": 5.25, |
| "learning_rate": 4.013346627857777e-06, |
| "loss": 1.3821439743041992, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.9689135118498, |
| "grad_norm": 3.984375, |
| "learning_rate": 4.0115696220253025e-06, |
| "loss": 1.5566853284835815, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.971375807940905, |
| "grad_norm": 11.1875, |
| "learning_rate": 4.009919427281232e-06, |
| "loss": 1.609104037284851, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.9738381040320099, |
| "grad_norm": 7.1875, |
| "learning_rate": 4.0083960698213234e-06, |
| "loss": 1.6049237251281738, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.9763004001231148, |
| "grad_norm": 2.359375, |
| "learning_rate": 4.006999573827876e-06, |
| "loss": 1.2179689407348633, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.9787626962142197, |
| "grad_norm": 6.4375, |
| "learning_rate": 4.005729961469349e-06, |
| "loss": 1.2181804180145264, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.9812249923053247, |
| "grad_norm": 9.375, |
| "learning_rate": 4.0045872529000035e-06, |
| "loss": 1.6380505561828613, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.9836872883964296, |
| "grad_norm": 6.9375, |
| "learning_rate": 4.003571466259587e-06, |
| "loss": 1.5696303844451904, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.9861495844875345, |
| "grad_norm": 9.4375, |
| "learning_rate": 4.002682617673048e-06, |
| "loss": 1.3733805418014526, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.9886118805786395, |
| "grad_norm": 2.90625, |
| "learning_rate": 4.001920721250273e-06, |
| "loss": 1.317124843597412, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.9910741766697444, |
| "grad_norm": 11.1875, |
| "learning_rate": 4.001285789085867e-06, |
| "loss": 1.012315034866333, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.9935364727608493, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.000777831258963e-06, |
| "loss": 1.1209490299224854, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.9959987688519545, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.000396855833057e-06, |
| "loss": 1.491336464881897, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.9984610649430594, |
| "grad_norm": 1.546875, |
| "learning_rate": 4.000142868855884e-06, |
| "loss": 1.23062264919281, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.0000158743593194e-06, |
| "loss": 1.0397253036499023, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1626, |
| "total_flos": 2.5753569883429274e+18, |
| "train_loss": 1.3656025735654513, |
| "train_runtime": 15098.7141, |
| "train_samples_per_second": 1.721, |
| "train_steps_per_second": 0.108 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1626, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5753569883429274e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|