| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.262511373976342, |
| "eval_steps": 500, |
| "global_step": 2775, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 2.3685307115973546, |
| "learning_rate": 5e-06, |
| "loss": 0.0587, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 3.207290006513166, |
| "learning_rate": 4.999999897855645e-06, |
| "loss": 0.0976, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 3.061584755625611, |
| "learning_rate": 4.9999995914225884e-06, |
| "loss": 0.1138, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.4708172493174265, |
| "learning_rate": 4.999999080700855e-06, |
| "loss": 0.102, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 2.7122863978048204, |
| "learning_rate": 4.999998365690486e-06, |
| "loss": 0.0899, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 2.1348308028500367, |
| "learning_rate": 4.999997446391542e-06, |
| "loss": 0.0589, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 1.9525029408374595, |
| "learning_rate": 4.999996322804095e-06, |
| "loss": 0.0692, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 2.4972521600201087, |
| "learning_rate": 4.999994994928239e-06, |
| "loss": 0.094, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 1.3057783939017902, |
| "learning_rate": 4.999993462764082e-06, |
| "loss": 0.0401, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 1.8178622655461494, |
| "learning_rate": 4.999991726311749e-06, |
| "loss": 0.0508, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 1.8904298363447831, |
| "learning_rate": 4.999989785571382e-06, |
| "loss": 0.0466, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 2.397431505721498, |
| "learning_rate": 4.999987640543139e-06, |
| "loss": 0.0684, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 2.121710266227225, |
| "learning_rate": 4.999985291227196e-06, |
| "loss": 0.0729, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 2.9696000985831614, |
| "learning_rate": 4.999982737623746e-06, |
| "loss": 0.0922, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 2.270433126704546, |
| "learning_rate": 4.999979979732995e-06, |
| "loss": 0.0946, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 1.9380248124362378, |
| "learning_rate": 4.999977017555171e-06, |
| "loss": 0.0578, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 2.6281882171357958, |
| "learning_rate": 4.999973851090514e-06, |
| "loss": 0.1147, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 2.40029765076707, |
| "learning_rate": 4.999970480339284e-06, |
| "loss": 0.0906, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 2.889640814144301, |
| "learning_rate": 4.9999669053017564e-06, |
| "loss": 0.0792, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 2.3110994220860883, |
| "learning_rate": 4.9999631259782235e-06, |
| "loss": 0.0751, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 2.6890244705482806, |
| "learning_rate": 4.999959142368993e-06, |
| "loss": 0.0966, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 2.2488041264680563, |
| "learning_rate": 4.999954954474391e-06, |
| "loss": 0.0714, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 2.0642223983397883, |
| "learning_rate": 4.9999505622947594e-06, |
| "loss": 0.0881, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 2.384727655713489, |
| "learning_rate": 4.999945965830458e-06, |
| "loss": 0.0992, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 2.2739375250381504, |
| "learning_rate": 4.999941165081863e-06, |
| "loss": 0.0831, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 1.6418905911049972, |
| "learning_rate": 4.999936160049364e-06, |
| "loss": 0.0662, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 2.029045596294324, |
| "learning_rate": 4.999930950733373e-06, |
| "loss": 0.097, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 2.2833378337725287, |
| "learning_rate": 4.999925537134312e-06, |
| "loss": 0.0823, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 2.611896749496796, |
| "learning_rate": 4.9999199192526286e-06, |
| "loss": 0.1115, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 2.4812612616344865, |
| "learning_rate": 4.9999140970887775e-06, |
| "loss": 0.0854, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 2.0837983680092904, |
| "learning_rate": 4.999908070643236e-06, |
| "loss": 0.0837, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 2.0812008840647827, |
| "learning_rate": 4.999901839916495e-06, |
| "loss": 0.064, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 1.5275195881020318, |
| "learning_rate": 4.999895404909067e-06, |
| "loss": 0.0582, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 2.703502541064391, |
| "learning_rate": 4.999888765621476e-06, |
| "loss": 0.1102, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 1.7231856796809104, |
| "learning_rate": 4.999881922054264e-06, |
| "loss": 0.0571, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 1.6472076658400754, |
| "learning_rate": 4.999874874207991e-06, |
| "loss": 0.0536, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 2.902300005488672, |
| "learning_rate": 4.999867622083232e-06, |
| "loss": 0.1302, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 1.9543380822482044, |
| "learning_rate": 4.99986016568058e-06, |
| "loss": 0.0983, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 1.814859572890468, |
| "learning_rate": 4.999852505000646e-06, |
| "loss": 0.0717, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 1.882630749677819, |
| "learning_rate": 4.999844640044053e-06, |
| "loss": 0.07, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.4063115131397823, |
| "learning_rate": 4.999836570811445e-06, |
| "loss": 0.0873, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 2.9701013712692035, |
| "learning_rate": 4.999828297303483e-06, |
| "loss": 0.0957, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 2.207833234895104, |
| "learning_rate": 4.9998198195208405e-06, |
| "loss": 0.0879, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 2.168760551509319, |
| "learning_rate": 4.999811137464212e-06, |
| "loss": 0.0967, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 2.12859962179133, |
| "learning_rate": 4.999802251134307e-06, |
| "loss": 0.1028, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 1.8067595132130894, |
| "learning_rate": 4.99979316053185e-06, |
| "loss": 0.0778, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 3.8815722657740594, |
| "learning_rate": 4.999783865657585e-06, |
| "loss": 0.1812, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 4.142186542548352, |
| "learning_rate": 4.999774366512272e-06, |
| "loss": 0.1981, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 2.4946427215064015, |
| "learning_rate": 4.9997646630966865e-06, |
| "loss": 0.0866, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 2.219814267860857, |
| "learning_rate": 4.999754755411621e-06, |
| "loss": 0.0767, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 1.7512451842619647, |
| "learning_rate": 4.9997446434578865e-06, |
| "loss": 0.0709, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 1.9267762038567948, |
| "learning_rate": 4.999734327236307e-06, |
| "loss": 0.0791, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 1.3192434416131813, |
| "learning_rate": 4.999723806747728e-06, |
| "loss": 0.0611, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 2.0553891309583787, |
| "learning_rate": 4.99971308199301e-06, |
| "loss": 0.0708, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 1.6809260342794263, |
| "learning_rate": 4.999702152973025e-06, |
| "loss": 0.0662, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 2.0087287549898716, |
| "learning_rate": 4.9996910196886694e-06, |
| "loss": 0.0795, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 1.3268510730840513, |
| "learning_rate": 4.999679682140852e-06, |
| "loss": 0.0422, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 2.646053521216802, |
| "learning_rate": 4.999668140330499e-06, |
| "loss": 0.1284, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 1.5857988579934552, |
| "learning_rate": 4.999656394258555e-06, |
| "loss": 0.0647, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 1.756551616255058, |
| "learning_rate": 4.999644443925978e-06, |
| "loss": 0.078, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 2.2102751228780546, |
| "learning_rate": 4.999632289333746e-06, |
| "loss": 0.0785, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 2.338156657994438, |
| "learning_rate": 4.999619930482852e-06, |
| "loss": 0.0835, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 2.0921557148636616, |
| "learning_rate": 4.999607367374304e-06, |
| "loss": 0.0974, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 1.7535396635399074, |
| "learning_rate": 4.999594600009131e-06, |
| "loss": 0.0605, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 2.2055708873696585, |
| "learning_rate": 4.999581628388375e-06, |
| "loss": 0.0946, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 2.5001955714674216, |
| "learning_rate": 4.999568452513097e-06, |
| "loss": 0.1549, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 2.417716838936908, |
| "learning_rate": 4.9995550723843726e-06, |
| "loss": 0.0953, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 1.9976883408624455, |
| "learning_rate": 4.999541488003295e-06, |
| "loss": 0.0772, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 1.9326277047503455, |
| "learning_rate": 4.999527699370975e-06, |
| "loss": 0.0764, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 2.0337761312716527, |
| "learning_rate": 4.99951370648854e-06, |
| "loss": 0.0659, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 1.895878774895592, |
| "learning_rate": 4.999499509357132e-06, |
| "loss": 0.0815, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.0909717848011313, |
| "learning_rate": 4.999485107977912e-06, |
| "loss": 0.084, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 1.5271836426577585, |
| "learning_rate": 4.999470502352057e-06, |
| "loss": 0.0645, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 2.4817155636981223, |
| "learning_rate": 4.999455692480759e-06, |
| "loss": 0.1008, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 1.6027477251164817, |
| "learning_rate": 4.999440678365229e-06, |
| "loss": 0.0722, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 2.164861284274037, |
| "learning_rate": 4.999425460006695e-06, |
| "loss": 0.0876, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 1.8147143711706584, |
| "learning_rate": 4.9994100374063995e-06, |
| "loss": 0.0739, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 2.379478288499757, |
| "learning_rate": 4.9993944105656035e-06, |
| "loss": 0.1158, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 1.7238147576191318, |
| "learning_rate": 4.999378579485582e-06, |
| "loss": 0.0749, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 2.1444185576728323, |
| "learning_rate": 4.999362544167632e-06, |
| "loss": 0.0937, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 1.18142283635082, |
| "learning_rate": 4.99934630461306e-06, |
| "loss": 0.0569, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 2.3599788407160456, |
| "learning_rate": 4.999329860823197e-06, |
| "loss": 0.0848, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 1.851574920799011, |
| "learning_rate": 4.999313212799383e-06, |
| "loss": 0.0882, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 2.144291660745484, |
| "learning_rate": 4.99929636054298e-06, |
| "loss": 0.0881, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 2.083071837291781, |
| "learning_rate": 4.999279304055366e-06, |
| "loss": 0.1109, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 2.245491182317419, |
| "learning_rate": 4.999262043337933e-06, |
| "loss": 0.0933, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 2.076902724310137, |
| "learning_rate": 4.999244578392094e-06, |
| "loss": 0.1004, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 2.213157445111281, |
| "learning_rate": 4.9992269092192736e-06, |
| "loss": 0.1048, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 1.8088256581500983, |
| "learning_rate": 4.9992090358209166e-06, |
| "loss": 0.0803, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 1.6952266837081935, |
| "learning_rate": 4.9991909581984835e-06, |
| "loss": 0.0707, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 1.2806634047624867, |
| "learning_rate": 4.999172676353451e-06, |
| "loss": 0.0405, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 1.537222164184117, |
| "learning_rate": 4.999154190287314e-06, |
| "loss": 0.0678, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 2.152654560935853, |
| "learning_rate": 4.999135500001583e-06, |
| "loss": 0.1323, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 1.7293087783358614, |
| "learning_rate": 4.9991166054977844e-06, |
| "loss": 0.0851, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 2.689089264886033, |
| "learning_rate": 4.999097506777463e-06, |
| "loss": 0.1018, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 1.8242860351920025, |
| "learning_rate": 4.999078203842179e-06, |
| "loss": 0.1063, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 1.5249963877818449, |
| "learning_rate": 4.999058696693511e-06, |
| "loss": 0.0593, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 1.668772591755926, |
| "learning_rate": 4.99903898533305e-06, |
| "loss": 0.0709, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.8521288885149407, |
| "learning_rate": 4.99901906976241e-06, |
| "loss": 0.0842, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 2.106435857041323, |
| "learning_rate": 4.998998949983217e-06, |
| "loss": 0.0921, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 2.104450695294598, |
| "learning_rate": 4.998978625997115e-06, |
| "loss": 0.1082, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 2.1381043167125466, |
| "learning_rate": 4.998958097805765e-06, |
| "loss": 0.0966, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 1.6962878781771613, |
| "learning_rate": 4.9989373654108445e-06, |
| "loss": 0.0721, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 26.768545049591438, |
| "learning_rate": 4.9989164288140465e-06, |
| "loss": 0.362, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 2.63813062408578, |
| "learning_rate": 4.998895288017085e-06, |
| "loss": 0.1373, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 1.828826426920959, |
| "learning_rate": 4.998873943021684e-06, |
| "loss": 0.0743, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 1.524672393516503, |
| "learning_rate": 4.998852393829589e-06, |
| "loss": 0.0693, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 3.0873114713096683, |
| "learning_rate": 4.9988306404425625e-06, |
| "loss": 0.1492, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 1.7541988764209069, |
| "learning_rate": 4.99880868286238e-06, |
| "loss": 0.0941, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 2.3475973125438103, |
| "learning_rate": 4.998786521090836e-06, |
| "loss": 0.0925, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 2.1297159392440452, |
| "learning_rate": 4.9987641551297426e-06, |
| "loss": 0.1209, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 1.8188477873711246, |
| "learning_rate": 4.998741584980926e-06, |
| "loss": 0.1191, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 2.0744703068317474, |
| "learning_rate": 4.9987188106462314e-06, |
| "loss": 0.0958, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 1.67585557445257, |
| "learning_rate": 4.99869583212752e-06, |
| "loss": 0.0759, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 2.9423649270306456, |
| "learning_rate": 4.9986726494266694e-06, |
| "loss": 0.1628, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 1.9805897541793653, |
| "learning_rate": 4.998649262545574e-06, |
| "loss": 0.0865, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 1.862673950464683, |
| "learning_rate": 4.998625671486144e-06, |
| "loss": 0.0841, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 1.6852737490573195, |
| "learning_rate": 4.998601876250308e-06, |
| "loss": 0.0801, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 1.8645780399689873, |
| "learning_rate": 4.998577876840011e-06, |
| "loss": 0.0822, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.7705796593126653, |
| "learning_rate": 4.9985536732572124e-06, |
| "loss": 0.0836, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 1.4380115814084553, |
| "learning_rate": 4.998529265503891e-06, |
| "loss": 0.0714, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 1.841019746353449, |
| "learning_rate": 4.9985046535820416e-06, |
| "loss": 0.0925, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 2.13633472088372, |
| "learning_rate": 4.998479837493675e-06, |
| "loss": 0.1098, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 1.6795956051728682, |
| "learning_rate": 4.9984548172408195e-06, |
| "loss": 0.0623, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 7.146738489798405, |
| "learning_rate": 4.998429592825519e-06, |
| "loss": 0.1803, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 2.17497011974541, |
| "learning_rate": 4.998404164249835e-06, |
| "loss": 0.1209, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 1.9663385354035616, |
| "learning_rate": 4.998378531515845e-06, |
| "loss": 0.0704, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 2.398444068788508, |
| "learning_rate": 4.998352694625645e-06, |
| "loss": 0.0819, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 1.5854929257305652, |
| "learning_rate": 4.998326653581343e-06, |
| "loss": 0.0775, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 1.8831317521751245, |
| "learning_rate": 4.998300408385072e-06, |
| "loss": 0.0895, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 2.624836374744882, |
| "learning_rate": 4.998273959038972e-06, |
| "loss": 0.1398, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 1.8281764860819427, |
| "learning_rate": 4.998247305545207e-06, |
| "loss": 0.0979, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 1.4175605750366638, |
| "learning_rate": 4.998220447905953e-06, |
| "loss": 0.0674, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 2.0007328792439307, |
| "learning_rate": 4.998193386123408e-06, |
| "loss": 0.1082, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 2.2534593276871355, |
| "learning_rate": 4.99816612019978e-06, |
| "loss": 0.1165, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 7.223128092677242, |
| "learning_rate": 4.998138650137298e-06, |
| "loss": 0.1547, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 2.0541187438324178, |
| "learning_rate": 4.998110975938208e-06, |
| "loss": 0.1153, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 2.900003934434033, |
| "learning_rate": 4.998083097604769e-06, |
| "loss": 0.1227, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 2.9930382656276655, |
| "learning_rate": 4.998055015139261e-06, |
| "loss": 0.0671, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.8183166737473904, |
| "learning_rate": 4.998026728543979e-06, |
| "loss": 0.0879, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 1.750231162848612, |
| "learning_rate": 4.997998237821233e-06, |
| "loss": 0.0973, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 1.531092755332603, |
| "learning_rate": 4.997969542973352e-06, |
| "loss": 0.0755, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 2.106588666489457, |
| "learning_rate": 4.997940644002681e-06, |
| "loss": 0.1014, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 2.4260145417995513, |
| "learning_rate": 4.997911540911581e-06, |
| "loss": 0.0992, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 1.9957158387709846, |
| "learning_rate": 4.99788223370243e-06, |
| "loss": 0.1074, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 2.7359115449729385, |
| "learning_rate": 4.9978527223776245e-06, |
| "loss": 0.1298, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 1.4774963397056595, |
| "learning_rate": 4.9978230069395735e-06, |
| "loss": 0.0725, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 2.4431671333335188, |
| "learning_rate": 4.9977930873907065e-06, |
| "loss": 0.0983, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 1.9906443670591782, |
| "learning_rate": 4.997762963733468e-06, |
| "loss": 0.1039, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 2.0201798980001517, |
| "learning_rate": 4.997732635970321e-06, |
| "loss": 0.085, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.7461931203369137, |
| "learning_rate": 4.9977021041037425e-06, |
| "loss": 0.0884, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 2.339191302020108, |
| "learning_rate": 4.9976713681362265e-06, |
| "loss": 0.1159, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 2.314166753359135, |
| "learning_rate": 4.997640428070286e-06, |
| "loss": 0.1338, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 1.5963391451568967, |
| "learning_rate": 4.99760928390845e-06, |
| "loss": 0.0575, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 1.7788915412646347, |
| "learning_rate": 4.997577935653262e-06, |
| "loss": 0.08, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 1.5840889143049688, |
| "learning_rate": 4.9975463833072835e-06, |
| "loss": 0.0709, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 2.1242834812157962, |
| "learning_rate": 4.997514626873093e-06, |
| "loss": 0.1078, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 1.7256733994251798, |
| "learning_rate": 4.997482666353287e-06, |
| "loss": 0.0678, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 2.2088750555704073, |
| "learning_rate": 4.997450501750476e-06, |
| "loss": 0.0981, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 1.817598507902073, |
| "learning_rate": 4.997418133067288e-06, |
| "loss": 0.0829, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.9174894618752205, |
| "learning_rate": 4.997385560306368e-06, |
| "loss": 0.0922, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 1.7975593397664607, |
| "learning_rate": 4.997352783470379e-06, |
| "loss": 0.093, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 2.1789877377155147, |
| "learning_rate": 4.997319802561997e-06, |
| "loss": 0.1044, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 1.5046722090412417, |
| "learning_rate": 4.9972866175839196e-06, |
| "loss": 0.0806, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 1.828261506678391, |
| "learning_rate": 4.9972532285388575e-06, |
| "loss": 0.1018, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 1.853289616987827, |
| "learning_rate": 4.997219635429538e-06, |
| "loss": 0.1177, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 1.9172069323651033, |
| "learning_rate": 4.997185838258709e-06, |
| "loss": 0.0817, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 1.6956924002006215, |
| "learning_rate": 4.997151837029129e-06, |
| "loss": 0.0679, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 1.8575330553269362, |
| "learning_rate": 4.997117631743579e-06, |
| "loss": 0.0855, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 1.7266908578071283, |
| "learning_rate": 4.997083222404852e-06, |
| "loss": 0.0625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 1.6397125044179104, |
| "learning_rate": 4.997048609015762e-06, |
| "loss": 0.0751, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 1.5340896344557344, |
| "learning_rate": 4.997013791579136e-06, |
| "loss": 0.0786, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 1.9189331650587453, |
| "learning_rate": 4.996978770097819e-06, |
| "loss": 0.0953, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 1.7773721601434869, |
| "learning_rate": 4.996943544574673e-06, |
| "loss": 0.083, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 1.7663708027835396, |
| "learning_rate": 4.996908115012576e-06, |
| "loss": 0.0711, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 2.0988130747441462, |
| "learning_rate": 4.996872481414425e-06, |
| "loss": 0.1068, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 3.491649419917669, |
| "learning_rate": 4.9968366437831305e-06, |
| "loss": 0.1596, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.9772529604089312, |
| "learning_rate": 4.99680060212162e-06, |
| "loss": 0.0469, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 1.411497576217555, |
| "learning_rate": 4.996764356432841e-06, |
| "loss": 0.0799, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 1.9634897057091474, |
| "learning_rate": 4.996727906719754e-06, |
| "loss": 0.0818, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 1.8622777856402457, |
| "learning_rate": 4.9966912529853365e-06, |
| "loss": 0.0654, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 1.6338074095796988, |
| "learning_rate": 4.996654395232585e-06, |
| "loss": 0.0744, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 1.534919993971643, |
| "learning_rate": 4.996617333464512e-06, |
| "loss": 0.0639, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 1.7391379315757225, |
| "learning_rate": 4.996580067684145e-06, |
| "loss": 0.0715, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 1.7215093643580193, |
| "learning_rate": 4.996542597894528e-06, |
| "loss": 0.1192, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 2.041088124472192, |
| "learning_rate": 4.996504924098726e-06, |
| "loss": 0.1078, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 1.7083926900772908, |
| "learning_rate": 4.9964670462998145e-06, |
| "loss": 0.0922, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 1.9950587953196364, |
| "learning_rate": 4.99642896450089e-06, |
| "loss": 0.125, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 2.2702904646099022, |
| "learning_rate": 4.9963906787050656e-06, |
| "loss": 0.1318, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 1.5062676480402928, |
| "learning_rate": 4.996352188915467e-06, |
| "loss": 0.0621, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 2.6764229211241153, |
| "learning_rate": 4.996313495135242e-06, |
| "loss": 0.1112, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 2.276483991348045, |
| "learning_rate": 4.9962745973675505e-06, |
| "loss": 0.1219, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 1.4375762261827663, |
| "learning_rate": 4.996235495615572e-06, |
| "loss": 0.0641, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 2.3164336329931094, |
| "learning_rate": 4.996196189882503e-06, |
| "loss": 0.1176, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 2.225732764096407, |
| "learning_rate": 4.996156680171552e-06, |
| "loss": 0.1096, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 1.8464739663611849, |
| "learning_rate": 4.996116966485951e-06, |
| "loss": 0.0817, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 1.9290667932284378, |
| "learning_rate": 4.996077048828944e-06, |
| "loss": 0.1106, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 1.6322378586848272, |
| "learning_rate": 4.996036927203793e-06, |
| "loss": 0.0972, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 2.2100804969645416, |
| "learning_rate": 4.995996601613775e-06, |
| "loss": 0.0944, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 1.5641835045850314, |
| "learning_rate": 4.9959560720621875e-06, |
| "loss": 0.0896, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 2.2116837789953117, |
| "learning_rate": 4.995915338552341e-06, |
| "loss": 0.1331, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 1.8792253280188753, |
| "learning_rate": 4.995874401087565e-06, |
| "loss": 0.0967, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 2.167978668790899, |
| "learning_rate": 4.9958332596712035e-06, |
| "loss": 0.1141, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 1.8621318139110883, |
| "learning_rate": 4.99579191430662e-06, |
| "loss": 0.0972, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 1.8429430162012657, |
| "learning_rate": 4.995750364997192e-06, |
| "loss": 0.0967, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 1.5424629326591568, |
| "learning_rate": 4.995708611746314e-06, |
| "loss": 0.0814, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 2.0700985381007904, |
| "learning_rate": 4.995666654557399e-06, |
| "loss": 0.1038, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 1.8765344045928045, |
| "learning_rate": 4.995624493433876e-06, |
| "loss": 0.1075, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 1.8732891178471252, |
| "learning_rate": 4.995582128379189e-06, |
| "loss": 0.1001, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 2.1418545940903373, |
| "learning_rate": 4.9955395593968e-06, |
| "loss": 0.1463, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 1.905821465202796, |
| "learning_rate": 4.99549678649019e-06, |
| "loss": 0.0848, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 1.7581366634538098, |
| "learning_rate": 4.99545380966285e-06, |
| "loss": 0.0976, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 2.133882292644339, |
| "learning_rate": 4.995410628918294e-06, |
| "loss": 0.1036, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 1.6491455235555508, |
| "learning_rate": 4.995367244260052e-06, |
| "loss": 0.1, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 1.372315749578445, |
| "learning_rate": 4.995323655691667e-06, |
| "loss": 0.0543, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 2.2929084487384297, |
| "learning_rate": 4.995279863216702e-06, |
| "loss": 0.1005, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 1.8371182479654964, |
| "learning_rate": 4.995235866838735e-06, |
| "loss": 0.096, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 1.4189314035725125, |
| "learning_rate": 4.995191666561361e-06, |
| "loss": 0.0707, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 1.4036483642687965, |
| "learning_rate": 4.995147262388192e-06, |
| "loss": 0.0689, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 1.7382878807357938, |
| "learning_rate": 4.995102654322858e-06, |
| "loss": 0.0829, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 1.3102015447280675, |
| "learning_rate": 4.995057842369002e-06, |
| "loss": 0.0548, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 1.8490525072637034, |
| "learning_rate": 4.995012826530287e-06, |
| "loss": 0.1044, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 2.802543488000276, |
| "learning_rate": 4.99496760681039e-06, |
| "loss": 0.1393, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 2.4234245545914295, |
| "learning_rate": 4.994922183213009e-06, |
| "loss": 0.1325, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 1.1495372549504432, |
| "learning_rate": 4.9948765557418535e-06, |
| "loss": 0.0585, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 2.1666263724534267, |
| "learning_rate": 4.994830724400653e-06, |
| "loss": 0.1063, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 1.7066677970234532, |
| "learning_rate": 4.994784689193151e-06, |
| "loss": 0.1002, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 1.5304723941528642, |
| "learning_rate": 4.994738450123111e-06, |
| "loss": 0.0825, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 2.1125485884299486, |
| "learning_rate": 4.994692007194312e-06, |
| "loss": 0.1089, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 1.4297773182355138, |
| "learning_rate": 4.994645360410547e-06, |
| "loss": 0.0855, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 1.741498602747005, |
| "learning_rate": 4.99459850977563e-06, |
| "loss": 0.0884, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 1.6875366585424447, |
| "learning_rate": 4.994551455293388e-06, |
| "loss": 0.068, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 2.03347527932056, |
| "learning_rate": 4.9945041969676654e-06, |
| "loss": 0.0997, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 1.5553350034126536, |
| "learning_rate": 4.994456734802325e-06, |
| "loss": 0.0709, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 1.354348073951093, |
| "learning_rate": 4.994409068801247e-06, |
| "loss": 0.0858, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 1.6048007960766557, |
| "learning_rate": 4.994361198968323e-06, |
| "loss": 0.0891, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 2.3380973830643663, |
| "learning_rate": 4.994313125307466e-06, |
| "loss": 0.116, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 1.68606521406513, |
| "learning_rate": 4.994264847822605e-06, |
| "loss": 0.09, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 2.0274881934833715, |
| "learning_rate": 4.994216366517684e-06, |
| "loss": 0.0856, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 1.9224041067300894, |
| "learning_rate": 4.994167681396667e-06, |
| "loss": 0.1032, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 2.213562554498921, |
| "learning_rate": 4.994118792463529e-06, |
| "loss": 0.1125, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 2.396477374166045, |
| "learning_rate": 4.994069699722267e-06, |
| "loss": 0.16, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 1.6621616457271884, |
| "learning_rate": 4.994020403176893e-06, |
| "loss": 0.1088, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 2.0137991000965862, |
| "learning_rate": 4.9939709028314345e-06, |
| "loss": 0.1203, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 1.731498246221376, |
| "learning_rate": 4.993921198689935e-06, |
| "loss": 0.0779, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 1.53319841517271, |
| "learning_rate": 4.993871290756459e-06, |
| "loss": 0.0859, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 1.5738861001818754, |
| "learning_rate": 4.9938211790350835e-06, |
| "loss": 0.0822, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 1.795556137822037, |
| "learning_rate": 4.993770863529902e-06, |
| "loss": 0.1082, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 1.753136266606954, |
| "learning_rate": 4.993720344245029e-06, |
| "loss": 0.0826, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 1.724266476242851, |
| "learning_rate": 4.99366962118459e-06, |
| "loss": 0.0851, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 1.8081901179247517, |
| "learning_rate": 4.99361869435273e-06, |
| "loss": 0.0965, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 2.064401083784083, |
| "learning_rate": 4.993567563753613e-06, |
| "loss": 0.0881, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 1.6354098857617054, |
| "learning_rate": 4.993516229391414e-06, |
| "loss": 0.0933, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 1.2711881947711132, |
| "learning_rate": 4.993464691270331e-06, |
| "loss": 0.0595, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 1.5847340722430843, |
| "learning_rate": 4.993412949394572e-06, |
| "loss": 0.0812, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 1.5774467606957123, |
| "learning_rate": 4.993361003768369e-06, |
| "loss": 0.081, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 1.3573852133613107, |
| "learning_rate": 4.993308854395963e-06, |
| "loss": 0.0812, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 1.5273272920136396, |
| "learning_rate": 4.993256501281618e-06, |
| "loss": 0.0634, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 1.8382646613112785, |
| "learning_rate": 4.993203944429611e-06, |
| "loss": 0.1145, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 1.5747608705636602, |
| "learning_rate": 4.993151183844236e-06, |
| "loss": 0.0801, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 1.7065433305132354, |
| "learning_rate": 4.9930982195298065e-06, |
| "loss": 0.0742, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 1.709109441111134, |
| "learning_rate": 4.9930450514906484e-06, |
| "loss": 0.1028, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 1.6959707782927067, |
| "learning_rate": 4.9929916797311075e-06, |
| "loss": 0.0791, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 2.374639715905283, |
| "learning_rate": 4.992938104255545e-06, |
| "loss": 0.1477, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 1.6263809057131815, |
| "learning_rate": 4.992884325068339e-06, |
| "loss": 0.0916, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 1.6207164559915699, |
| "learning_rate": 4.992830342173882e-06, |
| "loss": 0.1068, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 2.0552449766971823, |
| "learning_rate": 4.992776155576589e-06, |
| "loss": 0.1145, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 1.6692049909432523, |
| "learning_rate": 4.992721765280884e-06, |
| "loss": 0.1172, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 2.456621954888186, |
| "learning_rate": 4.992667171291215e-06, |
| "loss": 0.1267, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 1.5125250812884448, |
| "learning_rate": 4.992612373612042e-06, |
| "loss": 0.0661, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 2.0952324870431553, |
| "learning_rate": 4.99255737224784e-06, |
| "loss": 0.0917, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 1.4094336450761362, |
| "learning_rate": 4.9925021672031075e-06, |
| "loss": 0.0905, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 2.239902062561175, |
| "learning_rate": 4.992446758482353e-06, |
| "loss": 0.0995, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 2.696125395972354, |
| "learning_rate": 4.992391146090106e-06, |
| "loss": 0.1613, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 1.4853155964847005, |
| "learning_rate": 4.99233533003091e-06, |
| "loss": 0.0826, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 1.5393545957542452, |
| "learning_rate": 4.992279310309326e-06, |
| "loss": 0.1128, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 2.4236941073693283, |
| "learning_rate": 4.9922230869299316e-06, |
| "loss": 0.1607, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 1.6611888199243576, |
| "learning_rate": 4.992166659897321e-06, |
| "loss": 0.1005, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 1.3896864345667146, |
| "learning_rate": 4.992110029216106e-06, |
| "loss": 0.079, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.3647278081745937, |
| "learning_rate": 4.992053194890914e-06, |
| "loss": 0.0767, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 2.0323876810575525, |
| "learning_rate": 4.991996156926388e-06, |
| "loss": 0.101, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 1.948481701516796, |
| "learning_rate": 4.9919389153271904e-06, |
| "loss": 0.106, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 1.3512588403363923, |
| "learning_rate": 4.991881470097998e-06, |
| "loss": 0.0897, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 1.4862053800013564, |
| "learning_rate": 4.991823821243505e-06, |
| "loss": 0.0898, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 2.287612016528911, |
| "learning_rate": 4.991765968768422e-06, |
| "loss": 0.1048, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 1.8190624177647585, |
| "learning_rate": 4.991707912677477e-06, |
| "loss": 0.076, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 1.4178411985180965, |
| "learning_rate": 4.991649652975414e-06, |
| "loss": 0.062, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 1.7010811854624341, |
| "learning_rate": 4.991591189666994e-06, |
| "loss": 0.0928, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 1.7824920481002249, |
| "learning_rate": 4.991532522756993e-06, |
| "loss": 0.09, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 1.12093519239752, |
| "learning_rate": 4.991473652250207e-06, |
| "loss": 0.0564, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 1.4956629959050902, |
| "learning_rate": 4.991414578151445e-06, |
| "loss": 0.0777, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 3.467748085139679, |
| "learning_rate": 4.991355300465535e-06, |
| "loss": 0.193, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 1.746518786410603, |
| "learning_rate": 4.99129581919732e-06, |
| "loss": 0.0862, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 1.3513400373127227, |
| "learning_rate": 4.9912361343516616e-06, |
| "loss": 0.0588, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 1.7841617467512154, |
| "learning_rate": 4.991176245933437e-06, |
| "loss": 0.0982, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 1.6650575824861316, |
| "learning_rate": 4.9911161539475385e-06, |
| "loss": 0.0868, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 2.0850606622795667, |
| "learning_rate": 4.991055858398879e-06, |
| "loss": 0.1087, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 2.27094495258401, |
| "learning_rate": 4.990995359292384e-06, |
| "loss": 0.1177, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 1.8175215978998918, |
| "learning_rate": 4.990934656632997e-06, |
| "loss": 0.1029, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 1.9580713421337124, |
| "learning_rate": 4.990873750425679e-06, |
| "loss": 0.0842, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 1.5378181370134305, |
| "learning_rate": 4.990812640675406e-06, |
| "loss": 0.0813, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 1.4646500614646956, |
| "learning_rate": 4.990751327387174e-06, |
| "loss": 0.0642, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 1.7132953215338962, |
| "learning_rate": 4.99068981056599e-06, |
| "loss": 0.0921, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 2.020828034549401, |
| "learning_rate": 4.990628090216885e-06, |
| "loss": 0.1164, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 1.4167009033800524, |
| "learning_rate": 4.990566166344898e-06, |
| "loss": 0.0695, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 1.743676237886539, |
| "learning_rate": 4.990504038955092e-06, |
| "loss": 0.1083, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 1.8343720931834766, |
| "learning_rate": 4.990441708052542e-06, |
| "loss": 0.0985, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 1.4113998497835858, |
| "learning_rate": 4.9903791736423435e-06, |
| "loss": 0.081, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 1.8830612535708886, |
| "learning_rate": 4.9903164357296044e-06, |
| "loss": 0.0954, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 1.4208829323408247, |
| "learning_rate": 4.990253494319453e-06, |
| "loss": 0.0919, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 1.3671067756437636, |
| "learning_rate": 4.990190349417032e-06, |
| "loss": 0.0928, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 1.965673083316737, |
| "learning_rate": 4.990127001027501e-06, |
| "loss": 0.0849, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 1.3933093475773835, |
| "learning_rate": 4.990063449156037e-06, |
| "loss": 0.0735, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.8960360183192995, |
| "learning_rate": 4.989999693807832e-06, |
| "loss": 0.1141, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 1.8316795975938271, |
| "learning_rate": 4.989935734988098e-06, |
| "loss": 0.1084, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 1.6451238367574679, |
| "learning_rate": 4.98987157270206e-06, |
| "loss": 0.0739, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 2.0644883617404854, |
| "learning_rate": 4.989807206954961e-06, |
| "loss": 0.1125, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 1.322196438354388, |
| "learning_rate": 4.9897426377520605e-06, |
| "loss": 0.0792, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 2.568915637493138, |
| "learning_rate": 4.989677865098636e-06, |
| "loss": 0.1236, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 1.1659492648591403, |
| "learning_rate": 4.989612888999978e-06, |
| "loss": 0.0624, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 1.431829324891758, |
| "learning_rate": 4.9895477094614e-06, |
| "loss": 0.0855, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 1.1704367288212936, |
| "learning_rate": 4.989482326488225e-06, |
| "loss": 0.0741, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 1.6170438514885752, |
| "learning_rate": 4.989416740085796e-06, |
| "loss": 0.1057, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 1.639627544263893, |
| "learning_rate": 4.9893509502594735e-06, |
| "loss": 0.0784, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 1.6437318926278874, |
| "learning_rate": 4.9892849570146335e-06, |
| "loss": 0.1105, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 1.6588510281862943, |
| "learning_rate": 4.989218760356668e-06, |
| "loss": 0.106, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 1.692767253326721, |
| "learning_rate": 4.989152360290987e-06, |
| "loss": 0.1068, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 2.117777475502305, |
| "learning_rate": 4.989085756823015e-06, |
| "loss": 0.1274, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 1.6877038030416243, |
| "learning_rate": 4.989018949958197e-06, |
| "loss": 0.1001, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 2.018139319167573, |
| "learning_rate": 4.98895193970199e-06, |
| "loss": 0.0726, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 1.7601822979826238, |
| "learning_rate": 4.9888847260598705e-06, |
| "loss": 0.0884, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 2.153451550499006, |
| "learning_rate": 4.98881730903733e-06, |
| "loss": 0.1263, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 1.7037846763057773, |
| "learning_rate": 4.98874968863988e-06, |
| "loss": 0.1017, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 1.6373036503866722, |
| "learning_rate": 4.988681864873044e-06, |
| "loss": 0.0936, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 1.5043938510579566, |
| "learning_rate": 4.988613837742364e-06, |
| "loss": 0.0841, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 1.9480098961832564, |
| "learning_rate": 4.9885456072534015e-06, |
| "loss": 0.093, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 2.0743334215437845, |
| "learning_rate": 4.988477173411728e-06, |
| "loss": 0.1001, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 1.3686100112269117, |
| "learning_rate": 4.988408536222939e-06, |
| "loss": 0.0706, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 1.7072624744285279, |
| "learning_rate": 4.9883396956926416e-06, |
| "loss": 0.0883, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 1.2178991309049074, |
| "learning_rate": 4.988270651826462e-06, |
| "loss": 0.066, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 1.5734145514066031, |
| "learning_rate": 4.988201404630041e-06, |
| "loss": 0.0818, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 1.4266019263972631, |
| "learning_rate": 4.988131954109038e-06, |
| "loss": 0.0835, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 2.2620036917930633, |
| "learning_rate": 4.988062300269128e-06, |
| "loss": 0.1374, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 1.4975643248719304, |
| "learning_rate": 4.987992443116003e-06, |
| "loss": 0.0817, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 1.723916950757741, |
| "learning_rate": 4.987922382655372e-06, |
| "loss": 0.086, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 2.50033376989197, |
| "learning_rate": 4.987852118892958e-06, |
| "loss": 0.1498, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 2.0776125106096934, |
| "learning_rate": 4.987781651834503e-06, |
| "loss": 0.1258, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 2.186488732885297, |
| "learning_rate": 4.987710981485768e-06, |
| "loss": 0.1203, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 2.0497982262406698, |
| "learning_rate": 4.987640107852525e-06, |
| "loss": 0.1365, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 1.394060418907116, |
| "learning_rate": 4.987569030940567e-06, |
| "loss": 0.0811, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 1.5257209721345255, |
| "learning_rate": 4.987497750755702e-06, |
| "loss": 0.0665, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 2.328076306378438, |
| "learning_rate": 4.987426267303753e-06, |
| "loss": 0.1186, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 1.8266119344469305, |
| "learning_rate": 4.987354580590563e-06, |
| "loss": 0.1011, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 1.7369452160483552, |
| "learning_rate": 4.987282690621991e-06, |
| "loss": 0.117, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 1.8346392689418392, |
| "learning_rate": 4.987210597403907e-06, |
| "loss": 0.1, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 1.9402353280122917, |
| "learning_rate": 4.987138300942208e-06, |
| "loss": 0.0949, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 1.4819316275042067, |
| "learning_rate": 4.987065801242798e-06, |
| "loss": 0.0855, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 1.8440191145455884, |
| "learning_rate": 4.986993098311601e-06, |
| "loss": 0.1057, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 1.712390016283102, |
| "learning_rate": 4.986920192154561e-06, |
| "loss": 0.0917, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 1.2697535382377623, |
| "learning_rate": 4.986847082777632e-06, |
| "loss": 0.0729, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 1.5330396115730802, |
| "learning_rate": 4.986773770186791e-06, |
| "loss": 0.0966, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 2.359233717201702, |
| "learning_rate": 4.986700254388027e-06, |
| "loss": 0.1308, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 1.330733109747955, |
| "learning_rate": 4.986626535387349e-06, |
| "loss": 0.0728, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 1.7398719883146694, |
| "learning_rate": 4.9865526131907795e-06, |
| "loss": 0.0893, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 2.018839749017437, |
| "learning_rate": 4.9864784878043595e-06, |
| "loss": 0.1268, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 2.439244123753763, |
| "learning_rate": 4.986404159234146e-06, |
| "loss": 0.1047, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 1.4077243142655576, |
| "learning_rate": 4.986329627486213e-06, |
| "loss": 0.07, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 2.0634194365835583, |
| "learning_rate": 4.986254892566652e-06, |
| "loss": 0.1199, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 1.507898380305614, |
| "learning_rate": 4.9861799544815684e-06, |
| "loss": 0.0798, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 1.5689447325912511, |
| "learning_rate": 4.986104813237086e-06, |
| "loss": 0.0872, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 1.5434828853102547, |
| "learning_rate": 4.986029468839346e-06, |
| "loss": 0.0756, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 1.9546839136865664, |
| "learning_rate": 4.985953921294505e-06, |
| "loss": 0.129, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 1.4457360634551049, |
| "learning_rate": 4.985878170608736e-06, |
| "loss": 0.0651, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 1.7053082159754585, |
| "learning_rate": 4.985802216788228e-06, |
| "loss": 0.0786, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 2.0831330601859643, |
| "learning_rate": 4.98572605983919e-06, |
| "loss": 0.1087, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 1.3106266925763963, |
| "learning_rate": 4.985649699767842e-06, |
| "loss": 0.0666, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 1.5931730936354696, |
| "learning_rate": 4.985573136580427e-06, |
| "loss": 0.1015, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 1.3398175715153904, |
| "learning_rate": 4.9854963702832e-06, |
| "loss": 0.0706, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 1.4932070031671647, |
| "learning_rate": 4.985419400882433e-06, |
| "loss": 0.1009, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 2.05809614886543, |
| "learning_rate": 4.985342228384418e-06, |
| "loss": 0.1373, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 25.314485102746445, |
| "learning_rate": 4.985264852795459e-06, |
| "loss": 0.529, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 1.3496622625056474, |
| "learning_rate": 4.98518727412188e-06, |
| "loss": 0.0792, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 2.042157493841037, |
| "learning_rate": 4.98510949237002e-06, |
| "loss": 0.1127, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 2.093747109047391, |
| "learning_rate": 4.985031507546234e-06, |
| "loss": 0.0931, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 2.620290737475778, |
| "learning_rate": 4.984953319656896e-06, |
| "loss": 0.1258, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 1.7812499192074571, |
| "learning_rate": 4.984874928708395e-06, |
| "loss": 0.0934, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 1.9861134139953058, |
| "learning_rate": 4.984796334707136e-06, |
| "loss": 0.1105, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 9.71210910528449, |
| "learning_rate": 4.984717537659542e-06, |
| "loss": 0.119, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 1.2902315877318344, |
| "learning_rate": 4.984638537572052e-06, |
| "loss": 0.0591, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 1.693249076147672, |
| "learning_rate": 4.984559334451121e-06, |
| "loss": 0.0906, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 1.7045791781932, |
| "learning_rate": 4.984479928303221e-06, |
| "loss": 0.066, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 1.588345004423415, |
| "learning_rate": 4.984400319134841e-06, |
| "loss": 0.079, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 2.8167066456613368, |
| "learning_rate": 4.984320506952487e-06, |
| "loss": 0.1743, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 1.8409665855781128, |
| "learning_rate": 4.9842404917626796e-06, |
| "loss": 0.1009, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 1.5444918002986228, |
| "learning_rate": 4.984160273571959e-06, |
| "loss": 0.0952, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 1.9718645058282944, |
| "learning_rate": 4.9840798523868785e-06, |
| "loss": 0.1217, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 1.669853882784426, |
| "learning_rate": 4.983999228214011e-06, |
| "loss": 0.083, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 1.5445667787054873, |
| "learning_rate": 4.983918401059943e-06, |
| "loss": 0.0838, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 1.8477622601816133, |
| "learning_rate": 4.983837370931282e-06, |
| "loss": 0.1199, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 2.295804335093856, |
| "learning_rate": 4.983756137834647e-06, |
| "loss": 0.1561, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 2.1902816453958933, |
| "learning_rate": 4.9836747017766765e-06, |
| "loss": 0.1014, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 1.7414949549224419, |
| "learning_rate": 4.983593062764027e-06, |
| "loss": 0.1046, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 3.529761555914209, |
| "learning_rate": 4.983511220803367e-06, |
| "loss": 0.1573, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 1.5931351386368249, |
| "learning_rate": 4.983429175901386e-06, |
| "loss": 0.0896, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 1.4617184144821196, |
| "learning_rate": 4.983346928064788e-06, |
| "loss": 0.0698, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 1.564679441746091, |
| "learning_rate": 4.9832644773002935e-06, |
| "loss": 0.0955, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 1.4077890282448986, |
| "learning_rate": 4.98318182361464e-06, |
| "loss": 0.0887, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 1.6028267121804223, |
| "learning_rate": 4.9830989670145825e-06, |
| "loss": 0.0989, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 1.8479648547197383, |
| "learning_rate": 4.9830159075068905e-06, |
| "loss": 0.1009, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 1.8145495712184487, |
| "learning_rate": 4.9829326450983514e-06, |
| "loss": 0.1125, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 1.839873930402737, |
| "learning_rate": 4.98284917979577e-06, |
| "loss": 0.0975, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 2.3433237407057863, |
| "learning_rate": 4.9827655116059656e-06, |
| "loss": 0.1061, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 1.479552769836274, |
| "learning_rate": 4.9826816405357755e-06, |
| "loss": 0.105, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 1.0380040250679141, |
| "learning_rate": 4.982597566592054e-06, |
| "loss": 0.0522, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 2.2146611071914744, |
| "learning_rate": 4.982513289781671e-06, |
| "loss": 0.1403, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 1.4265466923705232, |
| "learning_rate": 4.982428810111512e-06, |
| "loss": 0.0846, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 1.4254072959974569, |
| "learning_rate": 4.9823441275884814e-06, |
| "loss": 0.0787, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 2.353200458571576, |
| "learning_rate": 4.982259242219499e-06, |
| "loss": 0.1114, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 1.3512279730893322, |
| "learning_rate": 4.9821741540115006e-06, |
| "loss": 0.0678, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 1.728060266498106, |
| "learning_rate": 4.982088862971441e-06, |
| "loss": 0.1129, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 1.8022543001727114, |
| "learning_rate": 4.982003369106287e-06, |
| "loss": 0.1036, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 1.2312712834502222, |
| "learning_rate": 4.981917672423028e-06, |
| "loss": 0.065, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 1.6183848549336255, |
| "learning_rate": 4.981831772928664e-06, |
| "loss": 0.0934, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 2.001713262915152, |
| "learning_rate": 4.981745670630216e-06, |
| "loss": 0.1356, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 2.0057745044552995, |
| "learning_rate": 4.981659365534718e-06, |
| "loss": 0.1285, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 2.299079022869691, |
| "learning_rate": 4.981572857649225e-06, |
| "loss": 0.1195, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 1.6869951958248894, |
| "learning_rate": 4.981486146980804e-06, |
| "loss": 0.0877, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 1.9301190501764922, |
| "learning_rate": 4.9813992335365415e-06, |
| "loss": 0.0977, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 1.6227704434432904, |
| "learning_rate": 4.98131211732354e-06, |
| "loss": 0.1035, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 1.632769015838627, |
| "learning_rate": 4.981224798348917e-06, |
| "loss": 0.0833, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 2.3862639707091082, |
| "learning_rate": 4.981137276619809e-06, |
| "loss": 0.1419, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 1.2625986411158334, |
| "learning_rate": 4.9810495521433675e-06, |
| "loss": 0.078, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 2.5081068393508157, |
| "learning_rate": 4.9809616249267616e-06, |
| "loss": 0.1478, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 1.9644808854065114, |
| "learning_rate": 4.980873494977174e-06, |
| "loss": 0.121, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 1.647433915922947, |
| "learning_rate": 4.98078516230181e-06, |
| "loss": 0.0865, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 1.5774273491436515, |
| "learning_rate": 4.980696626907884e-06, |
| "loss": 0.0887, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.5604062690588907, |
| "learning_rate": 4.980607888802633e-06, |
| "loss": 0.1, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 1.548442809835796, |
| "learning_rate": 4.980518947993307e-06, |
| "loss": 0.1005, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 1.6276180373825353, |
| "learning_rate": 4.980429804487176e-06, |
| "loss": 0.1006, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 1.5718547041391637, |
| "learning_rate": 4.980340458291521e-06, |
| "loss": 0.0858, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 1.3679183632524226, |
| "learning_rate": 4.980250909413646e-06, |
| "loss": 0.0901, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 1.7491296961984788, |
| "learning_rate": 4.980161157860867e-06, |
| "loss": 0.0888, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 2.0306839493761446, |
| "learning_rate": 4.980071203640519e-06, |
| "loss": 0.0893, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 1.353153596211688, |
| "learning_rate": 4.979981046759952e-06, |
| "loss": 0.0753, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 1.969605104045741, |
| "learning_rate": 4.979890687226533e-06, |
| "loss": 0.1033, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 2.085518332646124, |
| "learning_rate": 4.979800125047647e-06, |
| "loss": 0.0979, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 1.6181669031153556, |
| "learning_rate": 4.979709360230692e-06, |
| "loss": 0.0969, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 1.6760914355637484, |
| "learning_rate": 4.979618392783087e-06, |
| "loss": 0.0883, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 1.2907730003800948, |
| "learning_rate": 4.979527222712266e-06, |
| "loss": 0.0775, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 1.241096973502198, |
| "learning_rate": 4.9794358500256765e-06, |
| "loss": 0.0599, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 1.579037640818148, |
| "learning_rate": 4.979344274730786e-06, |
| "loss": 0.0831, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 2.225915719971972, |
| "learning_rate": 4.979252496835079e-06, |
| "loss": 0.1116, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 2.3031173397129923, |
| "learning_rate": 4.979160516346054e-06, |
| "loss": 0.1536, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 27.297310781833385, |
| "learning_rate": 4.979068333271227e-06, |
| "loss": 0.9223, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 2.4041431299507607, |
| "learning_rate": 4.978975947618131e-06, |
| "loss": 0.1184, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 1.6683861662324915, |
| "learning_rate": 4.978883359394316e-06, |
| "loss": 0.1301, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 1.9056814965685545, |
| "learning_rate": 4.978790568607347e-06, |
| "loss": 0.1001, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 1.9713836323302738, |
| "learning_rate": 4.9786975752648076e-06, |
| "loss": 0.1174, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 1.598376196967646, |
| "learning_rate": 4.978604379374295e-06, |
| "loss": 0.0986, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 1.5517923833736031, |
| "learning_rate": 4.978510980943427e-06, |
| "loss": 0.0807, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 2.004418653450344, |
| "learning_rate": 4.978417379979834e-06, |
| "loss": 0.1065, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 1.7753220163198007, |
| "learning_rate": 4.978323576491165e-06, |
| "loss": 0.0987, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 1.7384737383317277, |
| "learning_rate": 4.978229570485085e-06, |
| "loss": 0.1048, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 1.5352099211420311, |
| "learning_rate": 4.978135361969276e-06, |
| "loss": 0.0983, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 1.6028799125387194, |
| "learning_rate": 4.9780409509514375e-06, |
| "loss": 0.091, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 1.9664054893168261, |
| "learning_rate": 4.977946337439282e-06, |
| "loss": 0.1495, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 1.7122667851036462, |
| "learning_rate": 4.9778515214405436e-06, |
| "loss": 0.1139, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 1.7566455248377864, |
| "learning_rate": 4.977756502962967e-06, |
| "loss": 0.1097, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 1.1350501611425003, |
| "learning_rate": 4.97766128201432e-06, |
| "loss": 0.0629, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 1.2023067292666059, |
| "learning_rate": 4.977565858602381e-06, |
| "loss": 0.0782, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 1.628252441426902, |
| "learning_rate": 4.977470232734949e-06, |
| "loss": 0.0987, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 1.724322735405813, |
| "learning_rate": 4.977374404419838e-06, |
| "loss": 0.0903, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 1.470263169494043, |
| "learning_rate": 4.977278373664877e-06, |
| "loss": 0.0882, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 2.599396527432543, |
| "learning_rate": 4.977182140477916e-06, |
| "loss": 0.1209, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 1.6800447119151198, |
| "learning_rate": 4.977085704866817e-06, |
| "loss": 0.0776, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 1.5595540666125045, |
| "learning_rate": 4.97698906683946e-06, |
| "loss": 0.103, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 2.248635180290087, |
| "learning_rate": 4.9768922264037435e-06, |
| "loss": 0.1388, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 1.1547627152960565, |
| "learning_rate": 4.976795183567579e-06, |
| "loss": 0.0624, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 1.56353757750327, |
| "learning_rate": 4.976697938338898e-06, |
| "loss": 0.0856, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 1.2335181237621284, |
| "learning_rate": 4.976600490725645e-06, |
| "loss": 0.0644, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 1.900991648340467, |
| "learning_rate": 4.976502840735785e-06, |
| "loss": 0.153, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 1.3078243371858722, |
| "learning_rate": 4.976404988377297e-06, |
| "loss": 0.0621, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 2.0047686247285923, |
| "learning_rate": 4.976306933658176e-06, |
| "loss": 0.1136, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 1.8552855878852923, |
| "learning_rate": 4.976208676586435e-06, |
| "loss": 0.1284, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 1.8525936784229493, |
| "learning_rate": 4.976110217170104e-06, |
| "loss": 0.0917, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 1.4658188242525991, |
| "learning_rate": 4.976011555417228e-06, |
| "loss": 0.0749, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 1.1511032936840262, |
| "learning_rate": 4.975912691335869e-06, |
| "loss": 0.0761, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 1.458580259230844, |
| "learning_rate": 4.975813624934106e-06, |
| "loss": 0.0768, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 1.5627508232221192, |
| "learning_rate": 4.975714356220035e-06, |
| "loss": 0.0823, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 1.075721834306004, |
| "learning_rate": 4.975614885201766e-06, |
| "loss": 0.0504, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 1.6198884733457342, |
| "learning_rate": 4.975515211887429e-06, |
| "loss": 0.1024, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 1.6346417323820548, |
| "learning_rate": 4.9754153362851684e-06, |
| "loss": 0.0851, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 2.448143027911265, |
| "learning_rate": 4.975315258403145e-06, |
| "loss": 0.1479, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 1.6016068432961146, |
| "learning_rate": 4.975214978249537e-06, |
| "loss": 0.0886, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 1.4721161321318619, |
| "learning_rate": 4.975114495832539e-06, |
| "loss": 0.0976, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 1.7625335294527533, |
| "learning_rate": 4.975013811160362e-06, |
| "loss": 0.0898, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 1.9298670425360585, |
| "learning_rate": 4.974912924241233e-06, |
| "loss": 0.1027, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 1.4996755802132458, |
| "learning_rate": 4.974811835083397e-06, |
| "loss": 0.0978, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 2.1147277125940955, |
| "learning_rate": 4.974710543695114e-06, |
| "loss": 0.1063, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 2.529920688558412, |
| "learning_rate": 4.974609050084661e-06, |
| "loss": 0.1476, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 2.14209787933433, |
| "learning_rate": 4.974507354260332e-06, |
| "loss": 0.1261, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 1.9058176611193165, |
| "learning_rate": 4.974405456230436e-06, |
| "loss": 0.1203, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 1.8980074058725056, |
| "learning_rate": 4.974303356003301e-06, |
| "loss": 0.0996, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 1.4579903539692274, |
| "learning_rate": 4.974201053587268e-06, |
| "loss": 0.0943, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 1.3940386820106656, |
| "learning_rate": 4.9740985489907005e-06, |
| "loss": 0.0663, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 2.441971054754706, |
| "learning_rate": 4.973995842221971e-06, |
| "loss": 0.1245, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 1.919620601900113, |
| "learning_rate": 4.973892933289476e-06, |
| "loss": 0.1159, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 1.672712776153676, |
| "learning_rate": 4.97378982220162e-06, |
| "loss": 0.0981, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 1.2125382683302124, |
| "learning_rate": 4.973686508966832e-06, |
| "loss": 0.0601, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 1.222443145221144, |
| "learning_rate": 4.973582993593554e-06, |
| "loss": 0.0715, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 1.5223951861259333, |
| "learning_rate": 4.973479276090244e-06, |
| "loss": 0.0795, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 1.2392582362318094, |
| "learning_rate": 4.973375356465378e-06, |
| "loss": 0.061, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 1.7285156139774616, |
| "learning_rate": 4.973271234727447e-06, |
| "loss": 0.1201, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 1.4723786585295477, |
| "learning_rate": 4.97316691088496e-06, |
| "loss": 0.0885, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 2.25192801645438, |
| "learning_rate": 4.973062384946442e-06, |
| "loss": 0.135, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 1.1373098395352674, |
| "learning_rate": 4.9729576569204345e-06, |
| "loss": 0.0728, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 1.5300830315604266, |
| "learning_rate": 4.972852726815495e-06, |
| "loss": 0.0941, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 1.8026113068627658, |
| "learning_rate": 4.972747594640197e-06, |
| "loss": 0.1247, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 1.794104737159684, |
| "learning_rate": 4.9726422604031335e-06, |
| "loss": 0.095, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 1.1504559186965777, |
| "learning_rate": 4.97253672411291e-06, |
| "loss": 0.0674, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 1.4316672986650767, |
| "learning_rate": 4.972430985778152e-06, |
| "loss": 0.0702, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 1.5328603666600327, |
| "learning_rate": 4.972325045407499e-06, |
| "loss": 0.0675, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 3.2405357176859857, |
| "learning_rate": 4.972218903009608e-06, |
| "loss": 0.1212, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 1.5109558607242208, |
| "learning_rate": 4.972112558593153e-06, |
| "loss": 0.0938, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 1.264935168060258, |
| "learning_rate": 4.972006012166823e-06, |
| "loss": 0.0742, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 1.3461924059029533, |
| "learning_rate": 4.971899263739326e-06, |
| "loss": 0.0844, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 1.7441591810954875, |
| "learning_rate": 4.971792313319384e-06, |
| "loss": 0.1139, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 1.7027600325330141, |
| "learning_rate": 4.971685160915737e-06, |
| "loss": 0.0867, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 1.6301828004618641, |
| "learning_rate": 4.971577806537139e-06, |
| "loss": 0.0943, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 1.6173281507194255, |
| "learning_rate": 4.971470250192366e-06, |
| "loss": 0.1052, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 17.712189021618492, |
| "learning_rate": 4.9713624918902045e-06, |
| "loss": 0.3191, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 2.336934606774547, |
| "learning_rate": 4.971254531639461e-06, |
| "loss": 0.1347, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 1.8922827015678323, |
| "learning_rate": 4.971146369448957e-06, |
| "loss": 0.1144, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 1.7408688040721931, |
| "learning_rate": 4.971038005327532e-06, |
| "loss": 0.1143, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 1.9327103804196282, |
| "learning_rate": 4.970929439284039e-06, |
| "loss": 0.1377, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 2.0181579320929224, |
| "learning_rate": 4.970820671327351e-06, |
| "loss": 0.1259, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 1.1056426992050885, |
| "learning_rate": 4.9707117014663565e-06, |
| "loss": 0.0633, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 1.853338129642874, |
| "learning_rate": 4.97060252970996e-06, |
| "loss": 0.1215, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 1.6843406450831364, |
| "learning_rate": 4.970493156067081e-06, |
| "loss": 0.1016, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 1.1701908663612965, |
| "learning_rate": 4.970383580546658e-06, |
| "loss": 0.0731, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 1.7890527407391215, |
| "learning_rate": 4.970273803157645e-06, |
| "loss": 0.1097, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 1.4169073671700831, |
| "learning_rate": 4.970163823909013e-06, |
| "loss": 0.0845, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 1.5828589024944335, |
| "learning_rate": 4.970053642809748e-06, |
| "loss": 0.0921, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 1.6370747251722932, |
| "learning_rate": 4.969943259868853e-06, |
| "loss": 0.1088, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 2.023470308157194, |
| "learning_rate": 4.969832675095351e-06, |
| "loss": 0.1052, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 1.7462230999429424, |
| "learning_rate": 4.969721888498275e-06, |
| "loss": 0.1141, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 1.428774250085193, |
| "learning_rate": 4.96961090008668e-06, |
| "loss": 0.0824, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 1.6447081301063733, |
| "learning_rate": 4.969499709869635e-06, |
| "loss": 0.1324, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 2.0250820847646054, |
| "learning_rate": 4.969388317856225e-06, |
| "loss": 0.1122, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 2.060820071851061, |
| "learning_rate": 4.969276724055554e-06, |
| "loss": 0.128, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 1.8421595012757042, |
| "learning_rate": 4.969164928476741e-06, |
| "loss": 0.0929, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 1.8378761522798848, |
| "learning_rate": 4.969052931128919e-06, |
| "loss": 0.1038, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 1.4559119574869848, |
| "learning_rate": 4.968940732021243e-06, |
| "loss": 0.0884, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 1.9971887851212364, |
| "learning_rate": 4.9688283311628795e-06, |
| "loss": 0.1353, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 1.7386639848323409, |
| "learning_rate": 4.968715728563014e-06, |
| "loss": 0.1025, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2529572338489536, |
| "grad_norm": 1.260155855896464, |
| "learning_rate": 4.968602924230847e-06, |
| "loss": 0.0684, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2534121929026388, |
| "grad_norm": 2.3395689748358843, |
| "learning_rate": 4.968489918175598e-06, |
| "loss": 0.1151, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2538671519563239, |
| "grad_norm": 2.0737729432038137, |
| "learning_rate": 4.9683767104065014e-06, |
| "loss": 0.107, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2543221110100091, |
| "grad_norm": 1.4554456387078378, |
| "learning_rate": 4.968263300932806e-06, |
| "loss": 0.0674, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 1.236095562563839, |
| "learning_rate": 4.968149689763781e-06, |
| "loss": 0.0771, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2552320291173794, |
| "grad_norm": 1.6261579693523964, |
| "learning_rate": 4.968035876908708e-06, |
| "loss": 0.1033, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2556869881710646, |
| "grad_norm": 1.8267174614929946, |
| "learning_rate": 4.967921862376889e-06, |
| "loss": 0.1153, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.25614194722474976, |
| "grad_norm": 1.9897704292294367, |
| "learning_rate": 4.9678076461776415e-06, |
| "loss": 0.1168, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.25659690627843496, |
| "grad_norm": 1.9727936679798233, |
| "learning_rate": 4.9676932283202965e-06, |
| "loss": 0.1389, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2570518653321201, |
| "grad_norm": 1.8484690700205213, |
| "learning_rate": 4.967578608814205e-06, |
| "loss": 0.1024, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2575068243858053, |
| "grad_norm": 1.4833575893287436, |
| "learning_rate": 4.9674637876687345e-06, |
| "loss": 0.0959, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.25796178343949044, |
| "grad_norm": 1.0731244531443167, |
| "learning_rate": 4.967348764893265e-06, |
| "loss": 0.0652, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2584167424931756, |
| "grad_norm": 1.882586364820984, |
| "learning_rate": 4.967233540497197e-06, |
| "loss": 0.0887, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2588717015468608, |
| "grad_norm": 1.5585900206462215, |
| "learning_rate": 4.967118114489946e-06, |
| "loss": 0.0705, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.25932666060054593, |
| "grad_norm": 1.4304247727655925, |
| "learning_rate": 4.967002486880944e-06, |
| "loss": 0.0689, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25978161965423113, |
| "grad_norm": 1.996611084455256, |
| "learning_rate": 4.966886657679641e-06, |
| "loss": 0.1134, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2602365787079163, |
| "grad_norm": 2.573142554440562, |
| "learning_rate": 4.966770626895499e-06, |
| "loss": 0.137, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2606915377616015, |
| "grad_norm": 1.7759211248358038, |
| "learning_rate": 4.966654394538002e-06, |
| "loss": 0.097, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2611464968152866, |
| "grad_norm": 1.3021079669208342, |
| "learning_rate": 4.966537960616646e-06, |
| "loss": 0.0774, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.26160145586897177, |
| "grad_norm": 2.328733131052364, |
| "learning_rate": 4.9664213251409486e-06, |
| "loss": 0.1105, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26205641492265697, |
| "grad_norm": 2.281267812919593, |
| "learning_rate": 4.9663044881204375e-06, |
| "loss": 0.1556, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2625113739763421, |
| "grad_norm": 1.7215892787568372, |
| "learning_rate": 4.9661874495646615e-06, |
| "loss": 0.0917, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2629663330300273, |
| "grad_norm": 1.3072003221216781, |
| "learning_rate": 4.9660702094831845e-06, |
| "loss": 0.0818, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.26342129208371245, |
| "grad_norm": 2.141135787879026, |
| "learning_rate": 4.965952767885587e-06, |
| "loss": 0.1187, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.26387625113739765, |
| "grad_norm": 2.3440295569320857, |
| "learning_rate": 4.965835124781465e-06, |
| "loss": 0.1336, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2643312101910828, |
| "grad_norm": 1.2377586425554465, |
| "learning_rate": 4.965717280180432e-06, |
| "loss": 0.0771, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.26478616924476794, |
| "grad_norm": 1.5553208083958672, |
| "learning_rate": 4.965599234092118e-06, |
| "loss": 0.0906, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.26524112829845314, |
| "grad_norm": 1.676762616981095, |
| "learning_rate": 4.96548098652617e-06, |
| "loss": 0.1091, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2656960873521383, |
| "grad_norm": 1.8329426527347645, |
| "learning_rate": 4.965362537492249e-06, |
| "loss": 0.1171, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2661510464058235, |
| "grad_norm": 1.2752855217123082, |
| "learning_rate": 4.9652438870000356e-06, |
| "loss": 0.0726, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26660600545950863, |
| "grad_norm": 1.188941544645384, |
| "learning_rate": 4.965125035059224e-06, |
| "loss": 0.0801, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.26706096451319383, |
| "grad_norm": 1.4654127807937742, |
| "learning_rate": 4.965005981679527e-06, |
| "loss": 0.0839, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.267515923566879, |
| "grad_norm": 2.0288718475884107, |
| "learning_rate": 4.964886726870673e-06, |
| "loss": 0.1239, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2679708826205642, |
| "grad_norm": 1.972686660841513, |
| "learning_rate": 4.964767270642407e-06, |
| "loss": 0.1004, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2684258416742493, |
| "grad_norm": 1.6499743360699521, |
| "learning_rate": 4.964647613004491e-06, |
| "loss": 0.0976, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26888080072793447, |
| "grad_norm": 1.5661213245685233, |
| "learning_rate": 4.964527753966702e-06, |
| "loss": 0.0818, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.26933575978161967, |
| "grad_norm": 1.387453226127614, |
| "learning_rate": 4.964407693538834e-06, |
| "loss": 0.0813, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2697907188353048, |
| "grad_norm": 1.8652006740776592, |
| "learning_rate": 4.9642874317307e-06, |
| "loss": 0.1092, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.27024567788899, |
| "grad_norm": 1.6739291749648295, |
| "learning_rate": 4.964166968552124e-06, |
| "loss": 0.1262, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.27070063694267515, |
| "grad_norm": 1.4965319066427345, |
| "learning_rate": 4.9640463040129525e-06, |
| "loss": 0.0749, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27115559599636035, |
| "grad_norm": 1.483777185503557, |
| "learning_rate": 4.963925438123044e-06, |
| "loss": 0.075, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2716105550500455, |
| "grad_norm": 1.646106287941782, |
| "learning_rate": 4.963804370892276e-06, |
| "loss": 0.0948, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.27206551410373064, |
| "grad_norm": 1.8923424637891237, |
| "learning_rate": 4.9636831023305405e-06, |
| "loss": 0.1296, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.27252047315741584, |
| "grad_norm": 1.453967822900046, |
| "learning_rate": 4.963561632447748e-06, |
| "loss": 0.0777, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.272975432211101, |
| "grad_norm": 1.2633146266239919, |
| "learning_rate": 4.9634399612538255e-06, |
| "loss": 0.0704, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2734303912647862, |
| "grad_norm": 24.856853600017228, |
| "learning_rate": 4.963318088758714e-06, |
| "loss": 0.4372, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.27388535031847133, |
| "grad_norm": 1.6301604814034822, |
| "learning_rate": 4.963196014972371e-06, |
| "loss": 0.0879, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.27434030937215653, |
| "grad_norm": 1.556460730817159, |
| "learning_rate": 4.963073739904775e-06, |
| "loss": 0.0893, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2747952684258417, |
| "grad_norm": 1.657318032059153, |
| "learning_rate": 4.962951263565915e-06, |
| "loss": 0.0933, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2752502274795268, |
| "grad_norm": 2.273490391362205, |
| "learning_rate": 4.962828585965801e-06, |
| "loss": 0.1038, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.275705186533212, |
| "grad_norm": 1.5114052665682505, |
| "learning_rate": 4.962705707114457e-06, |
| "loss": 0.097, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.27616014558689717, |
| "grad_norm": 1.7683179621585026, |
| "learning_rate": 4.962582627021923e-06, |
| "loss": 0.1127, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.27661510464058237, |
| "grad_norm": 1.8859941959717001, |
| "learning_rate": 4.962459345698258e-06, |
| "loss": 0.1152, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2770700636942675, |
| "grad_norm": 1.9839838015935523, |
| "learning_rate": 4.962335863153537e-06, |
| "loss": 0.1198, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2775250227479527, |
| "grad_norm": 1.3671283570292578, |
| "learning_rate": 4.962212179397847e-06, |
| "loss": 0.0876, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27797998180163785, |
| "grad_norm": 1.4623540558631782, |
| "learning_rate": 4.962088294441299e-06, |
| "loss": 0.0754, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.278434940855323, |
| "grad_norm": 2.3501285954750806, |
| "learning_rate": 4.9619642082940135e-06, |
| "loss": 0.1, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2788898999090082, |
| "grad_norm": 1.6593172768016098, |
| "learning_rate": 4.9618399209661305e-06, |
| "loss": 0.0918, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.27934485896269334, |
| "grad_norm": 1.4913746956676242, |
| "learning_rate": 4.961715432467807e-06, |
| "loss": 0.0788, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.27979981801637854, |
| "grad_norm": 1.3335438953393988, |
| "learning_rate": 4.961590742809216e-06, |
| "loss": 0.0743, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2802547770700637, |
| "grad_norm": 1.4631866469804606, |
| "learning_rate": 4.961465852000545e-06, |
| "loss": 0.0869, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2807097361237489, |
| "grad_norm": 1.8021656107937525, |
| "learning_rate": 4.961340760052001e-06, |
| "loss": 0.0906, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.28116469517743403, |
| "grad_norm": 1.74213914067233, |
| "learning_rate": 4.961215466973806e-06, |
| "loss": 0.0926, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2816196542311192, |
| "grad_norm": 2.764803909834576, |
| "learning_rate": 4.961089972776197e-06, |
| "loss": 0.1823, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2820746132848044, |
| "grad_norm": 1.3665676735119967, |
| "learning_rate": 4.9609642774694285e-06, |
| "loss": 0.0734, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2825295723384895, |
| "grad_norm": 1.9426323562959267, |
| "learning_rate": 4.960838381063774e-06, |
| "loss": 0.0972, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2829845313921747, |
| "grad_norm": 2.3374254341147322, |
| "learning_rate": 4.960712283569521e-06, |
| "loss": 0.1411, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.28343949044585987, |
| "grad_norm": 2.2747894788958543, |
| "learning_rate": 4.960585984996971e-06, |
| "loss": 0.1033, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.28389444949954507, |
| "grad_norm": 1.7445142059152803, |
| "learning_rate": 4.960459485356447e-06, |
| "loss": 0.1222, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2843494085532302, |
| "grad_norm": 1.5220008831965313, |
| "learning_rate": 4.960332784658285e-06, |
| "loss": 0.1027, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28480436760691535, |
| "grad_norm": 2.1347326062219034, |
| "learning_rate": 4.960205882912839e-06, |
| "loss": 0.1237, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.28525932666060055, |
| "grad_norm": 2.5984695620436002, |
| "learning_rate": 4.9600787801304785e-06, |
| "loss": 0.1871, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.1207792848317375, |
| "learning_rate": 4.959951476321589e-06, |
| "loss": 0.1205, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2861692447679709, |
| "grad_norm": 1.1897630810057305, |
| "learning_rate": 4.959823971496575e-06, |
| "loss": 0.0773, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.28662420382165604, |
| "grad_norm": 3.4920069239312976, |
| "learning_rate": 4.959696265665853e-06, |
| "loss": 0.1897, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28707916287534124, |
| "grad_norm": 1.425742783647833, |
| "learning_rate": 4.959568358839862e-06, |
| "loss": 0.0635, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2875341219290264, |
| "grad_norm": 1.330689822741385, |
| "learning_rate": 4.95944025102905e-06, |
| "loss": 0.0722, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.28798908098271153, |
| "grad_norm": 1.99039564333339, |
| "learning_rate": 4.959311942243888e-06, |
| "loss": 0.1158, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.28844404003639673, |
| "grad_norm": 1.593751969696495, |
| "learning_rate": 4.95918343249486e-06, |
| "loss": 0.0861, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2888989990900819, |
| "grad_norm": 1.8945402616067804, |
| "learning_rate": 4.959054721792469e-06, |
| "loss": 0.1171, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2893539581437671, |
| "grad_norm": 1.4569740573581391, |
| "learning_rate": 4.958925810147231e-06, |
| "loss": 0.0777, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2898089171974522, |
| "grad_norm": 1.7102068304451903, |
| "learning_rate": 4.958796697569679e-06, |
| "loss": 0.0872, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2902638762511374, |
| "grad_norm": 1.5378977203553044, |
| "learning_rate": 4.958667384070365e-06, |
| "loss": 0.0796, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.29071883530482256, |
| "grad_norm": 1.9723232607058794, |
| "learning_rate": 4.958537869659855e-06, |
| "loss": 0.1204, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2911737943585077, |
| "grad_norm": 1.4856408560761394, |
| "learning_rate": 4.958408154348734e-06, |
| "loss": 0.0763, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2916287534121929, |
| "grad_norm": 1.7342797592944788, |
| "learning_rate": 4.9582782381476e-06, |
| "loss": 0.1104, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.29208371246587805, |
| "grad_norm": 2.179383476129295, |
| "learning_rate": 4.958148121067071e-06, |
| "loss": 0.1694, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.29253867151956325, |
| "grad_norm": 1.8609060135735762, |
| "learning_rate": 4.9580178031177775e-06, |
| "loss": 0.1303, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2929936305732484, |
| "grad_norm": 1.4742279064065518, |
| "learning_rate": 4.9578872843103694e-06, |
| "loss": 0.1001, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2934485896269336, |
| "grad_norm": 1.7670333338462736, |
| "learning_rate": 4.957756564655513e-06, |
| "loss": 0.1022, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.29390354868061874, |
| "grad_norm": 1.6630538784639108, |
| "learning_rate": 4.957625644163888e-06, |
| "loss": 0.1055, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2943585077343039, |
| "grad_norm": 1.9118546637397547, |
| "learning_rate": 4.957494522846194e-06, |
| "loss": 0.1029, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2948134667879891, |
| "grad_norm": 1.7468783195584092, |
| "learning_rate": 4.957363200713146e-06, |
| "loss": 0.13, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.29526842584167423, |
| "grad_norm": 1.4923304655802225, |
| "learning_rate": 4.957231677775475e-06, |
| "loss": 0.0846, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.29572338489535943, |
| "grad_norm": 2.0864859163635407, |
| "learning_rate": 4.957099954043928e-06, |
| "loss": 0.1363, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2961783439490446, |
| "grad_norm": 1.467640729386297, |
| "learning_rate": 4.956968029529269e-06, |
| "loss": 0.113, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2966333030027298, |
| "grad_norm": 1.5940129351295147, |
| "learning_rate": 4.956835904242277e-06, |
| "loss": 0.1121, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2970882620564149, |
| "grad_norm": 1.305300483782713, |
| "learning_rate": 4.9567035781937516e-06, |
| "loss": 0.0569, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.29754322111010006, |
| "grad_norm": 1.8626374769697236, |
| "learning_rate": 4.9565710513945024e-06, |
| "loss": 0.095, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.29799818016378526, |
| "grad_norm": 1.9350135167075724, |
| "learning_rate": 4.956438323855362e-06, |
| "loss": 0.11, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2984531392174704, |
| "grad_norm": 1.7292500874953625, |
| "learning_rate": 4.956305395587174e-06, |
| "loss": 0.1259, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2989080982711556, |
| "grad_norm": 1.7021672274359103, |
| "learning_rate": 4.956172266600802e-06, |
| "loss": 0.0857, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.29936305732484075, |
| "grad_norm": 1.2481942065304896, |
| "learning_rate": 4.956038936907125e-06, |
| "loss": 0.0776, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.29981801637852595, |
| "grad_norm": 1.4091727470459356, |
| "learning_rate": 4.955905406517036e-06, |
| "loss": 0.0706, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3002729754322111, |
| "grad_norm": 1.8640524340898077, |
| "learning_rate": 4.95577167544145e-06, |
| "loss": 0.1176, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.30072793448589624, |
| "grad_norm": 2.0619543797721698, |
| "learning_rate": 4.955637743691291e-06, |
| "loss": 0.1148, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.30118289353958144, |
| "grad_norm": 1.9364848961200234, |
| "learning_rate": 4.955503611277506e-06, |
| "loss": 0.0964, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3016378525932666, |
| "grad_norm": 1.5509916734065172, |
| "learning_rate": 4.955369278211055e-06, |
| "loss": 0.0824, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3020928116469518, |
| "grad_norm": 1.8848317603882998, |
| "learning_rate": 4.955234744502914e-06, |
| "loss": 0.1, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.30254777070063693, |
| "grad_norm": 1.7147002197137917, |
| "learning_rate": 4.955100010164079e-06, |
| "loss": 0.1042, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30300272975432213, |
| "grad_norm": 1.8287392204283686, |
| "learning_rate": 4.954965075205557e-06, |
| "loss": 0.0894, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3034576888080073, |
| "grad_norm": 3.2978505813072765, |
| "learning_rate": 4.9548299396383755e-06, |
| "loss": 0.1555, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3039126478616925, |
| "grad_norm": 1.733214316892207, |
| "learning_rate": 4.954694603473578e-06, |
| "loss": 0.0848, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3043676069153776, |
| "grad_norm": 2.1290440022616917, |
| "learning_rate": 4.954559066722222e-06, |
| "loss": 0.1329, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.30482256596906276, |
| "grad_norm": 1.7482728884321743, |
| "learning_rate": 4.954423329395385e-06, |
| "loss": 0.1135, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30527752502274796, |
| "grad_norm": 1.8272762006745102, |
| "learning_rate": 4.954287391504156e-06, |
| "loss": 0.1233, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3057324840764331, |
| "grad_norm": 2.276356474817249, |
| "learning_rate": 4.9541512530596455e-06, |
| "loss": 0.1426, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3061874431301183, |
| "grad_norm": 1.5212465132609405, |
| "learning_rate": 4.954014914072978e-06, |
| "loss": 0.0908, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.30664240218380345, |
| "grad_norm": 1.7081770141846233, |
| "learning_rate": 4.9538783745552934e-06, |
| "loss": 0.1069, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.30709736123748865, |
| "grad_norm": 2.2065783569813755, |
| "learning_rate": 4.95374163451775e-06, |
| "loss": 0.1303, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3075523202911738, |
| "grad_norm": 1.9717809133208803, |
| "learning_rate": 4.953604693971521e-06, |
| "loss": 0.0969, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.30800727934485894, |
| "grad_norm": 1.5094990032560427, |
| "learning_rate": 4.953467552927798e-06, |
| "loss": 0.059, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.30846223839854414, |
| "grad_norm": 2.5084055121202726, |
| "learning_rate": 4.9533302113977845e-06, |
| "loss": 0.141, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3089171974522293, |
| "grad_norm": 2.1105100650062814, |
| "learning_rate": 4.9531926693927055e-06, |
| "loss": 0.1162, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3093721565059145, |
| "grad_norm": 1.9374617838160508, |
| "learning_rate": 4.953054926923801e-06, |
| "loss": 0.1119, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30982711555959963, |
| "grad_norm": 2.266159358282095, |
| "learning_rate": 4.952916984002325e-06, |
| "loss": 0.1188, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.31028207461328483, |
| "grad_norm": 2.1490900129362243, |
| "learning_rate": 4.95277884063955e-06, |
| "loss": 0.1337, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.31073703366697, |
| "grad_norm": 1.5330806658735066, |
| "learning_rate": 4.952640496846766e-06, |
| "loss": 0.109, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3111919927206551, |
| "grad_norm": 1.41231573264733, |
| "learning_rate": 4.952501952635276e-06, |
| "loss": 0.0837, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3116469517743403, |
| "grad_norm": 1.993511064296186, |
| "learning_rate": 4.952363208016402e-06, |
| "loss": 0.1272, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.31210191082802546, |
| "grad_norm": 1.6098606771380728, |
| "learning_rate": 4.952224263001482e-06, |
| "loss": 0.0816, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.31255686988171066, |
| "grad_norm": 1.2309412681015492, |
| "learning_rate": 4.952085117601868e-06, |
| "loss": 0.0692, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3130118289353958, |
| "grad_norm": 1.7997377974129165, |
| "learning_rate": 4.951945771828933e-06, |
| "loss": 0.1322, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.313466787989081, |
| "grad_norm": 1.3223154067967124, |
| "learning_rate": 4.951806225694061e-06, |
| "loss": 0.0979, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.31392174704276615, |
| "grad_norm": 1.9747397800251965, |
| "learning_rate": 4.951666479208658e-06, |
| "loss": 0.1184, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3143767060964513, |
| "grad_norm": 1.4466542632801185, |
| "learning_rate": 4.951526532384141e-06, |
| "loss": 0.085, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3148316651501365, |
| "grad_norm": 1.8649877852775587, |
| "learning_rate": 4.951386385231946e-06, |
| "loss": 0.1011, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.31528662420382164, |
| "grad_norm": 1.2680670071467166, |
| "learning_rate": 4.951246037763528e-06, |
| "loss": 0.0748, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.31574158325750684, |
| "grad_norm": 1.5151831279551418, |
| "learning_rate": 4.9511054899903524e-06, |
| "loss": 0.0874, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.316196542311192, |
| "grad_norm": 1.6436638497099227, |
| "learning_rate": 4.950964741923905e-06, |
| "loss": 0.0982, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3166515013648772, |
| "grad_norm": 1.5379093700813176, |
| "learning_rate": 4.950823793575688e-06, |
| "loss": 0.0857, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.31710646041856233, |
| "grad_norm": 2.4063943761092452, |
| "learning_rate": 4.950682644957218e-06, |
| "loss": 0.1253, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3175614194722475, |
| "grad_norm": 2.5063143673804844, |
| "learning_rate": 4.9505412960800295e-06, |
| "loss": 0.1511, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3180163785259327, |
| "grad_norm": 1.722833309256951, |
| "learning_rate": 4.950399746955673e-06, |
| "loss": 0.0999, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 1.8190148406823232, |
| "learning_rate": 4.950257997595716e-06, |
| "loss": 0.0895, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.318926296633303, |
| "grad_norm": 1.9186747250049239, |
| "learning_rate": 4.950116048011739e-06, |
| "loss": 0.0964, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.31938125568698816, |
| "grad_norm": 1.372930302125184, |
| "learning_rate": 4.949973898215344e-06, |
| "loss": 0.0589, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.31983621474067336, |
| "grad_norm": 1.9707430002902289, |
| "learning_rate": 4.949831548218146e-06, |
| "loss": 0.1054, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3202911737943585, |
| "grad_norm": 2.0845604349239832, |
| "learning_rate": 4.949688998031777e-06, |
| "loss": 0.1105, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.32074613284804365, |
| "grad_norm": 1.4969274131429369, |
| "learning_rate": 4.949546247667886e-06, |
| "loss": 0.0814, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.32120109190172885, |
| "grad_norm": 1.9940826155791407, |
| "learning_rate": 4.949403297138137e-06, |
| "loss": 0.1064, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.321656050955414, |
| "grad_norm": 1.7246519891154302, |
| "learning_rate": 4.949260146454212e-06, |
| "loss": 0.1093, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3221110100090992, |
| "grad_norm": 1.6890948945842699, |
| "learning_rate": 4.94911679562781e-06, |
| "loss": 0.0888, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.32256596906278434, |
| "grad_norm": 2.0455963687465837, |
| "learning_rate": 4.948973244670643e-06, |
| "loss": 0.1019, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.32302092811646954, |
| "grad_norm": 1.7678121189421865, |
| "learning_rate": 4.948829493594441e-06, |
| "loss": 0.0961, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3234758871701547, |
| "grad_norm": 1.3731566726245188, |
| "learning_rate": 4.9486855424109524e-06, |
| "loss": 0.072, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.32393084622383983, |
| "grad_norm": 1.4962983653581472, |
| "learning_rate": 4.948541391131939e-06, |
| "loss": 0.0905, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.32438580527752503, |
| "grad_norm": 1.4198695601427125, |
| "learning_rate": 4.948397039769181e-06, |
| "loss": 0.0616, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3248407643312102, |
| "grad_norm": 1.131377673368795, |
| "learning_rate": 4.948252488334474e-06, |
| "loss": 0.0526, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3252957233848954, |
| "grad_norm": 1.1969683311404917, |
| "learning_rate": 4.948107736839629e-06, |
| "loss": 0.0763, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3257506824385805, |
| "grad_norm": 1.6793927846583725, |
| "learning_rate": 4.947962785296476e-06, |
| "loss": 0.1153, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3262056414922657, |
| "grad_norm": 2.070694963019659, |
| "learning_rate": 4.9478176337168594e-06, |
| "loss": 0.1153, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.32666060054595086, |
| "grad_norm": 2.7729923804058516, |
| "learning_rate": 4.9476722821126386e-06, |
| "loss": 0.171, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.327115559599636, |
| "grad_norm": 1.4442284620787837, |
| "learning_rate": 4.9475267304956945e-06, |
| "loss": 0.0997, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3275705186533212, |
| "grad_norm": 2.0979816044129413, |
| "learning_rate": 4.947380978877917e-06, |
| "loss": 0.1138, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32802547770700635, |
| "grad_norm": 1.9982881232916472, |
| "learning_rate": 4.947235027271219e-06, |
| "loss": 0.1402, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.32848043676069155, |
| "grad_norm": 1.3317844805683108, |
| "learning_rate": 4.9470888756875265e-06, |
| "loss": 0.0707, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3289353958143767, |
| "grad_norm": 1.4665146144499257, |
| "learning_rate": 4.946942524138782e-06, |
| "loss": 0.075, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3293903548680619, |
| "grad_norm": 1.6321427811402383, |
| "learning_rate": 4.946795972636944e-06, |
| "loss": 0.0971, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.32984531392174704, |
| "grad_norm": 1.9541110640157349, |
| "learning_rate": 4.94664922119399e-06, |
| "loss": 0.1347, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3303002729754322, |
| "grad_norm": 1.664760132709453, |
| "learning_rate": 4.94650226982191e-06, |
| "loss": 0.0959, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3307552320291174, |
| "grad_norm": 2.509161708357272, |
| "learning_rate": 4.9463551185327115e-06, |
| "loss": 0.1885, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.33121019108280253, |
| "grad_norm": 1.7296886670922147, |
| "learning_rate": 4.946207767338422e-06, |
| "loss": 0.0867, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.33166515013648773, |
| "grad_norm": 1.5254904811287948, |
| "learning_rate": 4.9460602162510805e-06, |
| "loss": 0.09, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3321201091901729, |
| "grad_norm": 1.3404896968358107, |
| "learning_rate": 4.945912465282744e-06, |
| "loss": 0.0782, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3325750682438581, |
| "grad_norm": 1.79952897501454, |
| "learning_rate": 4.945764514445487e-06, |
| "loss": 0.1444, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3330300272975432, |
| "grad_norm": 2.48899319031489, |
| "learning_rate": 4.9456163637513986e-06, |
| "loss": 0.1136, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.33348498635122836, |
| "grad_norm": 1.8285171425829347, |
| "learning_rate": 4.945468013212585e-06, |
| "loss": 0.1052, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.33393994540491356, |
| "grad_norm": 1.7843881981445446, |
| "learning_rate": 4.945319462841169e-06, |
| "loss": 0.1116, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3343949044585987, |
| "grad_norm": 2.181301353034186, |
| "learning_rate": 4.94517071264929e-06, |
| "loss": 0.1251, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3348498635122839, |
| "grad_norm": 1.2980326592722402, |
| "learning_rate": 4.945021762649102e-06, |
| "loss": 0.0648, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.33530482256596905, |
| "grad_norm": 1.3874782347309536, |
| "learning_rate": 4.9448726128527776e-06, |
| "loss": 0.0978, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.33575978161965425, |
| "grad_norm": 1.8955499231356112, |
| "learning_rate": 4.944723263272504e-06, |
| "loss": 0.0998, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3362147406733394, |
| "grad_norm": 1.6102418502733031, |
| "learning_rate": 4.944573713920485e-06, |
| "loss": 0.1055, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.33666969972702454, |
| "grad_norm": 3.355056116777925, |
| "learning_rate": 4.944423964808943e-06, |
| "loss": 0.1831, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33712465878070974, |
| "grad_norm": 1.507329867530008, |
| "learning_rate": 4.944274015950113e-06, |
| "loss": 0.0889, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3375796178343949, |
| "grad_norm": 1.610548678904166, |
| "learning_rate": 4.944123867356249e-06, |
| "loss": 0.0752, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3380345768880801, |
| "grad_norm": 1.918715600058829, |
| "learning_rate": 4.943973519039619e-06, |
| "loss": 0.1335, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.33848953594176523, |
| "grad_norm": 1.3921163271356483, |
| "learning_rate": 4.943822971012511e-06, |
| "loss": 0.0727, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.33894449499545043, |
| "grad_norm": 1.2023922578586952, |
| "learning_rate": 4.943672223287226e-06, |
| "loss": 0.0628, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3393994540491356, |
| "grad_norm": 2.2794421985003317, |
| "learning_rate": 4.9435212758760815e-06, |
| "loss": 0.1404, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3398544131028208, |
| "grad_norm": 1.3986125533304865, |
| "learning_rate": 4.943370128791413e-06, |
| "loss": 0.0787, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3403093721565059, |
| "grad_norm": 1.5259961799310353, |
| "learning_rate": 4.943218782045574e-06, |
| "loss": 0.1079, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.34076433121019106, |
| "grad_norm": 1.8181192019120165, |
| "learning_rate": 4.943067235650927e-06, |
| "loss": 0.1195, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.34121929026387626, |
| "grad_norm": 1.831268771798402, |
| "learning_rate": 4.942915489619859e-06, |
| "loss": 0.1065, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3416742493175614, |
| "grad_norm": 1.7306841826817951, |
| "learning_rate": 4.9427635439647704e-06, |
| "loss": 0.1232, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3421292083712466, |
| "grad_norm": 1.7076927486745839, |
| "learning_rate": 4.942611398698075e-06, |
| "loss": 0.0912, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.34258416742493175, |
| "grad_norm": 1.7425991433970283, |
| "learning_rate": 4.942459053832208e-06, |
| "loss": 0.0997, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.34303912647861695, |
| "grad_norm": 1.809200639541382, |
| "learning_rate": 4.942306509379617e-06, |
| "loss": 0.1085, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3434940855323021, |
| "grad_norm": 1.293751880354007, |
| "learning_rate": 4.942153765352767e-06, |
| "loss": 0.0966, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34394904458598724, |
| "grad_norm": 1.2918089478267207, |
| "learning_rate": 4.94200082176414e-06, |
| "loss": 0.078, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.34440400363967244, |
| "grad_norm": 1.5059276244213293, |
| "learning_rate": 4.941847678626234e-06, |
| "loss": 0.0805, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3448589626933576, |
| "grad_norm": 1.4851814064844335, |
| "learning_rate": 4.941694335951563e-06, |
| "loss": 0.0983, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3453139217470428, |
| "grad_norm": 1.8989617812022122, |
| "learning_rate": 4.9415407937526575e-06, |
| "loss": 0.1107, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.34576888080072793, |
| "grad_norm": 1.8347292963195811, |
| "learning_rate": 4.9413870520420635e-06, |
| "loss": 0.1237, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34622383985441313, |
| "grad_norm": 1.5924498433598573, |
| "learning_rate": 4.941233110832346e-06, |
| "loss": 0.0735, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3466787989080983, |
| "grad_norm": 2.3326854621993984, |
| "learning_rate": 4.941078970136082e-06, |
| "loss": 0.1295, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3471337579617834, |
| "grad_norm": 1.7112828341096407, |
| "learning_rate": 4.940924629965869e-06, |
| "loss": 0.1162, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3475887170154686, |
| "grad_norm": 1.5436956280322631, |
| "learning_rate": 4.940770090334319e-06, |
| "loss": 0.0861, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.34804367606915376, |
| "grad_norm": 1.6236751771508604, |
| "learning_rate": 4.940615351254059e-06, |
| "loss": 0.0968, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.34849863512283896, |
| "grad_norm": 1.0400997330052792, |
| "learning_rate": 4.940460412737734e-06, |
| "loss": 0.0711, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3489535941765241, |
| "grad_norm": 1.623731539624473, |
| "learning_rate": 4.940305274798005e-06, |
| "loss": 0.0929, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3494085532302093, |
| "grad_norm": 1.3764287278870393, |
| "learning_rate": 4.940149937447549e-06, |
| "loss": 0.1002, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.34986351228389445, |
| "grad_norm": 1.1571526873015439, |
| "learning_rate": 4.939994400699061e-06, |
| "loss": 0.0659, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3503184713375796, |
| "grad_norm": 1.3670356182264325, |
| "learning_rate": 4.939838664565248e-06, |
| "loss": 0.0991, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3507734303912648, |
| "grad_norm": 1.2532975621868427, |
| "learning_rate": 4.939682729058839e-06, |
| "loss": 0.0713, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.35122838944494994, |
| "grad_norm": 1.3003896066972325, |
| "learning_rate": 4.939526594192574e-06, |
| "loss": 0.0784, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.35168334849863514, |
| "grad_norm": 1.4253255736587618, |
| "learning_rate": 4.939370259979213e-06, |
| "loss": 0.0826, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3521383075523203, |
| "grad_norm": 2.0399381310170766, |
| "learning_rate": 4.9392137264315295e-06, |
| "loss": 0.1293, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3525932666060055, |
| "grad_norm": 1.938165172266556, |
| "learning_rate": 4.939056993562316e-06, |
| "loss": 0.1407, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35304822565969063, |
| "grad_norm": 1.5665447950299711, |
| "learning_rate": 4.9389000613843805e-06, |
| "loss": 0.0942, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3535031847133758, |
| "grad_norm": 1.6514430942693614, |
| "learning_rate": 4.938742929910546e-06, |
| "loss": 0.0927, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.353958143767061, |
| "grad_norm": 1.0136329941515525, |
| "learning_rate": 4.938585599153652e-06, |
| "loss": 0.0676, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3544131028207461, |
| "grad_norm": 1.6808166258098367, |
| "learning_rate": 4.938428069126555e-06, |
| "loss": 0.1029, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3548680618744313, |
| "grad_norm": 1.6649052760273926, |
| "learning_rate": 4.9382703398421285e-06, |
| "loss": 0.0952, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35532302092811646, |
| "grad_norm": 1.734423574608651, |
| "learning_rate": 4.938112411313261e-06, |
| "loss": 0.1098, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.35577797998180166, |
| "grad_norm": 1.5154424391674823, |
| "learning_rate": 4.937954283552858e-06, |
| "loss": 0.0808, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3562329390354868, |
| "grad_norm": 1.6988796126790968, |
| "learning_rate": 4.93779595657384e-06, |
| "loss": 0.1066, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.35668789808917195, |
| "grad_norm": 2.050921985283142, |
| "learning_rate": 4.937637430389145e-06, |
| "loss": 0.1184, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 1.5678672253769157, |
| "learning_rate": 4.937478705011729e-06, |
| "loss": 0.0709, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3575978161965423, |
| "grad_norm": 1.5215473079480804, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.1086, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3580527752502275, |
| "grad_norm": 1.4009067409412712, |
| "learning_rate": 4.937160656730625e-06, |
| "loss": 0.1004, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.35850773430391264, |
| "grad_norm": 1.538795370618956, |
| "learning_rate": 4.9370013338529274e-06, |
| "loss": 0.0897, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.35896269335759784, |
| "grad_norm": 1.3446100123630027, |
| "learning_rate": 4.936841811834486e-06, |
| "loss": 0.0907, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.359417652411283, |
| "grad_norm": 1.9381081676057568, |
| "learning_rate": 4.936682090688337e-06, |
| "loss": 0.1534, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35987261146496813, |
| "grad_norm": 1.787589837431021, |
| "learning_rate": 4.936522170427531e-06, |
| "loss": 0.0919, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.36032757051865333, |
| "grad_norm": 1.7189621906826116, |
| "learning_rate": 4.936362051065136e-06, |
| "loss": 0.0799, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3607825295723385, |
| "grad_norm": 1.615638183805568, |
| "learning_rate": 4.936201732614238e-06, |
| "loss": 0.0898, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3612374886260237, |
| "grad_norm": 1.899483445293266, |
| "learning_rate": 4.9360412150879355e-06, |
| "loss": 0.1086, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3616924476797088, |
| "grad_norm": 1.8831302635176637, |
| "learning_rate": 4.935880498499346e-06, |
| "loss": 0.0951, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.362147406733394, |
| "grad_norm": 2.0172166216160594, |
| "learning_rate": 4.935719582861604e-06, |
| "loss": 0.0983, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.36260236578707916, |
| "grad_norm": 1.7713001106130557, |
| "learning_rate": 4.935558468187855e-06, |
| "loss": 0.1177, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3630573248407643, |
| "grad_norm": 2.049007453668216, |
| "learning_rate": 4.935397154491268e-06, |
| "loss": 0.1349, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3635122838944495, |
| "grad_norm": 2.02340700279538, |
| "learning_rate": 4.935235641785023e-06, |
| "loss": 0.1419, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.36396724294813465, |
| "grad_norm": 1.5504094804690502, |
| "learning_rate": 4.935073930082319e-06, |
| "loss": 0.1141, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36442220200181985, |
| "grad_norm": 1.3892292745868653, |
| "learning_rate": 4.93491201939637e-06, |
| "loss": 0.0859, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.364877161055505, |
| "grad_norm": 1.636711407623354, |
| "learning_rate": 4.934749909740408e-06, |
| "loss": 0.1168, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3653321201091902, |
| "grad_norm": 1.5867549476191922, |
| "learning_rate": 4.934587601127677e-06, |
| "loss": 0.0941, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.36578707916287534, |
| "grad_norm": 1.5019646850922737, |
| "learning_rate": 4.934425093571442e-06, |
| "loss": 0.0931, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3662420382165605, |
| "grad_norm": 1.5412581659446851, |
| "learning_rate": 4.934262387084984e-06, |
| "loss": 0.0931, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3666969972702457, |
| "grad_norm": 1.3579602631174856, |
| "learning_rate": 4.934099481681595e-06, |
| "loss": 0.0745, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.36715195632393083, |
| "grad_norm": 1.800459979497766, |
| "learning_rate": 4.933936377374589e-06, |
| "loss": 0.1072, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.36760691537761603, |
| "grad_norm": 1.1946995764469395, |
| "learning_rate": 4.933773074177293e-06, |
| "loss": 0.0848, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3680618744313012, |
| "grad_norm": 1.6651644751131276, |
| "learning_rate": 4.933609572103053e-06, |
| "loss": 0.0965, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3685168334849864, |
| "grad_norm": 1.913995880200427, |
| "learning_rate": 4.933445871165229e-06, |
| "loss": 0.1315, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3689717925386715, |
| "grad_norm": 1.5517430124798408, |
| "learning_rate": 4.933281971377197e-06, |
| "loss": 0.0856, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.36942675159235666, |
| "grad_norm": 1.474632001508129, |
| "learning_rate": 4.933117872752352e-06, |
| "loss": 0.0989, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.36988171064604186, |
| "grad_norm": 1.8862093944877263, |
| "learning_rate": 4.932953575304102e-06, |
| "loss": 0.1087, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.370336669699727, |
| "grad_norm": 1.6830668966166524, |
| "learning_rate": 4.932789079045873e-06, |
| "loss": 0.1213, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3707916287534122, |
| "grad_norm": 1.7198476556190763, |
| "learning_rate": 4.932624383991106e-06, |
| "loss": 0.1215, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.37124658780709735, |
| "grad_norm": 2.109229814604393, |
| "learning_rate": 4.9324594901532605e-06, |
| "loss": 0.1337, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.37170154686078255, |
| "grad_norm": 1.4154701665481155, |
| "learning_rate": 4.93229439754581e-06, |
| "loss": 0.0944, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3721565059144677, |
| "grad_norm": 1.973608289061544, |
| "learning_rate": 4.932129106182246e-06, |
| "loss": 0.0901, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.37261146496815284, |
| "grad_norm": 1.651833939526615, |
| "learning_rate": 4.931963616076075e-06, |
| "loss": 0.0876, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.37306642402183804, |
| "grad_norm": 1.3876140677966586, |
| "learning_rate": 4.93179792724082e-06, |
| "loss": 0.0791, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3735213830755232, |
| "grad_norm": 1.4201117298181156, |
| "learning_rate": 4.9316320396900195e-06, |
| "loss": 0.0857, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3739763421292084, |
| "grad_norm": 2.158894018361071, |
| "learning_rate": 4.9314659534372305e-06, |
| "loss": 0.1499, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.37443130118289353, |
| "grad_norm": 1.2722019893377066, |
| "learning_rate": 4.931299668496024e-06, |
| "loss": 0.0626, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.37488626023657873, |
| "grad_norm": 1.5889108253283166, |
| "learning_rate": 4.931133184879988e-06, |
| "loss": 0.1003, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.37534121929026387, |
| "grad_norm": 1.133918642525753, |
| "learning_rate": 4.930966502602727e-06, |
| "loss": 0.0714, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37579617834394907, |
| "grad_norm": 2.1296168633446615, |
| "learning_rate": 4.930799621677862e-06, |
| "loss": 0.1276, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3762511373976342, |
| "grad_norm": 2.018575113751553, |
| "learning_rate": 4.93063254211903e-06, |
| "loss": 0.134, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.37670609645131936, |
| "grad_norm": 1.2247931548507431, |
| "learning_rate": 4.930465263939882e-06, |
| "loss": 0.0617, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.37716105550500456, |
| "grad_norm": 2.032637719937323, |
| "learning_rate": 4.9302977871540894e-06, |
| "loss": 0.1191, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3776160145586897, |
| "grad_norm": 1.8922514826155596, |
| "learning_rate": 4.930130111775336e-06, |
| "loss": 0.1136, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3780709736123749, |
| "grad_norm": 1.2345527477299194, |
| "learning_rate": 4.9299622378173245e-06, |
| "loss": 0.0613, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.37852593266606005, |
| "grad_norm": 2.2369584057058693, |
| "learning_rate": 4.929794165293773e-06, |
| "loss": 0.1384, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.37898089171974525, |
| "grad_norm": 1.2980952577352378, |
| "learning_rate": 4.9296258942184145e-06, |
| "loss": 0.0889, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3794358507734304, |
| "grad_norm": 2.116237658876168, |
| "learning_rate": 4.929457424605e-06, |
| "loss": 0.1156, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.37989080982711554, |
| "grad_norm": 1.820103679143319, |
| "learning_rate": 4.929288756467296e-06, |
| "loss": 0.1224, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.38034576888080074, |
| "grad_norm": 1.6658306682266317, |
| "learning_rate": 4.929119889819086e-06, |
| "loss": 0.0871, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3808007279344859, |
| "grad_norm": 2.7831412779318128, |
| "learning_rate": 4.928950824674169e-06, |
| "loss": 0.1447, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3812556869881711, |
| "grad_norm": 1.460745158832598, |
| "learning_rate": 4.928781561046359e-06, |
| "loss": 0.0902, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3817106460418562, |
| "grad_norm": 1.544649379546627, |
| "learning_rate": 4.928612098949488e-06, |
| "loss": 0.0995, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 1.583411250445995, |
| "learning_rate": 4.9284424383974026e-06, |
| "loss": 0.1007, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38262056414922657, |
| "grad_norm": 1.2960669635575661, |
| "learning_rate": 4.928272579403969e-06, |
| "loss": 0.0679, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3830755232029117, |
| "grad_norm": 1.4865280371498417, |
| "learning_rate": 4.928102521983067e-06, |
| "loss": 0.1208, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3835304822565969, |
| "grad_norm": 2.1345090660254145, |
| "learning_rate": 4.9279322661485906e-06, |
| "loss": 0.1489, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.38398544131028206, |
| "grad_norm": 1.705469805887344, |
| "learning_rate": 4.927761811914455e-06, |
| "loss": 0.1084, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.38444040036396726, |
| "grad_norm": 1.358954041720105, |
| "learning_rate": 4.927591159294587e-06, |
| "loss": 0.0827, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3848953594176524, |
| "grad_norm": 1.8335314647218843, |
| "learning_rate": 4.927420308302933e-06, |
| "loss": 0.102, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3853503184713376, |
| "grad_norm": 1.710141204765745, |
| "learning_rate": 4.927249258953454e-06, |
| "loss": 0.1091, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.38580527752502275, |
| "grad_norm": 1.7784989569871608, |
| "learning_rate": 4.927078011260126e-06, |
| "loss": 0.1094, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3862602365787079, |
| "grad_norm": 1.9072996593932403, |
| "learning_rate": 4.926906565236943e-06, |
| "loss": 0.1255, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3867151956323931, |
| "grad_norm": 1.7435526255624214, |
| "learning_rate": 4.926734920897916e-06, |
| "loss": 0.1076, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38717015468607824, |
| "grad_norm": 1.3254342460194672, |
| "learning_rate": 4.926563078257071e-06, |
| "loss": 0.099, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.38762511373976344, |
| "grad_norm": 1.0985508710385608, |
| "learning_rate": 4.926391037328448e-06, |
| "loss": 0.0848, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3880800727934486, |
| "grad_norm": 1.6344858491886853, |
| "learning_rate": 4.926218798126108e-06, |
| "loss": 0.1102, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3885350318471338, |
| "grad_norm": 1.694464350768917, |
| "learning_rate": 4.926046360664124e-06, |
| "loss": 0.0868, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3889899909008189, |
| "grad_norm": 1.865189060623283, |
| "learning_rate": 4.925873724956588e-06, |
| "loss": 0.1152, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.38944494995450407, |
| "grad_norm": 1.794490671041637, |
| "learning_rate": 4.9257008910176065e-06, |
| "loss": 0.1443, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.38989990900818927, |
| "grad_norm": 1.6294296423553156, |
| "learning_rate": 4.925527858861302e-06, |
| "loss": 0.092, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3903548680618744, |
| "grad_norm": 1.7424555145921712, |
| "learning_rate": 4.925354628501814e-06, |
| "loss": 0.1002, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3908098271155596, |
| "grad_norm": 2.309513172607415, |
| "learning_rate": 4.925181199953299e-06, |
| "loss": 0.1288, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.39126478616924476, |
| "grad_norm": 1.3668641274774587, |
| "learning_rate": 4.9250075732299285e-06, |
| "loss": 0.0903, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39171974522292996, |
| "grad_norm": 1.7785057619158235, |
| "learning_rate": 4.92483374834589e-06, |
| "loss": 0.1181, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3921747042766151, |
| "grad_norm": 1.5234971151354315, |
| "learning_rate": 4.9246597253153884e-06, |
| "loss": 0.0935, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.39262966333030025, |
| "grad_norm": 1.1791645313929775, |
| "learning_rate": 4.924485504152644e-06, |
| "loss": 0.0822, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.39308462238398545, |
| "grad_norm": 1.5983057485508323, |
| "learning_rate": 4.924311084871892e-06, |
| "loss": 0.0966, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.3935395814376706, |
| "grad_norm": 1.6634965227764558, |
| "learning_rate": 4.924136467487387e-06, |
| "loss": 0.0759, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3939945404913558, |
| "grad_norm": 1.5231170961334706, |
| "learning_rate": 4.923961652013397e-06, |
| "loss": 0.0881, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.39444949954504094, |
| "grad_norm": 1.4495990250164725, |
| "learning_rate": 4.923786638464207e-06, |
| "loss": 0.0941, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.39490445859872614, |
| "grad_norm": 1.3390712595063252, |
| "learning_rate": 4.9236114268541196e-06, |
| "loss": 0.0846, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3953594176524113, |
| "grad_norm": 1.627122973701433, |
| "learning_rate": 4.923436017197451e-06, |
| "loss": 0.0819, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3958143767060964, |
| "grad_norm": 1.3377642278691055, |
| "learning_rate": 4.923260409508535e-06, |
| "loss": 0.088, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3962693357597816, |
| "grad_norm": 1.9694748985572026, |
| "learning_rate": 4.9230846038017214e-06, |
| "loss": 0.151, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.39672429481346677, |
| "grad_norm": 1.4923965061921258, |
| "learning_rate": 4.922908600091378e-06, |
| "loss": 0.0795, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.39717925386715197, |
| "grad_norm": 1.8057120373297069, |
| "learning_rate": 4.9227323983918835e-06, |
| "loss": 0.1439, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3976342129208371, |
| "grad_norm": 1.226146313826682, |
| "learning_rate": 4.922555998717639e-06, |
| "loss": 0.0845, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3980891719745223, |
| "grad_norm": 1.4188073442884932, |
| "learning_rate": 4.922379401083058e-06, |
| "loss": 0.0723, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39854413102820746, |
| "grad_norm": 1.6044422866063657, |
| "learning_rate": 4.922202605502573e-06, |
| "loss": 0.0981, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3989990900818926, |
| "grad_norm": 1.645096377490142, |
| "learning_rate": 4.922025611990629e-06, |
| "loss": 0.0882, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3994540491355778, |
| "grad_norm": 1.4988618969542298, |
| "learning_rate": 4.92184842056169e-06, |
| "loss": 0.0914, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.39990900818926295, |
| "grad_norm": 1.4716766649704647, |
| "learning_rate": 4.921671031230235e-06, |
| "loss": 0.0843, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.40036396724294815, |
| "grad_norm": 1.8151437273817552, |
| "learning_rate": 4.921493444010759e-06, |
| "loss": 0.1115, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4008189262966333, |
| "grad_norm": 1.3841092562389385, |
| "learning_rate": 4.921315658917774e-06, |
| "loss": 0.0821, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4012738853503185, |
| "grad_norm": 1.5281014710080694, |
| "learning_rate": 4.921137675965809e-06, |
| "loss": 0.0894, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.40172884440400364, |
| "grad_norm": 1.1860457913745353, |
| "learning_rate": 4.920959495169406e-06, |
| "loss": 0.0819, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4021838034576888, |
| "grad_norm": 1.9670434695091386, |
| "learning_rate": 4.920781116543126e-06, |
| "loss": 0.1198, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.402638762511374, |
| "grad_norm": 1.4837005110977715, |
| "learning_rate": 4.920602540101546e-06, |
| "loss": 0.0871, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4030937215650591, |
| "grad_norm": 1.8269163623820734, |
| "learning_rate": 4.920423765859257e-06, |
| "loss": 0.0956, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4035486806187443, |
| "grad_norm": 1.6998774179110374, |
| "learning_rate": 4.920244793830869e-06, |
| "loss": 0.0973, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.40400363967242947, |
| "grad_norm": 1.6596471546846747, |
| "learning_rate": 4.920065624031006e-06, |
| "loss": 0.1085, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.40445859872611467, |
| "grad_norm": 1.4077908132773769, |
| "learning_rate": 4.919886256474309e-06, |
| "loss": 0.0904, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4049135577797998, |
| "grad_norm": 1.7022215596121757, |
| "learning_rate": 4.919706691175435e-06, |
| "loss": 0.091, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40536851683348496, |
| "grad_norm": 2.1232813584307455, |
| "learning_rate": 4.919526928149058e-06, |
| "loss": 0.1366, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.40582347588717016, |
| "grad_norm": 1.6341211456957871, |
| "learning_rate": 4.919346967409867e-06, |
| "loss": 0.1108, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4062784349408553, |
| "grad_norm": 1.5324489468460818, |
| "learning_rate": 4.919166808972567e-06, |
| "loss": 0.1228, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4067333939945405, |
| "grad_norm": 2.099437608372934, |
| "learning_rate": 4.918986452851881e-06, |
| "loss": 0.1245, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.40718835304822565, |
| "grad_norm": 1.3588941988828955, |
| "learning_rate": 4.918805899062545e-06, |
| "loss": 0.0621, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.40764331210191085, |
| "grad_norm": 0.8277266375645331, |
| "learning_rate": 4.9186251476193146e-06, |
| "loss": 0.0499, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.408098271155596, |
| "grad_norm": 1.7852175335240448, |
| "learning_rate": 4.918444198536959e-06, |
| "loss": 0.1206, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.40855323020928114, |
| "grad_norm": 1.5382745011065326, |
| "learning_rate": 4.918263051830267e-06, |
| "loss": 0.1081, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.40900818926296634, |
| "grad_norm": 1.621296590196374, |
| "learning_rate": 4.918081707514037e-06, |
| "loss": 0.0881, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4094631483166515, |
| "grad_norm": 2.178092466242458, |
| "learning_rate": 4.917900165603091e-06, |
| "loss": 0.1364, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4099181073703367, |
| "grad_norm": 1.5880350908655525, |
| "learning_rate": 4.9177184261122624e-06, |
| "loss": 0.1073, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4103730664240218, |
| "grad_norm": 1.8483741427612825, |
| "learning_rate": 4.917536489056402e-06, |
| "loss": 0.0972, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.410828025477707, |
| "grad_norm": 1.5893537500919641, |
| "learning_rate": 4.9173543544503775e-06, |
| "loss": 0.0851, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.41128298453139217, |
| "grad_norm": 1.144493331243443, |
| "learning_rate": 4.917172022309072e-06, |
| "loss": 0.0637, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.41173794358507737, |
| "grad_norm": 1.139422632834299, |
| "learning_rate": 4.916989492647385e-06, |
| "loss": 0.065, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4121929026387625, |
| "grad_norm": 1.2858602055549935, |
| "learning_rate": 4.916806765480231e-06, |
| "loss": 0.079, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.41264786169244766, |
| "grad_norm": 1.9716514818564959, |
| "learning_rate": 4.9166238408225416e-06, |
| "loss": 0.161, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.41310282074613286, |
| "grad_norm": 1.6206512831659239, |
| "learning_rate": 4.916440718689267e-06, |
| "loss": 0.0958, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.413557779799818, |
| "grad_norm": 1.2472167749456646, |
| "learning_rate": 4.916257399095369e-06, |
| "loss": 0.0705, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4140127388535032, |
| "grad_norm": 1.1891048303298737, |
| "learning_rate": 4.916073882055827e-06, |
| "loss": 0.0671, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41446769790718835, |
| "grad_norm": 1.9533245506572903, |
| "learning_rate": 4.91589016758564e-06, |
| "loss": 0.1203, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.41492265696087355, |
| "grad_norm": 1.7223916244259532, |
| "learning_rate": 4.915706255699817e-06, |
| "loss": 0.1171, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4153776160145587, |
| "grad_norm": 2.042050502050582, |
| "learning_rate": 4.915522146413389e-06, |
| "loss": 0.152, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.41583257506824384, |
| "grad_norm": 1.5213892799482642, |
| "learning_rate": 4.9153378397413985e-06, |
| "loss": 0.1011, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.41628753412192904, |
| "grad_norm": 1.8893914267841023, |
| "learning_rate": 4.915153335698908e-06, |
| "loss": 0.1133, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4167424931756142, |
| "grad_norm": 1.7882796521112458, |
| "learning_rate": 4.914968634300994e-06, |
| "loss": 0.1081, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4171974522292994, |
| "grad_norm": 1.186974851727905, |
| "learning_rate": 4.914783735562748e-06, |
| "loss": 0.0791, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4176524112829845, |
| "grad_norm": 1.3276822787818023, |
| "learning_rate": 4.914598639499281e-06, |
| "loss": 0.0929, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4181073703366697, |
| "grad_norm": 1.3143453344689244, |
| "learning_rate": 4.914413346125717e-06, |
| "loss": 0.0907, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.41856232939035487, |
| "grad_norm": 1.2706441279848544, |
| "learning_rate": 4.914227855457199e-06, |
| "loss": 0.0797, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41901728844404, |
| "grad_norm": 1.8437493208675002, |
| "learning_rate": 4.914042167508881e-06, |
| "loss": 0.0851, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4194722474977252, |
| "grad_norm": 1.4975873837594447, |
| "learning_rate": 4.9138562822959416e-06, |
| "loss": 0.0735, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.41992720655141036, |
| "grad_norm": 1.8590378932388973, |
| "learning_rate": 4.913670199833566e-06, |
| "loss": 0.0955, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.42038216560509556, |
| "grad_norm": 1.6110342357827778, |
| "learning_rate": 4.913483920136961e-06, |
| "loss": 0.0904, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4208371246587807, |
| "grad_norm": 1.761284240310015, |
| "learning_rate": 4.91329744322135e-06, |
| "loss": 0.0967, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4212920837124659, |
| "grad_norm": 1.3709410104557458, |
| "learning_rate": 4.913110769101971e-06, |
| "loss": 0.0872, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.42174704276615105, |
| "grad_norm": 1.6539854986144262, |
| "learning_rate": 4.912923897794077e-06, |
| "loss": 0.0982, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4222020018198362, |
| "grad_norm": 1.6465498130671066, |
| "learning_rate": 4.912736829312938e-06, |
| "loss": 0.1093, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4226569608735214, |
| "grad_norm": 1.8873864205133448, |
| "learning_rate": 4.912549563673842e-06, |
| "loss": 0.1239, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.42311191992720654, |
| "grad_norm": 1.5496708014603886, |
| "learning_rate": 4.912362100892091e-06, |
| "loss": 0.1273, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42356687898089174, |
| "grad_norm": 1.1519662533075623, |
| "learning_rate": 4.912174440983002e-06, |
| "loss": 0.0729, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4240218380345769, |
| "grad_norm": 1.6674274772885138, |
| "learning_rate": 4.911986583961912e-06, |
| "loss": 0.1107, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4244767970882621, |
| "grad_norm": 1.8943327104641587, |
| "learning_rate": 4.91179852984417e-06, |
| "loss": 0.0989, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4249317561419472, |
| "grad_norm": 1.3387420389544245, |
| "learning_rate": 4.911610278645144e-06, |
| "loss": 0.0873, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.42538671519563237, |
| "grad_norm": 1.3086866571732974, |
| "learning_rate": 4.911421830380217e-06, |
| "loss": 0.0767, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42584167424931757, |
| "grad_norm": 2.04544186641041, |
| "learning_rate": 4.911233185064788e-06, |
| "loss": 0.1285, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4262966333030027, |
| "grad_norm": 1.6906012723967403, |
| "learning_rate": 4.911044342714272e-06, |
| "loss": 0.0997, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4267515923566879, |
| "grad_norm": 1.439162135385858, |
| "learning_rate": 4.9108553033440995e-06, |
| "loss": 0.0744, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.42720655141037306, |
| "grad_norm": 1.2593154408057343, |
| "learning_rate": 4.91066606696972e-06, |
| "loss": 0.074, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.42766151046405826, |
| "grad_norm": 1.7514521824191083, |
| "learning_rate": 4.910476633606597e-06, |
| "loss": 0.0971, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4281164695177434, |
| "grad_norm": 1.5625231909908295, |
| "learning_rate": 4.9102870032702075e-06, |
| "loss": 0.0689, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.5194579023544843, |
| "learning_rate": 4.910097175976049e-06, |
| "loss": 0.0824, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.42902638762511375, |
| "grad_norm": 1.4223453649486908, |
| "learning_rate": 4.909907151739634e-06, |
| "loss": 0.0747, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4294813466787989, |
| "grad_norm": 2.2121264200483393, |
| "learning_rate": 4.909716930576489e-06, |
| "loss": 0.1463, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4299363057324841, |
| "grad_norm": 1.5012792406542972, |
| "learning_rate": 4.909526512502158e-06, |
| "loss": 0.1241, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.43039126478616924, |
| "grad_norm": 1.6714102508168673, |
| "learning_rate": 4.9093358975322025e-06, |
| "loss": 0.1045, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.43084622383985444, |
| "grad_norm": 1.5613346147429912, |
| "learning_rate": 4.909145085682198e-06, |
| "loss": 0.1105, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4313011828935396, |
| "grad_norm": 1.4864622392832871, |
| "learning_rate": 4.908954076967737e-06, |
| "loss": 0.0831, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4317561419472247, |
| "grad_norm": 1.5530391149425158, |
| "learning_rate": 4.908762871404427e-06, |
| "loss": 0.1345, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4322111010009099, |
| "grad_norm": 1.5444429676980205, |
| "learning_rate": 4.908571469007893e-06, |
| "loss": 0.0886, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43266606005459507, |
| "grad_norm": 1.8034818342216412, |
| "learning_rate": 4.908379869793776e-06, |
| "loss": 0.1046, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.43312101910828027, |
| "grad_norm": 1.3153452614362922, |
| "learning_rate": 4.908188073777732e-06, |
| "loss": 0.0715, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4335759781619654, |
| "grad_norm": 2.0825682650521857, |
| "learning_rate": 4.9079960809754334e-06, |
| "loss": 0.135, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4340309372156506, |
| "grad_norm": 1.3431541090651076, |
| "learning_rate": 4.90780389140257e-06, |
| "loss": 0.0812, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.43448589626933576, |
| "grad_norm": 2.018134282960315, |
| "learning_rate": 4.907611505074846e-06, |
| "loss": 0.1001, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4349408553230209, |
| "grad_norm": 1.8270847906398506, |
| "learning_rate": 4.907418922007983e-06, |
| "loss": 0.1054, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4353958143767061, |
| "grad_norm": 1.5502670619333374, |
| "learning_rate": 4.907226142217717e-06, |
| "loss": 0.0832, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.43585077343039125, |
| "grad_norm": 1.5099564094926066, |
| "learning_rate": 4.9070331657198015e-06, |
| "loss": 0.093, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.43630573248407645, |
| "grad_norm": 1.6580816557213998, |
| "learning_rate": 4.906839992530006e-06, |
| "loss": 0.1133, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4367606915377616, |
| "grad_norm": 1.9468112171012433, |
| "learning_rate": 4.906646622664115e-06, |
| "loss": 0.1122, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4372156505914468, |
| "grad_norm": 1.3246750710377195, |
| "learning_rate": 4.906453056137931e-06, |
| "loss": 0.0572, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.43767060964513194, |
| "grad_norm": 2.1577598041780846, |
| "learning_rate": 4.90625929296727e-06, |
| "loss": 0.1419, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4381255686988171, |
| "grad_norm": 1.3649728107391488, |
| "learning_rate": 4.9060653331679665e-06, |
| "loss": 0.1026, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4385805277525023, |
| "grad_norm": 1.7954750394301047, |
| "learning_rate": 4.90587117675587e-06, |
| "loss": 0.124, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4390354868061874, |
| "grad_norm": 1.6192897762023186, |
| "learning_rate": 4.905676823746846e-06, |
| "loss": 0.102, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4394904458598726, |
| "grad_norm": 1.183156466195084, |
| "learning_rate": 4.9054822741567745e-06, |
| "loss": 0.0741, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.43994540491355777, |
| "grad_norm": 1.791057313794206, |
| "learning_rate": 4.905287528001555e-06, |
| "loss": 0.0986, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.44040036396724297, |
| "grad_norm": 1.5587372758795195, |
| "learning_rate": 4.905092585297102e-06, |
| "loss": 0.0959, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4408553230209281, |
| "grad_norm": 1.9086814389692623, |
| "learning_rate": 4.904897446059344e-06, |
| "loss": 0.1124, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.44131028207461326, |
| "grad_norm": 1.5518685718016205, |
| "learning_rate": 4.9047021103042255e-06, |
| "loss": 0.0802, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.44176524112829846, |
| "grad_norm": 1.5626634869227398, |
| "learning_rate": 4.904506578047712e-06, |
| "loss": 0.0966, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4422202001819836, |
| "grad_norm": 1.6777151282946248, |
| "learning_rate": 4.9043108493057785e-06, |
| "loss": 0.0946, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4426751592356688, |
| "grad_norm": 1.3918546303467518, |
| "learning_rate": 4.904114924094421e-06, |
| "loss": 0.0776, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.44313011828935395, |
| "grad_norm": 1.7054781101293177, |
| "learning_rate": 4.903918802429648e-06, |
| "loss": 0.1076, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.44358507734303915, |
| "grad_norm": 0.9435161970580179, |
| "learning_rate": 4.9037224843274875e-06, |
| "loss": 0.055, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4440400363967243, |
| "grad_norm": 1.8279732096534727, |
| "learning_rate": 4.903525969803979e-06, |
| "loss": 0.144, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.44449499545040944, |
| "grad_norm": 1.5827975534285916, |
| "learning_rate": 4.903329258875184e-06, |
| "loss": 0.0876, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.44494995450409464, |
| "grad_norm": 1.5817514212508765, |
| "learning_rate": 4.903132351557175e-06, |
| "loss": 0.1003, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4454049135577798, |
| "grad_norm": 1.55794858043461, |
| "learning_rate": 4.902935247866043e-06, |
| "loss": 0.0901, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 1.7648097170403771, |
| "learning_rate": 4.9027379478178935e-06, |
| "loss": 0.1117, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4463148316651501, |
| "grad_norm": 1.4493752053158233, |
| "learning_rate": 4.90254045142885e-06, |
| "loss": 0.0824, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4467697907188353, |
| "grad_norm": 1.4618354488172722, |
| "learning_rate": 4.90234275871505e-06, |
| "loss": 0.08, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.44722474977252047, |
| "grad_norm": 2.314057245131694, |
| "learning_rate": 4.9021448696926486e-06, |
| "loss": 0.1437, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.44767970882620567, |
| "grad_norm": 1.2365214796695643, |
| "learning_rate": 4.901946784377816e-06, |
| "loss": 0.0955, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4481346678798908, |
| "grad_norm": 1.2633152164234291, |
| "learning_rate": 4.90174850278674e-06, |
| "loss": 0.0803, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.44858962693357596, |
| "grad_norm": 1.5083171008818446, |
| "learning_rate": 4.901550024935623e-06, |
| "loss": 0.0942, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.44904458598726116, |
| "grad_norm": 1.1583463791947812, |
| "learning_rate": 4.901351350840683e-06, |
| "loss": 0.0786, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4494995450409463, |
| "grad_norm": 1.343367085202188, |
| "learning_rate": 4.901152480518155e-06, |
| "loss": 0.0724, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4499545040946315, |
| "grad_norm": 1.1159650914918346, |
| "learning_rate": 4.900953413984289e-06, |
| "loss": 0.0681, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.45040946314831665, |
| "grad_norm": 2.0950998044271025, |
| "learning_rate": 4.900754151255353e-06, |
| "loss": 0.1541, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.45086442220200185, |
| "grad_norm": 1.4260341278646986, |
| "learning_rate": 4.9005546923476305e-06, |
| "loss": 0.0707, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.451319381255687, |
| "grad_norm": 1.6502415030386688, |
| "learning_rate": 4.9003550372774185e-06, |
| "loss": 0.1111, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.45177434030937214, |
| "grad_norm": 1.280806174818392, |
| "learning_rate": 4.900155186061033e-06, |
| "loss": 0.0789, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.45222929936305734, |
| "grad_norm": 1.9745186799391785, |
| "learning_rate": 4.8999551387148045e-06, |
| "loss": 0.1125, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4526842584167425, |
| "grad_norm": 1.2542781615680096, |
| "learning_rate": 4.89975489525508e-06, |
| "loss": 0.0814, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4531392174704277, |
| "grad_norm": 1.5218729573521388, |
| "learning_rate": 4.899554455698223e-06, |
| "loss": 0.0849, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4535941765241128, |
| "grad_norm": 1.4911465655176248, |
| "learning_rate": 4.899353820060612e-06, |
| "loss": 0.0887, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.454049135577798, |
| "grad_norm": 1.8552177664529743, |
| "learning_rate": 4.899152988358643e-06, |
| "loss": 0.1153, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.45450409463148317, |
| "grad_norm": 1.3462289694693903, |
| "learning_rate": 4.898951960608725e-06, |
| "loss": 0.0768, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4549590536851683, |
| "grad_norm": 1.5105165626051191, |
| "learning_rate": 4.8987507368272865e-06, |
| "loss": 0.0916, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4554140127388535, |
| "grad_norm": 1.7874012401425645, |
| "learning_rate": 4.898549317030772e-06, |
| "loss": 0.1228, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.45586897179253866, |
| "grad_norm": 1.8678564128703685, |
| "learning_rate": 4.898347701235637e-06, |
| "loss": 0.1226, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.45632393084622386, |
| "grad_norm": 1.9367180322034927, |
| "learning_rate": 4.89814588945836e-06, |
| "loss": 0.1239, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.456778889899909, |
| "grad_norm": 1.8462049373063074, |
| "learning_rate": 4.89794388171543e-06, |
| "loss": 0.1106, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4572338489535942, |
| "grad_norm": 1.7977459529642075, |
| "learning_rate": 4.897741678023356e-06, |
| "loss": 0.1137, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.45768880800727935, |
| "grad_norm": 1.4317415496884898, |
| "learning_rate": 4.897539278398659e-06, |
| "loss": 0.0835, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4581437670609645, |
| "grad_norm": 1.947224769167489, |
| "learning_rate": 4.8973366828578804e-06, |
| "loss": 0.1087, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4585987261146497, |
| "grad_norm": 1.6840082807319827, |
| "learning_rate": 4.897133891417574e-06, |
| "loss": 0.1004, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.45905368516833484, |
| "grad_norm": 1.6722996299672828, |
| "learning_rate": 4.896930904094311e-06, |
| "loss": 0.0869, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.45950864422202004, |
| "grad_norm": 2.2431321251776986, |
| "learning_rate": 4.896727720904679e-06, |
| "loss": 0.121, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4599636032757052, |
| "grad_norm": 1.2761704386307018, |
| "learning_rate": 4.896524341865282e-06, |
| "loss": 0.0736, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4604185623293904, |
| "grad_norm": 1.6413390038739506, |
| "learning_rate": 4.896320766992737e-06, |
| "loss": 0.1286, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4608735213830755, |
| "grad_norm": 1.5251335582402008, |
| "learning_rate": 4.896116996303682e-06, |
| "loss": 0.0989, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.46132848043676067, |
| "grad_norm": 1.8038369878473837, |
| "learning_rate": 4.895913029814766e-06, |
| "loss": 0.097, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.46178343949044587, |
| "grad_norm": 2.012861641550116, |
| "learning_rate": 4.895708867542658e-06, |
| "loss": 0.1111, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.462238398544131, |
| "grad_norm": 1.7366035889417508, |
| "learning_rate": 4.895504509504039e-06, |
| "loss": 0.1029, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4626933575978162, |
| "grad_norm": 1.3763665767496873, |
| "learning_rate": 4.89529995571561e-06, |
| "loss": 0.0938, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.46314831665150136, |
| "grad_norm": 1.6906151679744952, |
| "learning_rate": 4.895095206194086e-06, |
| "loss": 0.1085, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.46360327570518656, |
| "grad_norm": 1.5053749521419235, |
| "learning_rate": 4.894890260956198e-06, |
| "loss": 0.0884, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4640582347588717, |
| "grad_norm": 1.5334372638839222, |
| "learning_rate": 4.8946851200186925e-06, |
| "loss": 0.1015, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46451319381255685, |
| "grad_norm": 1.576638091265577, |
| "learning_rate": 4.894479783398334e-06, |
| "loss": 0.0903, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.46496815286624205, |
| "grad_norm": 1.7368682352331435, |
| "learning_rate": 4.8942742511119004e-06, |
| "loss": 0.1029, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4654231119199272, |
| "grad_norm": 3.9669130222003455, |
| "learning_rate": 4.894068523176187e-06, |
| "loss": 0.2383, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4658780709736124, |
| "grad_norm": 1.5974114766744798, |
| "learning_rate": 4.8938625996080056e-06, |
| "loss": 0.1116, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.46633303002729753, |
| "grad_norm": 1.1252846797063132, |
| "learning_rate": 4.893656480424184e-06, |
| "loss": 0.0673, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46678798908098273, |
| "grad_norm": 1.5329254322284862, |
| "learning_rate": 4.893450165641564e-06, |
| "loss": 0.1066, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4672429481346679, |
| "grad_norm": 1.3116647286111784, |
| "learning_rate": 4.893243655277005e-06, |
| "loss": 0.086, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.467697907188353, |
| "grad_norm": 1.5621452726926597, |
| "learning_rate": 4.893036949347383e-06, |
| "loss": 0.0937, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4681528662420382, |
| "grad_norm": 1.44299341979305, |
| "learning_rate": 4.892830047869588e-06, |
| "loss": 0.0922, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.46860782529572337, |
| "grad_norm": 1.2004173985623205, |
| "learning_rate": 4.892622950860527e-06, |
| "loss": 0.0545, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46906278434940857, |
| "grad_norm": 1.2933675353670258, |
| "learning_rate": 4.892415658337123e-06, |
| "loss": 0.0938, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4695177434030937, |
| "grad_norm": 1.3899639516557423, |
| "learning_rate": 4.892208170316317e-06, |
| "loss": 0.0807, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4699727024567789, |
| "grad_norm": 1.2103198454795117, |
| "learning_rate": 4.892000486815062e-06, |
| "loss": 0.0724, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.47042766151046406, |
| "grad_norm": 1.4625912187815495, |
| "learning_rate": 4.891792607850328e-06, |
| "loss": 0.0944, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4708826205641492, |
| "grad_norm": 2.3778377956475074, |
| "learning_rate": 4.891584533439104e-06, |
| "loss": 0.1301, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4713375796178344, |
| "grad_norm": 1.6240877825800288, |
| "learning_rate": 4.891376263598393e-06, |
| "loss": 0.1056, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.47179253867151955, |
| "grad_norm": 1.377205820937822, |
| "learning_rate": 4.891167798345213e-06, |
| "loss": 0.0879, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.47224749772520475, |
| "grad_norm": 1.918358313853146, |
| "learning_rate": 4.890959137696598e-06, |
| "loss": 0.1218, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4727024567788899, |
| "grad_norm": 1.9802948601827106, |
| "learning_rate": 4.890750281669601e-06, |
| "loss": 0.0966, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4731574158325751, |
| "grad_norm": 1.209426799273833, |
| "learning_rate": 4.890541230281287e-06, |
| "loss": 0.0687, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47361237488626023, |
| "grad_norm": 1.714672711362897, |
| "learning_rate": 4.8903319835487385e-06, |
| "loss": 0.1119, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4740673339399454, |
| "grad_norm": 1.8426958086935912, |
| "learning_rate": 4.890122541489056e-06, |
| "loss": 0.1071, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4745222929936306, |
| "grad_norm": 1.5412332450392434, |
| "learning_rate": 4.889912904119353e-06, |
| "loss": 0.1194, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4749772520473157, |
| "grad_norm": 1.5900743055736573, |
| "learning_rate": 4.88970307145676e-06, |
| "loss": 0.0905, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4754322111010009, |
| "grad_norm": 1.299438309320783, |
| "learning_rate": 4.889493043518423e-06, |
| "loss": 0.0782, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.47588717015468607, |
| "grad_norm": 1.2775434133946648, |
| "learning_rate": 4.889282820321506e-06, |
| "loss": 0.067, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.47634212920837127, |
| "grad_norm": 2.0181187729173313, |
| "learning_rate": 4.889072401883187e-06, |
| "loss": 0.1039, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4767970882620564, |
| "grad_norm": 1.3673144633984753, |
| "learning_rate": 4.88886178822066e-06, |
| "loss": 0.0871, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.47725204731574156, |
| "grad_norm": 1.5512598399498212, |
| "learning_rate": 4.888650979351136e-06, |
| "loss": 0.0936, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.47770700636942676, |
| "grad_norm": 1.8862924775266208, |
| "learning_rate": 4.888439975291841e-06, |
| "loss": 0.149, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4781619654231119, |
| "grad_norm": 1.527860807788029, |
| "learning_rate": 4.888228776060017e-06, |
| "loss": 0.0981, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4786169244767971, |
| "grad_norm": 1.635801739367282, |
| "learning_rate": 4.888017381672923e-06, |
| "loss": 0.1004, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.47907188353048225, |
| "grad_norm": 1.496869794404093, |
| "learning_rate": 4.887805792147832e-06, |
| "loss": 0.0921, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.47952684258416745, |
| "grad_norm": 1.729233289880027, |
| "learning_rate": 4.887594007502036e-06, |
| "loss": 0.089, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4799818016378526, |
| "grad_norm": 1.9599768924005974, |
| "learning_rate": 4.887382027752838e-06, |
| "loss": 0.1029, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.48043676069153773, |
| "grad_norm": 1.6584360062505734, |
| "learning_rate": 4.8871698529175636e-06, |
| "loss": 0.1173, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.48089171974522293, |
| "grad_norm": 1.631421092772313, |
| "learning_rate": 4.886957483013549e-06, |
| "loss": 0.1231, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4813466787989081, |
| "grad_norm": 2.3766899063373996, |
| "learning_rate": 4.886744918058149e-06, |
| "loss": 0.13, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4818016378525933, |
| "grad_norm": 1.7346716794855597, |
| "learning_rate": 4.886532158068732e-06, |
| "loss": 0.0938, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4822565969062784, |
| "grad_norm": 1.5214305907929453, |
| "learning_rate": 4.886319203062683e-06, |
| "loss": 0.0761, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4827115559599636, |
| "grad_norm": 1.6073102647133055, |
| "learning_rate": 4.886106053057408e-06, |
| "loss": 0.0818, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.48316651501364877, |
| "grad_norm": 1.803380712114119, |
| "learning_rate": 4.88589270807032e-06, |
| "loss": 0.1231, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.48362147406733397, |
| "grad_norm": 1.5275199982317587, |
| "learning_rate": 4.885679168118855e-06, |
| "loss": 0.1105, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4840764331210191, |
| "grad_norm": 1.8472965185652206, |
| "learning_rate": 4.8854654332204635e-06, |
| "loss": 0.1324, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.48453139217470426, |
| "grad_norm": 1.41701925154465, |
| "learning_rate": 4.885251503392607e-06, |
| "loss": 0.0767, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48498635122838946, |
| "grad_norm": 2.00437974621472, |
| "learning_rate": 4.885037378652771e-06, |
| "loss": 0.1336, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4854413102820746, |
| "grad_norm": 1.4895968911800157, |
| "learning_rate": 4.884823059018451e-06, |
| "loss": 0.0726, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4858962693357598, |
| "grad_norm": 1.5673178312119351, |
| "learning_rate": 4.88460854450716e-06, |
| "loss": 0.0843, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.48635122838944495, |
| "grad_norm": 1.1450505304026162, |
| "learning_rate": 4.884393835136427e-06, |
| "loss": 0.073, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.48680618744313015, |
| "grad_norm": 1.5223195045028948, |
| "learning_rate": 4.884178930923799e-06, |
| "loss": 0.0823, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4872611464968153, |
| "grad_norm": 1.912651615279676, |
| "learning_rate": 4.883963831886834e-06, |
| "loss": 0.0989, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.48771610555050043, |
| "grad_norm": 1.6904540179044927, |
| "learning_rate": 4.8837485380431115e-06, |
| "loss": 0.0981, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.48817106460418563, |
| "grad_norm": 1.4559744514600277, |
| "learning_rate": 4.883533049410223e-06, |
| "loss": 0.0874, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4886260236578708, |
| "grad_norm": 1.9041018278788933, |
| "learning_rate": 4.8833173660057785e-06, |
| "loss": 0.1065, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.489080982711556, |
| "grad_norm": 1.582657768337463, |
| "learning_rate": 4.8831014878474004e-06, |
| "loss": 0.0993, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4895359417652411, |
| "grad_norm": 1.487895945323618, |
| "learning_rate": 4.882885414952732e-06, |
| "loss": 0.0887, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4899909008189263, |
| "grad_norm": 1.1105199391014717, |
| "learning_rate": 4.882669147339428e-06, |
| "loss": 0.0521, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.49044585987261147, |
| "grad_norm": 1.3448385373486804, |
| "learning_rate": 4.882452685025161e-06, |
| "loss": 0.0606, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4909008189262966, |
| "grad_norm": 1.9169790386878416, |
| "learning_rate": 4.88223602802762e-06, |
| "loss": 0.1103, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4913557779799818, |
| "grad_norm": 1.4350936971881065, |
| "learning_rate": 4.882019176364509e-06, |
| "loss": 0.1052, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.49181073703366696, |
| "grad_norm": 1.9005260167330429, |
| "learning_rate": 4.881802130053548e-06, |
| "loss": 0.1217, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.49226569608735216, |
| "grad_norm": 1.4814940279383466, |
| "learning_rate": 4.881584889112473e-06, |
| "loss": 0.079, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4927206551410373, |
| "grad_norm": 1.7134074599855604, |
| "learning_rate": 4.881367453559036e-06, |
| "loss": 0.1025, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4931756141947225, |
| "grad_norm": 1.2847311247280295, |
| "learning_rate": 4.881149823411005e-06, |
| "loss": 0.0587, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.49363057324840764, |
| "grad_norm": 1.196984822353409, |
| "learning_rate": 4.880931998686162e-06, |
| "loss": 0.0779, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4940855323020928, |
| "grad_norm": 2.247552936990941, |
| "learning_rate": 4.880713979402311e-06, |
| "loss": 0.1534, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.494540491355778, |
| "grad_norm": 2.5523444538687645, |
| "learning_rate": 4.880495765577263e-06, |
| "loss": 0.146, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.49499545040946313, |
| "grad_norm": 1.7690099480339412, |
| "learning_rate": 4.880277357228852e-06, |
| "loss": 0.084, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.49545040946314833, |
| "grad_norm": 1.2117156565437108, |
| "learning_rate": 4.880058754374923e-06, |
| "loss": 0.0833, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4959053685168335, |
| "grad_norm": 1.5484757487864966, |
| "learning_rate": 4.879839957033343e-06, |
| "loss": 0.0938, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4963603275705187, |
| "grad_norm": 1.5534223234923523, |
| "learning_rate": 4.879620965221987e-06, |
| "loss": 0.09, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4968152866242038, |
| "grad_norm": 1.3405465803260945, |
| "learning_rate": 4.879401778958755e-06, |
| "loss": 0.0784, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.49727024567788897, |
| "grad_norm": 1.3343510524547628, |
| "learning_rate": 4.8791823982615525e-06, |
| "loss": 0.064, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.49772520473157417, |
| "grad_norm": 1.2315640234775116, |
| "learning_rate": 4.878962823148308e-06, |
| "loss": 0.067, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4981801637852593, |
| "grad_norm": 1.654273388728327, |
| "learning_rate": 4.878743053636968e-06, |
| "loss": 0.0964, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4986351228389445, |
| "grad_norm": 1.3344367681027707, |
| "learning_rate": 4.878523089745485e-06, |
| "loss": 0.0865, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.49909008189262966, |
| "grad_norm": 1.0737534169537484, |
| "learning_rate": 4.878302931491837e-06, |
| "loss": 0.0722, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.49954504094631486, |
| "grad_norm": 1.2217058614506033, |
| "learning_rate": 4.8780825788940145e-06, |
| "loss": 0.0531, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.765512273684173, |
| "learning_rate": 4.877862031970023e-06, |
| "loss": 0.1016, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5004549590536852, |
| "grad_norm": 2.1360497116346444, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.1095, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5009099181073703, |
| "grad_norm": 1.5928570797543171, |
| "learning_rate": 4.877420355215637e-06, |
| "loss": 0.0909, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5013648771610555, |
| "grad_norm": 1.9221830556747463, |
| "learning_rate": 4.877199225421334e-06, |
| "loss": 0.123, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5018198362147407, |
| "grad_norm": 1.967973587212139, |
| "learning_rate": 4.8769779013730454e-06, |
| "loss": 0.1535, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5022747952684259, |
| "grad_norm": 2.02512821365078, |
| "learning_rate": 4.876756383088858e-06, |
| "loss": 0.1173, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.502729754322111, |
| "grad_norm": 1.3904167109659709, |
| "learning_rate": 4.876534670586872e-06, |
| "loss": 0.0839, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5031847133757962, |
| "grad_norm": 1.4435165077122623, |
| "learning_rate": 4.8763127638852045e-06, |
| "loss": 0.0924, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5036396724294814, |
| "grad_norm": 1.7029448773247835, |
| "learning_rate": 4.87609066300199e-06, |
| "loss": 0.1076, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5040946314831665, |
| "grad_norm": 1.750067106251082, |
| "learning_rate": 4.875868367955376e-06, |
| "loss": 0.1077, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5045495905368517, |
| "grad_norm": 1.9748651822243342, |
| "learning_rate": 4.87564587876353e-06, |
| "loss": 0.1294, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5050045495905369, |
| "grad_norm": 1.7656971074259822, |
| "learning_rate": 4.87542319544463e-06, |
| "loss": 0.0974, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5054595086442221, |
| "grad_norm": 1.4817675230155858, |
| "learning_rate": 4.875200318016873e-06, |
| "loss": 0.0959, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5059144676979072, |
| "grad_norm": 1.603234528593141, |
| "learning_rate": 4.8749772464984736e-06, |
| "loss": 0.115, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5063694267515924, |
| "grad_norm": 1.7632465098077008, |
| "learning_rate": 4.874753980907658e-06, |
| "loss": 0.1224, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5068243858052776, |
| "grad_norm": 1.409315497870279, |
| "learning_rate": 4.8745305212626714e-06, |
| "loss": 0.0886, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5072793448589626, |
| "grad_norm": 1.3116197456740595, |
| "learning_rate": 4.874306867581775e-06, |
| "loss": 0.0853, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5077343039126478, |
| "grad_norm": 1.1746077003548339, |
| "learning_rate": 4.874083019883242e-06, |
| "loss": 0.0543, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.508189262966333, |
| "grad_norm": 1.941012957682845, |
| "learning_rate": 4.873858978185367e-06, |
| "loss": 0.1137, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5086442220200182, |
| "grad_norm": 2.32531280724128, |
| "learning_rate": 4.8736347425064565e-06, |
| "loss": 0.1627, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5090991810737033, |
| "grad_norm": 1.638539845007192, |
| "learning_rate": 4.873410312864833e-06, |
| "loss": 0.0988, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5095541401273885, |
| "grad_norm": 1.5695637896435937, |
| "learning_rate": 4.8731856892788384e-06, |
| "loss": 0.0918, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5100090991810737, |
| "grad_norm": 2.011157500272583, |
| "learning_rate": 4.872960871766826e-06, |
| "loss": 0.1316, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5104640582347588, |
| "grad_norm": 1.3312452781498474, |
| "learning_rate": 4.8727358603471675e-06, |
| "loss": 0.1007, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.510919017288444, |
| "grad_norm": 1.9359844901160286, |
| "learning_rate": 4.872510655038249e-06, |
| "loss": 0.1026, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5113739763421292, |
| "grad_norm": 1.2898375591874278, |
| "learning_rate": 4.872285255858476e-06, |
| "loss": 0.0929, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5118289353958144, |
| "grad_norm": 1.920657444015991, |
| "learning_rate": 4.872059662826263e-06, |
| "loss": 0.1129, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5122838944494995, |
| "grad_norm": 1.4550346247477233, |
| "learning_rate": 4.8718338759600465e-06, |
| "loss": 0.0824, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5127388535031847, |
| "grad_norm": 1.5791466307448474, |
| "learning_rate": 4.871607895278278e-06, |
| "loss": 0.1206, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5131938125568699, |
| "grad_norm": 1.204733566103446, |
| "learning_rate": 4.871381720799421e-06, |
| "loss": 0.0665, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.513648771610555, |
| "grad_norm": 1.6684092224882034, |
| "learning_rate": 4.8711553525419595e-06, |
| "loss": 0.1075, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5141037306642402, |
| "grad_norm": 1.4239501992031698, |
| "learning_rate": 4.87092879052439e-06, |
| "loss": 0.0957, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5145586897179254, |
| "grad_norm": 1.0934030596754927, |
| "learning_rate": 4.8707020347652275e-06, |
| "loss": 0.0686, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5150136487716106, |
| "grad_norm": 1.5870890463044125, |
| "learning_rate": 4.870475085283001e-06, |
| "loss": 0.1027, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5154686078252957, |
| "grad_norm": 1.6559311395509346, |
| "learning_rate": 4.870247942096254e-06, |
| "loss": 0.1008, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5159235668789809, |
| "grad_norm": 1.155174213270752, |
| "learning_rate": 4.870020605223551e-06, |
| "loss": 0.0592, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5163785259326661, |
| "grad_norm": 1.6869955821352955, |
| "learning_rate": 4.869793074683466e-06, |
| "loss": 0.0913, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5168334849863512, |
| "grad_norm": 2.19769614213437, |
| "learning_rate": 4.8695653504945925e-06, |
| "loss": 0.1237, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5172884440400364, |
| "grad_norm": 2.393558826937421, |
| "learning_rate": 4.8693374326755405e-06, |
| "loss": 0.1401, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5177434030937216, |
| "grad_norm": 1.3656006242910685, |
| "learning_rate": 4.869109321244932e-06, |
| "loss": 0.09, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5181983621474068, |
| "grad_norm": 1.4542523027566732, |
| "learning_rate": 4.86888101622141e-06, |
| "loss": 0.0918, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5186533212010919, |
| "grad_norm": 1.443069001120561, |
| "learning_rate": 4.868652517623629e-06, |
| "loss": 0.066, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5191082802547771, |
| "grad_norm": 1.3192549477432447, |
| "learning_rate": 4.86842382547026e-06, |
| "loss": 0.07, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5195632393084623, |
| "grad_norm": 1.4610522043176968, |
| "learning_rate": 4.868194939779992e-06, |
| "loss": 0.0603, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5200181983621474, |
| "grad_norm": 1.3807495660521953, |
| "learning_rate": 4.867965860571529e-06, |
| "loss": 0.086, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5204731574158326, |
| "grad_norm": 1.7439827425180354, |
| "learning_rate": 4.867736587863589e-06, |
| "loss": 0.1175, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5209281164695178, |
| "grad_norm": 3.8341122094242586, |
| "learning_rate": 4.867507121674907e-06, |
| "loss": 0.1369, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.521383075523203, |
| "grad_norm": 1.6708528784620404, |
| "learning_rate": 4.867277462024235e-06, |
| "loss": 0.0788, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.521838034576888, |
| "grad_norm": 1.8971649447454588, |
| "learning_rate": 4.8670476089303395e-06, |
| "loss": 0.138, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5222929936305732, |
| "grad_norm": 1.8468924709684824, |
| "learning_rate": 4.866817562412003e-06, |
| "loss": 0.1438, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5227479526842584, |
| "grad_norm": 1.6403934984754582, |
| "learning_rate": 4.866587322488024e-06, |
| "loss": 0.1223, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5232029117379435, |
| "grad_norm": 2.6178432136946843, |
| "learning_rate": 4.866356889177216e-06, |
| "loss": 0.1626, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5236578707916287, |
| "grad_norm": 1.7176781702000803, |
| "learning_rate": 4.866126262498409e-06, |
| "loss": 0.1169, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5241128298453139, |
| "grad_norm": 2.4788262927152256, |
| "learning_rate": 4.865895442470449e-06, |
| "loss": 0.1366, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5245677888989991, |
| "grad_norm": 1.4130512402331137, |
| "learning_rate": 4.865664429112199e-06, |
| "loss": 0.075, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5250227479526842, |
| "grad_norm": 2.161183666624184, |
| "learning_rate": 4.8654332224425345e-06, |
| "loss": 0.1219, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5254777070063694, |
| "grad_norm": 1.7134676925151036, |
| "learning_rate": 4.865201822480349e-06, |
| "loss": 0.1068, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5259326660600546, |
| "grad_norm": 1.2631225946147446, |
| "learning_rate": 4.864970229244552e-06, |
| "loss": 0.0732, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5263876251137397, |
| "grad_norm": 1.151791721954015, |
| "learning_rate": 4.864738442754068e-06, |
| "loss": 0.0612, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5268425841674249, |
| "grad_norm": 1.298125985364791, |
| "learning_rate": 4.864506463027837e-06, |
| "loss": 0.0841, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5272975432211101, |
| "grad_norm": 1.828500217819582, |
| "learning_rate": 4.864274290084816e-06, |
| "loss": 0.1279, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5277525022747953, |
| "grad_norm": 1.872568934497448, |
| "learning_rate": 4.864041923943978e-06, |
| "loss": 0.1041, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5282074613284804, |
| "grad_norm": 1.59985877807279, |
| "learning_rate": 4.863809364624309e-06, |
| "loss": 0.0996, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5286624203821656, |
| "grad_norm": 1.4920832769727852, |
| "learning_rate": 4.863576612144814e-06, |
| "loss": 0.1002, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5291173794358508, |
| "grad_norm": 1.9606964487777765, |
| "learning_rate": 4.863343666524512e-06, |
| "loss": 0.1113, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5295723384895359, |
| "grad_norm": 2.2204981872927774, |
| "learning_rate": 4.863110527782437e-06, |
| "loss": 0.1106, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5300272975432211, |
| "grad_norm": 1.7885324238047555, |
| "learning_rate": 4.8628771959376435e-06, |
| "loss": 0.1085, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5304822565969063, |
| "grad_norm": 1.7918603713541985, |
| "learning_rate": 4.862643671009195e-06, |
| "loss": 0.1007, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5309372156505915, |
| "grad_norm": 1.0998167564155898, |
| "learning_rate": 4.862409953016175e-06, |
| "loss": 0.0968, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5313921747042766, |
| "grad_norm": 1.853940722458201, |
| "learning_rate": 4.862176041977683e-06, |
| "loss": 0.1216, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5318471337579618, |
| "grad_norm": 1.4646094216764547, |
| "learning_rate": 4.861941937912832e-06, |
| "loss": 0.1116, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.532302092811647, |
| "grad_norm": 1.2365450205781439, |
| "learning_rate": 4.861707640840752e-06, |
| "loss": 0.0819, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5327570518653321, |
| "grad_norm": 1.6463867940760566, |
| "learning_rate": 4.861473150780589e-06, |
| "loss": 0.1094, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5332120109190173, |
| "grad_norm": 1.686704498138834, |
| "learning_rate": 4.8612384677515054e-06, |
| "loss": 0.1071, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5336669699727025, |
| "grad_norm": 1.2716060091758528, |
| "learning_rate": 4.861003591772677e-06, |
| "loss": 0.0788, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5341219290263877, |
| "grad_norm": 1.596228030510201, |
| "learning_rate": 4.860768522863297e-06, |
| "loss": 0.0716, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5345768880800728, |
| "grad_norm": 1.6508703177098787, |
| "learning_rate": 4.860533261042574e-06, |
| "loss": 0.0977, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.535031847133758, |
| "grad_norm": 1.3185419902691182, |
| "learning_rate": 4.8602978063297336e-06, |
| "loss": 0.1103, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5354868061874432, |
| "grad_norm": 1.6903360885675578, |
| "learning_rate": 4.8600621587440155e-06, |
| "loss": 0.0933, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5359417652411284, |
| "grad_norm": 1.5059509187961821, |
| "learning_rate": 4.859826318304676e-06, |
| "loss": 0.1093, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5363967242948134, |
| "grad_norm": 1.156363062560368, |
| "learning_rate": 4.859590285030986e-06, |
| "loss": 0.091, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5368516833484986, |
| "grad_norm": 1.4254896552320762, |
| "learning_rate": 4.859354058942234e-06, |
| "loss": 0.099, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5373066424021838, |
| "grad_norm": 1.6756998416867424, |
| "learning_rate": 4.859117640057723e-06, |
| "loss": 0.1058, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5377616014558689, |
| "grad_norm": 1.906068462189616, |
| "learning_rate": 4.858881028396773e-06, |
| "loss": 0.1344, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5382165605095541, |
| "grad_norm": 1.6813817476503583, |
| "learning_rate": 4.8586442239787165e-06, |
| "loss": 0.0938, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5386715195632393, |
| "grad_norm": 1.4947308906180774, |
| "learning_rate": 4.858407226822906e-06, |
| "loss": 0.1089, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5391264786169245, |
| "grad_norm": 1.5326514903244322, |
| "learning_rate": 4.858170036948707e-06, |
| "loss": 0.0903, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5395814376706096, |
| "grad_norm": 1.3397075921608799, |
| "learning_rate": 4.857932654375503e-06, |
| "loss": 0.079, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5400363967242948, |
| "grad_norm": 2.3382844220202963, |
| "learning_rate": 4.857695079122691e-06, |
| "loss": 0.1606, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.54049135577798, |
| "grad_norm": 1.2780125171194971, |
| "learning_rate": 4.857457311209683e-06, |
| "loss": 0.0819, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5409463148316651, |
| "grad_norm": 1.3621256537302653, |
| "learning_rate": 4.857219350655911e-06, |
| "loss": 0.0837, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5414012738853503, |
| "grad_norm": 1.4753266540938175, |
| "learning_rate": 4.856981197480818e-06, |
| "loss": 0.092, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5418562329390355, |
| "grad_norm": 1.1486583975675493, |
| "learning_rate": 4.856742851703866e-06, |
| "loss": 0.0695, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5423111919927207, |
| "grad_norm": 1.6118421470322997, |
| "learning_rate": 4.856504313344531e-06, |
| "loss": 0.1306, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5427661510464058, |
| "grad_norm": 1.654223645513978, |
| "learning_rate": 4.8562655824223055e-06, |
| "loss": 0.0868, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.543221110100091, |
| "grad_norm": 1.166432446622458, |
| "learning_rate": 4.856026658956697e-06, |
| "loss": 0.0592, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5436760691537762, |
| "grad_norm": 1.2408147318232963, |
| "learning_rate": 4.8557875429672295e-06, |
| "loss": 0.0893, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5441310282074613, |
| "grad_norm": 1.4658290533217708, |
| "learning_rate": 4.855548234473444e-06, |
| "loss": 0.1193, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5445859872611465, |
| "grad_norm": 1.5813217399288642, |
| "learning_rate": 4.8553087334948935e-06, |
| "loss": 0.1027, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5450409463148317, |
| "grad_norm": 1.346354212639339, |
| "learning_rate": 4.855069040051149e-06, |
| "loss": 0.0842, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5454959053685169, |
| "grad_norm": 1.7976208225125645, |
| "learning_rate": 4.854829154161799e-06, |
| "loss": 0.1231, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.545950864422202, |
| "grad_norm": 1.468188785415714, |
| "learning_rate": 4.854589075846445e-06, |
| "loss": 0.0941, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5464058234758872, |
| "grad_norm": 1.2900368220049758, |
| "learning_rate": 4.854348805124704e-06, |
| "loss": 0.0866, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5468607825295724, |
| "grad_norm": 1.465762931238317, |
| "learning_rate": 4.85410834201621e-06, |
| "loss": 0.0917, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5473157415832575, |
| "grad_norm": 2.030229358227215, |
| "learning_rate": 4.8538676865406155e-06, |
| "loss": 0.1367, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5477707006369427, |
| "grad_norm": 1.3216014713960686, |
| "learning_rate": 4.853626838717582e-06, |
| "loss": 0.0744, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5482256596906279, |
| "grad_norm": 1.34429128033589, |
| "learning_rate": 4.853385798566793e-06, |
| "loss": 0.072, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5486806187443131, |
| "grad_norm": 1.3681907039168972, |
| "learning_rate": 4.8531445661079444e-06, |
| "loss": 0.0772, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5491355777979982, |
| "grad_norm": 1.7634866119794534, |
| "learning_rate": 4.852903141360749e-06, |
| "loss": 0.1093, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5495905368516834, |
| "grad_norm": 1.3755217621758322, |
| "learning_rate": 4.852661524344933e-06, |
| "loss": 0.0706, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5500454959053686, |
| "grad_norm": 1.8792585200640362, |
| "learning_rate": 4.852419715080244e-06, |
| "loss": 0.1248, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5505004549590536, |
| "grad_norm": 1.3604609211138492, |
| "learning_rate": 4.852177713586437e-06, |
| "loss": 0.0849, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5509554140127388, |
| "grad_norm": 1.3077627182539715, |
| "learning_rate": 4.85193551988329e-06, |
| "loss": 0.0876, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.551410373066424, |
| "grad_norm": 1.5010970994642232, |
| "learning_rate": 4.851693133990594e-06, |
| "loss": 0.0887, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5518653321201092, |
| "grad_norm": 1.2366263332853158, |
| "learning_rate": 4.851450555928155e-06, |
| "loss": 0.0677, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5523202911737943, |
| "grad_norm": 1.9682815492889902, |
| "learning_rate": 4.851207785715797e-06, |
| "loss": 0.1605, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5527752502274795, |
| "grad_norm": 1.4810464832161876, |
| "learning_rate": 4.850964823373355e-06, |
| "loss": 0.1194, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5532302092811647, |
| "grad_norm": 1.269367325606048, |
| "learning_rate": 4.850721668920685e-06, |
| "loss": 0.0869, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5536851683348498, |
| "grad_norm": 1.811102361348233, |
| "learning_rate": 4.850478322377657e-06, |
| "loss": 0.113, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.554140127388535, |
| "grad_norm": 1.8234155506944059, |
| "learning_rate": 4.8502347837641536e-06, |
| "loss": 0.1337, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5545950864422202, |
| "grad_norm": 1.5374689189034605, |
| "learning_rate": 4.8499910531000776e-06, |
| "loss": 0.0923, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5550500454959054, |
| "grad_norm": 2.1434119748623583, |
| "learning_rate": 4.849747130405346e-06, |
| "loss": 0.1165, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5555050045495905, |
| "grad_norm": 1.5741068071079671, |
| "learning_rate": 4.849503015699889e-06, |
| "loss": 0.0833, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5559599636032757, |
| "grad_norm": 1.4450089536449229, |
| "learning_rate": 4.849258709003657e-06, |
| "loss": 0.0874, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5564149226569609, |
| "grad_norm": 2.0523390040501206, |
| "learning_rate": 4.849014210336612e-06, |
| "loss": 0.1311, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.556869881710646, |
| "grad_norm": 1.6272370459349303, |
| "learning_rate": 4.848769519718734e-06, |
| "loss": 0.1283, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5573248407643312, |
| "grad_norm": 1.7795199436155464, |
| "learning_rate": 4.848524637170018e-06, |
| "loss": 0.1053, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5577797998180164, |
| "grad_norm": 2.039787438198539, |
| "learning_rate": 4.848279562710474e-06, |
| "loss": 0.119, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5582347588717016, |
| "grad_norm": 1.048713205847522, |
| "learning_rate": 4.848034296360129e-06, |
| "loss": 0.0613, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5586897179253867, |
| "grad_norm": 1.2246704661323997, |
| "learning_rate": 4.847788838139025e-06, |
| "loss": 0.0907, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5591446769790719, |
| "grad_norm": 1.4248227073394217, |
| "learning_rate": 4.847543188067219e-06, |
| "loss": 0.0831, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5595996360327571, |
| "grad_norm": 1.6554531335771108, |
| "learning_rate": 4.847297346164786e-06, |
| "loss": 0.098, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5600545950864422, |
| "grad_norm": 1.6618601198336995, |
| "learning_rate": 4.8470513124518134e-06, |
| "loss": 0.1067, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5605095541401274, |
| "grad_norm": 1.910127735430222, |
| "learning_rate": 4.8468050869484075e-06, |
| "loss": 0.1153, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5609645131938126, |
| "grad_norm": 1.662154262618556, |
| "learning_rate": 4.846558669674688e-06, |
| "loss": 0.0858, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5614194722474978, |
| "grad_norm": 1.666011221920497, |
| "learning_rate": 4.8463120606507904e-06, |
| "loss": 0.087, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5618744313011829, |
| "grad_norm": 1.8392638033651618, |
| "learning_rate": 4.846065259896867e-06, |
| "loss": 0.1007, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5623293903548681, |
| "grad_norm": 1.823608778063299, |
| "learning_rate": 4.845818267433086e-06, |
| "loss": 0.1234, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5627843494085533, |
| "grad_norm": 1.6001337547517656, |
| "learning_rate": 4.845571083279629e-06, |
| "loss": 0.0992, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5632393084622384, |
| "grad_norm": 1.244896894294659, |
| "learning_rate": 4.845323707456696e-06, |
| "loss": 0.0911, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5636942675159236, |
| "grad_norm": 1.6134676145738456, |
| "learning_rate": 4.845076139984502e-06, |
| "loss": 0.0988, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5641492265696088, |
| "grad_norm": 1.817921705994322, |
| "learning_rate": 4.844828380883274e-06, |
| "loss": 0.1137, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.564604185623294, |
| "grad_norm": 1.223760267965902, |
| "learning_rate": 4.844580430173261e-06, |
| "loss": 0.0912, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.565059144676979, |
| "grad_norm": 1.0223923432784907, |
| "learning_rate": 4.8443322878747236e-06, |
| "loss": 0.0549, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.5655141037306642, |
| "grad_norm": 1.4179515952754742, |
| "learning_rate": 4.844083954007938e-06, |
| "loss": 0.0909, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.5659690627843494, |
| "grad_norm": 1.964821324684815, |
| "learning_rate": 4.843835428593198e-06, |
| "loss": 0.1331, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.5664240218380345, |
| "grad_norm": 1.8460290937807686, |
| "learning_rate": 4.84358671165081e-06, |
| "loss": 0.1355, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5668789808917197, |
| "grad_norm": 1.9533421795112815, |
| "learning_rate": 4.843337803201102e-06, |
| "loss": 0.1493, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.5673339399454049, |
| "grad_norm": 1.7429301575956597, |
| "learning_rate": 4.8430887032644094e-06, |
| "loss": 0.1208, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.5677888989990901, |
| "grad_norm": 1.6048397609024965, |
| "learning_rate": 4.842839411861089e-06, |
| "loss": 0.1016, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.5682438580527752, |
| "grad_norm": 1.5611018277418034, |
| "learning_rate": 4.842589929011513e-06, |
| "loss": 0.0996, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.5686988171064604, |
| "grad_norm": 1.549763833499855, |
| "learning_rate": 4.8423402547360665e-06, |
| "loss": 0.1047, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5691537761601456, |
| "grad_norm": 1.5794849405940026, |
| "learning_rate": 4.842090389055153e-06, |
| "loss": 0.0885, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.5696087352138307, |
| "grad_norm": 1.340948229500544, |
| "learning_rate": 4.841840331989189e-06, |
| "loss": 0.082, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.5700636942675159, |
| "grad_norm": 1.187480617941468, |
| "learning_rate": 4.841590083558608e-06, |
| "loss": 0.0757, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.5705186533212011, |
| "grad_norm": 1.6889387454247615, |
| "learning_rate": 4.841339643783861e-06, |
| "loss": 0.1007, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.5709736123748863, |
| "grad_norm": 1.8032486510427874, |
| "learning_rate": 4.841089012685412e-06, |
| "loss": 0.1387, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.518781686351209, |
| "learning_rate": 4.840838190283741e-06, |
| "loss": 0.1073, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.5718835304822566, |
| "grad_norm": 1.2622352263295604, |
| "learning_rate": 4.8405871765993435e-06, |
| "loss": 0.0611, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.5723384895359418, |
| "grad_norm": 1.3733958676153404, |
| "learning_rate": 4.840335971652732e-06, |
| "loss": 0.0806, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.5727934485896269, |
| "grad_norm": 1.414930922234482, |
| "learning_rate": 4.840084575464434e-06, |
| "loss": 0.0967, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.5732484076433121, |
| "grad_norm": 1.3132222404269749, |
| "learning_rate": 4.839832988054992e-06, |
| "loss": 0.0844, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5737033666969973, |
| "grad_norm": 1.4304276264926878, |
| "learning_rate": 4.839581209444966e-06, |
| "loss": 0.08, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.5741583257506825, |
| "grad_norm": 1.6261976055252851, |
| "learning_rate": 4.839329239654927e-06, |
| "loss": 0.1086, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.5746132848043676, |
| "grad_norm": 1.4905660158866907, |
| "learning_rate": 4.839077078705468e-06, |
| "loss": 0.0758, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.5750682438580528, |
| "grad_norm": 1.6218355961437578, |
| "learning_rate": 4.838824726617194e-06, |
| "loss": 0.1066, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.575523202911738, |
| "grad_norm": 1.7405100413536567, |
| "learning_rate": 4.838572183410725e-06, |
| "loss": 0.1103, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5759781619654231, |
| "grad_norm": 1.5825357430240847, |
| "learning_rate": 4.838319449106697e-06, |
| "loss": 0.1026, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.5764331210191083, |
| "grad_norm": 1.4234319951879078, |
| "learning_rate": 4.838066523725764e-06, |
| "loss": 0.0761, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.5768880800727935, |
| "grad_norm": 1.4883172887933762, |
| "learning_rate": 4.837813407288594e-06, |
| "loss": 0.0989, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.5773430391264787, |
| "grad_norm": 1.437934945090456, |
| "learning_rate": 4.837560099815869e-06, |
| "loss": 0.0874, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.5777979981801638, |
| "grad_norm": 1.6175863411283686, |
| "learning_rate": 4.837306601328289e-06, |
| "loss": 0.1074, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.578252957233849, |
| "grad_norm": 1.3546376195879695, |
| "learning_rate": 4.837052911846569e-06, |
| "loss": 0.099, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.5787079162875342, |
| "grad_norm": 1.615443707505004, |
| "learning_rate": 4.836799031391439e-06, |
| "loss": 0.1093, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.5791628753412192, |
| "grad_norm": 0.7225881399048506, |
| "learning_rate": 4.836544959983645e-06, |
| "loss": 0.0439, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.5796178343949044, |
| "grad_norm": 2.1011993101699926, |
| "learning_rate": 4.8362906976439485e-06, |
| "loss": 0.1277, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.5800727934485896, |
| "grad_norm": 2.000601957434587, |
| "learning_rate": 4.836036244393127e-06, |
| "loss": 0.1495, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5805277525022748, |
| "grad_norm": 1.6950265520988297, |
| "learning_rate": 4.835781600251973e-06, |
| "loss": 0.0976, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.5809827115559599, |
| "grad_norm": 1.3727073330890776, |
| "learning_rate": 4.835526765241295e-06, |
| "loss": 0.0828, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.5814376706096451, |
| "grad_norm": 1.5570369931283408, |
| "learning_rate": 4.835271739381917e-06, |
| "loss": 0.1109, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.5818926296633303, |
| "grad_norm": 1.0713801990040446, |
| "learning_rate": 4.835016522694678e-06, |
| "loss": 0.0757, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.5823475887170154, |
| "grad_norm": 1.942364052088125, |
| "learning_rate": 4.834761115200434e-06, |
| "loss": 0.1642, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5828025477707006, |
| "grad_norm": 1.7377055370855508, |
| "learning_rate": 4.834505516920055e-06, |
| "loss": 0.1187, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.5832575068243858, |
| "grad_norm": 1.6956294426437164, |
| "learning_rate": 4.834249727874428e-06, |
| "loss": 0.1051, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.583712465878071, |
| "grad_norm": 1.4102019730152917, |
| "learning_rate": 4.833993748084455e-06, |
| "loss": 0.0704, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.5841674249317561, |
| "grad_norm": 1.2666669426637933, |
| "learning_rate": 4.833737577571052e-06, |
| "loss": 0.072, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.5846223839854413, |
| "grad_norm": 1.451859405282776, |
| "learning_rate": 4.833481216355153e-06, |
| "loss": 0.0833, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5850773430391265, |
| "grad_norm": 2.2038986828884846, |
| "learning_rate": 4.833224664457709e-06, |
| "loss": 0.1247, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5855323020928116, |
| "grad_norm": 2.170783563626466, |
| "learning_rate": 4.83296792189968e-06, |
| "loss": 0.0991, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5859872611464968, |
| "grad_norm": 1.8083451546198175, |
| "learning_rate": 4.83271098870205e-06, |
| "loss": 0.1067, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.586442220200182, |
| "grad_norm": 1.764270130263968, |
| "learning_rate": 4.832453864885811e-06, |
| "loss": 0.1181, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.5868971792538672, |
| "grad_norm": 1.3642172399097685, |
| "learning_rate": 4.832196550471976e-06, |
| "loss": 0.0844, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5873521383075523, |
| "grad_norm": 1.4693026944828678, |
| "learning_rate": 4.831939045481571e-06, |
| "loss": 0.1103, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5878070973612375, |
| "grad_norm": 1.370206188315079, |
| "learning_rate": 4.8316813499356375e-06, |
| "loss": 0.0914, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5882620564149227, |
| "grad_norm": 1.3729593032500749, |
| "learning_rate": 4.831423463855235e-06, |
| "loss": 0.0719, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5887170154686078, |
| "grad_norm": 1.4507728916778564, |
| "learning_rate": 4.8311653872614345e-06, |
| "loss": 0.086, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.589171974522293, |
| "grad_norm": 1.3070476542527247, |
| "learning_rate": 4.830907120175327e-06, |
| "loss": 0.077, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5896269335759782, |
| "grad_norm": 2.4221015667648045, |
| "learning_rate": 4.830648662618015e-06, |
| "loss": 0.1596, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5900818926296634, |
| "grad_norm": 1.103239260506278, |
| "learning_rate": 4.83039001461062e-06, |
| "loss": 0.0581, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5905368516833485, |
| "grad_norm": 1.8298909001729466, |
| "learning_rate": 4.830131176174276e-06, |
| "loss": 0.1082, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5909918107370337, |
| "grad_norm": 1.9201560834557836, |
| "learning_rate": 4.829872147330136e-06, |
| "loss": 0.1147, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5914467697907189, |
| "grad_norm": 1.332697111328447, |
| "learning_rate": 4.829612928099366e-06, |
| "loss": 0.0906, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.591901728844404, |
| "grad_norm": 1.2286901595425765, |
| "learning_rate": 4.829353518503147e-06, |
| "loss": 0.0741, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.5923566878980892, |
| "grad_norm": 1.21692580464079, |
| "learning_rate": 4.829093918562678e-06, |
| "loss": 0.0657, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.5928116469517744, |
| "grad_norm": 1.776387560928479, |
| "learning_rate": 4.828834128299173e-06, |
| "loss": 0.122, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5932666060054596, |
| "grad_norm": 2.2576443805946003, |
| "learning_rate": 4.828574147733859e-06, |
| "loss": 0.1395, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5937215650591446, |
| "grad_norm": 1.6394742041639938, |
| "learning_rate": 4.828313976887982e-06, |
| "loss": 0.0886, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5941765241128298, |
| "grad_norm": 1.729743531966717, |
| "learning_rate": 4.8280536157828e-06, |
| "loss": 0.1191, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.594631483166515, |
| "grad_norm": 1.4769755060752687, |
| "learning_rate": 4.827793064439592e-06, |
| "loss": 0.0965, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.5950864422202001, |
| "grad_norm": 1.4080505436977253, |
| "learning_rate": 4.8275323228796455e-06, |
| "loss": 0.0874, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5955414012738853, |
| "grad_norm": 0.9123649868426729, |
| "learning_rate": 4.8272713911242695e-06, |
| "loss": 0.0402, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5959963603275705, |
| "grad_norm": 1.1294729714943839, |
| "learning_rate": 4.827010269194785e-06, |
| "loss": 0.0631, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5964513193812557, |
| "grad_norm": 1.9689287013341512, |
| "learning_rate": 4.8267489571125295e-06, |
| "loss": 0.1181, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5969062784349408, |
| "grad_norm": 2.330161760291491, |
| "learning_rate": 4.826487454898857e-06, |
| "loss": 0.1448, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.597361237488626, |
| "grad_norm": 1.2992174727337271, |
| "learning_rate": 4.826225762575136e-06, |
| "loss": 0.0857, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5978161965423112, |
| "grad_norm": 1.4247199067825551, |
| "learning_rate": 4.825963880162752e-06, |
| "loss": 0.0863, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5982711555959963, |
| "grad_norm": 2.01495341050897, |
| "learning_rate": 4.825701807683102e-06, |
| "loss": 0.1072, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5987261146496815, |
| "grad_norm": 1.7412264774469277, |
| "learning_rate": 4.825439545157603e-06, |
| "loss": 0.1092, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5991810737033667, |
| "grad_norm": 1.4724909601046332, |
| "learning_rate": 4.825177092607687e-06, |
| "loss": 0.0999, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5996360327570519, |
| "grad_norm": 1.3473250398166379, |
| "learning_rate": 4.8249144500547995e-06, |
| "loss": 0.0847, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.600090991810737, |
| "grad_norm": 1.3069589653313691, |
| "learning_rate": 4.824651617520402e-06, |
| "loss": 0.0669, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6005459508644222, |
| "grad_norm": 1.5442197540840334, |
| "learning_rate": 4.824388595025972e-06, |
| "loss": 0.1178, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6010009099181074, |
| "grad_norm": 1.5331976112900332, |
| "learning_rate": 4.824125382593003e-06, |
| "loss": 0.0874, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6014558689717925, |
| "grad_norm": 1.4665462333148995, |
| "learning_rate": 4.823861980243003e-06, |
| "loss": 0.1106, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6019108280254777, |
| "grad_norm": 1.909519129682131, |
| "learning_rate": 4.823598387997497e-06, |
| "loss": 0.1163, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6023657870791629, |
| "grad_norm": 1.5641688210807196, |
| "learning_rate": 4.823334605878024e-06, |
| "loss": 0.0797, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6028207461328481, |
| "grad_norm": 1.572854435679942, |
| "learning_rate": 4.82307063390614e-06, |
| "loss": 0.09, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6032757051865332, |
| "grad_norm": 1.6242534333910885, |
| "learning_rate": 4.822806472103413e-06, |
| "loss": 0.1031, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6037306642402184, |
| "grad_norm": 1.3730669374310474, |
| "learning_rate": 4.822542120491431e-06, |
| "loss": 0.0842, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6041856232939036, |
| "grad_norm": 1.12030081002078, |
| "learning_rate": 4.822277579091796e-06, |
| "loss": 0.0933, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6046405823475887, |
| "grad_norm": 1.2764536589561721, |
| "learning_rate": 4.822012847926125e-06, |
| "loss": 0.0795, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6050955414012739, |
| "grad_norm": 1.4682540895282241, |
| "learning_rate": 4.821747927016049e-06, |
| "loss": 0.0834, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6055505004549591, |
| "grad_norm": 1.5003874511683086, |
| "learning_rate": 4.821482816383219e-06, |
| "loss": 0.1096, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6060054595086443, |
| "grad_norm": 1.2445527510541503, |
| "learning_rate": 4.821217516049296e-06, |
| "loss": 0.0789, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6064604185623294, |
| "grad_norm": 1.3266125786690217, |
| "learning_rate": 4.82095202603596e-06, |
| "loss": 0.0796, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6069153776160146, |
| "grad_norm": 1.5070167125246237, |
| "learning_rate": 4.820686346364906e-06, |
| "loss": 0.0924, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6073703366696998, |
| "grad_norm": 1.9776742406411276, |
| "learning_rate": 4.820420477057843e-06, |
| "loss": 0.1066, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.607825295723385, |
| "grad_norm": 1.7020369242588063, |
| "learning_rate": 4.820154418136498e-06, |
| "loss": 0.1212, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.60828025477707, |
| "grad_norm": 1.8050978290349085, |
| "learning_rate": 4.819888169622612e-06, |
| "loss": 0.1102, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6087352138307552, |
| "grad_norm": 1.4892394361348396, |
| "learning_rate": 4.819621731537942e-06, |
| "loss": 0.1139, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6091901728844404, |
| "grad_norm": 1.4499858080485506, |
| "learning_rate": 4.819355103904259e-06, |
| "loss": 0.0833, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6096451319381255, |
| "grad_norm": 1.5725512633612637, |
| "learning_rate": 4.81908828674335e-06, |
| "loss": 0.0915, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6101000909918107, |
| "grad_norm": 1.122002936682905, |
| "learning_rate": 4.81882128007702e-06, |
| "loss": 0.0706, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6105550500454959, |
| "grad_norm": 1.6231339345844462, |
| "learning_rate": 4.818554083927086e-06, |
| "loss": 0.0989, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6110100090991811, |
| "grad_norm": 1.5566168283978299, |
| "learning_rate": 4.818286698315383e-06, |
| "loss": 0.0802, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6114649681528662, |
| "grad_norm": 1.5209649714120241, |
| "learning_rate": 4.818019123263761e-06, |
| "loss": 0.1202, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6119199272065514, |
| "grad_norm": 1.5198574931775437, |
| "learning_rate": 4.817751358794084e-06, |
| "loss": 0.0824, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6123748862602366, |
| "grad_norm": 1.3969905074954028, |
| "learning_rate": 4.8174834049282325e-06, |
| "loss": 0.1004, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6128298453139217, |
| "grad_norm": 2.1750619266428455, |
| "learning_rate": 4.817215261688104e-06, |
| "loss": 0.1479, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6132848043676069, |
| "grad_norm": 1.4757724334002973, |
| "learning_rate": 4.816946929095607e-06, |
| "loss": 0.0816, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.6137397634212921, |
| "grad_norm": 1.5237192624117821, |
| "learning_rate": 4.816678407172671e-06, |
| "loss": 0.1043, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6141947224749773, |
| "grad_norm": 1.369442898723999, |
| "learning_rate": 4.816409695941238e-06, |
| "loss": 0.092, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6146496815286624, |
| "grad_norm": 1.3552993829733393, |
| "learning_rate": 4.816140795423265e-06, |
| "loss": 0.0896, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6151046405823476, |
| "grad_norm": 1.914785073036727, |
| "learning_rate": 4.8158717056407255e-06, |
| "loss": 0.1405, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6155595996360328, |
| "grad_norm": 2.510056256789934, |
| "learning_rate": 4.815602426615609e-06, |
| "loss": 0.1347, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6160145586897179, |
| "grad_norm": 1.6994784582879867, |
| "learning_rate": 4.815332958369919e-06, |
| "loss": 0.1043, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.6164695177434031, |
| "grad_norm": 1.609212664276651, |
| "learning_rate": 4.815063300925677e-06, |
| "loss": 0.0801, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6169244767970883, |
| "grad_norm": 1.3059644313522971, |
| "learning_rate": 4.814793454304915e-06, |
| "loss": 0.0962, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6173794358507735, |
| "grad_norm": 1.316795599125537, |
| "learning_rate": 4.814523418529686e-06, |
| "loss": 0.0945, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6178343949044586, |
| "grad_norm": 1.458401958119273, |
| "learning_rate": 4.814253193622056e-06, |
| "loss": 0.0931, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6182893539581438, |
| "grad_norm": 1.5782519499021963, |
| "learning_rate": 4.813982779604106e-06, |
| "loss": 0.086, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.618744313011829, |
| "grad_norm": 1.4337607882677579, |
| "learning_rate": 4.813712176497933e-06, |
| "loss": 0.1051, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6191992720655141, |
| "grad_norm": 1.7873980657918327, |
| "learning_rate": 4.813441384325649e-06, |
| "loss": 0.1049, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6196542311191993, |
| "grad_norm": 1.3606232019090971, |
| "learning_rate": 4.813170403109383e-06, |
| "loss": 0.0708, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6201091901728845, |
| "grad_norm": 1.3563405384219576, |
| "learning_rate": 4.8128992328712774e-06, |
| "loss": 0.086, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6205641492265697, |
| "grad_norm": 1.3192980800606737, |
| "learning_rate": 4.812627873633492e-06, |
| "loss": 0.0781, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6210191082802548, |
| "grad_norm": 1.7840648545688607, |
| "learning_rate": 4.8123563254182e-06, |
| "loss": 0.1361, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.62147406733394, |
| "grad_norm": 1.8322981514345795, |
| "learning_rate": 4.8120845882475924e-06, |
| "loss": 0.1282, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6219290263876252, |
| "grad_norm": 2.0823134031423267, |
| "learning_rate": 4.8118126621438734e-06, |
| "loss": 0.1303, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6223839854413102, |
| "grad_norm": 1.8738406581860008, |
| "learning_rate": 4.811540547129263e-06, |
| "loss": 0.1603, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6228389444949954, |
| "grad_norm": 1.7465048715810059, |
| "learning_rate": 4.811268243225999e-06, |
| "loss": 0.1157, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6232939035486806, |
| "grad_norm": 1.3620940982420815, |
| "learning_rate": 4.810995750456331e-06, |
| "loss": 0.0794, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6237488626023658, |
| "grad_norm": 1.7874358637623151, |
| "learning_rate": 4.810723068842526e-06, |
| "loss": 0.1272, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6242038216560509, |
| "grad_norm": 1.7579304475520012, |
| "learning_rate": 4.810450198406867e-06, |
| "loss": 0.1185, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.6246587807097361, |
| "grad_norm": 2.467789845960662, |
| "learning_rate": 4.810177139171653e-06, |
| "loss": 0.1557, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6251137397634213, |
| "grad_norm": 1.1425822722647716, |
| "learning_rate": 4.809903891159195e-06, |
| "loss": 0.0657, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6255686988171064, |
| "grad_norm": 2.016266262602286, |
| "learning_rate": 4.809630454391822e-06, |
| "loss": 0.107, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6260236578707916, |
| "grad_norm": 1.7559713706649986, |
| "learning_rate": 4.80935682889188e-06, |
| "loss": 0.1506, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6264786169244768, |
| "grad_norm": 1.4915046053791412, |
| "learning_rate": 4.809083014681726e-06, |
| "loss": 0.1212, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.626933575978162, |
| "grad_norm": 1.632149901378183, |
| "learning_rate": 4.808809011783735e-06, |
| "loss": 0.1266, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6273885350318471, |
| "grad_norm": 1.3124240257866033, |
| "learning_rate": 4.808534820220299e-06, |
| "loss": 0.0837, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6278434940855323, |
| "grad_norm": 1.7180772149333445, |
| "learning_rate": 4.8082604400138226e-06, |
| "loss": 0.1287, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6282984531392175, |
| "grad_norm": 1.071227301223936, |
| "learning_rate": 4.807985871186726e-06, |
| "loss": 0.0776, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6287534121929026, |
| "grad_norm": 1.7108717630459847, |
| "learning_rate": 4.8077111137614484e-06, |
| "loss": 0.0991, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6292083712465878, |
| "grad_norm": 1.6365913346705507, |
| "learning_rate": 4.8074361677604394e-06, |
| "loss": 0.1004, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.629663330300273, |
| "grad_norm": 1.6392222223495618, |
| "learning_rate": 4.807161033206168e-06, |
| "loss": 0.1002, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6301182893539582, |
| "grad_norm": 1.687969288374962, |
| "learning_rate": 4.806885710121114e-06, |
| "loss": 0.1099, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6305732484076433, |
| "grad_norm": 1.4063826448960124, |
| "learning_rate": 4.806610198527779e-06, |
| "loss": 0.0896, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6310282074613285, |
| "grad_norm": 1.540144583948253, |
| "learning_rate": 4.8063344984486755e-06, |
| "loss": 0.0879, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6314831665150137, |
| "grad_norm": 1.5064915998503037, |
| "learning_rate": 4.806058609906331e-06, |
| "loss": 0.0962, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6319381255686988, |
| "grad_norm": 2.1627291975031104, |
| "learning_rate": 4.805782532923292e-06, |
| "loss": 0.128, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.632393084622384, |
| "grad_norm": 1.647216495001309, |
| "learning_rate": 4.805506267522116e-06, |
| "loss": 0.1248, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6328480436760692, |
| "grad_norm": 1.9302875416620158, |
| "learning_rate": 4.80522981372538e-06, |
| "loss": 0.1297, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6333030027297544, |
| "grad_norm": 1.1401865771531547, |
| "learning_rate": 4.804953171555674e-06, |
| "loss": 0.077, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6337579617834395, |
| "grad_norm": 2.3827232130583513, |
| "learning_rate": 4.8046763410356046e-06, |
| "loss": 0.1231, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6342129208371247, |
| "grad_norm": 2.132009387110179, |
| "learning_rate": 4.804399322187791e-06, |
| "loss": 0.1363, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6346678798908099, |
| "grad_norm": 1.914550517915578, |
| "learning_rate": 4.8041221150348725e-06, |
| "loss": 0.1408, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.635122838944495, |
| "grad_norm": 1.5194825054621766, |
| "learning_rate": 4.8038447195995e-06, |
| "loss": 0.1107, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6355777979981801, |
| "grad_norm": 1.6908421741011026, |
| "learning_rate": 4.80356713590434e-06, |
| "loss": 0.1057, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6360327570518653, |
| "grad_norm": 1.957264325451557, |
| "learning_rate": 4.803289363972078e-06, |
| "loss": 0.1279, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6364877161055505, |
| "grad_norm": 1.429753125674933, |
| "learning_rate": 4.8030114038254094e-06, |
| "loss": 0.0906, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6369426751592356, |
| "grad_norm": 1.574683320179916, |
| "learning_rate": 4.80273325548705e-06, |
| "loss": 0.0951, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6373976342129208, |
| "grad_norm": 1.422366848550457, |
| "learning_rate": 4.802454918979728e-06, |
| "loss": 0.0906, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.637852593266606, |
| "grad_norm": 1.9963358207494448, |
| "learning_rate": 4.802176394326187e-06, |
| "loss": 0.1483, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6383075523202911, |
| "grad_norm": 1.781860008561357, |
| "learning_rate": 4.801897681549188e-06, |
| "loss": 0.0878, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6387625113739763, |
| "grad_norm": 1.635142292837631, |
| "learning_rate": 4.801618780671506e-06, |
| "loss": 0.1054, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6392174704276615, |
| "grad_norm": 1.3235648640664877, |
| "learning_rate": 4.801339691715932e-06, |
| "loss": 0.0939, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6396724294813467, |
| "grad_norm": 1.2245139670763607, |
| "learning_rate": 4.8010604147052695e-06, |
| "loss": 0.0625, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6401273885350318, |
| "grad_norm": 1.4675657307946148, |
| "learning_rate": 4.800780949662343e-06, |
| "loss": 0.0994, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.640582347588717, |
| "grad_norm": 1.493372713452032, |
| "learning_rate": 4.800501296609986e-06, |
| "loss": 0.0841, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6410373066424022, |
| "grad_norm": 1.7340947187812135, |
| "learning_rate": 4.800221455571053e-06, |
| "loss": 0.1088, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6414922656960873, |
| "grad_norm": 1.2980113793311265, |
| "learning_rate": 4.7999414265684105e-06, |
| "loss": 0.0852, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6419472247497725, |
| "grad_norm": 1.4464636793664913, |
| "learning_rate": 4.79966120962494e-06, |
| "loss": 0.0976, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6424021838034577, |
| "grad_norm": 1.4659649640116845, |
| "learning_rate": 4.799380804763542e-06, |
| "loss": 0.0901, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 1.703460078887615, |
| "learning_rate": 4.799100212007128e-06, |
| "loss": 0.1074, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.643312101910828, |
| "grad_norm": 1.3106092828093312, |
| "learning_rate": 4.7988194313786275e-06, |
| "loss": 0.0736, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6437670609645132, |
| "grad_norm": 0.9724381635858095, |
| "learning_rate": 4.798538462900984e-06, |
| "loss": 0.0657, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6442220200181984, |
| "grad_norm": 1.3180852195340405, |
| "learning_rate": 4.798257306597157e-06, |
| "loss": 0.0791, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6446769790718835, |
| "grad_norm": 1.3806990093425773, |
| "learning_rate": 4.797975962490122e-06, |
| "loss": 0.102, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6451319381255687, |
| "grad_norm": 1.0796594549250105, |
| "learning_rate": 4.797694430602869e-06, |
| "loss": 0.0521, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6455868971792539, |
| "grad_norm": 1.8299905872463706, |
| "learning_rate": 4.797412710958405e-06, |
| "loss": 0.1117, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6460418562329391, |
| "grad_norm": 1.7103989898617438, |
| "learning_rate": 4.797130803579747e-06, |
| "loss": 0.1034, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6464968152866242, |
| "grad_norm": 1.9920043416958193, |
| "learning_rate": 4.796848708489935e-06, |
| "loss": 0.1314, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6469517743403094, |
| "grad_norm": 1.55952000492946, |
| "learning_rate": 4.796566425712018e-06, |
| "loss": 0.1094, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6474067333939946, |
| "grad_norm": 1.569073968162044, |
| "learning_rate": 4.796283955269065e-06, |
| "loss": 0.1288, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6478616924476797, |
| "grad_norm": 1.9345498009875362, |
| "learning_rate": 4.796001297184156e-06, |
| "loss": 0.1276, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6483166515013649, |
| "grad_norm": 1.996849276778458, |
| "learning_rate": 4.79571845148039e-06, |
| "loss": 0.1443, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6487716105550501, |
| "grad_norm": 1.1655015182194328, |
| "learning_rate": 4.795435418180879e-06, |
| "loss": 0.0895, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6492265696087353, |
| "grad_norm": 1.6476688817001566, |
| "learning_rate": 4.795152197308753e-06, |
| "loss": 0.0993, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.6496815286624203, |
| "grad_norm": 1.0099999351331836, |
| "learning_rate": 4.794868788887154e-06, |
| "loss": 0.0671, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6501364877161055, |
| "grad_norm": 1.8391539690012708, |
| "learning_rate": 4.79458519293924e-06, |
| "loss": 0.1345, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6505914467697907, |
| "grad_norm": 1.4752057458255263, |
| "learning_rate": 4.794301409488187e-06, |
| "loss": 0.0873, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6510464058234758, |
| "grad_norm": 1.2943024580621056, |
| "learning_rate": 4.7940174385571835e-06, |
| "loss": 0.0802, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.651501364877161, |
| "grad_norm": 1.3918512180039062, |
| "learning_rate": 4.793733280169435e-06, |
| "loss": 0.0993, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6519563239308462, |
| "grad_norm": 2.2174420994103574, |
| "learning_rate": 4.7934489343481614e-06, |
| "loss": 0.1425, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6524112829845314, |
| "grad_norm": 1.774834870886046, |
| "learning_rate": 4.7931644011165975e-06, |
| "loss": 0.0982, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6528662420382165, |
| "grad_norm": 1.2208864014501382, |
| "learning_rate": 4.792879680497995e-06, |
| "loss": 0.0807, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6533212010919017, |
| "grad_norm": 1.8182347519697841, |
| "learning_rate": 4.79259477251562e-06, |
| "loss": 0.1194, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6537761601455869, |
| "grad_norm": 1.8801650010523618, |
| "learning_rate": 4.792309677192753e-06, |
| "loss": 0.1326, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.654231119199272, |
| "grad_norm": 1.776650087976607, |
| "learning_rate": 4.79202439455269e-06, |
| "loss": 0.0995, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6546860782529572, |
| "grad_norm": 1.2419464528847455, |
| "learning_rate": 4.791738924618745e-06, |
| "loss": 0.0819, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6551410373066424, |
| "grad_norm": 1.3878814997047564, |
| "learning_rate": 4.791453267414245e-06, |
| "loss": 0.077, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6555959963603276, |
| "grad_norm": 1.3963850212985605, |
| "learning_rate": 4.7911674229625316e-06, |
| "loss": 0.0797, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6560509554140127, |
| "grad_norm": 1.9634000929904991, |
| "learning_rate": 4.790881391286963e-06, |
| "loss": 0.1173, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6565059144676979, |
| "grad_norm": 1.5553330936936114, |
| "learning_rate": 4.790595172410914e-06, |
| "loss": 0.099, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.6569608735213831, |
| "grad_norm": 1.9255393679593797, |
| "learning_rate": 4.79030876635777e-06, |
| "loss": 0.1353, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6574158325750682, |
| "grad_norm": 1.461167870438619, |
| "learning_rate": 4.790022173150938e-06, |
| "loss": 0.1049, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6578707916287534, |
| "grad_norm": 1.0062740037097007, |
| "learning_rate": 4.789735392813835e-06, |
| "loss": 0.0594, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6583257506824386, |
| "grad_norm": 1.4058443933458273, |
| "learning_rate": 4.789448425369896e-06, |
| "loss": 0.0872, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.6587807097361238, |
| "grad_norm": 1.5311615159042697, |
| "learning_rate": 4.789161270842571e-06, |
| "loss": 0.0939, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.6592356687898089, |
| "grad_norm": 1.6595649465936542, |
| "learning_rate": 4.7888739292553235e-06, |
| "loss": 0.1248, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.6596906278434941, |
| "grad_norm": 1.7051412400140817, |
| "learning_rate": 4.788586400631636e-06, |
| "loss": 0.1197, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6601455868971793, |
| "grad_norm": 1.2115114973668668, |
| "learning_rate": 4.788298684995003e-06, |
| "loss": 0.0905, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.6606005459508644, |
| "grad_norm": 1.4239694731611245, |
| "learning_rate": 4.7880107823689355e-06, |
| "loss": 0.0801, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.6610555050045496, |
| "grad_norm": 1.5925606772355265, |
| "learning_rate": 4.787722692776958e-06, |
| "loss": 0.1183, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.6615104640582348, |
| "grad_norm": 1.7931970729363222, |
| "learning_rate": 4.787434416242615e-06, |
| "loss": 0.1189, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.66196542311192, |
| "grad_norm": 2.3171059544303874, |
| "learning_rate": 4.787145952789461e-06, |
| "loss": 0.1436, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6624203821656051, |
| "grad_norm": 1.4441484331538328, |
| "learning_rate": 4.786857302441069e-06, |
| "loss": 0.0781, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.6628753412192903, |
| "grad_norm": 1.690439275216053, |
| "learning_rate": 4.786568465221025e-06, |
| "loss": 0.1111, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.6633303002729755, |
| "grad_norm": 1.6812302333143159, |
| "learning_rate": 4.7862794411529315e-06, |
| "loss": 0.1175, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.6637852593266605, |
| "grad_norm": 1.9541579133281037, |
| "learning_rate": 4.7859902302604075e-06, |
| "loss": 0.1329, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.6642402183803457, |
| "grad_norm": 1.8591409223722424, |
| "learning_rate": 4.785700832567085e-06, |
| "loss": 0.1211, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.664695177434031, |
| "grad_norm": 1.325162611861324, |
| "learning_rate": 4.785411248096613e-06, |
| "loss": 0.0743, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.6651501364877161, |
| "grad_norm": 1.3065112220161235, |
| "learning_rate": 4.785121476872654e-06, |
| "loss": 0.1034, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.6656050955414012, |
| "grad_norm": 1.5925894626386907, |
| "learning_rate": 4.784831518918888e-06, |
| "loss": 0.1196, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.6660600545950864, |
| "grad_norm": 1.1820283205821733, |
| "learning_rate": 4.784541374259008e-06, |
| "loss": 0.0769, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.6665150136487716, |
| "grad_norm": 1.571736758093102, |
| "learning_rate": 4.7842510429167244e-06, |
| "loss": 0.1, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6669699727024567, |
| "grad_norm": 1.5876822973446192, |
| "learning_rate": 4.783960524915761e-06, |
| "loss": 0.1214, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.6674249317561419, |
| "grad_norm": 1.5160576603586384, |
| "learning_rate": 4.783669820279858e-06, |
| "loss": 0.0979, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.6678798908098271, |
| "grad_norm": 1.2434477128547956, |
| "learning_rate": 4.783378929032769e-06, |
| "loss": 0.0824, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.6683348498635123, |
| "grad_norm": 1.46291955617626, |
| "learning_rate": 4.783087851198267e-06, |
| "loss": 0.0942, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.6687898089171974, |
| "grad_norm": 1.7951492565076614, |
| "learning_rate": 4.7827965868001356e-06, |
| "loss": 0.1192, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6692447679708826, |
| "grad_norm": 1.4406289448080234, |
| "learning_rate": 4.782505135862176e-06, |
| "loss": 0.1009, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.6696997270245678, |
| "grad_norm": 1.4538780681359404, |
| "learning_rate": 4.782213498408205e-06, |
| "loss": 0.1012, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.6701546860782529, |
| "grad_norm": 1.4490300401257787, |
| "learning_rate": 4.781921674462053e-06, |
| "loss": 0.0782, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.6706096451319381, |
| "grad_norm": 1.8860995116874109, |
| "learning_rate": 4.781629664047566e-06, |
| "loss": 0.1148, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.6710646041856233, |
| "grad_norm": 1.3918036510588907, |
| "learning_rate": 4.781337467188607e-06, |
| "loss": 0.1025, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6715195632393085, |
| "grad_norm": 2.3859380054935344, |
| "learning_rate": 4.781045083909053e-06, |
| "loss": 0.1219, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.6719745222929936, |
| "grad_norm": 1.9401784591368603, |
| "learning_rate": 4.780752514232796e-06, |
| "loss": 0.1022, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.6724294813466788, |
| "grad_norm": 1.374892200929808, |
| "learning_rate": 4.780459758183743e-06, |
| "loss": 0.0896, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.672884440400364, |
| "grad_norm": 1.4250914966637114, |
| "learning_rate": 4.780166815785817e-06, |
| "loss": 0.0907, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.6733393994540491, |
| "grad_norm": 1.3888650548243648, |
| "learning_rate": 4.7798736870629554e-06, |
| "loss": 0.1102, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6737943585077343, |
| "grad_norm": 1.5225956652456023, |
| "learning_rate": 4.779580372039113e-06, |
| "loss": 0.0809, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.6742493175614195, |
| "grad_norm": 2.133500594182355, |
| "learning_rate": 4.779286870738256e-06, |
| "loss": 0.1069, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.6747042766151047, |
| "grad_norm": 1.6417529269403512, |
| "learning_rate": 4.778993183184371e-06, |
| "loss": 0.0879, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.6751592356687898, |
| "grad_norm": 2.188184230975794, |
| "learning_rate": 4.778699309401453e-06, |
| "loss": 0.1196, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.675614194722475, |
| "grad_norm": 1.366654497975806, |
| "learning_rate": 4.7784052494135195e-06, |
| "loss": 0.0952, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.6760691537761602, |
| "grad_norm": 2.2251300669835734, |
| "learning_rate": 4.778111003244596e-06, |
| "loss": 0.0962, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.6765241128298453, |
| "grad_norm": 1.2239477453163228, |
| "learning_rate": 4.777816570918731e-06, |
| "loss": 0.0771, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.6769790718835305, |
| "grad_norm": 1.4442063624509236, |
| "learning_rate": 4.777521952459982e-06, |
| "loss": 0.0881, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.6774340309372157, |
| "grad_norm": 1.792892312265488, |
| "learning_rate": 4.777227147892424e-06, |
| "loss": 0.108, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.6778889899909009, |
| "grad_norm": 1.5848897809985478, |
| "learning_rate": 4.776932157240147e-06, |
| "loss": 0.0973, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.678343949044586, |
| "grad_norm": 1.5924788947742, |
| "learning_rate": 4.776636980527257e-06, |
| "loss": 0.0997, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.6787989080982711, |
| "grad_norm": 1.7689554235448024, |
| "learning_rate": 4.776341617777874e-06, |
| "loss": 0.0907, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.6792538671519563, |
| "grad_norm": 1.561936690334899, |
| "learning_rate": 4.776046069016135e-06, |
| "loss": 0.1045, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.6797088262056415, |
| "grad_norm": 1.978023029084926, |
| "learning_rate": 4.775750334266188e-06, |
| "loss": 0.1316, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.6801637852593266, |
| "grad_norm": 1.2221171400180673, |
| "learning_rate": 4.775454413552202e-06, |
| "loss": 0.0708, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6806187443130118, |
| "grad_norm": 2.2916692264154848, |
| "learning_rate": 4.775158306898358e-06, |
| "loss": 0.1045, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.681073703366697, |
| "grad_norm": 1.5270730953843772, |
| "learning_rate": 4.774862014328849e-06, |
| "loss": 0.087, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.6815286624203821, |
| "grad_norm": 1.5001501033936573, |
| "learning_rate": 4.774565535867892e-06, |
| "loss": 0.083, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.6819836214740673, |
| "grad_norm": 2.228962091730558, |
| "learning_rate": 4.77426887153971e-06, |
| "loss": 0.132, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.6824385805277525, |
| "grad_norm": 1.7756631467911705, |
| "learning_rate": 4.773972021368546e-06, |
| "loss": 0.1156, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6828935395814377, |
| "grad_norm": 1.9028113721779674, |
| "learning_rate": 4.773674985378658e-06, |
| "loss": 0.1692, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.6833484986351228, |
| "grad_norm": 1.591856567558633, |
| "learning_rate": 4.773377763594319e-06, |
| "loss": 0.0829, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.683803457688808, |
| "grad_norm": 1.7330424169213765, |
| "learning_rate": 4.773080356039814e-06, |
| "loss": 0.1079, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.6842584167424932, |
| "grad_norm": 1.3093378510726064, |
| "learning_rate": 4.772782762739448e-06, |
| "loss": 0.0919, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.6847133757961783, |
| "grad_norm": 1.5644465201102973, |
| "learning_rate": 4.772484983717539e-06, |
| "loss": 0.096, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.6851683348498635, |
| "grad_norm": 1.7535246249527565, |
| "learning_rate": 4.77218701899842e-06, |
| "loss": 0.1025, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.6856232939035487, |
| "grad_norm": 1.682557507776212, |
| "learning_rate": 4.771888868606438e-06, |
| "loss": 0.1245, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.6860782529572339, |
| "grad_norm": 1.1063626083550568, |
| "learning_rate": 4.771590532565957e-06, |
| "loss": 0.0628, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.686533212010919, |
| "grad_norm": 1.447485907138006, |
| "learning_rate": 4.771292010901357e-06, |
| "loss": 0.0756, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.6869881710646042, |
| "grad_norm": 1.9968564435349099, |
| "learning_rate": 4.77099330363703e-06, |
| "loss": 0.1121, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6874431301182894, |
| "grad_norm": 1.331414088559165, |
| "learning_rate": 4.770694410797387e-06, |
| "loss": 0.0918, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.6878980891719745, |
| "grad_norm": 1.7374051988448433, |
| "learning_rate": 4.770395332406851e-06, |
| "loss": 0.1046, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.6883530482256597, |
| "grad_norm": 1.5590482284052172, |
| "learning_rate": 4.770096068489861e-06, |
| "loss": 0.1045, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.6888080072793449, |
| "grad_norm": 1.2266167614387768, |
| "learning_rate": 4.769796619070872e-06, |
| "loss": 0.0877, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.6892629663330301, |
| "grad_norm": 1.1438287132644533, |
| "learning_rate": 4.769496984174353e-06, |
| "loss": 0.0759, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.6897179253867152, |
| "grad_norm": 1.5191110521315079, |
| "learning_rate": 4.769197163824791e-06, |
| "loss": 0.0839, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.6901728844404004, |
| "grad_norm": 1.5352637302100918, |
| "learning_rate": 4.768897158046683e-06, |
| "loss": 0.0927, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.6906278434940856, |
| "grad_norm": 1.224151460496261, |
| "learning_rate": 4.768596966864546e-06, |
| "loss": 0.0758, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.6910828025477707, |
| "grad_norm": 2.097275342036678, |
| "learning_rate": 4.76829659030291e-06, |
| "loss": 0.1606, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.6915377616014559, |
| "grad_norm": 1.773445388033648, |
| "learning_rate": 4.767996028386319e-06, |
| "loss": 0.1071, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6919927206551411, |
| "grad_norm": 1.798404416562804, |
| "learning_rate": 4.767695281139336e-06, |
| "loss": 0.0882, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.6924476797088263, |
| "grad_norm": 1.6643609283655776, |
| "learning_rate": 4.767394348586535e-06, |
| "loss": 0.0986, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.6929026387625113, |
| "grad_norm": 1.8351458616302123, |
| "learning_rate": 4.767093230752507e-06, |
| "loss": 0.1398, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.6933575978161965, |
| "grad_norm": 1.695947028633324, |
| "learning_rate": 4.766791927661859e-06, |
| "loss": 0.1217, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.6938125568698817, |
| "grad_norm": 1.706097971198418, |
| "learning_rate": 4.766490439339211e-06, |
| "loss": 0.0852, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.6942675159235668, |
| "grad_norm": 1.6641835764066073, |
| "learning_rate": 4.7661887658092e-06, |
| "loss": 0.1078, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.694722474977252, |
| "grad_norm": 1.4721263946542316, |
| "learning_rate": 4.765886907096477e-06, |
| "loss": 0.1046, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.6951774340309372, |
| "grad_norm": 1.7677748922664356, |
| "learning_rate": 4.7655848632257084e-06, |
| "loss": 0.1257, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.6956323930846224, |
| "grad_norm": 1.5849838243983163, |
| "learning_rate": 4.7652826342215764e-06, |
| "loss": 0.113, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.6960873521383075, |
| "grad_norm": 1.8167247958495556, |
| "learning_rate": 4.764980220108777e-06, |
| "loss": 0.1308, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6965423111919927, |
| "grad_norm": 2.259597776447737, |
| "learning_rate": 4.764677620912022e-06, |
| "loss": 0.1488, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.6969972702456779, |
| "grad_norm": 1.3871244274886438, |
| "learning_rate": 4.764374836656041e-06, |
| "loss": 0.1014, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.697452229299363, |
| "grad_norm": 1.261518456907349, |
| "learning_rate": 4.764071867365571e-06, |
| "loss": 0.0998, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.6979071883530482, |
| "grad_norm": 1.7720377742538196, |
| "learning_rate": 4.763768713065375e-06, |
| "loss": 0.1003, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.6983621474067334, |
| "grad_norm": 1.9316342411609453, |
| "learning_rate": 4.763465373780223e-06, |
| "loss": 0.1218, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6988171064604186, |
| "grad_norm": 1.7090441393124594, |
| "learning_rate": 4.763161849534902e-06, |
| "loss": 0.1016, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.6992720655141037, |
| "grad_norm": 1.0772372058571478, |
| "learning_rate": 4.762858140354214e-06, |
| "loss": 0.0795, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.6997270245677889, |
| "grad_norm": 1.5989783419371975, |
| "learning_rate": 4.7625542462629785e-06, |
| "loss": 0.1051, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7001819836214741, |
| "grad_norm": 0.9329076652331691, |
| "learning_rate": 4.762250167286027e-06, |
| "loss": 0.0492, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7006369426751592, |
| "grad_norm": 1.7557978189042716, |
| "learning_rate": 4.761945903448209e-06, |
| "loss": 0.1336, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7010919017288444, |
| "grad_norm": 1.1252616618728841, |
| "learning_rate": 4.761641454774386e-06, |
| "loss": 0.085, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7015468607825296, |
| "grad_norm": 1.9520354546929128, |
| "learning_rate": 4.761336821289436e-06, |
| "loss": 0.158, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7020018198362148, |
| "grad_norm": 1.088110444545801, |
| "learning_rate": 4.761032003018254e-06, |
| "loss": 0.0667, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.7024567788898999, |
| "grad_norm": 1.353551986968956, |
| "learning_rate": 4.760726999985748e-06, |
| "loss": 0.0748, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7029117379435851, |
| "grad_norm": 1.2483430565784006, |
| "learning_rate": 4.7604218122168406e-06, |
| "loss": 0.0821, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7033666969972703, |
| "grad_norm": 2.014581699156683, |
| "learning_rate": 4.760116439736471e-06, |
| "loss": 0.1376, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7038216560509554, |
| "grad_norm": 2.2990546871467386, |
| "learning_rate": 4.759810882569591e-06, |
| "loss": 0.1528, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7042766151046406, |
| "grad_norm": 1.062682027844058, |
| "learning_rate": 4.759505140741172e-06, |
| "loss": 0.0646, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7047315741583258, |
| "grad_norm": 2.1924162550625863, |
| "learning_rate": 4.759199214276196e-06, |
| "loss": 0.1277, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.705186533212011, |
| "grad_norm": 1.4339312162219853, |
| "learning_rate": 4.758893103199665e-06, |
| "loss": 0.1056, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7056414922656961, |
| "grad_norm": 1.6814902406994063, |
| "learning_rate": 4.758586807536588e-06, |
| "loss": 0.0968, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.7060964513193813, |
| "grad_norm": 1.055808036587697, |
| "learning_rate": 4.758280327311998e-06, |
| "loss": 0.0624, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7065514103730665, |
| "grad_norm": 2.092612313664783, |
| "learning_rate": 4.757973662550938e-06, |
| "loss": 0.1076, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7070063694267515, |
| "grad_norm": 1.2099784449421243, |
| "learning_rate": 4.757666813278466e-06, |
| "loss": 0.0791, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7074613284804367, |
| "grad_norm": 1.7701219392423706, |
| "learning_rate": 4.757359779519659e-06, |
| "loss": 0.1158, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.707916287534122, |
| "grad_norm": 1.9442818433331057, |
| "learning_rate": 4.757052561299604e-06, |
| "loss": 0.1498, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7083712465878071, |
| "grad_norm": 2.1934930579734417, |
| "learning_rate": 4.756745158643407e-06, |
| "loss": 0.1446, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.7088262056414922, |
| "grad_norm": 1.852211386061071, |
| "learning_rate": 4.7564375715761865e-06, |
| "loss": 0.1163, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7092811646951774, |
| "grad_norm": 0.8096640629799587, |
| "learning_rate": 4.756129800123078e-06, |
| "loss": 0.0398, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7097361237488626, |
| "grad_norm": 1.414444864803518, |
| "learning_rate": 4.755821844309232e-06, |
| "loss": 0.1126, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7101910828025477, |
| "grad_norm": 1.598441885528022, |
| "learning_rate": 4.75551370415981e-06, |
| "loss": 0.1008, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7106460418562329, |
| "grad_norm": 1.7052656116179543, |
| "learning_rate": 4.755205379699996e-06, |
| "loss": 0.105, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7111010009099181, |
| "grad_norm": 1.570140158085679, |
| "learning_rate": 4.75489687095498e-06, |
| "loss": 0.103, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7115559599636033, |
| "grad_norm": 1.6128979312038125, |
| "learning_rate": 4.754588177949977e-06, |
| "loss": 0.0947, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7120109190172884, |
| "grad_norm": 1.5157416875909306, |
| "learning_rate": 4.7542793007102086e-06, |
| "loss": 0.0826, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7124658780709736, |
| "grad_norm": 1.7615482286425264, |
| "learning_rate": 4.7539702392609165e-06, |
| "loss": 0.1349, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7129208371246588, |
| "grad_norm": 1.1762971295347604, |
| "learning_rate": 4.753660993627356e-06, |
| "loss": 0.0649, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.7133757961783439, |
| "grad_norm": 2.155472421625263, |
| "learning_rate": 4.753351563834795e-06, |
| "loss": 0.1308, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7138307552320291, |
| "grad_norm": 1.7676905218706818, |
| "learning_rate": 4.753041949908521e-06, |
| "loss": 0.1034, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 1.8250626593850294, |
| "learning_rate": 4.752732151873834e-06, |
| "loss": 0.1, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7147406733393995, |
| "grad_norm": 1.4984330035047126, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 0.1107, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7151956323930846, |
| "grad_norm": 1.2161952645703746, |
| "learning_rate": 4.752112003580495e-06, |
| "loss": 0.0772, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7156505914467698, |
| "grad_norm": 1.8268634010084226, |
| "learning_rate": 4.751801653372518e-06, |
| "loss": 0.0853, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.716105550500455, |
| "grad_norm": 1.6855455239576989, |
| "learning_rate": 4.751491119157481e-06, |
| "loss": 0.1055, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7165605095541401, |
| "grad_norm": 1.1214993041730539, |
| "learning_rate": 4.751180400960756e-06, |
| "loss": 0.0653, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7170154686078253, |
| "grad_norm": 1.8475828844832658, |
| "learning_rate": 4.7508694988077355e-06, |
| "loss": 0.1416, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7174704276615105, |
| "grad_norm": 1.4469787222461497, |
| "learning_rate": 4.750558412723824e-06, |
| "loss": 0.0766, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7179253867151957, |
| "grad_norm": 1.6682547194818422, |
| "learning_rate": 4.750247142734442e-06, |
| "loss": 0.073, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7183803457688808, |
| "grad_norm": 1.8235039708297685, |
| "learning_rate": 4.749935688865026e-06, |
| "loss": 0.1299, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.718835304822566, |
| "grad_norm": 1.2674959382982702, |
| "learning_rate": 4.749624051141026e-06, |
| "loss": 0.0639, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7192902638762512, |
| "grad_norm": 1.1814301599394401, |
| "learning_rate": 4.7493122295879076e-06, |
| "loss": 0.074, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7197452229299363, |
| "grad_norm": 1.8607689058316668, |
| "learning_rate": 4.7490002242311525e-06, |
| "loss": 0.1202, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7202001819836215, |
| "grad_norm": 1.40248476110639, |
| "learning_rate": 4.748688035096255e-06, |
| "loss": 0.0831, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7206551410373067, |
| "grad_norm": 1.376835864910441, |
| "learning_rate": 4.748375662208726e-06, |
| "loss": 0.0627, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7211101000909919, |
| "grad_norm": 1.7445419287373105, |
| "learning_rate": 4.748063105594092e-06, |
| "loss": 0.1182, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.721565059144677, |
| "grad_norm": 1.6298546358892563, |
| "learning_rate": 4.747750365277892e-06, |
| "loss": 0.1203, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7220200181983621, |
| "grad_norm": 1.78857652271692, |
| "learning_rate": 4.747437441285684e-06, |
| "loss": 0.0845, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.7224749772520473, |
| "grad_norm": 1.5543624854659128, |
| "learning_rate": 4.747124333643038e-06, |
| "loss": 0.1067, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7229299363057324, |
| "grad_norm": 1.6938973264546118, |
| "learning_rate": 4.746811042375538e-06, |
| "loss": 0.1092, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7233848953594176, |
| "grad_norm": 1.4339359801015907, |
| "learning_rate": 4.746497567508787e-06, |
| "loss": 0.1009, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7238398544131028, |
| "grad_norm": 1.370915821139941, |
| "learning_rate": 4.7461839090684e-06, |
| "loss": 0.0967, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.724294813466788, |
| "grad_norm": 1.65404522408881, |
| "learning_rate": 4.745870067080007e-06, |
| "loss": 0.0936, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7247497725204731, |
| "grad_norm": 2.5744395171768026, |
| "learning_rate": 4.7455560415692545e-06, |
| "loss": 0.1734, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7252047315741583, |
| "grad_norm": 1.6130757907987123, |
| "learning_rate": 4.745241832561803e-06, |
| "loss": 0.0782, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7256596906278435, |
| "grad_norm": 1.3264278567683987, |
| "learning_rate": 4.744927440083329e-06, |
| "loss": 0.0883, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7261146496815286, |
| "grad_norm": 1.4845169251283168, |
| "learning_rate": 4.744612864159522e-06, |
| "loss": 0.0866, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7265696087352138, |
| "grad_norm": 1.867201501230081, |
| "learning_rate": 4.7442981048160895e-06, |
| "loss": 0.1239, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.727024567788899, |
| "grad_norm": 1.5395932028522379, |
| "learning_rate": 4.74398316207875e-06, |
| "loss": 0.0937, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7274795268425842, |
| "grad_norm": 2.47394198911153, |
| "learning_rate": 4.74366803597324e-06, |
| "loss": 0.1896, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7279344858962693, |
| "grad_norm": 1.6788148875306355, |
| "learning_rate": 4.743352726525311e-06, |
| "loss": 0.1001, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7283894449499545, |
| "grad_norm": 1.1785705121541328, |
| "learning_rate": 4.743037233760728e-06, |
| "loss": 0.0723, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7288444040036397, |
| "grad_norm": 1.5889628523330563, |
| "learning_rate": 4.742721557705271e-06, |
| "loss": 0.0978, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7292993630573248, |
| "grad_norm": 1.3734642738638374, |
| "learning_rate": 4.7424056983847374e-06, |
| "loss": 0.0961, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.72975432211101, |
| "grad_norm": 1.6433399521175855, |
| "learning_rate": 4.7420896558249366e-06, |
| "loss": 0.1037, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7302092811646952, |
| "grad_norm": 1.0189389361932368, |
| "learning_rate": 4.741773430051694e-06, |
| "loss": 0.0571, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7306642402183804, |
| "grad_norm": 1.8326786415176635, |
| "learning_rate": 4.74145702109085e-06, |
| "loss": 0.1069, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7311191992720655, |
| "grad_norm": 1.9145632983548877, |
| "learning_rate": 4.741140428968261e-06, |
| "loss": 0.1155, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7315741583257507, |
| "grad_norm": 0.8975672007604479, |
| "learning_rate": 4.740823653709797e-06, |
| "loss": 0.0594, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7320291173794359, |
| "grad_norm": 1.1104882324072687, |
| "learning_rate": 4.740506695341343e-06, |
| "loss": 0.0774, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.732484076433121, |
| "grad_norm": 1.8804023117943707, |
| "learning_rate": 4.740189553888801e-06, |
| "loss": 0.1265, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7329390354868062, |
| "grad_norm": 1.3783166591523974, |
| "learning_rate": 4.739872229378085e-06, |
| "loss": 0.0849, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7333939945404914, |
| "grad_norm": 1.5383875985636057, |
| "learning_rate": 4.739554721835125e-06, |
| "loss": 0.0764, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7338489535941766, |
| "grad_norm": 1.7836575489679842, |
| "learning_rate": 4.739237031285867e-06, |
| "loss": 0.1208, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7343039126478617, |
| "grad_norm": 2.0374287466508343, |
| "learning_rate": 4.738919157756272e-06, |
| "loss": 0.1283, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7347588717015469, |
| "grad_norm": 1.4713023421634537, |
| "learning_rate": 4.738601101272313e-06, |
| "loss": 0.1143, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7352138307552321, |
| "grad_norm": 1.3004252033026868, |
| "learning_rate": 4.738282861859983e-06, |
| "loss": 0.0785, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7356687898089171, |
| "grad_norm": 1.7078107635335555, |
| "learning_rate": 4.737964439545284e-06, |
| "loss": 0.0989, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7361237488626023, |
| "grad_norm": 1.482235192071265, |
| "learning_rate": 4.737645834354238e-06, |
| "loss": 0.0889, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7365787079162875, |
| "grad_norm": 1.3632184750760454, |
| "learning_rate": 4.737327046312879e-06, |
| "loss": 0.0728, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7370336669699727, |
| "grad_norm": 1.6185932631828381, |
| "learning_rate": 4.737008075447259e-06, |
| "loss": 0.0967, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7374886260236578, |
| "grad_norm": 1.7060869720795129, |
| "learning_rate": 4.73668892178344e-06, |
| "loss": 0.1054, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.737943585077343, |
| "grad_norm": 1.672488053873089, |
| "learning_rate": 4.736369585347503e-06, |
| "loss": 0.1172, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7383985441310282, |
| "grad_norm": 2.009207481858011, |
| "learning_rate": 4.736050066165544e-06, |
| "loss": 0.1104, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7388535031847133, |
| "grad_norm": 1.7386909135986017, |
| "learning_rate": 4.735730364263671e-06, |
| "loss": 0.1142, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7393084622383985, |
| "grad_norm": 1.6299431755796778, |
| "learning_rate": 4.735410479668009e-06, |
| "loss": 0.109, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7397634212920837, |
| "grad_norm": 1.5971057123643035, |
| "learning_rate": 4.735090412404697e-06, |
| "loss": 0.1037, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7402183803457689, |
| "grad_norm": 1.4066558803560258, |
| "learning_rate": 4.734770162499891e-06, |
| "loss": 0.0718, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.740673339399454, |
| "grad_norm": 1.3437849408188942, |
| "learning_rate": 4.734449729979759e-06, |
| "loss": 0.0775, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7411282984531392, |
| "grad_norm": 1.8126383722195984, |
| "learning_rate": 4.734129114870486e-06, |
| "loss": 0.1097, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7415832575068244, |
| "grad_norm": 1.7276681892706887, |
| "learning_rate": 4.733808317198271e-06, |
| "loss": 0.075, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7420382165605095, |
| "grad_norm": 1.4303092464154914, |
| "learning_rate": 4.733487336989327e-06, |
| "loss": 0.0839, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7424931756141947, |
| "grad_norm": 1.8755052783018096, |
| "learning_rate": 4.733166174269886e-06, |
| "loss": 0.1156, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7429481346678799, |
| "grad_norm": 1.4937298948438007, |
| "learning_rate": 4.732844829066189e-06, |
| "loss": 0.1005, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7434030937215651, |
| "grad_norm": 1.641256737556786, |
| "learning_rate": 4.732523301404497e-06, |
| "loss": 0.1038, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7438580527752502, |
| "grad_norm": 1.8968655868657809, |
| "learning_rate": 4.732201591311082e-06, |
| "loss": 0.1318, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7443130118289354, |
| "grad_norm": 1.5647661977098755, |
| "learning_rate": 4.731879698812233e-06, |
| "loss": 0.1295, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7447679708826206, |
| "grad_norm": 1.3130665672457837, |
| "learning_rate": 4.731557623934255e-06, |
| "loss": 0.0797, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7452229299363057, |
| "grad_norm": 2.2524036787204236, |
| "learning_rate": 4.7312353667034645e-06, |
| "loss": 0.1549, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7456778889899909, |
| "grad_norm": 2.171706574250327, |
| "learning_rate": 4.730912927146197e-06, |
| "loss": 0.1221, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7461328480436761, |
| "grad_norm": 1.3055559061415911, |
| "learning_rate": 4.7305903052888e-06, |
| "loss": 0.0797, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7465878070973613, |
| "grad_norm": 1.9092438244747783, |
| "learning_rate": 4.730267501157636e-06, |
| "loss": 0.1211, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7470427661510464, |
| "grad_norm": 1.3873103303782754, |
| "learning_rate": 4.729944514779084e-06, |
| "loss": 0.0863, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7474977252047316, |
| "grad_norm": 1.3769315994876887, |
| "learning_rate": 4.729621346179536e-06, |
| "loss": 0.095, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7479526842584168, |
| "grad_norm": 1.3309888167219324, |
| "learning_rate": 4.7292979953854e-06, |
| "loss": 0.091, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7484076433121019, |
| "grad_norm": 1.3388937850633889, |
| "learning_rate": 4.7289744624231004e-06, |
| "loss": 0.0715, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7488626023657871, |
| "grad_norm": 2.9889212809141026, |
| "learning_rate": 4.728650747319073e-06, |
| "loss": 0.1403, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7493175614194723, |
| "grad_norm": 1.7436207494414042, |
| "learning_rate": 4.728326850099771e-06, |
| "loss": 0.11, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7497725204731575, |
| "grad_norm": 1.2990437768947476, |
| "learning_rate": 4.728002770791663e-06, |
| "loss": 0.0982, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7502274795268425, |
| "grad_norm": 1.344045724677696, |
| "learning_rate": 4.727678509421229e-06, |
| "loss": 0.0922, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7506824385805277, |
| "grad_norm": 1.1045854705826224, |
| "learning_rate": 4.727354066014968e-06, |
| "loss": 0.0704, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7511373976342129, |
| "grad_norm": 1.5988720844668791, |
| "learning_rate": 4.727029440599391e-06, |
| "loss": 0.1066, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7515923566878981, |
| "grad_norm": 1.3512878420396681, |
| "learning_rate": 4.726704633201025e-06, |
| "loss": 0.074, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7520473157415832, |
| "grad_norm": 1.669678273086279, |
| "learning_rate": 4.726379643846412e-06, |
| "loss": 0.1167, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.7525022747952684, |
| "grad_norm": 1.8860050110009976, |
| "learning_rate": 4.726054472562109e-06, |
| "loss": 0.1203, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.7529572338489536, |
| "grad_norm": 1.1328199081442367, |
| "learning_rate": 4.725729119374687e-06, |
| "loss": 0.0715, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7534121929026387, |
| "grad_norm": 1.369550149899098, |
| "learning_rate": 4.725403584310734e-06, |
| "loss": 0.0788, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.7538671519563239, |
| "grad_norm": 1.7251897843263797, |
| "learning_rate": 4.725077867396849e-06, |
| "loss": 0.0951, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.7543221110100091, |
| "grad_norm": 1.4350282883675796, |
| "learning_rate": 4.724751968659648e-06, |
| "loss": 0.1096, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.7547770700636943, |
| "grad_norm": 1.9342343144020262, |
| "learning_rate": 4.724425888125764e-06, |
| "loss": 0.125, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.7552320291173794, |
| "grad_norm": 1.6341803441145442, |
| "learning_rate": 4.724099625821842e-06, |
| "loss": 0.0945, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7556869881710646, |
| "grad_norm": 1.545830512814091, |
| "learning_rate": 4.723773181774543e-06, |
| "loss": 0.0961, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.7561419472247498, |
| "grad_norm": 1.7914456776458303, |
| "learning_rate": 4.723446556010542e-06, |
| "loss": 0.1092, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.7565969062784349, |
| "grad_norm": 1.2264032188306588, |
| "learning_rate": 4.7231197485565275e-06, |
| "loss": 0.096, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.7570518653321201, |
| "grad_norm": 1.838239870158386, |
| "learning_rate": 4.722792759439209e-06, |
| "loss": 0.129, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.7575068243858053, |
| "grad_norm": 1.8429853108458891, |
| "learning_rate": 4.722465588685302e-06, |
| "loss": 0.147, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.7579617834394905, |
| "grad_norm": 1.2105825230064677, |
| "learning_rate": 4.722138236321545e-06, |
| "loss": 0.0666, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.7584167424931756, |
| "grad_norm": 1.5830454148486297, |
| "learning_rate": 4.721810702374687e-06, |
| "loss": 0.0912, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.7588717015468608, |
| "grad_norm": 1.298617622670505, |
| "learning_rate": 4.721482986871491e-06, |
| "loss": 0.0787, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.759326660600546, |
| "grad_norm": 2.2458643789106105, |
| "learning_rate": 4.721155089838738e-06, |
| "loss": 0.1031, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.7597816196542311, |
| "grad_norm": 1.414978172323641, |
| "learning_rate": 4.720827011303222e-06, |
| "loss": 0.0909, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7602365787079163, |
| "grad_norm": 1.8584646816819383, |
| "learning_rate": 4.720498751291751e-06, |
| "loss": 0.1186, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.7606915377616015, |
| "grad_norm": 1.924659600493317, |
| "learning_rate": 4.72017030983115e-06, |
| "loss": 0.142, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.7611464968152867, |
| "grad_norm": 1.7938185155065802, |
| "learning_rate": 4.7198416869482575e-06, |
| "loss": 0.1118, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.7616014558689718, |
| "grad_norm": 2.011921232392505, |
| "learning_rate": 4.719512882669926e-06, |
| "loss": 0.1518, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.762056414922657, |
| "grad_norm": 1.2982227487003852, |
| "learning_rate": 4.719183897023027e-06, |
| "loss": 0.0693, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7625113739763422, |
| "grad_norm": 1.9792808150985886, |
| "learning_rate": 4.718854730034441e-06, |
| "loss": 0.1061, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.7629663330300273, |
| "grad_norm": 1.4920604204792802, |
| "learning_rate": 4.718525381731066e-06, |
| "loss": 0.0911, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.7634212920837125, |
| "grad_norm": 1.8909028849207012, |
| "learning_rate": 4.718195852139816e-06, |
| "loss": 0.1248, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.7638762511373977, |
| "grad_norm": 2.241114950796753, |
| "learning_rate": 4.717866141287618e-06, |
| "loss": 0.1693, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.7643312101910829, |
| "grad_norm": 1.6580808599028765, |
| "learning_rate": 4.717536249201416e-06, |
| "loss": 0.0957, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7647861692447679, |
| "grad_norm": 1.2734902743152507, |
| "learning_rate": 4.7172061759081646e-06, |
| "loss": 0.0793, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.7652411282984531, |
| "grad_norm": 1.5750820786177648, |
| "learning_rate": 4.716875921434838e-06, |
| "loss": 0.103, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.7656960873521383, |
| "grad_norm": 1.7945590491479675, |
| "learning_rate": 4.716545485808421e-06, |
| "loss": 0.1025, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.7661510464058234, |
| "grad_norm": 1.6260623314970664, |
| "learning_rate": 4.716214869055918e-06, |
| "loss": 0.1065, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.7666060054595086, |
| "grad_norm": 1.6283865070296875, |
| "learning_rate": 4.715884071204344e-06, |
| "loss": 0.1118, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7670609645131938, |
| "grad_norm": 1.3932748974606075, |
| "learning_rate": 4.715553092280731e-06, |
| "loss": 0.1022, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.767515923566879, |
| "grad_norm": 1.847468209296495, |
| "learning_rate": 4.7152219323121246e-06, |
| "loss": 0.1118, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.7679708826205641, |
| "grad_norm": 2.2984795110772978, |
| "learning_rate": 4.714890591325586e-06, |
| "loss": 0.1499, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.7684258416742493, |
| "grad_norm": 1.3869963208937237, |
| "learning_rate": 4.714559069348189e-06, |
| "loss": 0.082, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.7688808007279345, |
| "grad_norm": 1.8609913938687341, |
| "learning_rate": 4.714227366407027e-06, |
| "loss": 0.1227, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7693357597816196, |
| "grad_norm": 1.3603756636552151, |
| "learning_rate": 4.7138954825292035e-06, |
| "loss": 0.0837, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.7697907188353048, |
| "grad_norm": 1.4363757354044508, |
| "learning_rate": 4.71356341774184e-06, |
| "loss": 0.1016, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.77024567788899, |
| "grad_norm": 1.7266309692154667, |
| "learning_rate": 4.713231172072069e-06, |
| "loss": 0.1035, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.7707006369426752, |
| "grad_norm": 1.7559578771562407, |
| "learning_rate": 4.712898745547043e-06, |
| "loss": 0.1108, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.7711555959963603, |
| "grad_norm": 1.7003164181986268, |
| "learning_rate": 4.712566138193923e-06, |
| "loss": 0.1331, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.7716105550500455, |
| "grad_norm": 1.3433623006567543, |
| "learning_rate": 4.712233350039892e-06, |
| "loss": 0.0875, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.7720655141037307, |
| "grad_norm": 1.4038710385167128, |
| "learning_rate": 4.711900381112141e-06, |
| "loss": 0.0781, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.7725204731574158, |
| "grad_norm": 1.169674714312164, |
| "learning_rate": 4.71156723143788e-06, |
| "loss": 0.0811, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.772975432211101, |
| "grad_norm": 1.4291280792428351, |
| "learning_rate": 4.711233901044332e-06, |
| "loss": 0.0969, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.7734303912647862, |
| "grad_norm": 1.4493582239493352, |
| "learning_rate": 4.710900389958735e-06, |
| "loss": 0.1001, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7738853503184714, |
| "grad_norm": 1.942694182484742, |
| "learning_rate": 4.710566698208343e-06, |
| "loss": 0.1557, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.7743403093721565, |
| "grad_norm": 1.4803492252427144, |
| "learning_rate": 4.710232825820424e-06, |
| "loss": 0.0986, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.7747952684258417, |
| "grad_norm": 0.9755297818504668, |
| "learning_rate": 4.709898772822258e-06, |
| "loss": 0.0561, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.7752502274795269, |
| "grad_norm": 1.1791809604326482, |
| "learning_rate": 4.709564539241145e-06, |
| "loss": 0.0757, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.775705186533212, |
| "grad_norm": 1.2009047371661077, |
| "learning_rate": 4.709230125104396e-06, |
| "loss": 0.0884, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.7761601455868972, |
| "grad_norm": 1.1974081374389889, |
| "learning_rate": 4.708895530439339e-06, |
| "loss": 0.065, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.7766151046405824, |
| "grad_norm": 1.2627750540057827, |
| "learning_rate": 4.708560755273313e-06, |
| "loss": 0.0583, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.7770700636942676, |
| "grad_norm": 2.358488367779091, |
| "learning_rate": 4.7082257996336765e-06, |
| "loss": 0.1537, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.7775250227479527, |
| "grad_norm": 1.294851907930423, |
| "learning_rate": 4.707890663547801e-06, |
| "loss": 0.0933, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.7779799818016379, |
| "grad_norm": 1.4515186083780571, |
| "learning_rate": 4.7075553470430695e-06, |
| "loss": 0.1129, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.778434940855323, |
| "grad_norm": 2.243497892180013, |
| "learning_rate": 4.707219850146885e-06, |
| "loss": 0.1131, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.7788898999090081, |
| "grad_norm": 1.4994790836976213, |
| "learning_rate": 4.706884172886662e-06, |
| "loss": 0.1208, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.7793448589626933, |
| "grad_norm": 1.3372579159675713, |
| "learning_rate": 4.706548315289831e-06, |
| "loss": 0.0832, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.7797998180163785, |
| "grad_norm": 1.4278251897253653, |
| "learning_rate": 4.706212277383836e-06, |
| "loss": 0.0825, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.7802547770700637, |
| "grad_norm": 1.6733465665912612, |
| "learning_rate": 4.705876059196136e-06, |
| "loss": 0.1136, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.7807097361237488, |
| "grad_norm": 1.623215614746565, |
| "learning_rate": 4.705539660754208e-06, |
| "loss": 0.1073, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.781164695177434, |
| "grad_norm": 1.3278600542007308, |
| "learning_rate": 4.705203082085538e-06, |
| "loss": 0.0784, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.7816196542311192, |
| "grad_norm": 1.9615616543104608, |
| "learning_rate": 4.70486632321763e-06, |
| "loss": 0.1246, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.7820746132848043, |
| "grad_norm": 1.6162561833650906, |
| "learning_rate": 4.7045293841780034e-06, |
| "loss": 0.1011, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.7825295723384895, |
| "grad_norm": 1.1315087094780059, |
| "learning_rate": 4.704192264994193e-06, |
| "loss": 0.0728, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7829845313921747, |
| "grad_norm": 1.1842216997924317, |
| "learning_rate": 4.703854965693743e-06, |
| "loss": 0.0623, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.7834394904458599, |
| "grad_norm": 1.7587767128013225, |
| "learning_rate": 4.703517486304218e-06, |
| "loss": 0.1177, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.783894449499545, |
| "grad_norm": 1.339870047242403, |
| "learning_rate": 4.703179826853195e-06, |
| "loss": 0.0885, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.7843494085532302, |
| "grad_norm": 1.614840853064282, |
| "learning_rate": 4.702841987368265e-06, |
| "loss": 0.0832, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.7848043676069154, |
| "grad_norm": 1.333156859451678, |
| "learning_rate": 4.702503967877038e-06, |
| "loss": 0.0642, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.7852593266606005, |
| "grad_norm": 2.263050031311358, |
| "learning_rate": 4.702165768407132e-06, |
| "loss": 0.1535, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 1.6639628191313198, |
| "learning_rate": 4.701827388986185e-06, |
| "loss": 0.1104, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.7861692447679709, |
| "grad_norm": 1.5141275873838496, |
| "learning_rate": 4.701488829641845e-06, |
| "loss": 0.0812, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.7866242038216561, |
| "grad_norm": 1.3656110267192454, |
| "learning_rate": 4.701150090401782e-06, |
| "loss": 0.0882, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.7870791628753412, |
| "grad_norm": 1.2883384323068632, |
| "learning_rate": 4.700811171293673e-06, |
| "loss": 0.0874, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7875341219290264, |
| "grad_norm": 1.3671667581027154, |
| "learning_rate": 4.700472072345214e-06, |
| "loss": 0.105, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.7879890809827116, |
| "grad_norm": 1.5338726176497217, |
| "learning_rate": 4.700132793584113e-06, |
| "loss": 0.0953, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.7884440400363967, |
| "grad_norm": 1.442558179107586, |
| "learning_rate": 4.699793335038098e-06, |
| "loss": 0.0888, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.7888989990900819, |
| "grad_norm": 1.5541388069508446, |
| "learning_rate": 4.699453696734905e-06, |
| "loss": 0.0976, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.7893539581437671, |
| "grad_norm": 1.9432135282290401, |
| "learning_rate": 4.699113878702288e-06, |
| "loss": 0.141, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.7898089171974523, |
| "grad_norm": 1.0899335114210718, |
| "learning_rate": 4.698773880968017e-06, |
| "loss": 0.0587, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.7902638762511374, |
| "grad_norm": 0.8364235505875384, |
| "learning_rate": 4.698433703559874e-06, |
| "loss": 0.0407, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.7907188353048226, |
| "grad_norm": 0.9990422189505855, |
| "learning_rate": 4.698093346505656e-06, |
| "loss": 0.0577, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.7911737943585078, |
| "grad_norm": 1.6422962944234947, |
| "learning_rate": 4.697752809833177e-06, |
| "loss": 0.1103, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.7916287534121929, |
| "grad_norm": 1.9416159087916842, |
| "learning_rate": 4.697412093570263e-06, |
| "loss": 0.1188, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.792083712465878, |
| "grad_norm": 1.2746246892088609, |
| "learning_rate": 4.697071197744756e-06, |
| "loss": 0.0764, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.7925386715195633, |
| "grad_norm": 1.377041852744143, |
| "learning_rate": 4.6967301223845115e-06, |
| "loss": 0.0716, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.7929936305732485, |
| "grad_norm": 1.3143519637208256, |
| "learning_rate": 4.696388867517403e-06, |
| "loss": 0.0962, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.7934485896269335, |
| "grad_norm": 1.3897826616270026, |
| "learning_rate": 4.696047433171316e-06, |
| "loss": 0.0653, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.7939035486806187, |
| "grad_norm": 1.570502952913152, |
| "learning_rate": 4.695705819374149e-06, |
| "loss": 0.0789, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.7943585077343039, |
| "grad_norm": 1.2543848649594032, |
| "learning_rate": 4.695364026153818e-06, |
| "loss": 0.0804, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.794813466787989, |
| "grad_norm": 1.6107183046475493, |
| "learning_rate": 4.695022053538253e-06, |
| "loss": 0.0953, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.7952684258416742, |
| "grad_norm": 1.9045899767745609, |
| "learning_rate": 4.694679901555398e-06, |
| "loss": 0.1102, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.7957233848953594, |
| "grad_norm": 5.2618436961914385, |
| "learning_rate": 4.694337570233213e-06, |
| "loss": 0.1912, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.7961783439490446, |
| "grad_norm": 1.474591164189007, |
| "learning_rate": 4.693995059599672e-06, |
| "loss": 0.0896, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7966333030027297, |
| "grad_norm": 1.8428893564579294, |
| "learning_rate": 4.693652369682762e-06, |
| "loss": 0.1262, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.7970882620564149, |
| "grad_norm": 1.364047783874895, |
| "learning_rate": 4.693309500510487e-06, |
| "loss": 0.0655, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.7975432211101001, |
| "grad_norm": 1.410719847190726, |
| "learning_rate": 4.692966452110864e-06, |
| "loss": 0.0692, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.7979981801637852, |
| "grad_norm": 1.529856723780921, |
| "learning_rate": 4.6926232245119265e-06, |
| "loss": 0.1243, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.7984531392174704, |
| "grad_norm": 2.1726237253640917, |
| "learning_rate": 4.69227981774172e-06, |
| "loss": 0.1196, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.7989080982711556, |
| "grad_norm": 1.754980663107314, |
| "learning_rate": 4.691936231828308e-06, |
| "loss": 0.0992, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.7993630573248408, |
| "grad_norm": 1.4886622831847642, |
| "learning_rate": 4.691592466799766e-06, |
| "loss": 0.1047, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.7998180163785259, |
| "grad_norm": 1.513862751936672, |
| "learning_rate": 4.691248522684184e-06, |
| "loss": 0.098, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8002729754322111, |
| "grad_norm": 1.2579558512837248, |
| "learning_rate": 4.690904399509668e-06, |
| "loss": 0.0682, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8007279344858963, |
| "grad_norm": 2.2566273953343217, |
| "learning_rate": 4.69056009730434e-06, |
| "loss": 0.1303, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8011828935395814, |
| "grad_norm": 2.00717633449877, |
| "learning_rate": 4.690215616096332e-06, |
| "loss": 0.1321, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8016378525932666, |
| "grad_norm": 1.7602932248302383, |
| "learning_rate": 4.689870955913796e-06, |
| "loss": 0.1131, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8020928116469518, |
| "grad_norm": 1.0775248513760711, |
| "learning_rate": 4.689526116784894e-06, |
| "loss": 0.0681, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.802547770700637, |
| "grad_norm": 1.7261882758690685, |
| "learning_rate": 4.689181098737805e-06, |
| "loss": 0.0936, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8030027297543221, |
| "grad_norm": 0.9831258196463247, |
| "learning_rate": 4.6888359018007235e-06, |
| "loss": 0.0582, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8034576888080073, |
| "grad_norm": 1.32544308940629, |
| "learning_rate": 4.6884905260018565e-06, |
| "loss": 0.0818, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8039126478616925, |
| "grad_norm": 1.5291799564685358, |
| "learning_rate": 4.688144971369427e-06, |
| "loss": 0.0942, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8043676069153776, |
| "grad_norm": 1.6829263485608386, |
| "learning_rate": 4.687799237931673e-06, |
| "loss": 0.0901, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8048225659690628, |
| "grad_norm": 1.7708955797300434, |
| "learning_rate": 4.687453325716844e-06, |
| "loss": 0.1385, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.805277525022748, |
| "grad_norm": 1.7812931298131665, |
| "learning_rate": 4.687107234753208e-06, |
| "loss": 0.0999, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8057324840764332, |
| "grad_norm": 1.7046411065133646, |
| "learning_rate": 4.686760965069046e-06, |
| "loss": 0.0926, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8061874431301183, |
| "grad_norm": 1.3399041945985406, |
| "learning_rate": 4.686414516692653e-06, |
| "loss": 0.106, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8066424021838035, |
| "grad_norm": 1.3995619121814422, |
| "learning_rate": 4.68606788965234e-06, |
| "loss": 0.0946, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8070973612374887, |
| "grad_norm": 2.0667835204996154, |
| "learning_rate": 4.68572108397643e-06, |
| "loss": 0.1627, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8075523202911737, |
| "grad_norm": 1.578406572399004, |
| "learning_rate": 4.6853740996932645e-06, |
| "loss": 0.1036, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8080072793448589, |
| "grad_norm": 1.4979933750877805, |
| "learning_rate": 4.685026936831196e-06, |
| "loss": 0.0957, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8084622383985441, |
| "grad_norm": 1.3907524265462887, |
| "learning_rate": 4.684679595418595e-06, |
| "loss": 0.0755, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8089171974522293, |
| "grad_norm": 1.60762031761907, |
| "learning_rate": 4.684332075483843e-06, |
| "loss": 0.078, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8093721565059144, |
| "grad_norm": 1.7415989004852326, |
| "learning_rate": 4.6839843770553374e-06, |
| "loss": 0.1354, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8098271155595996, |
| "grad_norm": 1.2070058349523003, |
| "learning_rate": 4.683636500161491e-06, |
| "loss": 0.0801, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8102820746132848, |
| "grad_norm": 2.223701811300553, |
| "learning_rate": 4.683288444830732e-06, |
| "loss": 0.155, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8107370336669699, |
| "grad_norm": 1.386561374212108, |
| "learning_rate": 4.6829402110915015e-06, |
| "loss": 0.0788, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8111919927206551, |
| "grad_norm": 1.5941557438159017, |
| "learning_rate": 4.682591798972253e-06, |
| "loss": 0.0965, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8116469517743403, |
| "grad_norm": 1.2984908291809154, |
| "learning_rate": 4.682243208501461e-06, |
| "loss": 0.0785, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8121019108280255, |
| "grad_norm": 1.3916802407448716, |
| "learning_rate": 4.681894439707609e-06, |
| "loss": 0.0707, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8125568698817106, |
| "grad_norm": 1.275188321376173, |
| "learning_rate": 4.681545492619195e-06, |
| "loss": 0.0845, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8130118289353958, |
| "grad_norm": 1.6216179048744568, |
| "learning_rate": 4.681196367264736e-06, |
| "loss": 0.1003, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.813466787989081, |
| "grad_norm": 1.6523815915742526, |
| "learning_rate": 4.680847063672761e-06, |
| "loss": 0.1067, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8139217470427661, |
| "grad_norm": 1.2993987478913556, |
| "learning_rate": 4.680497581871811e-06, |
| "loss": 0.0935, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8143767060964513, |
| "grad_norm": 1.9221257157278642, |
| "learning_rate": 4.680147921890447e-06, |
| "loss": 0.1051, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8148316651501365, |
| "grad_norm": 1.6348625375140673, |
| "learning_rate": 4.67979808375724e-06, |
| "loss": 0.0903, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8152866242038217, |
| "grad_norm": 1.591568957822019, |
| "learning_rate": 4.679448067500777e-06, |
| "loss": 0.0909, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8157415832575068, |
| "grad_norm": 10.097792186896083, |
| "learning_rate": 4.67909787314966e-06, |
| "loss": 0.2083, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.816196542311192, |
| "grad_norm": 1.7756594176954685, |
| "learning_rate": 4.678747500732505e-06, |
| "loss": 0.12, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8166515013648772, |
| "grad_norm": 1.292124863258569, |
| "learning_rate": 4.6783969502779455e-06, |
| "loss": 0.091, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8171064604185623, |
| "grad_norm": 1.6688778356922547, |
| "learning_rate": 4.6780462218146236e-06, |
| "loss": 0.1165, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.8175614194722475, |
| "grad_norm": 1.7180044442845732, |
| "learning_rate": 4.6776953153712005e-06, |
| "loss": 0.136, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8180163785259327, |
| "grad_norm": 1.480949834935634, |
| "learning_rate": 4.67734423097635e-06, |
| "loss": 0.1014, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8184713375796179, |
| "grad_norm": 1.1428278249187394, |
| "learning_rate": 4.676992968658762e-06, |
| "loss": 0.095, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.818926296633303, |
| "grad_norm": 1.2399013375886503, |
| "learning_rate": 4.67664152844714e-06, |
| "loss": 0.0844, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8193812556869882, |
| "grad_norm": 1.7138722388873069, |
| "learning_rate": 4.676289910370202e-06, |
| "loss": 0.0843, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8198362147406734, |
| "grad_norm": 2.1122540273521917, |
| "learning_rate": 4.675938114456682e-06, |
| "loss": 0.1242, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8202911737943585, |
| "grad_norm": 1.2962455851687862, |
| "learning_rate": 4.675586140735323e-06, |
| "loss": 0.0865, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8207461328480437, |
| "grad_norm": 1.758157481346009, |
| "learning_rate": 4.675233989234891e-06, |
| "loss": 0.11, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8212010919017289, |
| "grad_norm": 1.1367235297205203, |
| "learning_rate": 4.67488165998416e-06, |
| "loss": 0.0712, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.821656050955414, |
| "grad_norm": 1.7362201964597195, |
| "learning_rate": 4.674529153011922e-06, |
| "loss": 0.125, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8221110100090991, |
| "grad_norm": 1.8194542425018903, |
| "learning_rate": 4.674176468346982e-06, |
| "loss": 0.1211, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8225659690627843, |
| "grad_norm": 1.266991443456481, |
| "learning_rate": 4.673823606018158e-06, |
| "loss": 0.0728, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8230209281164695, |
| "grad_norm": 1.5932681379034783, |
| "learning_rate": 4.673470566054288e-06, |
| "loss": 0.1112, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8234758871701547, |
| "grad_norm": 1.6394037488609823, |
| "learning_rate": 4.673117348484217e-06, |
| "loss": 0.0904, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8239308462238398, |
| "grad_norm": 1.6075069533857789, |
| "learning_rate": 4.672763953336811e-06, |
| "loss": 0.0851, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.824385805277525, |
| "grad_norm": 2.0887533751903953, |
| "learning_rate": 4.672410380640946e-06, |
| "loss": 0.1259, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8248407643312102, |
| "grad_norm": 1.4047419878725926, |
| "learning_rate": 4.672056630425516e-06, |
| "loss": 0.0926, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8252957233848953, |
| "grad_norm": 1.3090222394661126, |
| "learning_rate": 4.671702702719426e-06, |
| "loss": 0.0815, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8257506824385805, |
| "grad_norm": 1.4844416960380937, |
| "learning_rate": 4.671348597551599e-06, |
| "loss": 0.1006, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8262056414922657, |
| "grad_norm": 1.5208683282093747, |
| "learning_rate": 4.670994314950971e-06, |
| "loss": 0.0889, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8266606005459509, |
| "grad_norm": 1.4736427666739214, |
| "learning_rate": 4.6706398549464905e-06, |
| "loss": 0.0733, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.827115559599636, |
| "grad_norm": 0.9170331048659165, |
| "learning_rate": 4.670285217567124e-06, |
| "loss": 0.0444, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8275705186533212, |
| "grad_norm": 1.53493498663163, |
| "learning_rate": 4.6699304028418516e-06, |
| "loss": 0.1041, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8280254777070064, |
| "grad_norm": 1.8019365470508273, |
| "learning_rate": 4.669575410799665e-06, |
| "loss": 0.0978, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8284804367606915, |
| "grad_norm": 1.2273449371459113, |
| "learning_rate": 4.669220241469573e-06, |
| "loss": 0.0943, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8289353958143767, |
| "grad_norm": 1.3479797120899357, |
| "learning_rate": 4.668864894880599e-06, |
| "loss": 0.0971, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8293903548680619, |
| "grad_norm": 4.546793784746178, |
| "learning_rate": 4.668509371061781e-06, |
| "loss": 0.2014, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8298453139217471, |
| "grad_norm": 2.1255643817651406, |
| "learning_rate": 4.668153670042171e-06, |
| "loss": 0.1161, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8303002729754322, |
| "grad_norm": 1.2727939490852902, |
| "learning_rate": 4.667797791850833e-06, |
| "loss": 0.0747, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8307552320291174, |
| "grad_norm": 1.4060666687371306, |
| "learning_rate": 4.6674417365168495e-06, |
| "loss": 0.0844, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.8312101910828026, |
| "grad_norm": 1.761485619412532, |
| "learning_rate": 4.667085504069315e-06, |
| "loss": 0.1116, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8316651501364877, |
| "grad_norm": 1.9328273968129432, |
| "learning_rate": 4.66672909453734e-06, |
| "loss": 0.1475, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8321201091901729, |
| "grad_norm": 8.632824223723155, |
| "learning_rate": 4.6663725079500485e-06, |
| "loss": 0.1256, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8325750682438581, |
| "grad_norm": 1.6100163480643372, |
| "learning_rate": 4.666015744336578e-06, |
| "loss": 0.0792, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8330300272975433, |
| "grad_norm": 1.0641733424560582, |
| "learning_rate": 4.665658803726083e-06, |
| "loss": 0.0793, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.8334849863512284, |
| "grad_norm": 1.177632875705685, |
| "learning_rate": 4.6653016861477315e-06, |
| "loss": 0.0757, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8339399454049136, |
| "grad_norm": 1.3697702067415636, |
| "learning_rate": 4.664944391630704e-06, |
| "loss": 0.0762, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8343949044585988, |
| "grad_norm": 1.529924651084583, |
| "learning_rate": 4.664586920204197e-06, |
| "loss": 0.0942, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8348498635122839, |
| "grad_norm": 1.5597346422795266, |
| "learning_rate": 4.664229271897422e-06, |
| "loss": 0.0807, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.835304822565969, |
| "grad_norm": 1.1979216966579072, |
| "learning_rate": 4.663871446739606e-06, |
| "loss": 0.1023, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8357597816196543, |
| "grad_norm": 1.9885793595161931, |
| "learning_rate": 4.663513444759986e-06, |
| "loss": 0.1081, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8362147406733395, |
| "grad_norm": 1.803945786045615, |
| "learning_rate": 4.663155265987818e-06, |
| "loss": 0.1046, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8366696997270245, |
| "grad_norm": 1.4426781581496406, |
| "learning_rate": 4.66279691045237e-06, |
| "loss": 0.0862, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8371246587807097, |
| "grad_norm": 2.026712182903179, |
| "learning_rate": 4.662438378182927e-06, |
| "loss": 0.1318, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8375796178343949, |
| "grad_norm": 1.3508804333427062, |
| "learning_rate": 4.662079669208783e-06, |
| "loss": 0.0855, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.83803457688808, |
| "grad_norm": 1.8060788779967734, |
| "learning_rate": 4.661720783559254e-06, |
| "loss": 0.118, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8384895359417652, |
| "grad_norm": 1.3650594998874732, |
| "learning_rate": 4.661361721263664e-06, |
| "loss": 0.0792, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8389444949954504, |
| "grad_norm": 1.5308874946054334, |
| "learning_rate": 4.661002482351355e-06, |
| "loss": 0.092, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8393994540491356, |
| "grad_norm": 1.5787726497224914, |
| "learning_rate": 4.660643066851682e-06, |
| "loss": 0.0864, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8398544131028207, |
| "grad_norm": 1.3050034033987155, |
| "learning_rate": 4.6602834747940155e-06, |
| "loss": 0.0882, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8403093721565059, |
| "grad_norm": 1.3933693896920527, |
| "learning_rate": 4.6599237062077385e-06, |
| "loss": 0.082, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8407643312101911, |
| "grad_norm": 1.4441581935162036, |
| "learning_rate": 4.65956376112225e-06, |
| "loss": 0.0845, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8412192902638762, |
| "grad_norm": 1.0740044757490639, |
| "learning_rate": 4.659203639566965e-06, |
| "loss": 0.0579, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8416742493175614, |
| "grad_norm": 1.4897808416015064, |
| "learning_rate": 4.658843341571308e-06, |
| "loss": 0.0928, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8421292083712466, |
| "grad_norm": 1.2736939992740985, |
| "learning_rate": 4.6584828671647235e-06, |
| "loss": 0.0678, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8425841674249318, |
| "grad_norm": 1.7454965393572843, |
| "learning_rate": 4.658122216376666e-06, |
| "loss": 0.1273, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8430391264786169, |
| "grad_norm": 2.203665099645685, |
| "learning_rate": 4.657761389236607e-06, |
| "loss": 0.1483, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8434940855323021, |
| "grad_norm": 2.2587200410334796, |
| "learning_rate": 4.657400385774032e-06, |
| "loss": 0.1392, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8439490445859873, |
| "grad_norm": 1.4611165706322784, |
| "learning_rate": 4.65703920601844e-06, |
| "loss": 0.108, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8444040036396724, |
| "grad_norm": 1.4687908651365826, |
| "learning_rate": 4.656677849999345e-06, |
| "loss": 0.0861, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8448589626933576, |
| "grad_norm": 1.2067561638456004, |
| "learning_rate": 4.656316317746275e-06, |
| "loss": 0.0591, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8453139217470428, |
| "grad_norm": 1.4053103637325914, |
| "learning_rate": 4.655954609288775e-06, |
| "loss": 0.0913, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.845768880800728, |
| "grad_norm": 1.5935104104332813, |
| "learning_rate": 4.655592724656399e-06, |
| "loss": 0.101, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8462238398544131, |
| "grad_norm": 1.1076063342242028, |
| "learning_rate": 4.655230663878721e-06, |
| "loss": 0.0682, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8466787989080983, |
| "grad_norm": 1.3547571746668823, |
| "learning_rate": 4.654868426985326e-06, |
| "loss": 0.0783, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.8471337579617835, |
| "grad_norm": 2.189936562068025, |
| "learning_rate": 4.654506014005814e-06, |
| "loss": 0.1489, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.8475887170154686, |
| "grad_norm": 1.8695626003234893, |
| "learning_rate": 4.6541434249698e-06, |
| "loss": 0.1257, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.8480436760691538, |
| "grad_norm": 1.0837368326622652, |
| "learning_rate": 4.6537806599069144e-06, |
| "loss": 0.0739, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.848498635122839, |
| "grad_norm": 1.7007189798619473, |
| "learning_rate": 4.653417718846799e-06, |
| "loss": 0.1008, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8489535941765242, |
| "grad_norm": 2.0060980879747476, |
| "learning_rate": 4.6530546018191126e-06, |
| "loss": 0.1085, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.8494085532302093, |
| "grad_norm": 2.2978969469139594, |
| "learning_rate": 4.652691308853526e-06, |
| "loss": 0.0987, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.8498635122838945, |
| "grad_norm": 1.5780683821835888, |
| "learning_rate": 4.652327839979729e-06, |
| "loss": 0.0968, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.8503184713375797, |
| "grad_norm": 1.4785104975671204, |
| "learning_rate": 4.651964195227419e-06, |
| "loss": 0.0789, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.8507734303912647, |
| "grad_norm": 1.3276789113426002, |
| "learning_rate": 4.651600374626315e-06, |
| "loss": 0.1042, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8512283894449499, |
| "grad_norm": 1.2972649195872694, |
| "learning_rate": 4.651236378206144e-06, |
| "loss": 0.0856, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.8516833484986351, |
| "grad_norm": 1.7278397992212022, |
| "learning_rate": 4.650872205996651e-06, |
| "loss": 0.1148, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.8521383075523203, |
| "grad_norm": 1.8216586488949666, |
| "learning_rate": 4.650507858027595e-06, |
| "loss": 0.1079, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.8525932666060054, |
| "grad_norm": 1.4139889414338878, |
| "learning_rate": 4.6501433343287475e-06, |
| "loss": 0.0875, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.8530482256596906, |
| "grad_norm": 1.7248584454041247, |
| "learning_rate": 4.6497786349298975e-06, |
| "loss": 0.0971, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8535031847133758, |
| "grad_norm": 1.4315172966956178, |
| "learning_rate": 4.649413759860846e-06, |
| "loss": 0.0786, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.8539581437670609, |
| "grad_norm": 1.753091182567414, |
| "learning_rate": 4.649048709151408e-06, |
| "loss": 0.1209, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.8544131028207461, |
| "grad_norm": 1.1669060948418768, |
| "learning_rate": 4.648683482831415e-06, |
| "loss": 0.0806, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.8548680618744313, |
| "grad_norm": 1.9265886537142733, |
| "learning_rate": 4.648318080930711e-06, |
| "loss": 0.1334, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.8553230209281165, |
| "grad_norm": 4.314311448137681, |
| "learning_rate": 4.647952503479154e-06, |
| "loss": 0.178, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8557779799818016, |
| "grad_norm": 1.7124103845535494, |
| "learning_rate": 4.6475867505066195e-06, |
| "loss": 0.1141, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.8562329390354868, |
| "grad_norm": 1.7108302555198733, |
| "learning_rate": 4.647220822042995e-06, |
| "loss": 0.123, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.856687898089172, |
| "grad_norm": 1.5372453395034074, |
| "learning_rate": 4.64685471811818e-06, |
| "loss": 0.0801, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 1.5126705629909598, |
| "learning_rate": 4.646488438762094e-06, |
| "loss": 0.1193, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.8575978161965423, |
| "grad_norm": 1.9269079385312733, |
| "learning_rate": 4.646121984004666e-06, |
| "loss": 0.1244, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.8580527752502275, |
| "grad_norm": 2.2684708844494144, |
| "learning_rate": 4.64575535387584e-06, |
| "loss": 0.1279, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.8585077343039127, |
| "grad_norm": 1.670087782048151, |
| "learning_rate": 4.645388548405578e-06, |
| "loss": 0.1023, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.8589626933575978, |
| "grad_norm": 1.8607124006440674, |
| "learning_rate": 4.645021567623852e-06, |
| "loss": 0.1094, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.859417652411283, |
| "grad_norm": 1.5175623385645085, |
| "learning_rate": 4.644654411560651e-06, |
| "loss": 0.0996, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.8598726114649682, |
| "grad_norm": 1.6936906161685268, |
| "learning_rate": 4.644287080245975e-06, |
| "loss": 0.0967, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8603275705186533, |
| "grad_norm": 1.702801567909897, |
| "learning_rate": 4.643919573709843e-06, |
| "loss": 0.1202, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.8607825295723385, |
| "grad_norm": 1.8484706111091351, |
| "learning_rate": 4.6435518919822854e-06, |
| "loss": 0.1189, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.8612374886260237, |
| "grad_norm": 1.3074850022658593, |
| "learning_rate": 4.643184035093348e-06, |
| "loss": 0.0765, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.8616924476797089, |
| "grad_norm": 1.5927282725036787, |
| "learning_rate": 4.642816003073089e-06, |
| "loss": 0.0905, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.862147406733394, |
| "grad_norm": 1.2778897141301788, |
| "learning_rate": 4.6424477959515836e-06, |
| "loss": 0.0949, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8626023657870792, |
| "grad_norm": 1.4290175115369155, |
| "learning_rate": 4.642079413758919e-06, |
| "loss": 0.0913, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.8630573248407644, |
| "grad_norm": 1.4520897103313697, |
| "learning_rate": 4.641710856525199e-06, |
| "loss": 0.0896, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.8635122838944495, |
| "grad_norm": 1.6787474017663324, |
| "learning_rate": 4.641342124280539e-06, |
| "loss": 0.1299, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.8639672429481347, |
| "grad_norm": 1.1624658106387618, |
| "learning_rate": 4.6409732170550705e-06, |
| "loss": 0.0734, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.8644222020018199, |
| "grad_norm": 1.4729283765471304, |
| "learning_rate": 4.64060413487894e-06, |
| "loss": 0.1152, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.864877161055505, |
| "grad_norm": 1.2495961485560472, |
| "learning_rate": 4.640234877782306e-06, |
| "loss": 0.07, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.8653321201091901, |
| "grad_norm": 1.3795532525061756, |
| "learning_rate": 4.639865445795344e-06, |
| "loss": 0.0664, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.8657870791628753, |
| "grad_norm": 1.1223729388488364, |
| "learning_rate": 4.63949583894824e-06, |
| "loss": 0.0746, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.8662420382165605, |
| "grad_norm": 1.8028240933781334, |
| "learning_rate": 4.639126057271199e-06, |
| "loss": 0.1168, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.8666969972702456, |
| "grad_norm": 2.2543814411508585, |
| "learning_rate": 4.6387561007944355e-06, |
| "loss": 0.1905, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.8671519563239308, |
| "grad_norm": 1.6271318494510778, |
| "learning_rate": 4.638385969548183e-06, |
| "loss": 0.1309, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.867606915377616, |
| "grad_norm": 1.2925438472778907, |
| "learning_rate": 4.638015663562686e-06, |
| "loss": 0.0935, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.8680618744313012, |
| "grad_norm": 1.5349623239831232, |
| "learning_rate": 4.637645182868204e-06, |
| "loss": 0.0955, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.8685168334849863, |
| "grad_norm": 1.1090852503032294, |
| "learning_rate": 4.637274527495011e-06, |
| "loss": 0.0627, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.8689717925386715, |
| "grad_norm": 1.3576365082507504, |
| "learning_rate": 4.6369036974733955e-06, |
| "loss": 0.0892, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8694267515923567, |
| "grad_norm": 1.3013579320639557, |
| "learning_rate": 4.63653269283366e-06, |
| "loss": 0.0872, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.8698817106460418, |
| "grad_norm": 1.3423951865701553, |
| "learning_rate": 4.636161513606122e-06, |
| "loss": 0.1124, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.870336669699727, |
| "grad_norm": 1.5125877550581834, |
| "learning_rate": 4.6357901598211105e-06, |
| "loss": 0.1136, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.8707916287534122, |
| "grad_norm": 1.3908271970593282, |
| "learning_rate": 4.635418631508974e-06, |
| "loss": 0.0879, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.8712465878070974, |
| "grad_norm": 1.269444632546315, |
| "learning_rate": 4.635046928700069e-06, |
| "loss": 0.0963, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.8717015468607825, |
| "grad_norm": 1.5552630147374251, |
| "learning_rate": 4.634675051424771e-06, |
| "loss": 0.0938, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.8721565059144677, |
| "grad_norm": 1.2657711758454817, |
| "learning_rate": 4.634302999713468e-06, |
| "loss": 0.0772, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.8726114649681529, |
| "grad_norm": 2.5181910772046394, |
| "learning_rate": 4.633930773596563e-06, |
| "loss": 0.1552, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.873066424021838, |
| "grad_norm": 1.850598086876328, |
| "learning_rate": 4.633558373104472e-06, |
| "loss": 0.1332, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.8735213830755232, |
| "grad_norm": 1.3941232502211163, |
| "learning_rate": 4.633185798267625e-06, |
| "loss": 0.1069, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8739763421292084, |
| "grad_norm": 1.7644105148701474, |
| "learning_rate": 4.632813049116467e-06, |
| "loss": 0.112, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.8744313011828936, |
| "grad_norm": 1.6230790274291094, |
| "learning_rate": 4.63244012568146e-06, |
| "loss": 0.0926, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.8748862602365787, |
| "grad_norm": 1.8353813347257433, |
| "learning_rate": 4.632067027993076e-06, |
| "loss": 0.1008, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.8753412192902639, |
| "grad_norm": 1.6963512631134225, |
| "learning_rate": 4.631693756081802e-06, |
| "loss": 0.1085, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.8757961783439491, |
| "grad_norm": 1.3821835844572639, |
| "learning_rate": 4.631320309978141e-06, |
| "loss": 0.1048, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.8762511373976342, |
| "grad_norm": 1.8006435232211586, |
| "learning_rate": 4.630946689712609e-06, |
| "loss": 0.1161, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.8767060964513194, |
| "grad_norm": 1.4677928424313746, |
| "learning_rate": 4.630572895315737e-06, |
| "loss": 0.0869, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.8771610555050046, |
| "grad_norm": 1.0039983293317123, |
| "learning_rate": 4.63019892681807e-06, |
| "loss": 0.0677, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.8776160145586898, |
| "grad_norm": 1.0380886358048869, |
| "learning_rate": 4.629824784250166e-06, |
| "loss": 0.0718, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.8780709736123748, |
| "grad_norm": 1.2258052916757236, |
| "learning_rate": 4.629450467642599e-06, |
| "loss": 0.0766, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.87852593266606, |
| "grad_norm": 1.2915914597688039, |
| "learning_rate": 4.629075977025957e-06, |
| "loss": 0.0936, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.8789808917197452, |
| "grad_norm": 1.5105085754665042, |
| "learning_rate": 4.62870131243084e-06, |
| "loss": 0.1132, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.8794358507734303, |
| "grad_norm": 1.1778695514626771, |
| "learning_rate": 4.628326473887865e-06, |
| "loss": 0.0628, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.8798908098271155, |
| "grad_norm": 1.3631999608539114, |
| "learning_rate": 4.627951461427663e-06, |
| "loss": 0.0732, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.8803457688808007, |
| "grad_norm": 1.2567766444164166, |
| "learning_rate": 4.627576275080876e-06, |
| "loss": 0.0753, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.8808007279344859, |
| "grad_norm": 1.3480297690817413, |
| "learning_rate": 4.627200914878165e-06, |
| "loss": 0.0896, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.881255686988171, |
| "grad_norm": 1.7364720181858189, |
| "learning_rate": 4.6268253808502005e-06, |
| "loss": 0.1262, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.8817106460418562, |
| "grad_norm": 1.0626111071231377, |
| "learning_rate": 4.626449673027671e-06, |
| "loss": 0.0606, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.8821656050955414, |
| "grad_norm": 1.3200320755167188, |
| "learning_rate": 4.626073791441278e-06, |
| "loss": 0.0863, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.8826205641492265, |
| "grad_norm": 1.6285352743033554, |
| "learning_rate": 4.625697736121735e-06, |
| "loss": 0.1074, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8830755232029117, |
| "grad_norm": 1.9619338739386996, |
| "learning_rate": 4.6253215070997735e-06, |
| "loss": 0.144, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.8835304822565969, |
| "grad_norm": 1.3282837474813922, |
| "learning_rate": 4.624945104406135e-06, |
| "loss": 0.0848, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.8839854413102821, |
| "grad_norm": 1.445768351624459, |
| "learning_rate": 4.624568528071579e-06, |
| "loss": 0.0781, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.8844404003639672, |
| "grad_norm": 1.4862896876471505, |
| "learning_rate": 4.624191778126879e-06, |
| "loss": 0.1004, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.8848953594176524, |
| "grad_norm": 1.764835736083021, |
| "learning_rate": 4.623814854602818e-06, |
| "loss": 0.126, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.8853503184713376, |
| "grad_norm": 1.5473555941944201, |
| "learning_rate": 4.623437757530198e-06, |
| "loss": 0.102, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.8858052775250227, |
| "grad_norm": 1.208960470604565, |
| "learning_rate": 4.623060486939835e-06, |
| "loss": 0.0877, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.8862602365787079, |
| "grad_norm": 2.075304609707155, |
| "learning_rate": 4.622683042862556e-06, |
| "loss": 0.1296, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.8867151956323931, |
| "grad_norm": 1.3616698341531555, |
| "learning_rate": 4.622305425329205e-06, |
| "loss": 0.0809, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.8871701546860783, |
| "grad_norm": 1.037693244784599, |
| "learning_rate": 4.621927634370638e-06, |
| "loss": 0.069, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8876251137397634, |
| "grad_norm": 1.385175285733036, |
| "learning_rate": 4.621549670017727e-06, |
| "loss": 0.0852, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.8880800727934486, |
| "grad_norm": 1.511337977341827, |
| "learning_rate": 4.6211715323013595e-06, |
| "loss": 0.0951, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.8885350318471338, |
| "grad_norm": 1.3257350384392963, |
| "learning_rate": 4.6207932212524325e-06, |
| "loss": 0.1089, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.8889899909008189, |
| "grad_norm": 1.644272679286999, |
| "learning_rate": 4.620414736901861e-06, |
| "loss": 0.1032, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.8894449499545041, |
| "grad_norm": 1.467980610687172, |
| "learning_rate": 4.620036079280573e-06, |
| "loss": 0.1087, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.8898999090081893, |
| "grad_norm": 1.7890199290097137, |
| "learning_rate": 4.619657248419511e-06, |
| "loss": 0.1292, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.8903548680618745, |
| "grad_norm": 1.2684892744943759, |
| "learning_rate": 4.61927824434963e-06, |
| "loss": 0.084, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.8908098271155596, |
| "grad_norm": 1.5369979197668246, |
| "learning_rate": 4.6188990671019015e-06, |
| "loss": 0.1069, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.8912647861692448, |
| "grad_norm": 1.4903088456543865, |
| "learning_rate": 4.618519716707311e-06, |
| "loss": 0.1046, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.89171974522293, |
| "grad_norm": 1.4234136171740401, |
| "learning_rate": 4.618140193196856e-06, |
| "loss": 0.0954, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.892174704276615, |
| "grad_norm": 1.4256868330149868, |
| "learning_rate": 4.61776049660155e-06, |
| "loss": 0.1061, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.8926296633303002, |
| "grad_norm": 1.0896312490692992, |
| "learning_rate": 4.61738062695242e-06, |
| "loss": 0.0685, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.8930846223839854, |
| "grad_norm": 1.6230164914703122, |
| "learning_rate": 4.617000584280506e-06, |
| "loss": 0.078, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.8935395814376706, |
| "grad_norm": 1.532700386976957, |
| "learning_rate": 4.616620368616866e-06, |
| "loss": 0.1004, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.8939945404913557, |
| "grad_norm": 2.1875161300164088, |
| "learning_rate": 4.616239979992568e-06, |
| "loss": 0.1662, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.8944494995450409, |
| "grad_norm": 1.5587825874123524, |
| "learning_rate": 4.615859418438695e-06, |
| "loss": 0.0949, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.8949044585987261, |
| "grad_norm": 1.0305034082384465, |
| "learning_rate": 4.615478683986345e-06, |
| "loss": 0.0661, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.8953594176524113, |
| "grad_norm": 2.128047847661886, |
| "learning_rate": 4.6150977766666315e-06, |
| "loss": 0.1668, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.8958143767060964, |
| "grad_norm": 3.489557977843618, |
| "learning_rate": 4.614716696510679e-06, |
| "loss": 0.1712, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.8962693357597816, |
| "grad_norm": 1.7769003003393216, |
| "learning_rate": 4.614335443549628e-06, |
| "loss": 0.1045, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8967242948134668, |
| "grad_norm": 1.1115237845484403, |
| "learning_rate": 4.613954017814633e-06, |
| "loss": 0.0622, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.8971792538671519, |
| "grad_norm": 1.5013208339193664, |
| "learning_rate": 4.613572419336862e-06, |
| "loss": 0.0901, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.8976342129208371, |
| "grad_norm": 1.8914963811365617, |
| "learning_rate": 4.613190648147497e-06, |
| "loss": 0.122, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.8980891719745223, |
| "grad_norm": 1.5463380525844979, |
| "learning_rate": 4.612808704277736e-06, |
| "loss": 0.1085, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.8985441310282075, |
| "grad_norm": 2.025461784041365, |
| "learning_rate": 4.612426587758789e-06, |
| "loss": 0.1584, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.8989990900818926, |
| "grad_norm": 1.3179625167865827, |
| "learning_rate": 4.612044298621881e-06, |
| "loss": 0.0899, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.8994540491355778, |
| "grad_norm": 1.9415947301083212, |
| "learning_rate": 4.611661836898252e-06, |
| "loss": 0.1089, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.899909008189263, |
| "grad_norm": 1.7954614950390768, |
| "learning_rate": 4.611279202619151e-06, |
| "loss": 0.1164, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9003639672429481, |
| "grad_norm": 1.6871453686213593, |
| "learning_rate": 4.61089639581585e-06, |
| "loss": 0.1137, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9008189262966333, |
| "grad_norm": 1.7734698119689802, |
| "learning_rate": 4.610513416519628e-06, |
| "loss": 0.1057, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9012738853503185, |
| "grad_norm": 1.4584208401217962, |
| "learning_rate": 4.6101302647617806e-06, |
| "loss": 0.1015, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9017288444040037, |
| "grad_norm": 1.6070445694078421, |
| "learning_rate": 4.609746940573617e-06, |
| "loss": 0.0938, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9021838034576888, |
| "grad_norm": 1.7455436886266977, |
| "learning_rate": 4.609363443986461e-06, |
| "loss": 0.1054, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.902638762511374, |
| "grad_norm": 1.539177132214102, |
| "learning_rate": 4.60897977503165e-06, |
| "loss": 0.1065, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9030937215650592, |
| "grad_norm": 2.1232448783228466, |
| "learning_rate": 4.608595933740536e-06, |
| "loss": 0.1509, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9035486806187443, |
| "grad_norm": 1.8306612455581022, |
| "learning_rate": 4.608211920144485e-06, |
| "loss": 0.1507, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9040036396724295, |
| "grad_norm": 1.8906981305909403, |
| "learning_rate": 4.607827734274876e-06, |
| "loss": 0.1279, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.9044585987261147, |
| "grad_norm": 1.2043844526137453, |
| "learning_rate": 4.607443376163104e-06, |
| "loss": 0.0587, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9049135577797999, |
| "grad_norm": 1.5805357354670655, |
| "learning_rate": 4.607058845840576e-06, |
| "loss": 0.0901, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.905368516833485, |
| "grad_norm": 1.0726472977315278, |
| "learning_rate": 4.606674143338714e-06, |
| "loss": 0.0632, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9058234758871702, |
| "grad_norm": 1.2973447399415932, |
| "learning_rate": 4.606289268688955e-06, |
| "loss": 0.0695, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9062784349408554, |
| "grad_norm": 1.5444078623124102, |
| "learning_rate": 4.605904221922749e-06, |
| "loss": 0.097, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9067333939945404, |
| "grad_norm": 1.2266104902516182, |
| "learning_rate": 4.6055190030715605e-06, |
| "loss": 0.0813, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9071883530482256, |
| "grad_norm": 1.367812480965606, |
| "learning_rate": 4.605133612166868e-06, |
| "loss": 0.0849, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9076433121019108, |
| "grad_norm": 1.477324926137721, |
| "learning_rate": 4.604748049240162e-06, |
| "loss": 0.1038, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.908098271155596, |
| "grad_norm": 1.451806186182134, |
| "learning_rate": 4.604362314322951e-06, |
| "loss": 0.0968, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9085532302092811, |
| "grad_norm": 1.3200703707312873, |
| "learning_rate": 4.603976407446756e-06, |
| "loss": 0.0824, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9090081892629663, |
| "grad_norm": 1.4302611512368968, |
| "learning_rate": 4.603590328643108e-06, |
| "loss": 0.0697, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9094631483166515, |
| "grad_norm": 1.3896738683560679, |
| "learning_rate": 4.60320407794356e-06, |
| "loss": 0.0845, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9099181073703366, |
| "grad_norm": 1.2636123499087144, |
| "learning_rate": 4.602817655379672e-06, |
| "loss": 0.0956, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9103730664240218, |
| "grad_norm": 1.6260464547343871, |
| "learning_rate": 4.602431060983022e-06, |
| "loss": 0.0964, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.910828025477707, |
| "grad_norm": 1.6435883240849032, |
| "learning_rate": 4.6020442947852e-06, |
| "loss": 0.1204, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9112829845313922, |
| "grad_norm": 1.1627579803449954, |
| "learning_rate": 4.6016573568178105e-06, |
| "loss": 0.0658, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9117379435850773, |
| "grad_norm": 1.4830323644470826, |
| "learning_rate": 4.601270247112473e-06, |
| "loss": 0.1393, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.9121929026387625, |
| "grad_norm": 1.749363317911088, |
| "learning_rate": 4.60088296570082e-06, |
| "loss": 0.0958, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9126478616924477, |
| "grad_norm": 1.0989290307772166, |
| "learning_rate": 4.600495512614499e-06, |
| "loss": 0.0725, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.9131028207461328, |
| "grad_norm": 1.9104698820808548, |
| "learning_rate": 4.60010788788517e-06, |
| "loss": 0.1416, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.913557779799818, |
| "grad_norm": 1.5990058826046467, |
| "learning_rate": 4.5997200915445095e-06, |
| "loss": 0.0988, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9140127388535032, |
| "grad_norm": 1.3776065339397323, |
| "learning_rate": 4.599332123624204e-06, |
| "loss": 0.0954, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.9144676979071884, |
| "grad_norm": 1.5565907276733302, |
| "learning_rate": 4.598943984155959e-06, |
| "loss": 0.1119, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9149226569608735, |
| "grad_norm": 1.3912866638954073, |
| "learning_rate": 4.598555673171489e-06, |
| "loss": 0.0786, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.9153776160145587, |
| "grad_norm": 1.205108886240087, |
| "learning_rate": 4.5981671907025275e-06, |
| "loss": 0.0718, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9158325750682439, |
| "grad_norm": 1.5559249472503263, |
| "learning_rate": 4.597778536780818e-06, |
| "loss": 0.0844, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.916287534121929, |
| "grad_norm": 1.5416078099963733, |
| "learning_rate": 4.597389711438121e-06, |
| "loss": 0.0985, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.9167424931756142, |
| "grad_norm": 1.1809384394020321, |
| "learning_rate": 4.597000714706207e-06, |
| "loss": 0.0586, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9171974522292994, |
| "grad_norm": 2.074795994465729, |
| "learning_rate": 4.596611546616865e-06, |
| "loss": 0.1433, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9176524112829846, |
| "grad_norm": 1.613757709909132, |
| "learning_rate": 4.596222207201896e-06, |
| "loss": 0.0977, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9181073703366697, |
| "grad_norm": 2.252514307571058, |
| "learning_rate": 4.595832696493115e-06, |
| "loss": 0.163, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9185623293903549, |
| "grad_norm": 1.3450701602494668, |
| "learning_rate": 4.59544301452235e-06, |
| "loss": 0.0838, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9190172884440401, |
| "grad_norm": 1.8222310782671887, |
| "learning_rate": 4.595053161321444e-06, |
| "loss": 0.0969, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9194722474977252, |
| "grad_norm": 1.355290969082531, |
| "learning_rate": 4.594663136922256e-06, |
| "loss": 0.0864, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.9199272065514104, |
| "grad_norm": 1.2303249878491525, |
| "learning_rate": 4.594272941356655e-06, |
| "loss": 0.0766, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.9203821656050956, |
| "grad_norm": 1.8690439214006958, |
| "learning_rate": 4.593882574656528e-06, |
| "loss": 0.123, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9208371246587808, |
| "grad_norm": 1.602742311595863, |
| "learning_rate": 4.5934920368537724e-06, |
| "loss": 0.0911, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9212920837124658, |
| "grad_norm": 1.4118302094020563, |
| "learning_rate": 4.593101327980301e-06, |
| "loss": 0.0983, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.921747042766151, |
| "grad_norm": 1.4886683990661254, |
| "learning_rate": 4.592710448068043e-06, |
| "loss": 0.1092, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.9222020018198362, |
| "grad_norm": 1.3005237265557243, |
| "learning_rate": 4.592319397148936e-06, |
| "loss": 0.0734, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9226569608735213, |
| "grad_norm": 1.5293431637156654, |
| "learning_rate": 4.5919281752549386e-06, |
| "loss": 0.0939, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9231119199272065, |
| "grad_norm": 1.9189898686105098, |
| "learning_rate": 4.5915367824180165e-06, |
| "loss": 0.1316, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.9235668789808917, |
| "grad_norm": 1.0704164238908853, |
| "learning_rate": 4.591145218670154e-06, |
| "loss": 0.0643, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9240218380345769, |
| "grad_norm": 1.7428021756867538, |
| "learning_rate": 4.590753484043348e-06, |
| "loss": 0.1002, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.924476797088262, |
| "grad_norm": 2.077744811906698, |
| "learning_rate": 4.590361578569609e-06, |
| "loss": 0.1469, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9249317561419472, |
| "grad_norm": 1.4526612982168714, |
| "learning_rate": 4.589969502280962e-06, |
| "loss": 0.0794, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9253867151956324, |
| "grad_norm": 1.127350657053563, |
| "learning_rate": 4.589577255209445e-06, |
| "loss": 0.0825, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9258416742493175, |
| "grad_norm": 1.5418218878324004, |
| "learning_rate": 4.589184837387112e-06, |
| "loss": 0.105, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9262966333030027, |
| "grad_norm": 1.3295279516903347, |
| "learning_rate": 4.588792248846028e-06, |
| "loss": 0.0806, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9267515923566879, |
| "grad_norm": 2.133194300217007, |
| "learning_rate": 4.588399489618274e-06, |
| "loss": 0.1101, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9272065514103731, |
| "grad_norm": 1.127900412980361, |
| "learning_rate": 4.588006559735945e-06, |
| "loss": 0.0822, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9276615104640582, |
| "grad_norm": 1.352574531632525, |
| "learning_rate": 4.587613459231149e-06, |
| "loss": 0.0883, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9281164695177434, |
| "grad_norm": 1.563891551285797, |
| "learning_rate": 4.5872201881360105e-06, |
| "loss": 0.108, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 1.4992265474183826, |
| "learning_rate": 4.586826746482662e-06, |
| "loss": 0.1117, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9290263876251137, |
| "grad_norm": 1.2789604094060583, |
| "learning_rate": 4.586433134303257e-06, |
| "loss": 0.0848, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9294813466787989, |
| "grad_norm": 1.5307618549025637, |
| "learning_rate": 4.586039351629959e-06, |
| "loss": 0.1006, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.9299363057324841, |
| "grad_norm": 1.0708631866469716, |
| "learning_rate": 4.585645398494944e-06, |
| "loss": 0.0654, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.9303912647861693, |
| "grad_norm": 1.3019370361509508, |
| "learning_rate": 4.585251274930406e-06, |
| "loss": 0.0953, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9308462238398544, |
| "grad_norm": 1.9864224586066603, |
| "learning_rate": 4.584856980968552e-06, |
| "loss": 0.1184, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9313011828935396, |
| "grad_norm": 1.405575544197549, |
| "learning_rate": 4.584462516641599e-06, |
| "loss": 0.0941, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.9317561419472248, |
| "grad_norm": 1.4689077044508474, |
| "learning_rate": 4.584067881981784e-06, |
| "loss": 0.0895, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.9322111010009099, |
| "grad_norm": 1.135702737688899, |
| "learning_rate": 4.583673077021352e-06, |
| "loss": 0.0632, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9326660600545951, |
| "grad_norm": 1.4668776418008769, |
| "learning_rate": 4.583278101792567e-06, |
| "loss": 0.1013, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9331210191082803, |
| "grad_norm": 1.4404528025951508, |
| "learning_rate": 4.582882956327704e-06, |
| "loss": 0.0827, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9335759781619655, |
| "grad_norm": 1.8675992636120633, |
| "learning_rate": 4.58248764065905e-06, |
| "loss": 0.1034, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9340309372156506, |
| "grad_norm": 1.3878361039518945, |
| "learning_rate": 4.582092154818912e-06, |
| "loss": 0.099, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9344858962693358, |
| "grad_norm": 1.562944151678329, |
| "learning_rate": 4.581696498839605e-06, |
| "loss": 0.1111, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.934940855323021, |
| "grad_norm": 1.3973553853526035, |
| "learning_rate": 4.581300672753462e-06, |
| "loss": 0.0983, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.935395814376706, |
| "grad_norm": 1.2166463674221564, |
| "learning_rate": 4.580904676592826e-06, |
| "loss": 0.066, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9358507734303912, |
| "grad_norm": 1.2616231646039247, |
| "learning_rate": 4.580508510390057e-06, |
| "loss": 0.064, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9363057324840764, |
| "grad_norm": 1.9200531356431896, |
| "learning_rate": 4.580112174177529e-06, |
| "loss": 0.1314, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.9367606915377616, |
| "grad_norm": 1.3598186993146166, |
| "learning_rate": 4.5797156679876274e-06, |
| "loss": 0.081, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9372156505914467, |
| "grad_norm": 1.4701853511040646, |
| "learning_rate": 4.5793189918527524e-06, |
| "loss": 0.1156, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9376706096451319, |
| "grad_norm": 1.4188407331643451, |
| "learning_rate": 4.5789221458053205e-06, |
| "loss": 0.0933, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9381255686988171, |
| "grad_norm": 1.907331731563626, |
| "learning_rate": 4.578525129877759e-06, |
| "loss": 0.0997, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9385805277525022, |
| "grad_norm": 1.3277724749366069, |
| "learning_rate": 4.5781279441025105e-06, |
| "loss": 0.1062, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.9390354868061874, |
| "grad_norm": 1.5198259374517775, |
| "learning_rate": 4.577730588512031e-06, |
| "loss": 0.0935, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9394904458598726, |
| "grad_norm": 1.8061595559890593, |
| "learning_rate": 4.577333063138791e-06, |
| "loss": 0.1106, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9399454049135578, |
| "grad_norm": 1.2937016104475862, |
| "learning_rate": 4.576935368015274e-06, |
| "loss": 0.0754, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9404003639672429, |
| "grad_norm": 1.739125309652933, |
| "learning_rate": 4.576537503173978e-06, |
| "loss": 0.1485, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9408553230209281, |
| "grad_norm": 1.7162057258895547, |
| "learning_rate": 4.576139468647415e-06, |
| "loss": 0.1067, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.9413102820746133, |
| "grad_norm": 1.2858589124219557, |
| "learning_rate": 4.575741264468111e-06, |
| "loss": 0.0721, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.9417652411282984, |
| "grad_norm": 1.7879372382669043, |
| "learning_rate": 4.575342890668603e-06, |
| "loss": 0.1117, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9422202001819836, |
| "grad_norm": 2.2199652906323903, |
| "learning_rate": 4.574944347281448e-06, |
| "loss": 0.1667, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.9426751592356688, |
| "grad_norm": 1.8603229351203365, |
| "learning_rate": 4.5745456343392114e-06, |
| "loss": 0.0966, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.943130118289354, |
| "grad_norm": 1.519516751229683, |
| "learning_rate": 4.574146751874473e-06, |
| "loss": 0.1081, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.9435850773430391, |
| "grad_norm": 1.550278973906248, |
| "learning_rate": 4.57374769991983e-06, |
| "loss": 0.1, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.9440400363967243, |
| "grad_norm": 1.2078297904504105, |
| "learning_rate": 4.573348478507888e-06, |
| "loss": 0.0778, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9444949954504095, |
| "grad_norm": 2.3422905994337397, |
| "learning_rate": 4.5729490876712725e-06, |
| "loss": 0.183, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.9449499545040946, |
| "grad_norm": 1.4654776784670076, |
| "learning_rate": 4.572549527442619e-06, |
| "loss": 0.1011, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.9454049135577798, |
| "grad_norm": 1.9579159955836072, |
| "learning_rate": 4.572149797854578e-06, |
| "loss": 0.0992, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.945859872611465, |
| "grad_norm": 1.2715287260379102, |
| "learning_rate": 4.571749898939813e-06, |
| "loss": 0.0774, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.9463148316651502, |
| "grad_norm": 1.3137532408543675, |
| "learning_rate": 4.5713498307310024e-06, |
| "loss": 0.0767, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9467697907188353, |
| "grad_norm": 1.1469787080077873, |
| "learning_rate": 4.570949593260837e-06, |
| "loss": 0.0657, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.9472247497725205, |
| "grad_norm": 1.4149315544143606, |
| "learning_rate": 4.570549186562024e-06, |
| "loss": 0.0973, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.9476797088262057, |
| "grad_norm": 1.653104733877059, |
| "learning_rate": 4.570148610667281e-06, |
| "loss": 0.1008, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.9481346678798908, |
| "grad_norm": 2.1146488269160493, |
| "learning_rate": 4.569747865609343e-06, |
| "loss": 0.137, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.948589626933576, |
| "grad_norm": 1.8861095602948619, |
| "learning_rate": 4.569346951420957e-06, |
| "loss": 0.1312, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9490445859872612, |
| "grad_norm": 1.7581609580097275, |
| "learning_rate": 4.568945868134882e-06, |
| "loss": 0.1201, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.9494995450409464, |
| "grad_norm": 1.6957832968300124, |
| "learning_rate": 4.568544615783894e-06, |
| "loss": 0.1165, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.9499545040946314, |
| "grad_norm": 1.2934181985102529, |
| "learning_rate": 4.568143194400782e-06, |
| "loss": 0.0779, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.9504094631483166, |
| "grad_norm": 1.453041608889173, |
| "learning_rate": 4.567741604018348e-06, |
| "loss": 0.0931, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.9508644222020018, |
| "grad_norm": 1.9468778720901316, |
| "learning_rate": 4.567339844669407e-06, |
| "loss": 0.123, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9513193812556869, |
| "grad_norm": 1.236496096780351, |
| "learning_rate": 4.566937916386791e-06, |
| "loss": 0.0929, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.9517743403093721, |
| "grad_norm": 1.2079482887760136, |
| "learning_rate": 4.566535819203342e-06, |
| "loss": 0.0836, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.9522292993630573, |
| "grad_norm": 1.6071846832424066, |
| "learning_rate": 4.566133553151918e-06, |
| "loss": 0.116, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.9526842584167425, |
| "grad_norm": 1.5917288384663462, |
| "learning_rate": 4.565731118265392e-06, |
| "loss": 0.1023, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.9531392174704276, |
| "grad_norm": 1.9630737595748788, |
| "learning_rate": 4.5653285145766465e-06, |
| "loss": 0.1162, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.9535941765241128, |
| "grad_norm": 0.9081536366325619, |
| "learning_rate": 4.564925742118583e-06, |
| "loss": 0.0504, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.954049135577798, |
| "grad_norm": 1.7249991192643004, |
| "learning_rate": 4.564522800924111e-06, |
| "loss": 0.1385, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.9545040946314831, |
| "grad_norm": 1.2321759022595342, |
| "learning_rate": 4.56411969102616e-06, |
| "loss": 0.0828, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.9549590536851683, |
| "grad_norm": 1.6485112601533525, |
| "learning_rate": 4.5637164124576695e-06, |
| "loss": 0.1108, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.9554140127388535, |
| "grad_norm": 1.5011949500492594, |
| "learning_rate": 4.563312965251594e-06, |
| "loss": 0.0789, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9558689717925387, |
| "grad_norm": 1.717044211260813, |
| "learning_rate": 4.562909349440899e-06, |
| "loss": 0.101, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.9563239308462238, |
| "grad_norm": 1.9690794320469502, |
| "learning_rate": 4.5625055650585695e-06, |
| "loss": 0.0887, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.956778889899909, |
| "grad_norm": 1.541908934769312, |
| "learning_rate": 4.562101612137599e-06, |
| "loss": 0.1032, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.9572338489535942, |
| "grad_norm": 1.1708242416085117, |
| "learning_rate": 4.561697490710998e-06, |
| "loss": 0.0786, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.9576888080072793, |
| "grad_norm": 1.7420543259304238, |
| "learning_rate": 4.561293200811787e-06, |
| "loss": 0.1027, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9581437670609645, |
| "grad_norm": 1.8800626433332384, |
| "learning_rate": 4.560888742473005e-06, |
| "loss": 0.126, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.9585987261146497, |
| "grad_norm": 1.7463314395249052, |
| "learning_rate": 4.560484115727703e-06, |
| "loss": 0.1144, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.9590536851683349, |
| "grad_norm": 1.3208911722315206, |
| "learning_rate": 4.560079320608942e-06, |
| "loss": 0.0831, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.95950864422202, |
| "grad_norm": 1.1738917496826642, |
| "learning_rate": 4.5596743571498035e-06, |
| "loss": 0.0845, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.9599636032757052, |
| "grad_norm": 1.5293179012953557, |
| "learning_rate": 4.5592692253833775e-06, |
| "loss": 0.0872, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9604185623293904, |
| "grad_norm": 1.3928507698223558, |
| "learning_rate": 4.5588639253427705e-06, |
| "loss": 0.0768, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.9608735213830755, |
| "grad_norm": 1.3009781755226655, |
| "learning_rate": 4.558458457061101e-06, |
| "loss": 0.0775, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.9613284804367607, |
| "grad_norm": 1.4250614240937063, |
| "learning_rate": 4.5580528205715024e-06, |
| "loss": 0.0891, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.9617834394904459, |
| "grad_norm": 1.9113281468795669, |
| "learning_rate": 4.557647015907121e-06, |
| "loss": 0.1099, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.9622383985441311, |
| "grad_norm": 1.2894594976227116, |
| "learning_rate": 4.557241043101118e-06, |
| "loss": 0.0779, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.9626933575978162, |
| "grad_norm": 1.6810992823881177, |
| "learning_rate": 4.556834902186667e-06, |
| "loss": 0.1397, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.9631483166515014, |
| "grad_norm": 1.5393066887459634, |
| "learning_rate": 4.556428593196956e-06, |
| "loss": 0.0935, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.9636032757051866, |
| "grad_norm": 2.3178365688110505, |
| "learning_rate": 4.556022116165189e-06, |
| "loss": 0.1796, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.9640582347588716, |
| "grad_norm": 1.646496340430589, |
| "learning_rate": 4.555615471124578e-06, |
| "loss": 0.0953, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.9645131938125568, |
| "grad_norm": 1.5571147184879746, |
| "learning_rate": 4.555208658108354e-06, |
| "loss": 0.084, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.964968152866242, |
| "grad_norm": 1.4911160901088942, |
| "learning_rate": 4.55480167714976e-06, |
| "loss": 0.0834, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.9654231119199272, |
| "grad_norm": 1.3047219270900265, |
| "learning_rate": 4.554394528282052e-06, |
| "loss": 0.1113, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.9658780709736123, |
| "grad_norm": 1.2420084154140223, |
| "learning_rate": 4.553987211538501e-06, |
| "loss": 0.0754, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.9663330300272975, |
| "grad_norm": 1.7628843384857757, |
| "learning_rate": 4.5535797269523906e-06, |
| "loss": 0.1157, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.9667879890809827, |
| "grad_norm": 1.3728918544083577, |
| "learning_rate": 4.55317207455702e-06, |
| "loss": 0.0886, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.9672429481346679, |
| "grad_norm": 1.3436653964528689, |
| "learning_rate": 4.552764254385697e-06, |
| "loss": 0.1031, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.967697907188353, |
| "grad_norm": 1.3924013137185407, |
| "learning_rate": 4.552356266471751e-06, |
| "loss": 0.0802, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.9681528662420382, |
| "grad_norm": 1.5484652367308942, |
| "learning_rate": 4.55194811084852e-06, |
| "loss": 0.1083, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.9686078252957234, |
| "grad_norm": 2.249626354585024, |
| "learning_rate": 4.551539787549354e-06, |
| "loss": 0.141, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.9690627843494085, |
| "grad_norm": 1.1787037168147345, |
| "learning_rate": 4.551131296607623e-06, |
| "loss": 0.0661, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9695177434030937, |
| "grad_norm": 1.3998025414191242, |
| "learning_rate": 4.550722638056703e-06, |
| "loss": 0.0778, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.9699727024567789, |
| "grad_norm": 1.3210786504328669, |
| "learning_rate": 4.550313811929993e-06, |
| "loss": 0.0768, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.9704276615104641, |
| "grad_norm": 1.5377350977690776, |
| "learning_rate": 4.549904818260895e-06, |
| "loss": 0.0925, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.9708826205641492, |
| "grad_norm": 1.6844155831955263, |
| "learning_rate": 4.549495657082834e-06, |
| "loss": 0.1208, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.9713375796178344, |
| "grad_norm": 1.4036987190850851, |
| "learning_rate": 4.549086328429242e-06, |
| "loss": 0.0736, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.9717925386715196, |
| "grad_norm": 1.7761277537947966, |
| "learning_rate": 4.548676832333569e-06, |
| "loss": 0.1008, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.9722474977252047, |
| "grad_norm": 1.455686991970635, |
| "learning_rate": 4.548267168829279e-06, |
| "loss": 0.0829, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.9727024567788899, |
| "grad_norm": 1.7910623421009375, |
| "learning_rate": 4.547857337949844e-06, |
| "loss": 0.0997, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.9731574158325751, |
| "grad_norm": 1.3409903276734334, |
| "learning_rate": 4.5474473397287556e-06, |
| "loss": 0.0757, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.9736123748862603, |
| "grad_norm": 1.2780097230437193, |
| "learning_rate": 4.547037174199517e-06, |
| "loss": 0.0828, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9740673339399454, |
| "grad_norm": 1.9829718978602024, |
| "learning_rate": 4.546626841395645e-06, |
| "loss": 0.1136, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.9745222929936306, |
| "grad_norm": 2.3916912753219504, |
| "learning_rate": 4.54621634135067e-06, |
| "loss": 0.106, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.9749772520473158, |
| "grad_norm": 1.7857367523019798, |
| "learning_rate": 4.545805674098136e-06, |
| "loss": 0.1179, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.9754322111010009, |
| "grad_norm": 1.8192016060510356, |
| "learning_rate": 4.545394839671601e-06, |
| "loss": 0.1153, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.9758871701546861, |
| "grad_norm": 1.7666738715643184, |
| "learning_rate": 4.544983838104637e-06, |
| "loss": 0.1155, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.9763421292083713, |
| "grad_norm": 1.669261781086701, |
| "learning_rate": 4.544572669430828e-06, |
| "loss": 0.1137, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.9767970882620565, |
| "grad_norm": 2.0898641673496603, |
| "learning_rate": 4.544161333683775e-06, |
| "loss": 0.1475, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.9772520473157416, |
| "grad_norm": 1.520441107938051, |
| "learning_rate": 4.543749830897088e-06, |
| "loss": 0.1196, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.9777070063694268, |
| "grad_norm": 1.778574007165815, |
| "learning_rate": 4.543338161104395e-06, |
| "loss": 0.1186, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.978161965423112, |
| "grad_norm": 1.4187578330640795, |
| "learning_rate": 4.542926324339335e-06, |
| "loss": 0.085, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.978616924476797, |
| "grad_norm": 1.7565219742471405, |
| "learning_rate": 4.542514320635561e-06, |
| "loss": 0.1288, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.9790718835304822, |
| "grad_norm": 1.8973174985472165, |
| "learning_rate": 4.542102150026741e-06, |
| "loss": 0.1309, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.9795268425841674, |
| "grad_norm": 1.8442728665682142, |
| "learning_rate": 4.541689812546556e-06, |
| "loss": 0.122, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.9799818016378526, |
| "grad_norm": 1.697144335252138, |
| "learning_rate": 4.541277308228698e-06, |
| "loss": 0.1176, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.9804367606915377, |
| "grad_norm": 1.435709999735577, |
| "learning_rate": 4.540864637106879e-06, |
| "loss": 0.0963, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.9808917197452229, |
| "grad_norm": 1.527626504741261, |
| "learning_rate": 4.540451799214817e-06, |
| "loss": 0.0698, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.9813466787989081, |
| "grad_norm": 1.3910853805372232, |
| "learning_rate": 4.540038794586248e-06, |
| "loss": 0.0872, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.9818016378525932, |
| "grad_norm": 1.2876712484569046, |
| "learning_rate": 4.539625623254923e-06, |
| "loss": 0.0746, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.9822565969062784, |
| "grad_norm": 1.9040440120946815, |
| "learning_rate": 4.539212285254601e-06, |
| "loss": 0.1374, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.9827115559599636, |
| "grad_norm": 1.368387986825355, |
| "learning_rate": 4.5387987806190615e-06, |
| "loss": 0.077, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9831665150136488, |
| "grad_norm": 1.5628650011793674, |
| "learning_rate": 4.538385109382093e-06, |
| "loss": 0.1122, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.9836214740673339, |
| "grad_norm": 1.536108666612923, |
| "learning_rate": 4.537971271577498e-06, |
| "loss": 0.113, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.9840764331210191, |
| "grad_norm": 1.1870522930285077, |
| "learning_rate": 4.537557267239093e-06, |
| "loss": 0.0829, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.9845313921747043, |
| "grad_norm": 1.1107704199660413, |
| "learning_rate": 4.537143096400712e-06, |
| "loss": 0.0727, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.9849863512283894, |
| "grad_norm": 1.6702825882583634, |
| "learning_rate": 4.536728759096195e-06, |
| "loss": 0.1012, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.9854413102820746, |
| "grad_norm": 1.4945842814197994, |
| "learning_rate": 4.536314255359402e-06, |
| "loss": 0.0859, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.9858962693357598, |
| "grad_norm": 1.5267042639312454, |
| "learning_rate": 4.535899585224204e-06, |
| "loss": 0.0904, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.986351228389445, |
| "grad_norm": 1.1363362545968427, |
| "learning_rate": 4.535484748724486e-06, |
| "loss": 0.0713, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.9868061874431301, |
| "grad_norm": 1.1419506135274826, |
| "learning_rate": 4.535069745894147e-06, |
| "loss": 0.092, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.9872611464968153, |
| "grad_norm": 1.4152116084634339, |
| "learning_rate": 4.534654576767098e-06, |
| "loss": 0.0964, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9877161055505005, |
| "grad_norm": 1.8913935291732986, |
| "learning_rate": 4.534239241377266e-06, |
| "loss": 0.1623, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.9881710646041856, |
| "grad_norm": 1.7108674711326843, |
| "learning_rate": 4.5338237397585895e-06, |
| "loss": 0.1366, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.9886260236578708, |
| "grad_norm": 1.867375764653619, |
| "learning_rate": 4.533408071945021e-06, |
| "loss": 0.0921, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.989080982711556, |
| "grad_norm": 1.6894369905832938, |
| "learning_rate": 4.532992237970528e-06, |
| "loss": 0.1123, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.9895359417652412, |
| "grad_norm": 1.2564113420047616, |
| "learning_rate": 4.532576237869091e-06, |
| "loss": 0.0639, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.9899909008189263, |
| "grad_norm": 1.1715905637128303, |
| "learning_rate": 4.5321600716747025e-06, |
| "loss": 0.0671, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.9904458598726115, |
| "grad_norm": 1.4423653806452021, |
| "learning_rate": 4.531743739421369e-06, |
| "loss": 0.0846, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.9909008189262967, |
| "grad_norm": 1.3936572340730857, |
| "learning_rate": 4.531327241143114e-06, |
| "loss": 0.0757, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.9913557779799818, |
| "grad_norm": 1.6605247652982158, |
| "learning_rate": 4.530910576873969e-06, |
| "loss": 0.0911, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.991810737033667, |
| "grad_norm": 1.4040852794165468, |
| "learning_rate": 4.530493746647984e-06, |
| "loss": 0.0822, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9922656960873522, |
| "grad_norm": 1.5953857991116012, |
| "learning_rate": 4.530076750499219e-06, |
| "loss": 0.1092, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.9927206551410374, |
| "grad_norm": 1.683735567141584, |
| "learning_rate": 4.52965958846175e-06, |
| "loss": 0.1118, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.9931756141947224, |
| "grad_norm": 1.2145587847450323, |
| "learning_rate": 4.529242260569665e-06, |
| "loss": 0.0771, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.9936305732484076, |
| "grad_norm": 2.1272377858020364, |
| "learning_rate": 4.528824766857067e-06, |
| "loss": 0.1349, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.9940855323020928, |
| "grad_norm": 1.6139933135537738, |
| "learning_rate": 4.5284071073580715e-06, |
| "loss": 0.1055, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.9945404913557779, |
| "grad_norm": 1.4819652610036196, |
| "learning_rate": 4.527989282106807e-06, |
| "loss": 0.0803, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.9949954504094631, |
| "grad_norm": 2.09368743936611, |
| "learning_rate": 4.527571291137416e-06, |
| "loss": 0.1076, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.9954504094631483, |
| "grad_norm": 1.7222186608794936, |
| "learning_rate": 4.527153134484056e-06, |
| "loss": 0.1173, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.9959053685168335, |
| "grad_norm": 1.3187846074985496, |
| "learning_rate": 4.5267348121808965e-06, |
| "loss": 0.0808, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.9963603275705186, |
| "grad_norm": 1.346265507805684, |
| "learning_rate": 4.526316324262121e-06, |
| "loss": 0.112, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9968152866242038, |
| "grad_norm": 1.341519514219881, |
| "learning_rate": 4.525897670761926e-06, |
| "loss": 0.0725, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.997270245677889, |
| "grad_norm": 1.5713885768234184, |
| "learning_rate": 4.525478851714522e-06, |
| "loss": 0.0977, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.9977252047315741, |
| "grad_norm": 1.5698524319504634, |
| "learning_rate": 4.525059867154133e-06, |
| "loss": 0.0995, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.9981801637852593, |
| "grad_norm": 1.3144114191589358, |
| "learning_rate": 4.5246407171149975e-06, |
| "loss": 0.0923, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.9986351228389445, |
| "grad_norm": 1.5066334421883691, |
| "learning_rate": 4.5242214016313655e-06, |
| "loss": 0.0944, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.9990900818926297, |
| "grad_norm": 1.0767315858549367, |
| "learning_rate": 4.523801920737501e-06, |
| "loss": 0.0692, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.9995450409463148, |
| "grad_norm": 1.362736303528153, |
| "learning_rate": 4.523382274467684e-06, |
| "loss": 0.0916, |
| "step": 2197 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.0570283557217577, |
| "learning_rate": 4.522962462856206e-06, |
| "loss": 0.0478, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.000454959053685, |
| "grad_norm": 0.7592988875540077, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.0248, |
| "step": 2199 |
| }, |
| { |
| "epoch": 1.0009099181073704, |
| "grad_norm": 1.368980443137241, |
| "learning_rate": 4.522122343745495e-06, |
| "loss": 0.0579, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.0013648771610555, |
| "grad_norm": 1.0628585042902956, |
| "learning_rate": 4.521702036314915e-06, |
| "loss": 0.0561, |
| "step": 2201 |
| }, |
| { |
| "epoch": 1.0018198362147406, |
| "grad_norm": 0.7147794512980257, |
| "learning_rate": 4.521281563679973e-06, |
| "loss": 0.0292, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.0022747952684259, |
| "grad_norm": 1.1709050671007968, |
| "learning_rate": 4.5208609258750314e-06, |
| "loss": 0.0464, |
| "step": 2203 |
| }, |
| { |
| "epoch": 1.002729754322111, |
| "grad_norm": 0.9609189064016738, |
| "learning_rate": 4.52044012293446e-06, |
| "loss": 0.0451, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.0031847133757963, |
| "grad_norm": 1.354272672445694, |
| "learning_rate": 4.520019154892646e-06, |
| "loss": 0.0555, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.0036396724294814, |
| "grad_norm": 1.2952803538667241, |
| "learning_rate": 4.519598021783989e-06, |
| "loss": 0.0539, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.0040946314831665, |
| "grad_norm": 1.5494612726642303, |
| "learning_rate": 4.519176723642903e-06, |
| "loss": 0.0762, |
| "step": 2207 |
| }, |
| { |
| "epoch": 1.0045495905368518, |
| "grad_norm": 1.4196552945601155, |
| "learning_rate": 4.518755260503813e-06, |
| "loss": 0.0606, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.0050045495905369, |
| "grad_norm": 0.9459922110368411, |
| "learning_rate": 4.51833363240116e-06, |
| "loss": 0.0401, |
| "step": 2209 |
| }, |
| { |
| "epoch": 1.005459508644222, |
| "grad_norm": 0.9836663939604657, |
| "learning_rate": 4.517911839369398e-06, |
| "loss": 0.0414, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.0059144676979073, |
| "grad_norm": 1.0781133888503407, |
| "learning_rate": 4.517489881442993e-06, |
| "loss": 0.0347, |
| "step": 2211 |
| }, |
| { |
| "epoch": 1.0063694267515924, |
| "grad_norm": 0.8497781985930463, |
| "learning_rate": 4.517067758656424e-06, |
| "loss": 0.0266, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.0068243858052774, |
| "grad_norm": 1.113725479756549, |
| "learning_rate": 4.516645471044188e-06, |
| "loss": 0.0328, |
| "step": 2213 |
| }, |
| { |
| "epoch": 1.0072793448589628, |
| "grad_norm": 1.0208334208547818, |
| "learning_rate": 4.516223018640791e-06, |
| "loss": 0.0373, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.0077343039126478, |
| "grad_norm": 1.3882067298362941, |
| "learning_rate": 4.515800401480754e-06, |
| "loss": 0.0457, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.008189262966333, |
| "grad_norm": 1.0420659203678968, |
| "learning_rate": 4.515377619598612e-06, |
| "loss": 0.0462, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.0086442220200182, |
| "grad_norm": 1.3078136737284674, |
| "learning_rate": 4.514954673028913e-06, |
| "loss": 0.0508, |
| "step": 2217 |
| }, |
| { |
| "epoch": 1.0090991810737033, |
| "grad_norm": 1.1934759478904275, |
| "learning_rate": 4.5145315618062155e-06, |
| "loss": 0.0448, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.0095541401273886, |
| "grad_norm": 1.2215784077070255, |
| "learning_rate": 4.514108285965098e-06, |
| "loss": 0.0427, |
| "step": 2219 |
| }, |
| { |
| "epoch": 1.0100090991810737, |
| "grad_norm": 1.178079318605842, |
| "learning_rate": 4.513684845540146e-06, |
| "loss": 0.0481, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0104640582347588, |
| "grad_norm": 1.3970520403755995, |
| "learning_rate": 4.5132612405659625e-06, |
| "loss": 0.0674, |
| "step": 2221 |
| }, |
| { |
| "epoch": 1.0109190172884441, |
| "grad_norm": 0.9508725726357258, |
| "learning_rate": 4.5128374710771625e-06, |
| "loss": 0.0287, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.0113739763421292, |
| "grad_norm": 1.3884527475726967, |
| "learning_rate": 4.512413537108374e-06, |
| "loss": 0.0678, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.0118289353958143, |
| "grad_norm": 1.2731469735444418, |
| "learning_rate": 4.511989438694239e-06, |
| "loss": 0.0373, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.0122838944494996, |
| "grad_norm": 0.9413972115792746, |
| "learning_rate": 4.511565175869415e-06, |
| "loss": 0.0234, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.0127388535031847, |
| "grad_norm": 1.1035710509489105, |
| "learning_rate": 4.511140748668566e-06, |
| "loss": 0.0471, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.0131938125568698, |
| "grad_norm": 0.8934820398484401, |
| "learning_rate": 4.510716157126379e-06, |
| "loss": 0.0337, |
| "step": 2227 |
| }, |
| { |
| "epoch": 1.013648771610555, |
| "grad_norm": 1.171377332070442, |
| "learning_rate": 4.510291401277548e-06, |
| "loss": 0.046, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.0141037306642402, |
| "grad_norm": 1.6191391065606777, |
| "learning_rate": 4.509866481156781e-06, |
| "loss": 0.0587, |
| "step": 2229 |
| }, |
| { |
| "epoch": 1.0145586897179253, |
| "grad_norm": 0.949496302030369, |
| "learning_rate": 4.509441396798802e-06, |
| "loss": 0.0319, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0150136487716106, |
| "grad_norm": 1.4372242490862157, |
| "learning_rate": 4.5090161482383475e-06, |
| "loss": 0.0476, |
| "step": 2231 |
| }, |
| { |
| "epoch": 1.0154686078252957, |
| "grad_norm": 0.9759482506446853, |
| "learning_rate": 4.508590735510166e-06, |
| "loss": 0.0453, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.015923566878981, |
| "grad_norm": 1.2867276257383993, |
| "learning_rate": 4.508165158649019e-06, |
| "loss": 0.0476, |
| "step": 2233 |
| }, |
| { |
| "epoch": 1.016378525932666, |
| "grad_norm": 1.1431990503227307, |
| "learning_rate": 4.507739417689685e-06, |
| "loss": 0.0312, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.0168334849863512, |
| "grad_norm": 1.2730945448369781, |
| "learning_rate": 4.507313512666953e-06, |
| "loss": 0.0389, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.0172884440400365, |
| "grad_norm": 0.9169087871477506, |
| "learning_rate": 4.506887443615625e-06, |
| "loss": 0.0328, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.0177434030937216, |
| "grad_norm": 1.367545758269456, |
| "learning_rate": 4.506461210570518e-06, |
| "loss": 0.0498, |
| "step": 2237 |
| }, |
| { |
| "epoch": 1.0181983621474067, |
| "grad_norm": 1.2176469297056225, |
| "learning_rate": 4.506034813566462e-06, |
| "loss": 0.0481, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.018653321201092, |
| "grad_norm": 1.157275144047969, |
| "learning_rate": 4.505608252638301e-06, |
| "loss": 0.0548, |
| "step": 2239 |
| }, |
| { |
| "epoch": 1.019108280254777, |
| "grad_norm": 2.133899749818285, |
| "learning_rate": 4.50518152782089e-06, |
| "loss": 0.0566, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0195632393084622, |
| "grad_norm": 0.6961268619707563, |
| "learning_rate": 4.504754639149101e-06, |
| "loss": 0.0253, |
| "step": 2241 |
| }, |
| { |
| "epoch": 1.0200181983621475, |
| "grad_norm": 0.7706214739124562, |
| "learning_rate": 4.504327586657814e-06, |
| "loss": 0.027, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.0204731574158326, |
| "grad_norm": 1.2000984203375016, |
| "learning_rate": 4.50390037038193e-06, |
| "loss": 0.0432, |
| "step": 2243 |
| }, |
| { |
| "epoch": 1.0209281164695176, |
| "grad_norm": 1.2252238966595805, |
| "learning_rate": 4.503472990356357e-06, |
| "loss": 0.0534, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.021383075523203, |
| "grad_norm": 1.0732643874827672, |
| "learning_rate": 4.503045446616018e-06, |
| "loss": 0.0325, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.021838034576888, |
| "grad_norm": 0.8837962801111581, |
| "learning_rate": 4.502617739195852e-06, |
| "loss": 0.0354, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.0222929936305734, |
| "grad_norm": 1.0014557088446234, |
| "learning_rate": 4.502189868130807e-06, |
| "loss": 0.0338, |
| "step": 2247 |
| }, |
| { |
| "epoch": 1.0227479526842584, |
| "grad_norm": 1.4138137492437213, |
| "learning_rate": 4.501761833455849e-06, |
| "loss": 0.0632, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.0232029117379435, |
| "grad_norm": 1.3089171005061577, |
| "learning_rate": 4.501333635205952e-06, |
| "loss": 0.0436, |
| "step": 2249 |
| }, |
| { |
| "epoch": 1.0236578707916288, |
| "grad_norm": 0.964576801162002, |
| "learning_rate": 4.5009052734161095e-06, |
| "loss": 0.0413, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.024112829845314, |
| "grad_norm": 0.8959038326952967, |
| "learning_rate": 4.500476748121324e-06, |
| "loss": 0.0431, |
| "step": 2251 |
| }, |
| { |
| "epoch": 1.024567788898999, |
| "grad_norm": 1.1431759011872913, |
| "learning_rate": 4.500048059356613e-06, |
| "loss": 0.0496, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.0250227479526843, |
| "grad_norm": 0.7061798756393048, |
| "learning_rate": 4.499619207157007e-06, |
| "loss": 0.0266, |
| "step": 2253 |
| }, |
| { |
| "epoch": 1.0254777070063694, |
| "grad_norm": 1.0664245996990538, |
| "learning_rate": 4.499190191557549e-06, |
| "loss": 0.0463, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.0259326660600545, |
| "grad_norm": 1.308102690321987, |
| "learning_rate": 4.498761012593296e-06, |
| "loss": 0.0523, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.0263876251137398, |
| "grad_norm": 1.184292634608403, |
| "learning_rate": 4.498331670299321e-06, |
| "loss": 0.0479, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.026842584167425, |
| "grad_norm": 1.3603086018990007, |
| "learning_rate": 4.497902164710704e-06, |
| "loss": 0.0451, |
| "step": 2257 |
| }, |
| { |
| "epoch": 1.02729754322111, |
| "grad_norm": 1.8635596633040872, |
| "learning_rate": 4.497472495862547e-06, |
| "loss": 0.0915, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.0277525022747953, |
| "grad_norm": 1.1753309212652938, |
| "learning_rate": 4.497042663789957e-06, |
| "loss": 0.0383, |
| "step": 2259 |
| }, |
| { |
| "epoch": 1.0282074613284804, |
| "grad_norm": 1.1584048851077327, |
| "learning_rate": 4.496612668528059e-06, |
| "loss": 0.0412, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.0286624203821657, |
| "grad_norm": 1.2262777215723997, |
| "learning_rate": 4.496182510111991e-06, |
| "loss": 0.07, |
| "step": 2261 |
| }, |
| { |
| "epoch": 1.0291173794358508, |
| "grad_norm": 1.2242297765277614, |
| "learning_rate": 4.495752188576902e-06, |
| "loss": 0.058, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.0295723384895359, |
| "grad_norm": 1.130646862008688, |
| "learning_rate": 4.4953217039579574e-06, |
| "loss": 0.0409, |
| "step": 2263 |
| }, |
| { |
| "epoch": 1.0300272975432212, |
| "grad_norm": 1.3756941584521463, |
| "learning_rate": 4.494891056290335e-06, |
| "loss": 0.058, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.0304822565969063, |
| "grad_norm": 1.0105322252819338, |
| "learning_rate": 4.494460245609223e-06, |
| "loss": 0.0357, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.0309372156505914, |
| "grad_norm": 1.446748004387351, |
| "learning_rate": 4.494029271949827e-06, |
| "loss": 0.0586, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.0313921747042767, |
| "grad_norm": 0.9047699993675258, |
| "learning_rate": 4.493598135347363e-06, |
| "loss": 0.0397, |
| "step": 2267 |
| }, |
| { |
| "epoch": 1.0318471337579618, |
| "grad_norm": 1.6965514138213933, |
| "learning_rate": 4.493166835837064e-06, |
| "loss": 0.049, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.0323020928116469, |
| "grad_norm": 1.460647237292739, |
| "learning_rate": 4.492735373454171e-06, |
| "loss": 0.059, |
| "step": 2269 |
| }, |
| { |
| "epoch": 1.0327570518653322, |
| "grad_norm": 1.1515338357687457, |
| "learning_rate": 4.492303748233943e-06, |
| "loss": 0.0457, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0332120109190173, |
| "grad_norm": 1.3354844049104573, |
| "learning_rate": 4.49187196021165e-06, |
| "loss": 0.0628, |
| "step": 2271 |
| }, |
| { |
| "epoch": 1.0336669699727024, |
| "grad_norm": 1.1925357013920974, |
| "learning_rate": 4.491440009422575e-06, |
| "loss": 0.0434, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.0341219290263877, |
| "grad_norm": 0.8659843507172138, |
| "learning_rate": 4.491007895902016e-06, |
| "loss": 0.0242, |
| "step": 2273 |
| }, |
| { |
| "epoch": 1.0345768880800728, |
| "grad_norm": 1.18711902494128, |
| "learning_rate": 4.490575619685283e-06, |
| "loss": 0.0436, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.035031847133758, |
| "grad_norm": 1.6475885386844928, |
| "learning_rate": 4.4901431808077e-06, |
| "loss": 0.058, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.0354868061874432, |
| "grad_norm": 1.8261487405648387, |
| "learning_rate": 4.489710579304603e-06, |
| "loss": 0.0521, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.0359417652411282, |
| "grad_norm": 1.1947212739973332, |
| "learning_rate": 4.489277815211343e-06, |
| "loss": 0.0392, |
| "step": 2277 |
| }, |
| { |
| "epoch": 1.0363967242948136, |
| "grad_norm": 1.268312083567863, |
| "learning_rate": 4.488844888563284e-06, |
| "loss": 0.0548, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.0368516833484986, |
| "grad_norm": 1.2124770977332124, |
| "learning_rate": 4.488411799395802e-06, |
| "loss": 0.0368, |
| "step": 2279 |
| }, |
| { |
| "epoch": 1.0373066424021837, |
| "grad_norm": 1.09936939122518, |
| "learning_rate": 4.487978547744287e-06, |
| "loss": 0.0434, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.037761601455869, |
| "grad_norm": 0.9268802621262473, |
| "learning_rate": 4.487545133644143e-06, |
| "loss": 0.0284, |
| "step": 2281 |
| }, |
| { |
| "epoch": 1.0382165605095541, |
| "grad_norm": 1.1529258944159122, |
| "learning_rate": 4.487111557130787e-06, |
| "loss": 0.0448, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.0386715195632392, |
| "grad_norm": 1.5946150882381405, |
| "learning_rate": 4.486677818239647e-06, |
| "loss": 0.0589, |
| "step": 2283 |
| }, |
| { |
| "epoch": 1.0391264786169245, |
| "grad_norm": 1.4208771624851846, |
| "learning_rate": 4.486243917006169e-06, |
| "loss": 0.0573, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.0395814376706096, |
| "grad_norm": 1.1717268544019273, |
| "learning_rate": 4.485809853465807e-06, |
| "loss": 0.0347, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.0400363967242947, |
| "grad_norm": 2.048346044240694, |
| "learning_rate": 4.4853756276540315e-06, |
| "loss": 0.0545, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.04049135577798, |
| "grad_norm": 1.1427723194882247, |
| "learning_rate": 4.484941239606326e-06, |
| "loss": 0.0326, |
| "step": 2287 |
| }, |
| { |
| "epoch": 1.040946314831665, |
| "grad_norm": 1.3094056778220102, |
| "learning_rate": 4.484506689358186e-06, |
| "loss": 0.0446, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.0414012738853504, |
| "grad_norm": 0.9866459461152235, |
| "learning_rate": 4.484071976945121e-06, |
| "loss": 0.0346, |
| "step": 2289 |
| }, |
| { |
| "epoch": 1.0418562329390355, |
| "grad_norm": 1.5902938369289412, |
| "learning_rate": 4.483637102402655e-06, |
| "loss": 0.0608, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.0423111919927206, |
| "grad_norm": 1.457900328820472, |
| "learning_rate": 4.4832020657663224e-06, |
| "loss": 0.0445, |
| "step": 2291 |
| }, |
| { |
| "epoch": 1.042766151046406, |
| "grad_norm": 0.8030067177194667, |
| "learning_rate": 4.482766867071673e-06, |
| "loss": 0.021, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.043221110100091, |
| "grad_norm": 1.5694199801125026, |
| "learning_rate": 4.482331506354269e-06, |
| "loss": 0.0603, |
| "step": 2293 |
| }, |
| { |
| "epoch": 1.043676069153776, |
| "grad_norm": 1.6224658858589436, |
| "learning_rate": 4.4818959836496876e-06, |
| "loss": 0.039, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.0441310282074614, |
| "grad_norm": 1.4465179948532605, |
| "learning_rate": 4.481460298993515e-06, |
| "loss": 0.0487, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.0445859872611465, |
| "grad_norm": 1.4085468589555914, |
| "learning_rate": 4.481024452421357e-06, |
| "loss": 0.0609, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.0450409463148316, |
| "grad_norm": 1.3760032065105168, |
| "learning_rate": 4.480588443968825e-06, |
| "loss": 0.0437, |
| "step": 2297 |
| }, |
| { |
| "epoch": 1.0454959053685169, |
| "grad_norm": 1.148069204226395, |
| "learning_rate": 4.4801522736715505e-06, |
| "loss": 0.0411, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.045950864422202, |
| "grad_norm": 1.2063041041062295, |
| "learning_rate": 4.479715941565174e-06, |
| "loss": 0.0387, |
| "step": 2299 |
| }, |
| { |
| "epoch": 1.046405823475887, |
| "grad_norm": 1.1651396969098329, |
| "learning_rate": 4.4792794476853514e-06, |
| "loss": 0.0421, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0468607825295724, |
| "grad_norm": 1.4497876497955704, |
| "learning_rate": 4.47884279206775e-06, |
| "loss": 0.0458, |
| "step": 2301 |
| }, |
| { |
| "epoch": 1.0473157415832575, |
| "grad_norm": 0.6954207079711388, |
| "learning_rate": 4.478405974748054e-06, |
| "loss": 0.0283, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.0477707006369428, |
| "grad_norm": 1.08704023013193, |
| "learning_rate": 4.477968995761954e-06, |
| "loss": 0.0494, |
| "step": 2303 |
| }, |
| { |
| "epoch": 1.0482256596906279, |
| "grad_norm": 1.2023016480567608, |
| "learning_rate": 4.477531855145161e-06, |
| "loss": 0.0464, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.048680618744313, |
| "grad_norm": 1.119732724667165, |
| "learning_rate": 4.477094552933395e-06, |
| "loss": 0.0343, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.0491355777979983, |
| "grad_norm": 0.995862987950808, |
| "learning_rate": 4.476657089162391e-06, |
| "loss": 0.0446, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.0495905368516834, |
| "grad_norm": 1.4599886761016245, |
| "learning_rate": 4.476219463867897e-06, |
| "loss": 0.0559, |
| "step": 2307 |
| }, |
| { |
| "epoch": 1.0500454959053684, |
| "grad_norm": 1.179376461270977, |
| "learning_rate": 4.475781677085671e-06, |
| "loss": 0.0345, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.0505004549590538, |
| "grad_norm": 1.2567995856763843, |
| "learning_rate": 4.4753437288514904e-06, |
| "loss": 0.0417, |
| "step": 2309 |
| }, |
| { |
| "epoch": 1.0509554140127388, |
| "grad_norm": 1.4176863768351469, |
| "learning_rate": 4.47490561920114e-06, |
| "loss": 0.0413, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.051410373066424, |
| "grad_norm": 1.3572620241361109, |
| "learning_rate": 4.474467348170421e-06, |
| "loss": 0.0547, |
| "step": 2311 |
| }, |
| { |
| "epoch": 1.0518653321201092, |
| "grad_norm": 1.3581302259371113, |
| "learning_rate": 4.474028915795148e-06, |
| "loss": 0.0556, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.0523202911737943, |
| "grad_norm": 0.9190038205266854, |
| "learning_rate": 4.473590322111145e-06, |
| "loss": 0.0349, |
| "step": 2313 |
| }, |
| { |
| "epoch": 1.0527752502274794, |
| "grad_norm": 1.1932241075740775, |
| "learning_rate": 4.473151567154255e-06, |
| "loss": 0.0441, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.0532302092811647, |
| "grad_norm": 0.9977743247298095, |
| "learning_rate": 4.472712650960328e-06, |
| "loss": 0.0403, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.0536851683348498, |
| "grad_norm": 1.2303515067871835, |
| "learning_rate": 4.472273573565234e-06, |
| "loss": 0.055, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.0541401273885351, |
| "grad_norm": 1.5989593561515982, |
| "learning_rate": 4.471834335004849e-06, |
| "loss": 0.0499, |
| "step": 2317 |
| }, |
| { |
| "epoch": 1.0545950864422202, |
| "grad_norm": 1.635120592011771, |
| "learning_rate": 4.471394935315067e-06, |
| "loss": 0.0536, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.0550500454959053, |
| "grad_norm": 1.3079418412147372, |
| "learning_rate": 4.470955374531794e-06, |
| "loss": 0.048, |
| "step": 2319 |
| }, |
| { |
| "epoch": 1.0555050045495906, |
| "grad_norm": 1.2174909036277741, |
| "learning_rate": 4.470515652690947e-06, |
| "loss": 0.0468, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.0559599636032757, |
| "grad_norm": 1.1872482616328512, |
| "learning_rate": 4.470075769828461e-06, |
| "loss": 0.0375, |
| "step": 2321 |
| }, |
| { |
| "epoch": 1.0564149226569608, |
| "grad_norm": 1.2259206871771855, |
| "learning_rate": 4.46963572598028e-06, |
| "loss": 0.0436, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.056869881710646, |
| "grad_norm": 1.1033426053144877, |
| "learning_rate": 4.469195521182362e-06, |
| "loss": 0.0426, |
| "step": 2323 |
| }, |
| { |
| "epoch": 1.0573248407643312, |
| "grad_norm": 1.2649171038278944, |
| "learning_rate": 4.468755155470679e-06, |
| "loss": 0.0486, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.0577797998180163, |
| "grad_norm": 1.0279391958160191, |
| "learning_rate": 4.468314628881214e-06, |
| "loss": 0.0319, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.0582347588717016, |
| "grad_norm": 1.0845310516292441, |
| "learning_rate": 4.467873941449969e-06, |
| "loss": 0.0463, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.0586897179253867, |
| "grad_norm": 1.2529975095990527, |
| "learning_rate": 4.46743309321295e-06, |
| "loss": 0.0432, |
| "step": 2327 |
| }, |
| { |
| "epoch": 1.0591446769790718, |
| "grad_norm": 1.5472044718879958, |
| "learning_rate": 4.466992084206185e-06, |
| "loss": 0.0737, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.059599636032757, |
| "grad_norm": 1.26171029661481, |
| "learning_rate": 4.466550914465709e-06, |
| "loss": 0.046, |
| "step": 2329 |
| }, |
| { |
| "epoch": 1.0600545950864422, |
| "grad_norm": 1.2412151656611115, |
| "learning_rate": 4.466109584027573e-06, |
| "loss": 0.0568, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0605095541401275, |
| "grad_norm": 1.157086807558091, |
| "learning_rate": 4.465668092927841e-06, |
| "loss": 0.0527, |
| "step": 2331 |
| }, |
| { |
| "epoch": 1.0609645131938126, |
| "grad_norm": 1.5024263673511569, |
| "learning_rate": 4.465226441202589e-06, |
| "loss": 0.0593, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.0614194722474977, |
| "grad_norm": 1.5602918486987227, |
| "learning_rate": 4.464784628887908e-06, |
| "loss": 0.0418, |
| "step": 2333 |
| }, |
| { |
| "epoch": 1.061874431301183, |
| "grad_norm": 1.1908963791759075, |
| "learning_rate": 4.4643426560199e-06, |
| "loss": 0.0399, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.062329390354868, |
| "grad_norm": 1.224987867569317, |
| "learning_rate": 4.46390052263468e-06, |
| "loss": 0.0431, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.0627843494085532, |
| "grad_norm": 0.9964070335279412, |
| "learning_rate": 4.463458228768378e-06, |
| "loss": 0.0286, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.0632393084622385, |
| "grad_norm": 1.1029659276137886, |
| "learning_rate": 4.463015774457137e-06, |
| "loss": 0.0442, |
| "step": 2337 |
| }, |
| { |
| "epoch": 1.0636942675159236, |
| "grad_norm": 1.8790987697202004, |
| "learning_rate": 4.462573159737113e-06, |
| "loss": 0.0744, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.0641492265696086, |
| "grad_norm": 1.0214393343973027, |
| "learning_rate": 4.462130384644472e-06, |
| "loss": 0.0417, |
| "step": 2339 |
| }, |
| { |
| "epoch": 1.064604185623294, |
| "grad_norm": 1.6623580705853476, |
| "learning_rate": 4.461687449215397e-06, |
| "loss": 0.0567, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.065059144676979, |
| "grad_norm": 1.2103435389100787, |
| "learning_rate": 4.4612443534860826e-06, |
| "loss": 0.0473, |
| "step": 2341 |
| }, |
| { |
| "epoch": 1.0655141037306644, |
| "grad_norm": 1.4489440150290915, |
| "learning_rate": 4.460801097492737e-06, |
| "loss": 0.0437, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.0659690627843494, |
| "grad_norm": 0.7822321591520786, |
| "learning_rate": 4.460357681271579e-06, |
| "loss": 0.0306, |
| "step": 2343 |
| }, |
| { |
| "epoch": 1.0664240218380345, |
| "grad_norm": 1.3403186761851023, |
| "learning_rate": 4.4599141048588454e-06, |
| "loss": 0.0614, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.0668789808917198, |
| "grad_norm": 1.2783250768017846, |
| "learning_rate": 4.4594703682907825e-06, |
| "loss": 0.0591, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.067333939945405, |
| "grad_norm": 1.2525758133587013, |
| "learning_rate": 4.459026471603649e-06, |
| "loss": 0.0577, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.06778889899909, |
| "grad_norm": 1.1241808251725347, |
| "learning_rate": 4.45858241483372e-06, |
| "loss": 0.0446, |
| "step": 2347 |
| }, |
| { |
| "epoch": 1.0682438580527753, |
| "grad_norm": 1.6188338672425098, |
| "learning_rate": 4.458138198017281e-06, |
| "loss": 0.0491, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.0686988171064604, |
| "grad_norm": 1.5172950408699082, |
| "learning_rate": 4.457693821190631e-06, |
| "loss": 0.0553, |
| "step": 2349 |
| }, |
| { |
| "epoch": 1.0691537761601455, |
| "grad_norm": 1.5780672259398312, |
| "learning_rate": 4.4572492843900815e-06, |
| "loss": 0.0525, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0696087352138308, |
| "grad_norm": 1.0727949839427282, |
| "learning_rate": 4.456804587651961e-06, |
| "loss": 0.0342, |
| "step": 2351 |
| }, |
| { |
| "epoch": 1.070063694267516, |
| "grad_norm": 0.9599229329286044, |
| "learning_rate": 4.456359731012606e-06, |
| "loss": 0.0339, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.070518653321201, |
| "grad_norm": 1.2954163297410495, |
| "learning_rate": 4.455914714508369e-06, |
| "loss": 0.0359, |
| "step": 2353 |
| }, |
| { |
| "epoch": 1.0709736123748863, |
| "grad_norm": 0.7369480798255574, |
| "learning_rate": 4.455469538175614e-06, |
| "loss": 0.0262, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 1.414642082838209, |
| "learning_rate": 4.455024202050719e-06, |
| "loss": 0.0479, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.0718835304822565, |
| "grad_norm": 1.2543141826227884, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 0.0437, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.0723384895359418, |
| "grad_norm": 1.2446138398178868, |
| "learning_rate": 4.454133050570087e-06, |
| "loss": 0.0399, |
| "step": 2357 |
| }, |
| { |
| "epoch": 1.0727934485896269, |
| "grad_norm": 1.158413385216241, |
| "learning_rate": 4.453687235287169e-06, |
| "loss": 0.0456, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.0732484076433122, |
| "grad_norm": 1.0349949382172452, |
| "learning_rate": 4.453241260357754e-06, |
| "loss": 0.0387, |
| "step": 2359 |
| }, |
| { |
| "epoch": 1.0737033666969973, |
| "grad_norm": 0.9528710326100024, |
| "learning_rate": 4.452795125818283e-06, |
| "loss": 0.0302, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.0741583257506824, |
| "grad_norm": 1.6748707696298286, |
| "learning_rate": 4.4523488317052146e-06, |
| "loss": 0.0695, |
| "step": 2361 |
| }, |
| { |
| "epoch": 1.0746132848043677, |
| "grad_norm": 0.9943025075099621, |
| "learning_rate": 4.451902378055015e-06, |
| "loss": 0.0347, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.0750682438580528, |
| "grad_norm": 1.0111268480823534, |
| "learning_rate": 4.451455764904169e-06, |
| "loss": 0.0355, |
| "step": 2363 |
| }, |
| { |
| "epoch": 1.0755232029117379, |
| "grad_norm": 1.51613146359701, |
| "learning_rate": 4.45100899228917e-06, |
| "loss": 0.0683, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.0759781619654232, |
| "grad_norm": 1.1003665142463364, |
| "learning_rate": 4.4505620602465275e-06, |
| "loss": 0.0412, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.0764331210191083, |
| "grad_norm": 1.3747566768643154, |
| "learning_rate": 4.450114968812761e-06, |
| "loss": 0.0526, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.0768880800727934, |
| "grad_norm": 1.2353340866512168, |
| "learning_rate": 4.449667718024406e-06, |
| "loss": 0.0443, |
| "step": 2367 |
| }, |
| { |
| "epoch": 1.0773430391264787, |
| "grad_norm": 1.4934658349375112, |
| "learning_rate": 4.449220307918011e-06, |
| "loss": 0.0615, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.0777979981801638, |
| "grad_norm": 1.0941994239908737, |
| "learning_rate": 4.448772738530134e-06, |
| "loss": 0.0334, |
| "step": 2369 |
| }, |
| { |
| "epoch": 1.078252957233849, |
| "grad_norm": 0.9334140149905326, |
| "learning_rate": 4.44832500989735e-06, |
| "loss": 0.0331, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0787079162875342, |
| "grad_norm": 1.1527853076015333, |
| "learning_rate": 4.447877122056243e-06, |
| "loss": 0.0446, |
| "step": 2371 |
| }, |
| { |
| "epoch": 1.0791628753412192, |
| "grad_norm": 1.4488730770087763, |
| "learning_rate": 4.447429075043416e-06, |
| "loss": 0.0414, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.0796178343949046, |
| "grad_norm": 0.993988849176869, |
| "learning_rate": 4.4469808688954786e-06, |
| "loss": 0.0323, |
| "step": 2373 |
| }, |
| { |
| "epoch": 1.0800727934485896, |
| "grad_norm": 1.2678099813158608, |
| "learning_rate": 4.446532503649058e-06, |
| "loss": 0.0414, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.0805277525022747, |
| "grad_norm": 1.044004897105538, |
| "learning_rate": 4.44608397934079e-06, |
| "loss": 0.0378, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.08098271155596, |
| "grad_norm": 1.1946064960107736, |
| "learning_rate": 4.445635296007329e-06, |
| "loss": 0.0473, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.0814376706096451, |
| "grad_norm": 1.2008540897846665, |
| "learning_rate": 4.445186453685339e-06, |
| "loss": 0.0489, |
| "step": 2377 |
| }, |
| { |
| "epoch": 1.0818926296633302, |
| "grad_norm": 1.101478249984827, |
| "learning_rate": 4.444737452411494e-06, |
| "loss": 0.035, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.0823475887170155, |
| "grad_norm": 1.315452085320974, |
| "learning_rate": 4.444288292222488e-06, |
| "loss": 0.0448, |
| "step": 2379 |
| }, |
| { |
| "epoch": 1.0828025477707006, |
| "grad_norm": 1.2177621407572365, |
| "learning_rate": 4.443838973155023e-06, |
| "loss": 0.0509, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.0832575068243857, |
| "grad_norm": 1.4009233335818028, |
| "learning_rate": 4.443389495245816e-06, |
| "loss": 0.0545, |
| "step": 2381 |
| }, |
| { |
| "epoch": 1.083712465878071, |
| "grad_norm": 1.0273018558001004, |
| "learning_rate": 4.442939858531594e-06, |
| "loss": 0.0411, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.084167424931756, |
| "grad_norm": 1.4618310572678548, |
| "learning_rate": 4.442490063049103e-06, |
| "loss": 0.0516, |
| "step": 2383 |
| }, |
| { |
| "epoch": 1.0846223839854412, |
| "grad_norm": 1.1857494134666622, |
| "learning_rate": 4.442040108835095e-06, |
| "loss": 0.0309, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.0850773430391265, |
| "grad_norm": 1.151479651667381, |
| "learning_rate": 4.44158999592634e-06, |
| "loss": 0.0453, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.0855323020928116, |
| "grad_norm": 1.2171109689922353, |
| "learning_rate": 4.441139724359617e-06, |
| "loss": 0.0347, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.085987261146497, |
| "grad_norm": 1.1600797404646812, |
| "learning_rate": 4.440689294171724e-06, |
| "loss": 0.0482, |
| "step": 2387 |
| }, |
| { |
| "epoch": 1.086442220200182, |
| "grad_norm": 1.470721297467342, |
| "learning_rate": 4.440238705399465e-06, |
| "loss": 0.0402, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.086897179253867, |
| "grad_norm": 1.156528424871924, |
| "learning_rate": 4.439787958079662e-06, |
| "loss": 0.0431, |
| "step": 2389 |
| }, |
| { |
| "epoch": 1.0873521383075524, |
| "grad_norm": 0.9548457677170976, |
| "learning_rate": 4.439337052249146e-06, |
| "loss": 0.0312, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.0878070973612375, |
| "grad_norm": 1.2408812515312437, |
| "learning_rate": 4.4388859879447645e-06, |
| "loss": 0.0487, |
| "step": 2391 |
| }, |
| { |
| "epoch": 1.0882620564149226, |
| "grad_norm": 3.049539174474021, |
| "learning_rate": 4.438434765203376e-06, |
| "loss": 0.0857, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.0887170154686079, |
| "grad_norm": 1.2492037273967551, |
| "learning_rate": 4.4379833840618524e-06, |
| "loss": 0.0538, |
| "step": 2393 |
| }, |
| { |
| "epoch": 1.089171974522293, |
| "grad_norm": 1.2399469969104553, |
| "learning_rate": 4.4375318445570785e-06, |
| "loss": 0.0377, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.089626933575978, |
| "grad_norm": 1.4535531788977818, |
| "learning_rate": 4.437080146725951e-06, |
| "loss": 0.0472, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.0900818926296634, |
| "grad_norm": 1.1893861617416726, |
| "learning_rate": 4.436628290605384e-06, |
| "loss": 0.0515, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.0905368516833485, |
| "grad_norm": 0.9305630982590819, |
| "learning_rate": 4.436176276232297e-06, |
| "loss": 0.0298, |
| "step": 2397 |
| }, |
| { |
| "epoch": 1.0909918107370338, |
| "grad_norm": 1.3876854731609625, |
| "learning_rate": 4.4357241036436294e-06, |
| "loss": 0.0416, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.0914467697907189, |
| "grad_norm": 0.7914778789700057, |
| "learning_rate": 4.435271772876329e-06, |
| "loss": 0.0327, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.091901728844404, |
| "grad_norm": 1.3724103614749033, |
| "learning_rate": 4.434819283967359e-06, |
| "loss": 0.0552, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0923566878980893, |
| "grad_norm": 1.2536063130707569, |
| "learning_rate": 4.434366636953695e-06, |
| "loss": 0.035, |
| "step": 2401 |
| }, |
| { |
| "epoch": 1.0928116469517744, |
| "grad_norm": 0.9368763383307603, |
| "learning_rate": 4.433913831872324e-06, |
| "loss": 0.0256, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.0932666060054594, |
| "grad_norm": 1.4085211062041956, |
| "learning_rate": 4.43346086876025e-06, |
| "loss": 0.0551, |
| "step": 2403 |
| }, |
| { |
| "epoch": 1.0937215650591448, |
| "grad_norm": 1.5982437692959348, |
| "learning_rate": 4.433007747654484e-06, |
| "loss": 0.0503, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.0941765241128298, |
| "grad_norm": 1.1834519484288195, |
| "learning_rate": 4.432554468592054e-06, |
| "loss": 0.0435, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.094631483166515, |
| "grad_norm": 1.485944123175508, |
| "learning_rate": 4.432101031610001e-06, |
| "loss": 0.0539, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.0950864422202002, |
| "grad_norm": 1.2727051287798419, |
| "learning_rate": 4.431647436745376e-06, |
| "loss": 0.04, |
| "step": 2407 |
| }, |
| { |
| "epoch": 1.0955414012738853, |
| "grad_norm": 1.4565264120912131, |
| "learning_rate": 4.431193684035246e-06, |
| "loss": 0.0461, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.0959963603275704, |
| "grad_norm": 1.40974844506133, |
| "learning_rate": 4.43073977351669e-06, |
| "loss": 0.0573, |
| "step": 2409 |
| }, |
| { |
| "epoch": 1.0964513193812557, |
| "grad_norm": 1.1482231418541697, |
| "learning_rate": 4.430285705226799e-06, |
| "loss": 0.0287, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0969062784349408, |
| "grad_norm": 1.2918850459740896, |
| "learning_rate": 4.429831479202676e-06, |
| "loss": 0.0446, |
| "step": 2411 |
| }, |
| { |
| "epoch": 1.097361237488626, |
| "grad_norm": 0.9307131764504681, |
| "learning_rate": 4.429377095481441e-06, |
| "loss": 0.0372, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.0978161965423112, |
| "grad_norm": 1.128804280095948, |
| "learning_rate": 4.428922554100221e-06, |
| "loss": 0.0469, |
| "step": 2413 |
| }, |
| { |
| "epoch": 1.0982711555959963, |
| "grad_norm": 0.9576886710266964, |
| "learning_rate": 4.428467855096163e-06, |
| "loss": 0.0329, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.0987261146496816, |
| "grad_norm": 1.7900226665283667, |
| "learning_rate": 4.428012998506419e-06, |
| "loss": 0.0506, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.0991810737033667, |
| "grad_norm": 1.4872860841375592, |
| "learning_rate": 4.42755798436816e-06, |
| "loss": 0.0549, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.0996360327570518, |
| "grad_norm": 1.2872956233366672, |
| "learning_rate": 4.427102812718568e-06, |
| "loss": 0.0502, |
| "step": 2417 |
| }, |
| { |
| "epoch": 1.100090991810737, |
| "grad_norm": 1.1697026711762988, |
| "learning_rate": 4.426647483594836e-06, |
| "loss": 0.0438, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.1005459508644222, |
| "grad_norm": 1.1730865927933076, |
| "learning_rate": 4.4261919970341724e-06, |
| "loss": 0.059, |
| "step": 2419 |
| }, |
| { |
| "epoch": 1.1010009099181073, |
| "grad_norm": 0.8069565798505858, |
| "learning_rate": 4.425736353073798e-06, |
| "loss": 0.0308, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.1014558689717926, |
| "grad_norm": 1.4916540523215134, |
| "learning_rate": 4.425280551750945e-06, |
| "loss": 0.0531, |
| "step": 2421 |
| }, |
| { |
| "epoch": 1.1019108280254777, |
| "grad_norm": 1.1538939365796403, |
| "learning_rate": 4.42482459310286e-06, |
| "loss": 0.0363, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.1023657870791628, |
| "grad_norm": 1.560333283306957, |
| "learning_rate": 4.424368477166801e-06, |
| "loss": 0.0566, |
| "step": 2423 |
| }, |
| { |
| "epoch": 1.102820746132848, |
| "grad_norm": 1.3465375615780066, |
| "learning_rate": 4.423912203980041e-06, |
| "loss": 0.0515, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.1032757051865332, |
| "grad_norm": 1.1269590237117892, |
| "learning_rate": 4.423455773579865e-06, |
| "loss": 0.0323, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.1037306642402185, |
| "grad_norm": 0.9500399093059193, |
| "learning_rate": 4.422999186003568e-06, |
| "loss": 0.0305, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.1041856232939036, |
| "grad_norm": 0.8342180356166348, |
| "learning_rate": 4.422542441288462e-06, |
| "loss": 0.0235, |
| "step": 2427 |
| }, |
| { |
| "epoch": 1.1046405823475887, |
| "grad_norm": 1.2824767128116152, |
| "learning_rate": 4.42208553947187e-06, |
| "loss": 0.0549, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.105095541401274, |
| "grad_norm": 1.1301510416758715, |
| "learning_rate": 4.4216284805911275e-06, |
| "loss": 0.0357, |
| "step": 2429 |
| }, |
| { |
| "epoch": 1.105550500454959, |
| "grad_norm": 1.5252855186654373, |
| "learning_rate": 4.421171264683584e-06, |
| "loss": 0.0603, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.1060054595086442, |
| "grad_norm": 1.9340330730215476, |
| "learning_rate": 4.4207138917866e-06, |
| "loss": 0.0746, |
| "step": 2431 |
| }, |
| { |
| "epoch": 1.1064604185623295, |
| "grad_norm": 1.4780786403437716, |
| "learning_rate": 4.420256361937551e-06, |
| "loss": 0.0665, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.1069153776160146, |
| "grad_norm": 0.8433657341894452, |
| "learning_rate": 4.419798675173824e-06, |
| "loss": 0.029, |
| "step": 2433 |
| }, |
| { |
| "epoch": 1.1073703366696996, |
| "grad_norm": 1.24322553403791, |
| "learning_rate": 4.419340831532819e-06, |
| "loss": 0.054, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.107825295723385, |
| "grad_norm": 1.3688683953060277, |
| "learning_rate": 4.418882831051949e-06, |
| "loss": 0.0544, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.10828025477707, |
| "grad_norm": 1.316852383217834, |
| "learning_rate": 4.418424673768639e-06, |
| "loss": 0.0381, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.1087352138307551, |
| "grad_norm": 1.1724429999169874, |
| "learning_rate": 4.417966359720329e-06, |
| "loss": 0.0326, |
| "step": 2437 |
| }, |
| { |
| "epoch": 1.1091901728844404, |
| "grad_norm": 1.1568832581928536, |
| "learning_rate": 4.417507888944469e-06, |
| "loss": 0.0405, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.1096451319381255, |
| "grad_norm": 1.5251902460106344, |
| "learning_rate": 4.417049261478525e-06, |
| "loss": 0.0575, |
| "step": 2439 |
| }, |
| { |
| "epoch": 1.1101000909918108, |
| "grad_norm": 1.0120545747432346, |
| "learning_rate": 4.416590477359971e-06, |
| "loss": 0.0334, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.110555050045496, |
| "grad_norm": 0.7914216990176248, |
| "learning_rate": 4.416131536626299e-06, |
| "loss": 0.0356, |
| "step": 2441 |
| }, |
| { |
| "epoch": 1.111010009099181, |
| "grad_norm": 1.2910637898716506, |
| "learning_rate": 4.415672439315011e-06, |
| "loss": 0.0515, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.1114649681528663, |
| "grad_norm": 0.9051320988874424, |
| "learning_rate": 4.415213185463623e-06, |
| "loss": 0.0356, |
| "step": 2443 |
| }, |
| { |
| "epoch": 1.1119199272065514, |
| "grad_norm": 1.0269698815028385, |
| "learning_rate": 4.414753775109661e-06, |
| "loss": 0.0329, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.1123748862602365, |
| "grad_norm": 1.2419026252261667, |
| "learning_rate": 4.414294208290669e-06, |
| "loss": 0.0517, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.1128298453139218, |
| "grad_norm": 1.819082869630631, |
| "learning_rate": 4.413834485044199e-06, |
| "loss": 0.0451, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.113284804367607, |
| "grad_norm": 0.9979243205463798, |
| "learning_rate": 4.413374605407817e-06, |
| "loss": 0.0383, |
| "step": 2447 |
| }, |
| { |
| "epoch": 1.113739763421292, |
| "grad_norm": 1.224800546943753, |
| "learning_rate": 4.412914569419103e-06, |
| "loss": 0.0434, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.1141947224749773, |
| "grad_norm": 1.0669150265973117, |
| "learning_rate": 4.412454377115649e-06, |
| "loss": 0.0408, |
| "step": 2449 |
| }, |
| { |
| "epoch": 1.1146496815286624, |
| "grad_norm": 1.123671491866875, |
| "learning_rate": 4.411994028535061e-06, |
| "loss": 0.0441, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.1151046405823477, |
| "grad_norm": 1.2722514561723057, |
| "learning_rate": 4.411533523714954e-06, |
| "loss": 0.0383, |
| "step": 2451 |
| }, |
| { |
| "epoch": 1.1155595996360328, |
| "grad_norm": 1.5834385006640326, |
| "learning_rate": 4.41107286269296e-06, |
| "loss": 0.0589, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.1160145586897179, |
| "grad_norm": 1.1455499260049011, |
| "learning_rate": 4.410612045506722e-06, |
| "loss": 0.0449, |
| "step": 2453 |
| }, |
| { |
| "epoch": 1.1164695177434032, |
| "grad_norm": 1.0371099501390657, |
| "learning_rate": 4.410151072193897e-06, |
| "loss": 0.0362, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.1169244767970883, |
| "grad_norm": 1.4081161167961136, |
| "learning_rate": 4.409689942792152e-06, |
| "loss": 0.0518, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.1173794358507734, |
| "grad_norm": 1.7214027846335829, |
| "learning_rate": 4.409228657339168e-06, |
| "loss": 0.0692, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.1178343949044587, |
| "grad_norm": 1.12403321816502, |
| "learning_rate": 4.4087672158726415e-06, |
| "loss": 0.0366, |
| "step": 2457 |
| }, |
| { |
| "epoch": 1.1182893539581438, |
| "grad_norm": 1.0823504834469402, |
| "learning_rate": 4.408305618430277e-06, |
| "loss": 0.0345, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.1187443130118289, |
| "grad_norm": 1.216846306576934, |
| "learning_rate": 4.407843865049797e-06, |
| "loss": 0.0372, |
| "step": 2459 |
| }, |
| { |
| "epoch": 1.1191992720655142, |
| "grad_norm": 1.1113939155438939, |
| "learning_rate": 4.40738195576893e-06, |
| "loss": 0.0451, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1196542311191993, |
| "grad_norm": 0.891204400606777, |
| "learning_rate": 4.406919890625424e-06, |
| "loss": 0.0292, |
| "step": 2461 |
| }, |
| { |
| "epoch": 1.1201091901728844, |
| "grad_norm": 1.2915935448419529, |
| "learning_rate": 4.406457669657036e-06, |
| "loss": 0.0516, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.1205641492265697, |
| "grad_norm": 1.6069379871364107, |
| "learning_rate": 4.405995292901537e-06, |
| "loss": 0.08, |
| "step": 2463 |
| }, |
| { |
| "epoch": 1.1210191082802548, |
| "grad_norm": 1.5035973769422863, |
| "learning_rate": 4.40553276039671e-06, |
| "loss": 0.0601, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.1214740673339398, |
| "grad_norm": 1.0151523691387854, |
| "learning_rate": 4.4050700721803505e-06, |
| "loss": 0.0379, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.1219290263876252, |
| "grad_norm": 1.7711601032802793, |
| "learning_rate": 4.404607228290269e-06, |
| "loss": 0.0576, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.1223839854413102, |
| "grad_norm": 0.954038569483112, |
| "learning_rate": 4.404144228764285e-06, |
| "loss": 0.0404, |
| "step": 2467 |
| }, |
| { |
| "epoch": 1.1228389444949956, |
| "grad_norm": 1.5447139108146788, |
| "learning_rate": 4.403681073640235e-06, |
| "loss": 0.0658, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.1232939035486806, |
| "grad_norm": 0.9377135115431458, |
| "learning_rate": 4.403217762955963e-06, |
| "loss": 0.0263, |
| "step": 2469 |
| }, |
| { |
| "epoch": 1.1237488626023657, |
| "grad_norm": 1.1143437864668884, |
| "learning_rate": 4.402754296749331e-06, |
| "loss": 0.0566, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.124203821656051, |
| "grad_norm": 1.196886172258259, |
| "learning_rate": 4.402290675058211e-06, |
| "loss": 0.0443, |
| "step": 2471 |
| }, |
| { |
| "epoch": 1.1246587807097361, |
| "grad_norm": 1.2121878881495864, |
| "learning_rate": 4.401826897920487e-06, |
| "loss": 0.0424, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.1251137397634212, |
| "grad_norm": 1.0141547702640479, |
| "learning_rate": 4.4013629653740575e-06, |
| "loss": 0.0377, |
| "step": 2473 |
| }, |
| { |
| "epoch": 1.1255686988171065, |
| "grad_norm": 1.0861470170547507, |
| "learning_rate": 4.400898877456833e-06, |
| "loss": 0.0404, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.1260236578707916, |
| "grad_norm": 0.9496965385334126, |
| "learning_rate": 4.400434634206737e-06, |
| "loss": 0.0344, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.1264786169244767, |
| "grad_norm": 1.3817398545762736, |
| "learning_rate": 4.399970235661705e-06, |
| "loss": 0.0447, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.126933575978162, |
| "grad_norm": 1.5813045704011366, |
| "learning_rate": 4.399505681859685e-06, |
| "loss": 0.0523, |
| "step": 2477 |
| }, |
| { |
| "epoch": 1.127388535031847, |
| "grad_norm": 1.2440959913644623, |
| "learning_rate": 4.399040972838639e-06, |
| "loss": 0.0386, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.1278434940855324, |
| "grad_norm": 1.4133175073097664, |
| "learning_rate": 4.398576108636541e-06, |
| "loss": 0.0416, |
| "step": 2479 |
| }, |
| { |
| "epoch": 1.1282984531392175, |
| "grad_norm": 1.124024448878979, |
| "learning_rate": 4.398111089291378e-06, |
| "loss": 0.0439, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1287534121929026, |
| "grad_norm": 1.2930145274113767, |
| "learning_rate": 4.3976459148411464e-06, |
| "loss": 0.0418, |
| "step": 2481 |
| }, |
| { |
| "epoch": 1.129208371246588, |
| "grad_norm": 1.4204451546522527, |
| "learning_rate": 4.3971805853238616e-06, |
| "loss": 0.0638, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.129663330300273, |
| "grad_norm": 1.4700979876317655, |
| "learning_rate": 4.396715100777547e-06, |
| "loss": 0.0379, |
| "step": 2483 |
| }, |
| { |
| "epoch": 1.130118289353958, |
| "grad_norm": 1.634898199167989, |
| "learning_rate": 4.39624946124024e-06, |
| "loss": 0.0529, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.1305732484076434, |
| "grad_norm": 1.0585835936713908, |
| "learning_rate": 4.39578366674999e-06, |
| "loss": 0.044, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.1310282074613285, |
| "grad_norm": 1.3973600769419179, |
| "learning_rate": 4.395317717344861e-06, |
| "loss": 0.0432, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.1314831665150136, |
| "grad_norm": 1.798219021657043, |
| "learning_rate": 4.394851613062927e-06, |
| "loss": 0.0638, |
| "step": 2487 |
| }, |
| { |
| "epoch": 1.1319381255686989, |
| "grad_norm": 1.0582181682092058, |
| "learning_rate": 4.394385353942275e-06, |
| "loss": 0.0332, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.132393084622384, |
| "grad_norm": 1.6181012659879317, |
| "learning_rate": 4.393918940021008e-06, |
| "loss": 0.049, |
| "step": 2489 |
| }, |
| { |
| "epoch": 1.132848043676069, |
| "grad_norm": 1.2537106795729136, |
| "learning_rate": 4.393452371337238e-06, |
| "loss": 0.0562, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1333030027297544, |
| "grad_norm": 0.9402110423643004, |
| "learning_rate": 4.39298564792909e-06, |
| "loss": 0.0323, |
| "step": 2491 |
| }, |
| { |
| "epoch": 1.1337579617834395, |
| "grad_norm": 0.9503033355772303, |
| "learning_rate": 4.392518769834705e-06, |
| "loss": 0.035, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.1342129208371245, |
| "grad_norm": 1.4777783162394809, |
| "learning_rate": 4.392051737092231e-06, |
| "loss": 0.0414, |
| "step": 2493 |
| }, |
| { |
| "epoch": 1.1346678798908099, |
| "grad_norm": 1.0763667953448761, |
| "learning_rate": 4.391584549739834e-06, |
| "loss": 0.0403, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.135122838944495, |
| "grad_norm": 1.3091193169121618, |
| "learning_rate": 4.391117207815691e-06, |
| "loss": 0.0503, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.1355777979981803, |
| "grad_norm": 1.1971659092014446, |
| "learning_rate": 4.3906497113579895e-06, |
| "loss": 0.0438, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.1360327570518653, |
| "grad_norm": 1.280791387170822, |
| "learning_rate": 4.390182060404931e-06, |
| "loss": 0.0346, |
| "step": 2497 |
| }, |
| { |
| "epoch": 1.1364877161055504, |
| "grad_norm": 1.121184192345378, |
| "learning_rate": 4.389714254994732e-06, |
| "loss": 0.0441, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.1369426751592357, |
| "grad_norm": 1.475854207845472, |
| "learning_rate": 4.389246295165617e-06, |
| "loss": 0.0486, |
| "step": 2499 |
| }, |
| { |
| "epoch": 1.1373976342129208, |
| "grad_norm": 0.9970049368117108, |
| "learning_rate": 4.388778180955826e-06, |
| "loss": 0.0327, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.137852593266606, |
| "grad_norm": 1.4145433757639123, |
| "learning_rate": 4.388309912403612e-06, |
| "loss": 0.0596, |
| "step": 2501 |
| }, |
| { |
| "epoch": 1.1383075523202912, |
| "grad_norm": 1.1343574838051573, |
| "learning_rate": 4.38784148954724e-06, |
| "loss": 0.0448, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.1387625113739763, |
| "grad_norm": 1.0484194331273002, |
| "learning_rate": 4.387372912424987e-06, |
| "loss": 0.0355, |
| "step": 2503 |
| }, |
| { |
| "epoch": 1.1392174704276614, |
| "grad_norm": 1.7053028178852092, |
| "learning_rate": 4.386904181075142e-06, |
| "loss": 0.0705, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.1396724294813467, |
| "grad_norm": 1.4330442148223927, |
| "learning_rate": 4.386435295536008e-06, |
| "loss": 0.0507, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.1401273885350318, |
| "grad_norm": 0.9348889254085606, |
| "learning_rate": 4.385966255845902e-06, |
| "loss": 0.0292, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.1405823475887171, |
| "grad_norm": 1.2174866946182084, |
| "learning_rate": 4.38549706204315e-06, |
| "loss": 0.0414, |
| "step": 2507 |
| }, |
| { |
| "epoch": 1.1410373066424022, |
| "grad_norm": 1.2934017758365686, |
| "learning_rate": 4.385027714166094e-06, |
| "loss": 0.0427, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.1414922656960873, |
| "grad_norm": 1.201346914344506, |
| "learning_rate": 4.384558212253084e-06, |
| "loss": 0.0341, |
| "step": 2509 |
| }, |
| { |
| "epoch": 1.1419472247497726, |
| "grad_norm": 1.3185084669060936, |
| "learning_rate": 4.384088556342488e-06, |
| "loss": 0.0375, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1424021838034577, |
| "grad_norm": 1.4235313585038782, |
| "learning_rate": 4.383618746472686e-06, |
| "loss": 0.045, |
| "step": 2511 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 1.2494910789819569, |
| "learning_rate": 4.383148782682064e-06, |
| "loss": 0.0467, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.143312101910828, |
| "grad_norm": 1.7266592945727848, |
| "learning_rate": 4.382678665009028e-06, |
| "loss": 0.0635, |
| "step": 2513 |
| }, |
| { |
| "epoch": 1.1437670609645132, |
| "grad_norm": 1.0979278755311446, |
| "learning_rate": 4.382208393491994e-06, |
| "loss": 0.0436, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.1442220200181983, |
| "grad_norm": 1.8838897508978143, |
| "learning_rate": 4.381737968169389e-06, |
| "loss": 0.0461, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.1446769790718836, |
| "grad_norm": 1.3766516415962329, |
| "learning_rate": 4.381267389079657e-06, |
| "loss": 0.0451, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.1451319381255687, |
| "grad_norm": 0.9711460067189062, |
| "learning_rate": 4.380796656261248e-06, |
| "loss": 0.0351, |
| "step": 2517 |
| }, |
| { |
| "epoch": 1.1455868971792538, |
| "grad_norm": 1.1491830597989745, |
| "learning_rate": 4.38032576975263e-06, |
| "loss": 0.039, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.146041856232939, |
| "grad_norm": 1.3340694838907248, |
| "learning_rate": 4.3798547295922825e-06, |
| "loss": 0.0507, |
| "step": 2519 |
| }, |
| { |
| "epoch": 1.1464968152866242, |
| "grad_norm": 1.734429381445876, |
| "learning_rate": 4.3793835358186955e-06, |
| "loss": 0.0492, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1469517743403093, |
| "grad_norm": 1.5672845093843653, |
| "learning_rate": 4.378912188470374e-06, |
| "loss": 0.0659, |
| "step": 2521 |
| }, |
| { |
| "epoch": 1.1474067333939946, |
| "grad_norm": 1.3205326253235095, |
| "learning_rate": 4.378440687585832e-06, |
| "loss": 0.0523, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.1478616924476797, |
| "grad_norm": 1.2623261051330958, |
| "learning_rate": 4.3779690332036005e-06, |
| "loss": 0.0452, |
| "step": 2523 |
| }, |
| { |
| "epoch": 1.148316651501365, |
| "grad_norm": 1.6057808240455016, |
| "learning_rate": 4.3774972253622205e-06, |
| "loss": 0.0614, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.14877161055505, |
| "grad_norm": 1.5280671053096613, |
| "learning_rate": 4.377025264100246e-06, |
| "loss": 0.0611, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.1492265696087351, |
| "grad_norm": 1.5093730950142752, |
| "learning_rate": 4.376553149456244e-06, |
| "loss": 0.0442, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.1496815286624205, |
| "grad_norm": 1.1139894261338705, |
| "learning_rate": 4.376080881468793e-06, |
| "loss": 0.0296, |
| "step": 2527 |
| }, |
| { |
| "epoch": 1.1501364877161055, |
| "grad_norm": 0.9637638948077641, |
| "learning_rate": 4.375608460176483e-06, |
| "loss": 0.0385, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.1505914467697906, |
| "grad_norm": 1.3874618298487023, |
| "learning_rate": 4.375135885617922e-06, |
| "loss": 0.0376, |
| "step": 2529 |
| }, |
| { |
| "epoch": 1.151046405823476, |
| "grad_norm": 1.0108739599653256, |
| "learning_rate": 4.3746631578317236e-06, |
| "loss": 0.042, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.151501364877161, |
| "grad_norm": 1.0477978478269183, |
| "learning_rate": 4.374190276856517e-06, |
| "loss": 0.0338, |
| "step": 2531 |
| }, |
| { |
| "epoch": 1.1519563239308463, |
| "grad_norm": 1.4108994571069893, |
| "learning_rate": 4.373717242730946e-06, |
| "loss": 0.0425, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.1524112829845314, |
| "grad_norm": 1.1644049374336485, |
| "learning_rate": 4.373244055493663e-06, |
| "loss": 0.034, |
| "step": 2533 |
| }, |
| { |
| "epoch": 1.1528662420382165, |
| "grad_norm": 1.4519406400703538, |
| "learning_rate": 4.372770715183336e-06, |
| "loss": 0.0395, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.1533212010919018, |
| "grad_norm": 2.2708530673350813, |
| "learning_rate": 4.372297221838642e-06, |
| "loss": 0.0738, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.153776160145587, |
| "grad_norm": 1.115588114418083, |
| "learning_rate": 4.3718235754982755e-06, |
| "loss": 0.0479, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.154231119199272, |
| "grad_norm": 1.3912239053761137, |
| "learning_rate": 4.371349776200939e-06, |
| "loss": 0.0646, |
| "step": 2537 |
| }, |
| { |
| "epoch": 1.1546860782529573, |
| "grad_norm": 1.1895266675210927, |
| "learning_rate": 4.37087582398535e-06, |
| "loss": 0.0452, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.1551410373066424, |
| "grad_norm": 0.8756359432109283, |
| "learning_rate": 4.370401718890237e-06, |
| "loss": 0.0319, |
| "step": 2539 |
| }, |
| { |
| "epoch": 1.1555959963603275, |
| "grad_norm": 1.1404809023588176, |
| "learning_rate": 4.369927460954342e-06, |
| "loss": 0.0419, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1560509554140128, |
| "grad_norm": 1.034953677525622, |
| "learning_rate": 4.36945305021642e-06, |
| "loss": 0.0447, |
| "step": 2541 |
| }, |
| { |
| "epoch": 1.156505914467698, |
| "grad_norm": 1.07838689845768, |
| "learning_rate": 4.368978486715237e-06, |
| "loss": 0.052, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.156960873521383, |
| "grad_norm": 1.1088180827845917, |
| "learning_rate": 4.368503770489573e-06, |
| "loss": 0.0428, |
| "step": 2543 |
| }, |
| { |
| "epoch": 1.1574158325750683, |
| "grad_norm": 1.0432412519466965, |
| "learning_rate": 4.368028901578218e-06, |
| "loss": 0.0424, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.1578707916287534, |
| "grad_norm": 1.233831910090493, |
| "learning_rate": 4.367553880019977e-06, |
| "loss": 0.0416, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.1583257506824385, |
| "grad_norm": 1.0183822315736946, |
| "learning_rate": 4.367078705853667e-06, |
| "loss": 0.0284, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.1587807097361238, |
| "grad_norm": 1.3978418402664838, |
| "learning_rate": 4.366603379118117e-06, |
| "loss": 0.0491, |
| "step": 2547 |
| }, |
| { |
| "epoch": 1.1592356687898089, |
| "grad_norm": 1.5608020148859059, |
| "learning_rate": 4.366127899852169e-06, |
| "loss": 0.0541, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.159690627843494, |
| "grad_norm": 1.5401142178479585, |
| "learning_rate": 4.365652268094675e-06, |
| "loss": 0.0371, |
| "step": 2549 |
| }, |
| { |
| "epoch": 1.1601455868971793, |
| "grad_norm": 1.2935963958785663, |
| "learning_rate": 4.365176483884504e-06, |
| "loss": 0.0425, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.1606005459508644, |
| "grad_norm": 1.0286225573192664, |
| "learning_rate": 4.364700547260533e-06, |
| "loss": 0.0324, |
| "step": 2551 |
| }, |
| { |
| "epoch": 1.1610555050045497, |
| "grad_norm": 1.0731921270914817, |
| "learning_rate": 4.3642244582616545e-06, |
| "loss": 0.0417, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.1615104640582348, |
| "grad_norm": 1.3742028973162175, |
| "learning_rate": 4.363748216926772e-06, |
| "loss": 0.0469, |
| "step": 2553 |
| }, |
| { |
| "epoch": 1.1619654231119199, |
| "grad_norm": 1.44817553523058, |
| "learning_rate": 4.363271823294802e-06, |
| "loss": 0.0449, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.1624203821656052, |
| "grad_norm": 1.7296257496846295, |
| "learning_rate": 4.362795277404673e-06, |
| "loss": 0.0581, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.1628753412192903, |
| "grad_norm": 0.8285482219963878, |
| "learning_rate": 4.362318579295326e-06, |
| "loss": 0.0245, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.1633303002729753, |
| "grad_norm": 1.3742522135018411, |
| "learning_rate": 4.361841729005715e-06, |
| "loss": 0.062, |
| "step": 2557 |
| }, |
| { |
| "epoch": 1.1637852593266607, |
| "grad_norm": 1.275915845948076, |
| "learning_rate": 4.361364726574806e-06, |
| "loss": 0.0362, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.1642402183803457, |
| "grad_norm": 1.0204881246754238, |
| "learning_rate": 4.360887572041578e-06, |
| "loss": 0.0482, |
| "step": 2559 |
| }, |
| { |
| "epoch": 1.164695177434031, |
| "grad_norm": 1.5641162257475847, |
| "learning_rate": 4.36041026544502e-06, |
| "loss": 0.0634, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1651501364877161, |
| "grad_norm": 0.9569151636268276, |
| "learning_rate": 4.359932806824138e-06, |
| "loss": 0.0391, |
| "step": 2561 |
| }, |
| { |
| "epoch": 1.1656050955414012, |
| "grad_norm": 1.1571738424155176, |
| "learning_rate": 4.359455196217946e-06, |
| "loss": 0.0309, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.1660600545950865, |
| "grad_norm": 0.9720311264059563, |
| "learning_rate": 4.358977433665471e-06, |
| "loss": 0.0289, |
| "step": 2563 |
| }, |
| { |
| "epoch": 1.1665150136487716, |
| "grad_norm": 1.1195718793798541, |
| "learning_rate": 4.3584995192057565e-06, |
| "loss": 0.0489, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.1669699727024567, |
| "grad_norm": 0.8563415290407096, |
| "learning_rate": 4.358021452877854e-06, |
| "loss": 0.0302, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.167424931756142, |
| "grad_norm": 1.0377841509592145, |
| "learning_rate": 4.357543234720829e-06, |
| "loss": 0.0421, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.1678798908098271, |
| "grad_norm": 1.417493067242251, |
| "learning_rate": 4.357064864773761e-06, |
| "loss": 0.0513, |
| "step": 2567 |
| }, |
| { |
| "epoch": 1.1683348498635122, |
| "grad_norm": 1.2490536851099554, |
| "learning_rate": 4.3565863430757375e-06, |
| "loss": 0.0465, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.1687898089171975, |
| "grad_norm": 1.2999928751001002, |
| "learning_rate": 4.356107669665862e-06, |
| "loss": 0.0439, |
| "step": 2569 |
| }, |
| { |
| "epoch": 1.1692447679708826, |
| "grad_norm": 1.2578059399065435, |
| "learning_rate": 4.355628844583249e-06, |
| "loss": 0.0481, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1696997270245677, |
| "grad_norm": 1.2347557622000964, |
| "learning_rate": 4.355149867867029e-06, |
| "loss": 0.0401, |
| "step": 2571 |
| }, |
| { |
| "epoch": 1.170154686078253, |
| "grad_norm": 1.1439428966642318, |
| "learning_rate": 4.354670739556338e-06, |
| "loss": 0.0269, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.170609645131938, |
| "grad_norm": 1.207997182803568, |
| "learning_rate": 4.35419145969033e-06, |
| "loss": 0.054, |
| "step": 2573 |
| }, |
| { |
| "epoch": 1.1710646041856232, |
| "grad_norm": 0.9958085388233581, |
| "learning_rate": 4.35371202830817e-06, |
| "loss": 0.0347, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.1715195632393085, |
| "grad_norm": 1.4469523807416915, |
| "learning_rate": 4.353232445449034e-06, |
| "loss": 0.0478, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.1719745222929936, |
| "grad_norm": 1.2192178084336096, |
| "learning_rate": 4.352752711152112e-06, |
| "loss": 0.0385, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.1724294813466787, |
| "grad_norm": 1.0288045332506803, |
| "learning_rate": 4.352272825456605e-06, |
| "loss": 0.0383, |
| "step": 2577 |
| }, |
| { |
| "epoch": 1.172884440400364, |
| "grad_norm": 0.9071458989549619, |
| "learning_rate": 4.3517927884017275e-06, |
| "loss": 0.0344, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.173339399454049, |
| "grad_norm": 1.33906299902172, |
| "learning_rate": 4.351312600026706e-06, |
| "loss": 0.0529, |
| "step": 2579 |
| }, |
| { |
| "epoch": 1.1737943585077344, |
| "grad_norm": 1.1311450064261168, |
| "learning_rate": 4.350832260370779e-06, |
| "loss": 0.0366, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1742493175614195, |
| "grad_norm": 1.9426905665431426, |
| "learning_rate": 4.350351769473198e-06, |
| "loss": 0.0532, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.1747042766151046, |
| "grad_norm": 1.2781318500543644, |
| "learning_rate": 4.349871127373226e-06, |
| "loss": 0.0487, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.1751592356687899, |
| "grad_norm": 1.6790567518399873, |
| "learning_rate": 4.349390334110141e-06, |
| "loss": 0.0713, |
| "step": 2583 |
| }, |
| { |
| "epoch": 1.175614194722475, |
| "grad_norm": 1.0221229397935976, |
| "learning_rate": 4.348909389723228e-06, |
| "loss": 0.0375, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.17606915377616, |
| "grad_norm": 1.266818927759103, |
| "learning_rate": 4.348428294251791e-06, |
| "loss": 0.0572, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.1765241128298454, |
| "grad_norm": 1.1645878298799628, |
| "learning_rate": 4.34794704773514e-06, |
| "loss": 0.0362, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.1769790718835305, |
| "grad_norm": 1.630299911836864, |
| "learning_rate": 4.347465650212602e-06, |
| "loss": 0.0496, |
| "step": 2587 |
| }, |
| { |
| "epoch": 1.1774340309372158, |
| "grad_norm": 1.4291932771494225, |
| "learning_rate": 4.346984101723513e-06, |
| "loss": 0.055, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.1778889899909009, |
| "grad_norm": 1.1155617857423723, |
| "learning_rate": 4.3465024023072255e-06, |
| "loss": 0.0446, |
| "step": 2589 |
| }, |
| { |
| "epoch": 1.178343949044586, |
| "grad_norm": 1.102429871166694, |
| "learning_rate": 4.3460205520031006e-06, |
| "loss": 0.0386, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1787989080982713, |
| "grad_norm": 1.581559071083143, |
| "learning_rate": 4.345538550850512e-06, |
| "loss": 0.0577, |
| "step": 2591 |
| }, |
| { |
| "epoch": 1.1792538671519563, |
| "grad_norm": 1.1692163124504829, |
| "learning_rate": 4.345056398888847e-06, |
| "loss": 0.0414, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.1797088262056414, |
| "grad_norm": 1.2967463744472663, |
| "learning_rate": 4.3445740961575066e-06, |
| "loss": 0.0459, |
| "step": 2593 |
| }, |
| { |
| "epoch": 1.1801637852593267, |
| "grad_norm": 1.1801818713644656, |
| "learning_rate": 4.3440916426959e-06, |
| "loss": 0.0447, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.1806187443130118, |
| "grad_norm": 1.056332937803461, |
| "learning_rate": 4.343609038543452e-06, |
| "loss": 0.0427, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.181073703366697, |
| "grad_norm": 0.8087418066841546, |
| "learning_rate": 4.3431262837396e-06, |
| "loss": 0.0283, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.1815286624203822, |
| "grad_norm": 1.3717396641256512, |
| "learning_rate": 4.342643378323791e-06, |
| "loss": 0.0413, |
| "step": 2597 |
| }, |
| { |
| "epoch": 1.1819836214740673, |
| "grad_norm": 1.1227194071656794, |
| "learning_rate": 4.342160322335487e-06, |
| "loss": 0.0504, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.1824385805277524, |
| "grad_norm": 1.8008906018414683, |
| "learning_rate": 4.34167711581416e-06, |
| "loss": 0.0531, |
| "step": 2599 |
| }, |
| { |
| "epoch": 1.1828935395814377, |
| "grad_norm": 1.2487182183391359, |
| "learning_rate": 4.3411937587992955e-06, |
| "loss": 0.0506, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1833484986351228, |
| "grad_norm": 1.320687044420774, |
| "learning_rate": 4.340710251330393e-06, |
| "loss": 0.037, |
| "step": 2601 |
| }, |
| { |
| "epoch": 1.183803457688808, |
| "grad_norm": 1.4687079679462933, |
| "learning_rate": 4.34022659344696e-06, |
| "loss": 0.0483, |
| "step": 2602 |
| }, |
| { |
| "epoch": 1.1842584167424932, |
| "grad_norm": 1.7281068422393309, |
| "learning_rate": 4.339742785188521e-06, |
| "loss": 0.0504, |
| "step": 2603 |
| }, |
| { |
| "epoch": 1.1847133757961783, |
| "grad_norm": 1.4966277369670473, |
| "learning_rate": 4.339258826594611e-06, |
| "loss": 0.0565, |
| "step": 2604 |
| }, |
| { |
| "epoch": 1.1851683348498634, |
| "grad_norm": 1.0309253275774168, |
| "learning_rate": 4.338774717704774e-06, |
| "loss": 0.0305, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.1856232939035487, |
| "grad_norm": 1.6778028117255999, |
| "learning_rate": 4.338290458558572e-06, |
| "loss": 0.0655, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.1860782529572338, |
| "grad_norm": 0.9073764223244197, |
| "learning_rate": 4.3378060491955744e-06, |
| "loss": 0.0269, |
| "step": 2607 |
| }, |
| { |
| "epoch": 1.186533212010919, |
| "grad_norm": 1.4009552856469054, |
| "learning_rate": 4.337321489655366e-06, |
| "loss": 0.0528, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.1869881710646042, |
| "grad_norm": 1.2237165912590953, |
| "learning_rate": 4.336836779977543e-06, |
| "loss": 0.0452, |
| "step": 2609 |
| }, |
| { |
| "epoch": 1.1874431301182893, |
| "grad_norm": 1.1089915681263793, |
| "learning_rate": 4.336351920201714e-06, |
| "loss": 0.0503, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1878980891719746, |
| "grad_norm": 1.1575829526678958, |
| "learning_rate": 4.335866910367498e-06, |
| "loss": 0.0316, |
| "step": 2611 |
| }, |
| { |
| "epoch": 1.1883530482256597, |
| "grad_norm": 1.5899164379309811, |
| "learning_rate": 4.3353817505145294e-06, |
| "loss": 0.0591, |
| "step": 2612 |
| }, |
| { |
| "epoch": 1.1888080072793448, |
| "grad_norm": 1.0938077922062897, |
| "learning_rate": 4.334896440682452e-06, |
| "loss": 0.0462, |
| "step": 2613 |
| }, |
| { |
| "epoch": 1.18926296633303, |
| "grad_norm": 1.263020825895642, |
| "learning_rate": 4.334410980910924e-06, |
| "loss": 0.0625, |
| "step": 2614 |
| }, |
| { |
| "epoch": 1.1897179253867152, |
| "grad_norm": 1.375825558928864, |
| "learning_rate": 4.333925371239615e-06, |
| "loss": 0.0357, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.1901728844404005, |
| "grad_norm": 1.5744528953975865, |
| "learning_rate": 4.3334396117082065e-06, |
| "loss": 0.0597, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.1906278434940856, |
| "grad_norm": 1.491527583156796, |
| "learning_rate": 4.332953702356393e-06, |
| "loss": 0.0689, |
| "step": 2617 |
| }, |
| { |
| "epoch": 1.1910828025477707, |
| "grad_norm": 1.2695873066043095, |
| "learning_rate": 4.33246764322388e-06, |
| "loss": 0.0493, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.191537761601456, |
| "grad_norm": 1.267946927066454, |
| "learning_rate": 4.331981434350387e-06, |
| "loss": 0.0462, |
| "step": 2619 |
| }, |
| { |
| "epoch": 1.191992720655141, |
| "grad_norm": 1.5583866575742822, |
| "learning_rate": 4.331495075775644e-06, |
| "loss": 0.0726, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1924476797088261, |
| "grad_norm": 1.3067997154538695, |
| "learning_rate": 4.331008567539395e-06, |
| "loss": 0.0574, |
| "step": 2621 |
| }, |
| { |
| "epoch": 1.1929026387625115, |
| "grad_norm": 1.68657006877326, |
| "learning_rate": 4.330521909681394e-06, |
| "loss": 0.057, |
| "step": 2622 |
| }, |
| { |
| "epoch": 1.1933575978161965, |
| "grad_norm": 1.070230342746352, |
| "learning_rate": 4.330035102241409e-06, |
| "loss": 0.0494, |
| "step": 2623 |
| }, |
| { |
| "epoch": 1.1938125568698816, |
| "grad_norm": 0.9723243113806012, |
| "learning_rate": 4.32954814525922e-06, |
| "loss": 0.0332, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.194267515923567, |
| "grad_norm": 1.2685431020720037, |
| "learning_rate": 4.329061038774619e-06, |
| "loss": 0.0507, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.194722474977252, |
| "grad_norm": 1.0835779865853132, |
| "learning_rate": 4.32857378282741e-06, |
| "loss": 0.0479, |
| "step": 2626 |
| }, |
| { |
| "epoch": 1.1951774340309371, |
| "grad_norm": 1.0387725026250871, |
| "learning_rate": 4.328086377457409e-06, |
| "loss": 0.0334, |
| "step": 2627 |
| }, |
| { |
| "epoch": 1.1956323930846224, |
| "grad_norm": 1.4314410760599618, |
| "learning_rate": 4.327598822704444e-06, |
| "loss": 0.0535, |
| "step": 2628 |
| }, |
| { |
| "epoch": 1.1960873521383075, |
| "grad_norm": 1.3961863059099942, |
| "learning_rate": 4.327111118608357e-06, |
| "loss": 0.0674, |
| "step": 2629 |
| }, |
| { |
| "epoch": 1.1965423111919926, |
| "grad_norm": 1.333893251244688, |
| "learning_rate": 4.326623265209001e-06, |
| "loss": 0.05, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.196997270245678, |
| "grad_norm": 1.322580892246249, |
| "learning_rate": 4.326135262546241e-06, |
| "loss": 0.0421, |
| "step": 2631 |
| }, |
| { |
| "epoch": 1.197452229299363, |
| "grad_norm": 1.9375107994278074, |
| "learning_rate": 4.325647110659954e-06, |
| "loss": 0.0712, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.197907188353048, |
| "grad_norm": 1.168082719041356, |
| "learning_rate": 4.325158809590028e-06, |
| "loss": 0.0333, |
| "step": 2633 |
| }, |
| { |
| "epoch": 1.1983621474067334, |
| "grad_norm": 0.9816497240960296, |
| "learning_rate": 4.324670359376368e-06, |
| "loss": 0.038, |
| "step": 2634 |
| }, |
| { |
| "epoch": 1.1988171064604185, |
| "grad_norm": 1.1935314907676322, |
| "learning_rate": 4.3241817600588865e-06, |
| "loss": 0.0425, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.1992720655141038, |
| "grad_norm": 0.7280480133520848, |
| "learning_rate": 4.3236930116775086e-06, |
| "loss": 0.0241, |
| "step": 2636 |
| }, |
| { |
| "epoch": 1.199727024567789, |
| "grad_norm": 1.6138573788858979, |
| "learning_rate": 4.323204114272174e-06, |
| "loss": 0.0652, |
| "step": 2637 |
| }, |
| { |
| "epoch": 1.200181983621474, |
| "grad_norm": 1.3500478748040605, |
| "learning_rate": 4.3227150678828335e-06, |
| "loss": 0.0458, |
| "step": 2638 |
| }, |
| { |
| "epoch": 1.2006369426751593, |
| "grad_norm": 1.1897012603509638, |
| "learning_rate": 4.322225872549448e-06, |
| "loss": 0.0407, |
| "step": 2639 |
| }, |
| { |
| "epoch": 1.2010919017288444, |
| "grad_norm": 1.5282207173547417, |
| "learning_rate": 4.321736528311994e-06, |
| "loss": 0.0582, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.2015468607825295, |
| "grad_norm": 1.3392160941035591, |
| "learning_rate": 4.321247035210456e-06, |
| "loss": 0.0448, |
| "step": 2641 |
| }, |
| { |
| "epoch": 1.2020018198362148, |
| "grad_norm": 1.3785022604960386, |
| "learning_rate": 4.320757393284837e-06, |
| "loss": 0.0397, |
| "step": 2642 |
| }, |
| { |
| "epoch": 1.2024567788898999, |
| "grad_norm": 1.285523733844365, |
| "learning_rate": 4.3202676025751455e-06, |
| "loss": 0.056, |
| "step": 2643 |
| }, |
| { |
| "epoch": 1.2029117379435852, |
| "grad_norm": 1.281764404944773, |
| "learning_rate": 4.319777663121406e-06, |
| "loss": 0.0525, |
| "step": 2644 |
| }, |
| { |
| "epoch": 1.2033666969972703, |
| "grad_norm": 2.0617531919811154, |
| "learning_rate": 4.319287574963653e-06, |
| "loss": 0.072, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.2038216560509554, |
| "grad_norm": 1.151027870688013, |
| "learning_rate": 4.318797338141936e-06, |
| "loss": 0.0388, |
| "step": 2646 |
| }, |
| { |
| "epoch": 1.2042766151046407, |
| "grad_norm": 1.3251849606667694, |
| "learning_rate": 4.318306952696314e-06, |
| "loss": 0.052, |
| "step": 2647 |
| }, |
| { |
| "epoch": 1.2047315741583258, |
| "grad_norm": 0.9553323646012558, |
| "learning_rate": 4.317816418666859e-06, |
| "loss": 0.0362, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.2051865332120109, |
| "grad_norm": 1.2558683237040615, |
| "learning_rate": 4.317325736093656e-06, |
| "loss": 0.0411, |
| "step": 2649 |
| }, |
| { |
| "epoch": 1.2056414922656962, |
| "grad_norm": 1.3582780841204083, |
| "learning_rate": 4.316834905016801e-06, |
| "loss": 0.0501, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.2060964513193813, |
| "grad_norm": 1.4044619964142426, |
| "learning_rate": 4.3163439254764015e-06, |
| "loss": 0.0427, |
| "step": 2651 |
| }, |
| { |
| "epoch": 1.2065514103730663, |
| "grad_norm": 1.3865901268810323, |
| "learning_rate": 4.31585279751258e-06, |
| "loss": 0.0441, |
| "step": 2652 |
| }, |
| { |
| "epoch": 1.2070063694267517, |
| "grad_norm": 1.0581399940422287, |
| "learning_rate": 4.315361521165467e-06, |
| "loss": 0.0316, |
| "step": 2653 |
| }, |
| { |
| "epoch": 1.2074613284804367, |
| "grad_norm": 0.9498409377999157, |
| "learning_rate": 4.314870096475209e-06, |
| "loss": 0.031, |
| "step": 2654 |
| }, |
| { |
| "epoch": 1.2079162875341218, |
| "grad_norm": 1.3395763349079761, |
| "learning_rate": 4.3143785234819624e-06, |
| "loss": 0.0392, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.2083712465878071, |
| "grad_norm": 1.1668460538335044, |
| "learning_rate": 4.3138868022258974e-06, |
| "loss": 0.037, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.2088262056414922, |
| "grad_norm": 1.3236957800006974, |
| "learning_rate": 4.313394932747194e-06, |
| "loss": 0.0463, |
| "step": 2657 |
| }, |
| { |
| "epoch": 1.2092811646951773, |
| "grad_norm": 1.0304307373040038, |
| "learning_rate": 4.312902915086045e-06, |
| "loss": 0.0411, |
| "step": 2658 |
| }, |
| { |
| "epoch": 1.2097361237488626, |
| "grad_norm": 1.617438952674323, |
| "learning_rate": 4.312410749282658e-06, |
| "loss": 0.0644, |
| "step": 2659 |
| }, |
| { |
| "epoch": 1.2101910828025477, |
| "grad_norm": 1.6676288833209185, |
| "learning_rate": 4.311918435377248e-06, |
| "loss": 0.061, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.210646041856233, |
| "grad_norm": 1.2394524290208426, |
| "learning_rate": 4.311425973410047e-06, |
| "loss": 0.0466, |
| "step": 2661 |
| }, |
| { |
| "epoch": 1.2111010009099181, |
| "grad_norm": 1.1019703553584292, |
| "learning_rate": 4.310933363421296e-06, |
| "loss": 0.0438, |
| "step": 2662 |
| }, |
| { |
| "epoch": 1.2115559599636032, |
| "grad_norm": 1.2307980815737782, |
| "learning_rate": 4.310440605451248e-06, |
| "loss": 0.0506, |
| "step": 2663 |
| }, |
| { |
| "epoch": 1.2120109190172885, |
| "grad_norm": 1.593712071886966, |
| "learning_rate": 4.30994769954017e-06, |
| "loss": 0.0453, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.2124658780709736, |
| "grad_norm": 1.4947965857246563, |
| "learning_rate": 4.30945464572834e-06, |
| "loss": 0.0598, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.2129208371246587, |
| "grad_norm": 1.0674292570554222, |
| "learning_rate": 4.3089614440560465e-06, |
| "loss": 0.0329, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.213375796178344, |
| "grad_norm": 1.2203614750189777, |
| "learning_rate": 4.3084680945635946e-06, |
| "loss": 0.0462, |
| "step": 2667 |
| }, |
| { |
| "epoch": 1.213830755232029, |
| "grad_norm": 1.7186029559337777, |
| "learning_rate": 4.307974597291296e-06, |
| "loss": 0.0718, |
| "step": 2668 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 1.067668191281136, |
| "learning_rate": 4.307480952279478e-06, |
| "loss": 0.0281, |
| "step": 2669 |
| }, |
| { |
| "epoch": 1.2147406733393995, |
| "grad_norm": 0.8765223532562123, |
| "learning_rate": 4.3069871595684795e-06, |
| "loss": 0.0274, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2151956323930846, |
| "grad_norm": 1.2905532614569657, |
| "learning_rate": 4.30649321919865e-06, |
| "loss": 0.0461, |
| "step": 2671 |
| }, |
| { |
| "epoch": 1.21565059144677, |
| "grad_norm": 1.5131575179066197, |
| "learning_rate": 4.305999131210353e-06, |
| "loss": 0.0485, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.216105550500455, |
| "grad_norm": 1.5581230353431827, |
| "learning_rate": 4.305504895643963e-06, |
| "loss": 0.0517, |
| "step": 2673 |
| }, |
| { |
| "epoch": 1.21656050955414, |
| "grad_norm": 1.185733143097644, |
| "learning_rate": 4.305010512539867e-06, |
| "loss": 0.0393, |
| "step": 2674 |
| }, |
| { |
| "epoch": 1.2170154686078254, |
| "grad_norm": 1.7014068344022533, |
| "learning_rate": 4.304515981938462e-06, |
| "loss": 0.0582, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.2174704276615105, |
| "grad_norm": 1.2375425236050612, |
| "learning_rate": 4.304021303880161e-06, |
| "loss": 0.0506, |
| "step": 2676 |
| }, |
| { |
| "epoch": 1.2179253867151956, |
| "grad_norm": 1.3982335408942175, |
| "learning_rate": 4.303526478405386e-06, |
| "loss": 0.0439, |
| "step": 2677 |
| }, |
| { |
| "epoch": 1.2183803457688809, |
| "grad_norm": 1.139069195563892, |
| "learning_rate": 4.3030315055545715e-06, |
| "loss": 0.0497, |
| "step": 2678 |
| }, |
| { |
| "epoch": 1.218835304822566, |
| "grad_norm": 1.2256378742793013, |
| "learning_rate": 4.302536385368165e-06, |
| "loss": 0.0482, |
| "step": 2679 |
| }, |
| { |
| "epoch": 1.219290263876251, |
| "grad_norm": 1.0532517244142978, |
| "learning_rate": 4.3020411178866246e-06, |
| "loss": 0.027, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2197452229299364, |
| "grad_norm": 1.0158835721348332, |
| "learning_rate": 4.3015457031504226e-06, |
| "loss": 0.0338, |
| "step": 2681 |
| }, |
| { |
| "epoch": 1.2202001819836215, |
| "grad_norm": 1.3209441539963251, |
| "learning_rate": 4.301050141200041e-06, |
| "loss": 0.0413, |
| "step": 2682 |
| }, |
| { |
| "epoch": 1.2206551410373065, |
| "grad_norm": 1.2029700094175324, |
| "learning_rate": 4.300554432075975e-06, |
| "loss": 0.0426, |
| "step": 2683 |
| }, |
| { |
| "epoch": 1.2211101000909919, |
| "grad_norm": 2.1891001967424497, |
| "learning_rate": 4.300058575818733e-06, |
| "loss": 0.0815, |
| "step": 2684 |
| }, |
| { |
| "epoch": 1.221565059144677, |
| "grad_norm": 1.5555726562312315, |
| "learning_rate": 4.299562572468833e-06, |
| "loss": 0.0613, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.222020018198362, |
| "grad_norm": 0.876349999904215, |
| "learning_rate": 4.299066422066807e-06, |
| "loss": 0.024, |
| "step": 2686 |
| }, |
| { |
| "epoch": 1.2224749772520473, |
| "grad_norm": 1.6071265872782081, |
| "learning_rate": 4.2985701246531965e-06, |
| "loss": 0.0595, |
| "step": 2687 |
| }, |
| { |
| "epoch": 1.2229299363057324, |
| "grad_norm": 1.1252656710992743, |
| "learning_rate": 4.2980736802685575e-06, |
| "loss": 0.0365, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.2233848953594177, |
| "grad_norm": 1.2454277763206345, |
| "learning_rate": 4.297577088953458e-06, |
| "loss": 0.042, |
| "step": 2689 |
| }, |
| { |
| "epoch": 1.2238398544131028, |
| "grad_norm": 1.3331402174546207, |
| "learning_rate": 4.2970803507484756e-06, |
| "loss": 0.0443, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.224294813466788, |
| "grad_norm": 0.8807425462365368, |
| "learning_rate": 4.296583465694204e-06, |
| "loss": 0.0247, |
| "step": 2691 |
| }, |
| { |
| "epoch": 1.2247497725204732, |
| "grad_norm": 1.5107994300897551, |
| "learning_rate": 4.296086433831244e-06, |
| "loss": 0.0562, |
| "step": 2692 |
| }, |
| { |
| "epoch": 1.2252047315741583, |
| "grad_norm": 1.1896975196354314, |
| "learning_rate": 4.295589255200212e-06, |
| "loss": 0.0409, |
| "step": 2693 |
| }, |
| { |
| "epoch": 1.2256596906278434, |
| "grad_norm": 1.6408631876378263, |
| "learning_rate": 4.295091929841734e-06, |
| "loss": 0.0605, |
| "step": 2694 |
| }, |
| { |
| "epoch": 1.2261146496815287, |
| "grad_norm": 1.4729994818109902, |
| "learning_rate": 4.2945944577964516e-06, |
| "loss": 0.0641, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.2265696087352138, |
| "grad_norm": 0.9615140420824617, |
| "learning_rate": 4.294096839105013e-06, |
| "loss": 0.0313, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.2270245677888991, |
| "grad_norm": 1.2488451650637187, |
| "learning_rate": 4.293599073808083e-06, |
| "loss": 0.0248, |
| "step": 2697 |
| }, |
| { |
| "epoch": 1.2274795268425842, |
| "grad_norm": 1.0912415382975869, |
| "learning_rate": 4.293101161946337e-06, |
| "loss": 0.0445, |
| "step": 2698 |
| }, |
| { |
| "epoch": 1.2279344858962693, |
| "grad_norm": 1.1751753225474721, |
| "learning_rate": 4.292603103560462e-06, |
| "loss": 0.0368, |
| "step": 2699 |
| }, |
| { |
| "epoch": 1.2283894449499546, |
| "grad_norm": 1.1326874563295564, |
| "learning_rate": 4.292104898691157e-06, |
| "loss": 0.0404, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2288444040036397, |
| "grad_norm": 1.5032483835067327, |
| "learning_rate": 4.291606547379131e-06, |
| "loss": 0.0475, |
| "step": 2701 |
| }, |
| { |
| "epoch": 1.2292993630573248, |
| "grad_norm": 1.027713182816127, |
| "learning_rate": 4.291108049665109e-06, |
| "loss": 0.0408, |
| "step": 2702 |
| }, |
| { |
| "epoch": 1.22975432211101, |
| "grad_norm": 1.5746683457870045, |
| "learning_rate": 4.290609405589827e-06, |
| "loss": 0.0274, |
| "step": 2703 |
| }, |
| { |
| "epoch": 1.2302092811646952, |
| "grad_norm": 1.2638695199357213, |
| "learning_rate": 4.29011061519403e-06, |
| "loss": 0.054, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.2306642402183803, |
| "grad_norm": 1.4052005740754276, |
| "learning_rate": 4.289611678518478e-06, |
| "loss": 0.0534, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.2311191992720656, |
| "grad_norm": 1.3735183505459223, |
| "learning_rate": 4.289112595603941e-06, |
| "loss": 0.0377, |
| "step": 2706 |
| }, |
| { |
| "epoch": 1.2315741583257507, |
| "grad_norm": 1.4478720172943578, |
| "learning_rate": 4.288613366491202e-06, |
| "loss": 0.0543, |
| "step": 2707 |
| }, |
| { |
| "epoch": 1.2320291173794358, |
| "grad_norm": 1.7455696934218505, |
| "learning_rate": 4.288113991221057e-06, |
| "loss": 0.0603, |
| "step": 2708 |
| }, |
| { |
| "epoch": 1.232484076433121, |
| "grad_norm": 1.1210761028191916, |
| "learning_rate": 4.2876144698343115e-06, |
| "loss": 0.0475, |
| "step": 2709 |
| }, |
| { |
| "epoch": 1.2329390354868062, |
| "grad_norm": 1.1060112184650446, |
| "learning_rate": 4.287114802371783e-06, |
| "loss": 0.04, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.2333939945404913, |
| "grad_norm": 1.2712429142254253, |
| "learning_rate": 4.286614988874304e-06, |
| "loss": 0.0563, |
| "step": 2711 |
| }, |
| { |
| "epoch": 1.2338489535941766, |
| "grad_norm": 1.2328935475809604, |
| "learning_rate": 4.286115029382717e-06, |
| "loss": 0.0552, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.2343039126478617, |
| "grad_norm": 1.855500596222532, |
| "learning_rate": 4.285614923937876e-06, |
| "loss": 0.0682, |
| "step": 2713 |
| }, |
| { |
| "epoch": 1.2347588717015467, |
| "grad_norm": 1.1666260050179595, |
| "learning_rate": 4.285114672580647e-06, |
| "loss": 0.0363, |
| "step": 2714 |
| }, |
| { |
| "epoch": 1.235213830755232, |
| "grad_norm": 0.9861248154816342, |
| "learning_rate": 4.284614275351907e-06, |
| "loss": 0.0453, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.2356687898089171, |
| "grad_norm": 1.6523912699677337, |
| "learning_rate": 4.2841137322925495e-06, |
| "loss": 0.0675, |
| "step": 2716 |
| }, |
| { |
| "epoch": 1.2361237488626025, |
| "grad_norm": 1.155544966260493, |
| "learning_rate": 4.283613043443474e-06, |
| "loss": 0.04, |
| "step": 2717 |
| }, |
| { |
| "epoch": 1.2365787079162875, |
| "grad_norm": 1.5051110722840872, |
| "learning_rate": 4.2831122088455955e-06, |
| "loss": 0.0501, |
| "step": 2718 |
| }, |
| { |
| "epoch": 1.2370336669699726, |
| "grad_norm": 1.4761491749577835, |
| "learning_rate": 4.2826112285398395e-06, |
| "loss": 0.0651, |
| "step": 2719 |
| }, |
| { |
| "epoch": 1.237488626023658, |
| "grad_norm": 1.1967962508548793, |
| "learning_rate": 4.282110102567145e-06, |
| "loss": 0.027, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.237943585077343, |
| "grad_norm": 1.1493057900219714, |
| "learning_rate": 4.28160883096846e-06, |
| "loss": 0.041, |
| "step": 2721 |
| }, |
| { |
| "epoch": 1.2383985441310281, |
| "grad_norm": 1.0985145023097473, |
| "learning_rate": 4.281107413784747e-06, |
| "loss": 0.0274, |
| "step": 2722 |
| }, |
| { |
| "epoch": 1.2388535031847134, |
| "grad_norm": 0.7388333736423212, |
| "learning_rate": 4.28060585105698e-06, |
| "loss": 0.0282, |
| "step": 2723 |
| }, |
| { |
| "epoch": 1.2393084622383985, |
| "grad_norm": 1.1903720740600707, |
| "learning_rate": 4.280104142826143e-06, |
| "loss": 0.0499, |
| "step": 2724 |
| }, |
| { |
| "epoch": 1.2397634212920838, |
| "grad_norm": 1.723316755925373, |
| "learning_rate": 4.2796022891332355e-06, |
| "loss": 0.0566, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.240218380345769, |
| "grad_norm": 1.216488630920899, |
| "learning_rate": 4.279100290019265e-06, |
| "loss": 0.0462, |
| "step": 2726 |
| }, |
| { |
| "epoch": 1.240673339399454, |
| "grad_norm": 1.1748663632265637, |
| "learning_rate": 4.278598145525253e-06, |
| "loss": 0.0532, |
| "step": 2727 |
| }, |
| { |
| "epoch": 1.2411282984531393, |
| "grad_norm": 1.0632256527535209, |
| "learning_rate": 4.278095855692233e-06, |
| "loss": 0.0448, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.2415832575068244, |
| "grad_norm": 1.0009224728607076, |
| "learning_rate": 4.277593420561249e-06, |
| "loss": 0.0294, |
| "step": 2729 |
| }, |
| { |
| "epoch": 1.2420382165605095, |
| "grad_norm": 1.1473518056754541, |
| "learning_rate": 4.277090840173359e-06, |
| "loss": 0.0328, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2424931756141948, |
| "grad_norm": 1.5670630246372301, |
| "learning_rate": 4.276588114569631e-06, |
| "loss": 0.0618, |
| "step": 2731 |
| }, |
| { |
| "epoch": 1.24294813466788, |
| "grad_norm": 0.9616041915829661, |
| "learning_rate": 4.2760852437911436e-06, |
| "loss": 0.0344, |
| "step": 2732 |
| }, |
| { |
| "epoch": 1.243403093721565, |
| "grad_norm": 0.8928694681439103, |
| "learning_rate": 4.2755822278789926e-06, |
| "loss": 0.0301, |
| "step": 2733 |
| }, |
| { |
| "epoch": 1.2438580527752503, |
| "grad_norm": 0.7742130363736376, |
| "learning_rate": 4.2750790668742795e-06, |
| "loss": 0.03, |
| "step": 2734 |
| }, |
| { |
| "epoch": 1.2443130118289354, |
| "grad_norm": 1.3690067259997272, |
| "learning_rate": 4.274575760818122e-06, |
| "loss": 0.0627, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.2447679708826205, |
| "grad_norm": 1.2398077927504156, |
| "learning_rate": 4.274072309751646e-06, |
| "loss": 0.042, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.2452229299363058, |
| "grad_norm": 2.119418114608392, |
| "learning_rate": 4.273568713715993e-06, |
| "loss": 0.084, |
| "step": 2737 |
| }, |
| { |
| "epoch": 1.2456778889899909, |
| "grad_norm": 1.5538300114961436, |
| "learning_rate": 4.2730649727523145e-06, |
| "loss": 0.0699, |
| "step": 2738 |
| }, |
| { |
| "epoch": 1.246132848043676, |
| "grad_norm": 1.3643103879770182, |
| "learning_rate": 4.272561086901773e-06, |
| "loss": 0.0532, |
| "step": 2739 |
| }, |
| { |
| "epoch": 1.2465878070973613, |
| "grad_norm": 1.1568077681351847, |
| "learning_rate": 4.272057056205544e-06, |
| "loss": 0.0344, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2470427661510464, |
| "grad_norm": 1.2641994469449778, |
| "learning_rate": 4.271552880704815e-06, |
| "loss": 0.0373, |
| "step": 2741 |
| }, |
| { |
| "epoch": 1.2474977252047315, |
| "grad_norm": 1.0475430675782684, |
| "learning_rate": 4.271048560440786e-06, |
| "loss": 0.0376, |
| "step": 2742 |
| }, |
| { |
| "epoch": 1.2479526842584168, |
| "grad_norm": 1.1284095107322705, |
| "learning_rate": 4.2705440954546665e-06, |
| "loss": 0.052, |
| "step": 2743 |
| }, |
| { |
| "epoch": 1.2484076433121019, |
| "grad_norm": 1.2553617458506297, |
| "learning_rate": 4.270039485787678e-06, |
| "loss": 0.0416, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.2488626023657872, |
| "grad_norm": 1.4058677641547184, |
| "learning_rate": 4.269534731481057e-06, |
| "loss": 0.0473, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.2493175614194723, |
| "grad_norm": 1.2142381478070348, |
| "learning_rate": 4.269029832576048e-06, |
| "loss": 0.035, |
| "step": 2746 |
| }, |
| { |
| "epoch": 1.2497725204731573, |
| "grad_norm": 1.6479152556232572, |
| "learning_rate": 4.2685247891139114e-06, |
| "loss": 0.0608, |
| "step": 2747 |
| }, |
| { |
| "epoch": 1.2502274795268427, |
| "grad_norm": 0.953403781482595, |
| "learning_rate": 4.268019601135914e-06, |
| "loss": 0.038, |
| "step": 2748 |
| }, |
| { |
| "epoch": 1.2506824385805277, |
| "grad_norm": 1.5098476212077065, |
| "learning_rate": 4.26751426868334e-06, |
| "loss": 0.0475, |
| "step": 2749 |
| }, |
| { |
| "epoch": 1.251137397634213, |
| "grad_norm": 1.2921753382761736, |
| "learning_rate": 4.2670087917974826e-06, |
| "loss": 0.0584, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2515923566878981, |
| "grad_norm": 1.350245665282995, |
| "learning_rate": 4.266503170519645e-06, |
| "loss": 0.0457, |
| "step": 2751 |
| }, |
| { |
| "epoch": 1.2520473157415832, |
| "grad_norm": 1.695246374616271, |
| "learning_rate": 4.265997404891147e-06, |
| "loss": 0.0739, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.2525022747952685, |
| "grad_norm": 1.577037929110468, |
| "learning_rate": 4.265491494953316e-06, |
| "loss": 0.0585, |
| "step": 2753 |
| }, |
| { |
| "epoch": 1.2529572338489536, |
| "grad_norm": 1.509780877559115, |
| "learning_rate": 4.2649854407474925e-06, |
| "loss": 0.0631, |
| "step": 2754 |
| }, |
| { |
| "epoch": 1.2534121929026387, |
| "grad_norm": 1.0721641088625067, |
| "learning_rate": 4.26447924231503e-06, |
| "loss": 0.0438, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.253867151956324, |
| "grad_norm": 1.249446172481293, |
| "learning_rate": 4.263972899697292e-06, |
| "loss": 0.0491, |
| "step": 2756 |
| }, |
| { |
| "epoch": 1.2543221110100091, |
| "grad_norm": 0.9978735197226813, |
| "learning_rate": 4.263466412935654e-06, |
| "loss": 0.0418, |
| "step": 2757 |
| }, |
| { |
| "epoch": 1.2547770700636942, |
| "grad_norm": 0.7093666542585404, |
| "learning_rate": 4.262959782071505e-06, |
| "loss": 0.019, |
| "step": 2758 |
| }, |
| { |
| "epoch": 1.2552320291173795, |
| "grad_norm": 1.512877707182696, |
| "learning_rate": 4.262453007146244e-06, |
| "loss": 0.0701, |
| "step": 2759 |
| }, |
| { |
| "epoch": 1.2556869881710646, |
| "grad_norm": 0.7922874291294156, |
| "learning_rate": 4.261946088201282e-06, |
| "loss": 0.0285, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2561419472247497, |
| "grad_norm": 1.0842153351128343, |
| "learning_rate": 4.261439025278044e-06, |
| "loss": 0.0539, |
| "step": 2761 |
| }, |
| { |
| "epoch": 1.256596906278435, |
| "grad_norm": 1.398531796088167, |
| "learning_rate": 4.260931818417962e-06, |
| "loss": 0.0451, |
| "step": 2762 |
| }, |
| { |
| "epoch": 1.25705186533212, |
| "grad_norm": 0.9609460193121903, |
| "learning_rate": 4.260424467662484e-06, |
| "loss": 0.0354, |
| "step": 2763 |
| }, |
| { |
| "epoch": 1.2575068243858052, |
| "grad_norm": 1.012897458323134, |
| "learning_rate": 4.259916973053069e-06, |
| "loss": 0.0501, |
| "step": 2764 |
| }, |
| { |
| "epoch": 1.2579617834394905, |
| "grad_norm": 1.2466310498333586, |
| "learning_rate": 4.2594093346311865e-06, |
| "loss": 0.0423, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.2584167424931756, |
| "grad_norm": 1.016522023435209, |
| "learning_rate": 4.258901552438319e-06, |
| "loss": 0.0386, |
| "step": 2766 |
| }, |
| { |
| "epoch": 1.2588717015468607, |
| "grad_norm": 1.0546347630517319, |
| "learning_rate": 4.25839362651596e-06, |
| "loss": 0.0373, |
| "step": 2767 |
| }, |
| { |
| "epoch": 1.259326660600546, |
| "grad_norm": 1.4450084856638563, |
| "learning_rate": 4.257885556905613e-06, |
| "loss": 0.0612, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.259781619654231, |
| "grad_norm": 8.618624596562725, |
| "learning_rate": 4.257377343648799e-06, |
| "loss": 0.0838, |
| "step": 2769 |
| }, |
| { |
| "epoch": 1.2602365787079162, |
| "grad_norm": 1.4768347208008115, |
| "learning_rate": 4.256868986787044e-06, |
| "loss": 0.0581, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2606915377616015, |
| "grad_norm": 1.5379734295458345, |
| "learning_rate": 4.256360486361889e-06, |
| "loss": 0.0519, |
| "step": 2771 |
| }, |
| { |
| "epoch": 1.2611464968152866, |
| "grad_norm": 1.175911717327085, |
| "learning_rate": 4.255851842414887e-06, |
| "loss": 0.0367, |
| "step": 2772 |
| }, |
| { |
| "epoch": 1.2616014558689717, |
| "grad_norm": 1.754505004374234, |
| "learning_rate": 4.255343054987601e-06, |
| "loss": 0.0669, |
| "step": 2773 |
| }, |
| { |
| "epoch": 1.262056414922657, |
| "grad_norm": 1.2805358650627772, |
| "learning_rate": 4.2548341241216085e-06, |
| "loss": 0.0434, |
| "step": 2774 |
| }, |
| { |
| "epoch": 1.262511373976342, |
| "grad_norm": 1.3649618043130114, |
| "learning_rate": 4.254325049858496e-06, |
| "loss": 0.0567, |
| "step": 2775 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 18262287138816.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|