| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5050045495905369, |
| "eval_steps": 500, |
| "global_step": 1110, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 2.3685307115973546, |
| "learning_rate": 5e-06, |
| "loss": 0.0587, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 3.207290006513166, |
| "learning_rate": 4.999999897855645e-06, |
| "loss": 0.0976, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 3.061584755625611, |
| "learning_rate": 4.9999995914225884e-06, |
| "loss": 0.1138, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.4708172493174265, |
| "learning_rate": 4.999999080700855e-06, |
| "loss": 0.102, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 2.7122863978048204, |
| "learning_rate": 4.999998365690486e-06, |
| "loss": 0.0899, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 2.1348308028500367, |
| "learning_rate": 4.999997446391542e-06, |
| "loss": 0.0589, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 1.9525029408374595, |
| "learning_rate": 4.999996322804095e-06, |
| "loss": 0.0692, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 2.4972521600201087, |
| "learning_rate": 4.999994994928239e-06, |
| "loss": 0.094, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 1.3057783939017902, |
| "learning_rate": 4.999993462764082e-06, |
| "loss": 0.0401, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 1.8178622655461494, |
| "learning_rate": 4.999991726311749e-06, |
| "loss": 0.0508, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 1.8904298363447831, |
| "learning_rate": 4.999989785571382e-06, |
| "loss": 0.0466, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 2.397431505721498, |
| "learning_rate": 4.999987640543139e-06, |
| "loss": 0.0684, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 2.121710266227225, |
| "learning_rate": 4.999985291227196e-06, |
| "loss": 0.0729, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 2.9696000985831614, |
| "learning_rate": 4.999982737623746e-06, |
| "loss": 0.0922, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 2.270433126704546, |
| "learning_rate": 4.999979979732995e-06, |
| "loss": 0.0946, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 1.9380248124362378, |
| "learning_rate": 4.999977017555171e-06, |
| "loss": 0.0578, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 2.6281882171357958, |
| "learning_rate": 4.999973851090514e-06, |
| "loss": 0.1147, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 2.40029765076707, |
| "learning_rate": 4.999970480339284e-06, |
| "loss": 0.0906, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 2.889640814144301, |
| "learning_rate": 4.9999669053017564e-06, |
| "loss": 0.0792, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 2.3110994220860883, |
| "learning_rate": 4.9999631259782235e-06, |
| "loss": 0.0751, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 2.6890244705482806, |
| "learning_rate": 4.999959142368993e-06, |
| "loss": 0.0966, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 2.2488041264680563, |
| "learning_rate": 4.999954954474391e-06, |
| "loss": 0.0714, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 2.0642223983397883, |
| "learning_rate": 4.9999505622947594e-06, |
| "loss": 0.0881, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 2.384727655713489, |
| "learning_rate": 4.999945965830458e-06, |
| "loss": 0.0992, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 2.2739375250381504, |
| "learning_rate": 4.999941165081863e-06, |
| "loss": 0.0831, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 1.6418905911049972, |
| "learning_rate": 4.999936160049364e-06, |
| "loss": 0.0662, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 2.029045596294324, |
| "learning_rate": 4.999930950733373e-06, |
| "loss": 0.097, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 2.2833378337725287, |
| "learning_rate": 4.999925537134312e-06, |
| "loss": 0.0823, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 2.611896749496796, |
| "learning_rate": 4.9999199192526286e-06, |
| "loss": 0.1115, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 2.4812612616344865, |
| "learning_rate": 4.9999140970887775e-06, |
| "loss": 0.0854, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 2.0837983680092904, |
| "learning_rate": 4.999908070643236e-06, |
| "loss": 0.0837, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 2.0812008840647827, |
| "learning_rate": 4.999901839916495e-06, |
| "loss": 0.064, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 1.5275195881020318, |
| "learning_rate": 4.999895404909067e-06, |
| "loss": 0.0582, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 2.703502541064391, |
| "learning_rate": 4.999888765621476e-06, |
| "loss": 0.1102, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 1.7231856796809104, |
| "learning_rate": 4.999881922054264e-06, |
| "loss": 0.0571, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 1.6472076658400754, |
| "learning_rate": 4.999874874207991e-06, |
| "loss": 0.0536, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 2.902300005488672, |
| "learning_rate": 4.999867622083232e-06, |
| "loss": 0.1302, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 1.9543380822482044, |
| "learning_rate": 4.99986016568058e-06, |
| "loss": 0.0983, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 1.814859572890468, |
| "learning_rate": 4.999852505000646e-06, |
| "loss": 0.0717, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 1.882630749677819, |
| "learning_rate": 4.999844640044053e-06, |
| "loss": 0.07, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.4063115131397823, |
| "learning_rate": 4.999836570811445e-06, |
| "loss": 0.0873, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 2.9701013712692035, |
| "learning_rate": 4.999828297303483e-06, |
| "loss": 0.0957, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 2.207833234895104, |
| "learning_rate": 4.9998198195208405e-06, |
| "loss": 0.0879, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 2.168760551509319, |
| "learning_rate": 4.999811137464212e-06, |
| "loss": 0.0967, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 2.12859962179133, |
| "learning_rate": 4.999802251134307e-06, |
| "loss": 0.1028, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 1.8067595132130894, |
| "learning_rate": 4.99979316053185e-06, |
| "loss": 0.0778, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 3.8815722657740594, |
| "learning_rate": 4.999783865657585e-06, |
| "loss": 0.1812, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 4.142186542548352, |
| "learning_rate": 4.999774366512272e-06, |
| "loss": 0.1981, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 2.4946427215064015, |
| "learning_rate": 4.9997646630966865e-06, |
| "loss": 0.0866, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 2.219814267860857, |
| "learning_rate": 4.999754755411621e-06, |
| "loss": 0.0767, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 1.7512451842619647, |
| "learning_rate": 4.9997446434578865e-06, |
| "loss": 0.0709, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 1.9267762038567948, |
| "learning_rate": 4.999734327236307e-06, |
| "loss": 0.0791, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 1.3192434416131813, |
| "learning_rate": 4.999723806747728e-06, |
| "loss": 0.0611, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 2.0553891309583787, |
| "learning_rate": 4.99971308199301e-06, |
| "loss": 0.0708, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 1.6809260342794263, |
| "learning_rate": 4.999702152973025e-06, |
| "loss": 0.0662, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 2.0087287549898716, |
| "learning_rate": 4.9996910196886694e-06, |
| "loss": 0.0795, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 1.3268510730840513, |
| "learning_rate": 4.999679682140852e-06, |
| "loss": 0.0422, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 2.646053521216802, |
| "learning_rate": 4.999668140330499e-06, |
| "loss": 0.1284, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 1.5857988579934552, |
| "learning_rate": 4.999656394258555e-06, |
| "loss": 0.0647, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 1.756551616255058, |
| "learning_rate": 4.999644443925978e-06, |
| "loss": 0.078, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 2.2102751228780546, |
| "learning_rate": 4.999632289333746e-06, |
| "loss": 0.0785, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 2.338156657994438, |
| "learning_rate": 4.999619930482852e-06, |
| "loss": 0.0835, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 2.0921557148636616, |
| "learning_rate": 4.999607367374304e-06, |
| "loss": 0.0974, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 1.7535396635399074, |
| "learning_rate": 4.999594600009131e-06, |
| "loss": 0.0605, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 2.2055708873696585, |
| "learning_rate": 4.999581628388375e-06, |
| "loss": 0.0946, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 2.5001955714674216, |
| "learning_rate": 4.999568452513097e-06, |
| "loss": 0.1549, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 2.417716838936908, |
| "learning_rate": 4.9995550723843726e-06, |
| "loss": 0.0953, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 1.9976883408624455, |
| "learning_rate": 4.999541488003295e-06, |
| "loss": 0.0772, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 1.9326277047503455, |
| "learning_rate": 4.999527699370975e-06, |
| "loss": 0.0764, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 2.0337761312716527, |
| "learning_rate": 4.99951370648854e-06, |
| "loss": 0.0659, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 1.895878774895592, |
| "learning_rate": 4.999499509357132e-06, |
| "loss": 0.0815, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.0909717848011313, |
| "learning_rate": 4.999485107977912e-06, |
| "loss": 0.084, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 1.5271836426577585, |
| "learning_rate": 4.999470502352057e-06, |
| "loss": 0.0645, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 2.4817155636981223, |
| "learning_rate": 4.999455692480759e-06, |
| "loss": 0.1008, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 1.6027477251164817, |
| "learning_rate": 4.999440678365229e-06, |
| "loss": 0.0722, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 2.164861284274037, |
| "learning_rate": 4.999425460006695e-06, |
| "loss": 0.0876, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 1.8147143711706584, |
| "learning_rate": 4.9994100374063995e-06, |
| "loss": 0.0739, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 2.379478288499757, |
| "learning_rate": 4.9993944105656035e-06, |
| "loss": 0.1158, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 1.7238147576191318, |
| "learning_rate": 4.999378579485582e-06, |
| "loss": 0.0749, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 2.1444185576728323, |
| "learning_rate": 4.999362544167632e-06, |
| "loss": 0.0937, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 1.18142283635082, |
| "learning_rate": 4.99934630461306e-06, |
| "loss": 0.0569, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 2.3599788407160456, |
| "learning_rate": 4.999329860823197e-06, |
| "loss": 0.0848, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 1.851574920799011, |
| "learning_rate": 4.999313212799383e-06, |
| "loss": 0.0882, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 2.144291660745484, |
| "learning_rate": 4.99929636054298e-06, |
| "loss": 0.0881, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 2.083071837291781, |
| "learning_rate": 4.999279304055366e-06, |
| "loss": 0.1109, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 2.245491182317419, |
| "learning_rate": 4.999262043337933e-06, |
| "loss": 0.0933, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 2.076902724310137, |
| "learning_rate": 4.999244578392094e-06, |
| "loss": 0.1004, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 2.213157445111281, |
| "learning_rate": 4.9992269092192736e-06, |
| "loss": 0.1048, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 1.8088256581500983, |
| "learning_rate": 4.9992090358209166e-06, |
| "loss": 0.0803, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 1.6952266837081935, |
| "learning_rate": 4.9991909581984835e-06, |
| "loss": 0.0707, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 1.2806634047624867, |
| "learning_rate": 4.999172676353451e-06, |
| "loss": 0.0405, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 1.537222164184117, |
| "learning_rate": 4.999154190287314e-06, |
| "loss": 0.0678, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 2.152654560935853, |
| "learning_rate": 4.999135500001583e-06, |
| "loss": 0.1323, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 1.7293087783358614, |
| "learning_rate": 4.9991166054977844e-06, |
| "loss": 0.0851, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 2.689089264886033, |
| "learning_rate": 4.999097506777463e-06, |
| "loss": 0.1018, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 1.8242860351920025, |
| "learning_rate": 4.999078203842179e-06, |
| "loss": 0.1063, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 1.5249963877818449, |
| "learning_rate": 4.999058696693511e-06, |
| "loss": 0.0593, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 1.668772591755926, |
| "learning_rate": 4.99903898533305e-06, |
| "loss": 0.0709, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.8521288885149407, |
| "learning_rate": 4.99901906976241e-06, |
| "loss": 0.0842, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 2.106435857041323, |
| "learning_rate": 4.998998949983217e-06, |
| "loss": 0.0921, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 2.104450695294598, |
| "learning_rate": 4.998978625997115e-06, |
| "loss": 0.1082, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 2.1381043167125466, |
| "learning_rate": 4.998958097805765e-06, |
| "loss": 0.0966, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 1.6962878781771613, |
| "learning_rate": 4.9989373654108445e-06, |
| "loss": 0.0721, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 26.768545049591438, |
| "learning_rate": 4.9989164288140465e-06, |
| "loss": 0.362, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 2.63813062408578, |
| "learning_rate": 4.998895288017085e-06, |
| "loss": 0.1373, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 1.828826426920959, |
| "learning_rate": 4.998873943021684e-06, |
| "loss": 0.0743, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 1.524672393516503, |
| "learning_rate": 4.998852393829589e-06, |
| "loss": 0.0693, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 3.0873114713096683, |
| "learning_rate": 4.9988306404425625e-06, |
| "loss": 0.1492, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 1.7541988764209069, |
| "learning_rate": 4.99880868286238e-06, |
| "loss": 0.0941, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 2.3475973125438103, |
| "learning_rate": 4.998786521090836e-06, |
| "loss": 0.0925, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 2.1297159392440452, |
| "learning_rate": 4.9987641551297426e-06, |
| "loss": 0.1209, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 1.8188477873711246, |
| "learning_rate": 4.998741584980926e-06, |
| "loss": 0.1191, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 2.0744703068317474, |
| "learning_rate": 4.9987188106462314e-06, |
| "loss": 0.0958, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 1.67585557445257, |
| "learning_rate": 4.99869583212752e-06, |
| "loss": 0.0759, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 2.9423649270306456, |
| "learning_rate": 4.9986726494266694e-06, |
| "loss": 0.1628, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 1.9805897541793653, |
| "learning_rate": 4.998649262545574e-06, |
| "loss": 0.0865, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 1.862673950464683, |
| "learning_rate": 4.998625671486144e-06, |
| "loss": 0.0841, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 1.6852737490573195, |
| "learning_rate": 4.998601876250308e-06, |
| "loss": 0.0801, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 1.8645780399689873, |
| "learning_rate": 4.998577876840011e-06, |
| "loss": 0.0822, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.7705796593126653, |
| "learning_rate": 4.9985536732572124e-06, |
| "loss": 0.0836, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 1.4380115814084553, |
| "learning_rate": 4.998529265503891e-06, |
| "loss": 0.0714, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 1.841019746353449, |
| "learning_rate": 4.9985046535820416e-06, |
| "loss": 0.0925, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 2.13633472088372, |
| "learning_rate": 4.998479837493675e-06, |
| "loss": 0.1098, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 1.6795956051728682, |
| "learning_rate": 4.9984548172408195e-06, |
| "loss": 0.0623, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 7.146738489798405, |
| "learning_rate": 4.998429592825519e-06, |
| "loss": 0.1803, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 2.17497011974541, |
| "learning_rate": 4.998404164249835e-06, |
| "loss": 0.1209, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 1.9663385354035616, |
| "learning_rate": 4.998378531515845e-06, |
| "loss": 0.0704, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 2.398444068788508, |
| "learning_rate": 4.998352694625645e-06, |
| "loss": 0.0819, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 1.5854929257305652, |
| "learning_rate": 4.998326653581343e-06, |
| "loss": 0.0775, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 1.8831317521751245, |
| "learning_rate": 4.998300408385072e-06, |
| "loss": 0.0895, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 2.624836374744882, |
| "learning_rate": 4.998273959038972e-06, |
| "loss": 0.1398, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 1.8281764860819427, |
| "learning_rate": 4.998247305545207e-06, |
| "loss": 0.0979, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 1.4175605750366638, |
| "learning_rate": 4.998220447905953e-06, |
| "loss": 0.0674, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 2.0007328792439307, |
| "learning_rate": 4.998193386123408e-06, |
| "loss": 0.1082, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 2.2534593276871355, |
| "learning_rate": 4.99816612019978e-06, |
| "loss": 0.1165, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 7.223128092677242, |
| "learning_rate": 4.998138650137298e-06, |
| "loss": 0.1547, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 2.0541187438324178, |
| "learning_rate": 4.998110975938208e-06, |
| "loss": 0.1153, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 2.900003934434033, |
| "learning_rate": 4.998083097604769e-06, |
| "loss": 0.1227, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 2.9930382656276655, |
| "learning_rate": 4.998055015139261e-06, |
| "loss": 0.0671, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.8183166737473904, |
| "learning_rate": 4.998026728543979e-06, |
| "loss": 0.0879, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 1.750231162848612, |
| "learning_rate": 4.997998237821233e-06, |
| "loss": 0.0973, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 1.531092755332603, |
| "learning_rate": 4.997969542973352e-06, |
| "loss": 0.0755, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 2.106588666489457, |
| "learning_rate": 4.997940644002681e-06, |
| "loss": 0.1014, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 2.4260145417995513, |
| "learning_rate": 4.997911540911581e-06, |
| "loss": 0.0992, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 1.9957158387709846, |
| "learning_rate": 4.99788223370243e-06, |
| "loss": 0.1074, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 2.7359115449729385, |
| "learning_rate": 4.9978527223776245e-06, |
| "loss": 0.1298, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 1.4774963397056595, |
| "learning_rate": 4.9978230069395735e-06, |
| "loss": 0.0725, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 2.4431671333335188, |
| "learning_rate": 4.9977930873907065e-06, |
| "loss": 0.0983, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 1.9906443670591782, |
| "learning_rate": 4.997762963733468e-06, |
| "loss": 0.1039, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 2.0201798980001517, |
| "learning_rate": 4.997732635970321e-06, |
| "loss": 0.085, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.7461931203369137, |
| "learning_rate": 4.9977021041037425e-06, |
| "loss": 0.0884, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 2.339191302020108, |
| "learning_rate": 4.9976713681362265e-06, |
| "loss": 0.1159, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 2.314166753359135, |
| "learning_rate": 4.997640428070286e-06, |
| "loss": 0.1338, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 1.5963391451568967, |
| "learning_rate": 4.99760928390845e-06, |
| "loss": 0.0575, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 1.7788915412646347, |
| "learning_rate": 4.997577935653262e-06, |
| "loss": 0.08, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 1.5840889143049688, |
| "learning_rate": 4.9975463833072835e-06, |
| "loss": 0.0709, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 2.1242834812157962, |
| "learning_rate": 4.997514626873093e-06, |
| "loss": 0.1078, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 1.7256733994251798, |
| "learning_rate": 4.997482666353287e-06, |
| "loss": 0.0678, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 2.2088750555704073, |
| "learning_rate": 4.997450501750476e-06, |
| "loss": 0.0981, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 1.817598507902073, |
| "learning_rate": 4.997418133067288e-06, |
| "loss": 0.0829, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.9174894618752205, |
| "learning_rate": 4.997385560306368e-06, |
| "loss": 0.0922, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 1.7975593397664607, |
| "learning_rate": 4.997352783470379e-06, |
| "loss": 0.093, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 2.1789877377155147, |
| "learning_rate": 4.997319802561997e-06, |
| "loss": 0.1044, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 1.5046722090412417, |
| "learning_rate": 4.9972866175839196e-06, |
| "loss": 0.0806, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 1.828261506678391, |
| "learning_rate": 4.9972532285388575e-06, |
| "loss": 0.1018, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 1.853289616987827, |
| "learning_rate": 4.997219635429538e-06, |
| "loss": 0.1177, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 1.9172069323651033, |
| "learning_rate": 4.997185838258709e-06, |
| "loss": 0.0817, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 1.6956924002006215, |
| "learning_rate": 4.997151837029129e-06, |
| "loss": 0.0679, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 1.8575330553269362, |
| "learning_rate": 4.997117631743579e-06, |
| "loss": 0.0855, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 1.7266908578071283, |
| "learning_rate": 4.997083222404852e-06, |
| "loss": 0.0625, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 1.6397125044179104, |
| "learning_rate": 4.997048609015762e-06, |
| "loss": 0.0751, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 1.5340896344557344, |
| "learning_rate": 4.997013791579136e-06, |
| "loss": 0.0786, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 1.9189331650587453, |
| "learning_rate": 4.996978770097819e-06, |
| "loss": 0.0953, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 1.7773721601434869, |
| "learning_rate": 4.996943544574673e-06, |
| "loss": 0.083, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 1.7663708027835396, |
| "learning_rate": 4.996908115012576e-06, |
| "loss": 0.0711, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 2.0988130747441462, |
| "learning_rate": 4.996872481414425e-06, |
| "loss": 0.1068, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 3.491649419917669, |
| "learning_rate": 4.9968366437831305e-06, |
| "loss": 0.1596, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.9772529604089312, |
| "learning_rate": 4.99680060212162e-06, |
| "loss": 0.0469, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 1.411497576217555, |
| "learning_rate": 4.996764356432841e-06, |
| "loss": 0.0799, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 1.9634897057091474, |
| "learning_rate": 4.996727906719754e-06, |
| "loss": 0.0818, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 1.8622777856402457, |
| "learning_rate": 4.9966912529853365e-06, |
| "loss": 0.0654, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 1.6338074095796988, |
| "learning_rate": 4.996654395232585e-06, |
| "loss": 0.0744, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 1.534919993971643, |
| "learning_rate": 4.996617333464512e-06, |
| "loss": 0.0639, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 1.7391379315757225, |
| "learning_rate": 4.996580067684145e-06, |
| "loss": 0.0715, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 1.7215093643580193, |
| "learning_rate": 4.996542597894528e-06, |
| "loss": 0.1192, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 2.041088124472192, |
| "learning_rate": 4.996504924098726e-06, |
| "loss": 0.1078, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 1.7083926900772908, |
| "learning_rate": 4.9964670462998145e-06, |
| "loss": 0.0922, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 1.9950587953196364, |
| "learning_rate": 4.99642896450089e-06, |
| "loss": 0.125, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 2.2702904646099022, |
| "learning_rate": 4.9963906787050656e-06, |
| "loss": 0.1318, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 1.5062676480402928, |
| "learning_rate": 4.996352188915467e-06, |
| "loss": 0.0621, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 2.6764229211241153, |
| "learning_rate": 4.996313495135242e-06, |
| "loss": 0.1112, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 2.276483991348045, |
| "learning_rate": 4.9962745973675505e-06, |
| "loss": 0.1219, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 1.4375762261827663, |
| "learning_rate": 4.996235495615572e-06, |
| "loss": 0.0641, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 2.3164336329931094, |
| "learning_rate": 4.996196189882503e-06, |
| "loss": 0.1176, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 2.225732764096407, |
| "learning_rate": 4.996156680171552e-06, |
| "loss": 0.1096, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 1.8464739663611849, |
| "learning_rate": 4.996116966485951e-06, |
| "loss": 0.0817, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 1.9290667932284378, |
| "learning_rate": 4.996077048828944e-06, |
| "loss": 0.1106, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 1.6322378586848272, |
| "learning_rate": 4.996036927203793e-06, |
| "loss": 0.0972, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 2.2100804969645416, |
| "learning_rate": 4.995996601613775e-06, |
| "loss": 0.0944, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 1.5641835045850314, |
| "learning_rate": 4.9959560720621875e-06, |
| "loss": 0.0896, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 2.2116837789953117, |
| "learning_rate": 4.995915338552341e-06, |
| "loss": 0.1331, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 1.8792253280188753, |
| "learning_rate": 4.995874401087565e-06, |
| "loss": 0.0967, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 2.167978668790899, |
| "learning_rate": 4.9958332596712035e-06, |
| "loss": 0.1141, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 1.8621318139110883, |
| "learning_rate": 4.99579191430662e-06, |
| "loss": 0.0972, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 1.8429430162012657, |
| "learning_rate": 4.995750364997192e-06, |
| "loss": 0.0967, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 1.5424629326591568, |
| "learning_rate": 4.995708611746314e-06, |
| "loss": 0.0814, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 2.0700985381007904, |
| "learning_rate": 4.995666654557399e-06, |
| "loss": 0.1038, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 1.8765344045928045, |
| "learning_rate": 4.995624493433876e-06, |
| "loss": 0.1075, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 1.8732891178471252, |
| "learning_rate": 4.995582128379189e-06, |
| "loss": 0.1001, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 2.1418545940903373, |
| "learning_rate": 4.9955395593968e-06, |
| "loss": 0.1463, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 1.905821465202796, |
| "learning_rate": 4.99549678649019e-06, |
| "loss": 0.0848, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 1.7581366634538098, |
| "learning_rate": 4.99545380966285e-06, |
| "loss": 0.0976, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 2.133882292644339, |
| "learning_rate": 4.995410628918294e-06, |
| "loss": 0.1036, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 1.6491455235555508, |
| "learning_rate": 4.995367244260052e-06, |
| "loss": 0.1, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 1.372315749578445, |
| "learning_rate": 4.995323655691667e-06, |
| "loss": 0.0543, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 2.2929084487384297, |
| "learning_rate": 4.995279863216702e-06, |
| "loss": 0.1005, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 1.8371182479654964, |
| "learning_rate": 4.995235866838735e-06, |
| "loss": 0.096, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 1.4189314035725125, |
| "learning_rate": 4.995191666561361e-06, |
| "loss": 0.0707, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 1.4036483642687965, |
| "learning_rate": 4.995147262388192e-06, |
| "loss": 0.0689, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 1.7382878807357938, |
| "learning_rate": 4.995102654322858e-06, |
| "loss": 0.0829, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 1.3102015447280675, |
| "learning_rate": 4.995057842369002e-06, |
| "loss": 0.0548, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 1.8490525072637034, |
| "learning_rate": 4.995012826530287e-06, |
| "loss": 0.1044, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 2.802543488000276, |
| "learning_rate": 4.99496760681039e-06, |
| "loss": 0.1393, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 2.4234245545914295, |
| "learning_rate": 4.994922183213009e-06, |
| "loss": 0.1325, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 1.1495372549504432, |
| "learning_rate": 4.9948765557418535e-06, |
| "loss": 0.0585, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 2.1666263724534267, |
| "learning_rate": 4.994830724400653e-06, |
| "loss": 0.1063, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 1.7066677970234532, |
| "learning_rate": 4.994784689193151e-06, |
| "loss": 0.1002, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 1.5304723941528642, |
| "learning_rate": 4.994738450123111e-06, |
| "loss": 0.0825, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 2.1125485884299486, |
| "learning_rate": 4.994692007194312e-06, |
| "loss": 0.1089, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 1.4297773182355138, |
| "learning_rate": 4.994645360410547e-06, |
| "loss": 0.0855, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 1.741498602747005, |
| "learning_rate": 4.99459850977563e-06, |
| "loss": 0.0884, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 1.6875366585424447, |
| "learning_rate": 4.994551455293388e-06, |
| "loss": 0.068, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 2.03347527932056, |
| "learning_rate": 4.9945041969676654e-06, |
| "loss": 0.0997, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 1.5553350034126536, |
| "learning_rate": 4.994456734802325e-06, |
| "loss": 0.0709, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 1.354348073951093, |
| "learning_rate": 4.994409068801247e-06, |
| "loss": 0.0858, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 1.6048007960766557, |
| "learning_rate": 4.994361198968323e-06, |
| "loss": 0.0891, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 2.3380973830643663, |
| "learning_rate": 4.994313125307466e-06, |
| "loss": 0.116, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 1.68606521406513, |
| "learning_rate": 4.994264847822605e-06, |
| "loss": 0.09, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 2.0274881934833715, |
| "learning_rate": 4.994216366517684e-06, |
| "loss": 0.0856, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 1.9224041067300894, |
| "learning_rate": 4.994167681396667e-06, |
| "loss": 0.1032, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 2.213562554498921, |
| "learning_rate": 4.994118792463529e-06, |
| "loss": 0.1125, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 2.396477374166045, |
| "learning_rate": 4.994069699722267e-06, |
| "loss": 0.16, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 1.6621616457271884, |
| "learning_rate": 4.994020403176893e-06, |
| "loss": 0.1088, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 2.0137991000965862, |
| "learning_rate": 4.9939709028314345e-06, |
| "loss": 0.1203, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 1.731498246221376, |
| "learning_rate": 4.993921198689935e-06, |
| "loss": 0.0779, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 1.53319841517271, |
| "learning_rate": 4.993871290756459e-06, |
| "loss": 0.0859, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 1.5738861001818754, |
| "learning_rate": 4.9938211790350835e-06, |
| "loss": 0.0822, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 1.795556137822037, |
| "learning_rate": 4.993770863529902e-06, |
| "loss": 0.1082, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 1.753136266606954, |
| "learning_rate": 4.993720344245029e-06, |
| "loss": 0.0826, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 1.724266476242851, |
| "learning_rate": 4.99366962118459e-06, |
| "loss": 0.0851, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 1.8081901179247517, |
| "learning_rate": 4.99361869435273e-06, |
| "loss": 0.0965, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 2.064401083784083, |
| "learning_rate": 4.993567563753613e-06, |
| "loss": 0.0881, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 1.6354098857617054, |
| "learning_rate": 4.993516229391414e-06, |
| "loss": 0.0933, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 1.2711881947711132, |
| "learning_rate": 4.993464691270331e-06, |
| "loss": 0.0595, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 1.5847340722430843, |
| "learning_rate": 4.993412949394572e-06, |
| "loss": 0.0812, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 1.5774467606957123, |
| "learning_rate": 4.993361003768369e-06, |
| "loss": 0.081, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 1.3573852133613107, |
| "learning_rate": 4.993308854395963e-06, |
| "loss": 0.0812, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 1.5273272920136396, |
| "learning_rate": 4.993256501281618e-06, |
| "loss": 0.0634, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 1.8382646613112785, |
| "learning_rate": 4.993203944429611e-06, |
| "loss": 0.1145, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 1.5747608705636602, |
| "learning_rate": 4.993151183844236e-06, |
| "loss": 0.0801, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 1.7065433305132354, |
| "learning_rate": 4.9930982195298065e-06, |
| "loss": 0.0742, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 1.709109441111134, |
| "learning_rate": 4.9930450514906484e-06, |
| "loss": 0.1028, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 1.6959707782927067, |
| "learning_rate": 4.9929916797311075e-06, |
| "loss": 0.0791, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 2.374639715905283, |
| "learning_rate": 4.992938104255545e-06, |
| "loss": 0.1477, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 1.6263809057131815, |
| "learning_rate": 4.992884325068339e-06, |
| "loss": 0.0916, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 1.6207164559915699, |
| "learning_rate": 4.992830342173882e-06, |
| "loss": 0.1068, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 2.0552449766971823, |
| "learning_rate": 4.992776155576589e-06, |
| "loss": 0.1145, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 1.6692049909432523, |
| "learning_rate": 4.992721765280884e-06, |
| "loss": 0.1172, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 2.456621954888186, |
| "learning_rate": 4.992667171291215e-06, |
| "loss": 0.1267, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 1.5125250812884448, |
| "learning_rate": 4.992612373612042e-06, |
| "loss": 0.0661, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 2.0952324870431553, |
| "learning_rate": 4.99255737224784e-06, |
| "loss": 0.0917, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 1.4094336450761362, |
| "learning_rate": 4.9925021672031075e-06, |
| "loss": 0.0905, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 2.239902062561175, |
| "learning_rate": 4.992446758482353e-06, |
| "loss": 0.0995, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 2.696125395972354, |
| "learning_rate": 4.992391146090106e-06, |
| "loss": 0.1613, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 1.4853155964847005, |
| "learning_rate": 4.99233533003091e-06, |
| "loss": 0.0826, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 1.5393545957542452, |
| "learning_rate": 4.992279310309326e-06, |
| "loss": 0.1128, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 2.4236941073693283, |
| "learning_rate": 4.9922230869299316e-06, |
| "loss": 0.1607, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 1.6611888199243576, |
| "learning_rate": 4.992166659897321e-06, |
| "loss": 0.1005, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 1.3896864345667146, |
| "learning_rate": 4.992110029216106e-06, |
| "loss": 0.079, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.3647278081745937, |
| "learning_rate": 4.992053194890914e-06, |
| "loss": 0.0767, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 2.0323876810575525, |
| "learning_rate": 4.991996156926388e-06, |
| "loss": 0.101, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 1.948481701516796, |
| "learning_rate": 4.9919389153271904e-06, |
| "loss": 0.106, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 1.3512588403363923, |
| "learning_rate": 4.991881470097998e-06, |
| "loss": 0.0897, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 1.4862053800013564, |
| "learning_rate": 4.991823821243505e-06, |
| "loss": 0.0898, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 2.287612016528911, |
| "learning_rate": 4.991765968768422e-06, |
| "loss": 0.1048, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 1.8190624177647585, |
| "learning_rate": 4.991707912677477e-06, |
| "loss": 0.076, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 1.4178411985180965, |
| "learning_rate": 4.991649652975414e-06, |
| "loss": 0.062, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 1.7010811854624341, |
| "learning_rate": 4.991591189666994e-06, |
| "loss": 0.0928, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 1.7824920481002249, |
| "learning_rate": 4.991532522756993e-06, |
| "loss": 0.09, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 1.12093519239752, |
| "learning_rate": 4.991473652250207e-06, |
| "loss": 0.0564, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 1.4956629959050902, |
| "learning_rate": 4.991414578151445e-06, |
| "loss": 0.0777, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 3.467748085139679, |
| "learning_rate": 4.991355300465535e-06, |
| "loss": 0.193, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 1.746518786410603, |
| "learning_rate": 4.99129581919732e-06, |
| "loss": 0.0862, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 1.3513400373127227, |
| "learning_rate": 4.9912361343516616e-06, |
| "loss": 0.0588, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 1.7841617467512154, |
| "learning_rate": 4.991176245933437e-06, |
| "loss": 0.0982, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 1.6650575824861316, |
| "learning_rate": 4.9911161539475385e-06, |
| "loss": 0.0868, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 2.0850606622795667, |
| "learning_rate": 4.991055858398879e-06, |
| "loss": 0.1087, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 2.27094495258401, |
| "learning_rate": 4.990995359292384e-06, |
| "loss": 0.1177, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 1.8175215978998918, |
| "learning_rate": 4.990934656632997e-06, |
| "loss": 0.1029, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 1.9580713421337124, |
| "learning_rate": 4.990873750425679e-06, |
| "loss": 0.0842, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 1.5378181370134305, |
| "learning_rate": 4.990812640675406e-06, |
| "loss": 0.0813, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 1.4646500614646956, |
| "learning_rate": 4.990751327387174e-06, |
| "loss": 0.0642, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 1.7132953215338962, |
| "learning_rate": 4.99068981056599e-06, |
| "loss": 0.0921, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 2.020828034549401, |
| "learning_rate": 4.990628090216885e-06, |
| "loss": 0.1164, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 1.4167009033800524, |
| "learning_rate": 4.990566166344898e-06, |
| "loss": 0.0695, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 1.743676237886539, |
| "learning_rate": 4.990504038955092e-06, |
| "loss": 0.1083, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 1.8343720931834766, |
| "learning_rate": 4.990441708052542e-06, |
| "loss": 0.0985, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 1.4113998497835858, |
| "learning_rate": 4.9903791736423435e-06, |
| "loss": 0.081, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 1.8830612535708886, |
| "learning_rate": 4.9903164357296044e-06, |
| "loss": 0.0954, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 1.4208829323408247, |
| "learning_rate": 4.990253494319453e-06, |
| "loss": 0.0919, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 1.3671067756437636, |
| "learning_rate": 4.990190349417032e-06, |
| "loss": 0.0928, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 1.965673083316737, |
| "learning_rate": 4.990127001027501e-06, |
| "loss": 0.0849, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 1.3933093475773835, |
| "learning_rate": 4.990063449156037e-06, |
| "loss": 0.0735, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.8960360183192995, |
| "learning_rate": 4.989999693807832e-06, |
| "loss": 0.1141, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 1.8316795975938271, |
| "learning_rate": 4.989935734988098e-06, |
| "loss": 0.1084, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 1.6451238367574679, |
| "learning_rate": 4.98987157270206e-06, |
| "loss": 0.0739, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 2.0644883617404854, |
| "learning_rate": 4.989807206954961e-06, |
| "loss": 0.1125, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 1.322196438354388, |
| "learning_rate": 4.9897426377520605e-06, |
| "loss": 0.0792, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 2.568915637493138, |
| "learning_rate": 4.989677865098636e-06, |
| "loss": 0.1236, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 1.1659492648591403, |
| "learning_rate": 4.989612888999978e-06, |
| "loss": 0.0624, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 1.431829324891758, |
| "learning_rate": 4.9895477094614e-06, |
| "loss": 0.0855, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 1.1704367288212936, |
| "learning_rate": 4.989482326488225e-06, |
| "loss": 0.0741, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 1.6170438514885752, |
| "learning_rate": 4.989416740085796e-06, |
| "loss": 0.1057, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 1.639627544263893, |
| "learning_rate": 4.9893509502594735e-06, |
| "loss": 0.0784, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 1.6437318926278874, |
| "learning_rate": 4.9892849570146335e-06, |
| "loss": 0.1105, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 1.6588510281862943, |
| "learning_rate": 4.989218760356668e-06, |
| "loss": 0.106, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 1.692767253326721, |
| "learning_rate": 4.989152360290987e-06, |
| "loss": 0.1068, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 2.117777475502305, |
| "learning_rate": 4.989085756823015e-06, |
| "loss": 0.1274, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 1.6877038030416243, |
| "learning_rate": 4.989018949958197e-06, |
| "loss": 0.1001, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 2.018139319167573, |
| "learning_rate": 4.98895193970199e-06, |
| "loss": 0.0726, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 1.7601822979826238, |
| "learning_rate": 4.9888847260598705e-06, |
| "loss": 0.0884, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 2.153451550499006, |
| "learning_rate": 4.98881730903733e-06, |
| "loss": 0.1263, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 1.7037846763057773, |
| "learning_rate": 4.98874968863988e-06, |
| "loss": 0.1017, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 1.6373036503866722, |
| "learning_rate": 4.988681864873044e-06, |
| "loss": 0.0936, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 1.5043938510579566, |
| "learning_rate": 4.988613837742364e-06, |
| "loss": 0.0841, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 1.9480098961832564, |
| "learning_rate": 4.9885456072534015e-06, |
| "loss": 0.093, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 2.0743334215437845, |
| "learning_rate": 4.988477173411728e-06, |
| "loss": 0.1001, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 1.3686100112269117, |
| "learning_rate": 4.988408536222939e-06, |
| "loss": 0.0706, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 1.7072624744285279, |
| "learning_rate": 4.9883396956926416e-06, |
| "loss": 0.0883, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 1.2178991309049074, |
| "learning_rate": 4.988270651826462e-06, |
| "loss": 0.066, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 1.5734145514066031, |
| "learning_rate": 4.988201404630041e-06, |
| "loss": 0.0818, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 1.4266019263972631, |
| "learning_rate": 4.988131954109038e-06, |
| "loss": 0.0835, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 2.2620036917930633, |
| "learning_rate": 4.988062300269128e-06, |
| "loss": 0.1374, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 1.4975643248719304, |
| "learning_rate": 4.987992443116003e-06, |
| "loss": 0.0817, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 1.723916950757741, |
| "learning_rate": 4.987922382655372e-06, |
| "loss": 0.086, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 2.50033376989197, |
| "learning_rate": 4.987852118892958e-06, |
| "loss": 0.1498, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 2.0776125106096934, |
| "learning_rate": 4.987781651834503e-06, |
| "loss": 0.1258, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 2.186488732885297, |
| "learning_rate": 4.987710981485768e-06, |
| "loss": 0.1203, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 2.0497982262406698, |
| "learning_rate": 4.987640107852525e-06, |
| "loss": 0.1365, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 1.394060418907116, |
| "learning_rate": 4.987569030940567e-06, |
| "loss": 0.0811, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 1.5257209721345255, |
| "learning_rate": 4.987497750755702e-06, |
| "loss": 0.0665, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 2.328076306378438, |
| "learning_rate": 4.987426267303753e-06, |
| "loss": 0.1186, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 1.8266119344469305, |
| "learning_rate": 4.987354580590563e-06, |
| "loss": 0.1011, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 1.7369452160483552, |
| "learning_rate": 4.987282690621991e-06, |
| "loss": 0.117, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 1.8346392689418392, |
| "learning_rate": 4.987210597403907e-06, |
| "loss": 0.1, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 1.9402353280122917, |
| "learning_rate": 4.987138300942208e-06, |
| "loss": 0.0949, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 1.4819316275042067, |
| "learning_rate": 4.987065801242798e-06, |
| "loss": 0.0855, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 1.8440191145455884, |
| "learning_rate": 4.986993098311601e-06, |
| "loss": 0.1057, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 1.712390016283102, |
| "learning_rate": 4.986920192154561e-06, |
| "loss": 0.0917, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 1.2697535382377623, |
| "learning_rate": 4.986847082777632e-06, |
| "loss": 0.0729, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 1.5330396115730802, |
| "learning_rate": 4.986773770186791e-06, |
| "loss": 0.0966, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 2.359233717201702, |
| "learning_rate": 4.986700254388027e-06, |
| "loss": 0.1308, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 1.330733109747955, |
| "learning_rate": 4.986626535387349e-06, |
| "loss": 0.0728, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 1.7398719883146694, |
| "learning_rate": 4.9865526131907795e-06, |
| "loss": 0.0893, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 2.018839749017437, |
| "learning_rate": 4.9864784878043595e-06, |
| "loss": 0.1268, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 2.439244123753763, |
| "learning_rate": 4.986404159234146e-06, |
| "loss": 0.1047, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 1.4077243142655576, |
| "learning_rate": 4.986329627486213e-06, |
| "loss": 0.07, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 2.0634194365835583, |
| "learning_rate": 4.986254892566652e-06, |
| "loss": 0.1199, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 1.507898380305614, |
| "learning_rate": 4.9861799544815684e-06, |
| "loss": 0.0798, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 1.5689447325912511, |
| "learning_rate": 4.986104813237086e-06, |
| "loss": 0.0872, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 1.5434828853102547, |
| "learning_rate": 4.986029468839346e-06, |
| "loss": 0.0756, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 1.9546839136865664, |
| "learning_rate": 4.985953921294505e-06, |
| "loss": 0.129, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 1.4457360634551049, |
| "learning_rate": 4.985878170608736e-06, |
| "loss": 0.0651, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 1.7053082159754585, |
| "learning_rate": 4.985802216788228e-06, |
| "loss": 0.0786, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 2.0831330601859643, |
| "learning_rate": 4.98572605983919e-06, |
| "loss": 0.1087, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 1.3106266925763963, |
| "learning_rate": 4.985649699767842e-06, |
| "loss": 0.0666, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 1.5931730936354696, |
| "learning_rate": 4.985573136580427e-06, |
| "loss": 0.1015, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 1.3398175715153904, |
| "learning_rate": 4.9854963702832e-06, |
| "loss": 0.0706, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 1.4932070031671647, |
| "learning_rate": 4.985419400882433e-06, |
| "loss": 0.1009, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 2.05809614886543, |
| "learning_rate": 4.985342228384418e-06, |
| "loss": 0.1373, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 25.314485102746445, |
| "learning_rate": 4.985264852795459e-06, |
| "loss": 0.529, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 1.3496622625056474, |
| "learning_rate": 4.98518727412188e-06, |
| "loss": 0.0792, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 2.042157493841037, |
| "learning_rate": 4.98510949237002e-06, |
| "loss": 0.1127, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 2.093747109047391, |
| "learning_rate": 4.985031507546234e-06, |
| "loss": 0.0931, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 2.620290737475778, |
| "learning_rate": 4.984953319656896e-06, |
| "loss": 0.1258, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 1.7812499192074571, |
| "learning_rate": 4.984874928708395e-06, |
| "loss": 0.0934, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 1.9861134139953058, |
| "learning_rate": 4.984796334707136e-06, |
| "loss": 0.1105, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 9.71210910528449, |
| "learning_rate": 4.984717537659542e-06, |
| "loss": 0.119, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 1.2902315877318344, |
| "learning_rate": 4.984638537572052e-06, |
| "loss": 0.0591, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 1.693249076147672, |
| "learning_rate": 4.984559334451121e-06, |
| "loss": 0.0906, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 1.7045791781932, |
| "learning_rate": 4.984479928303221e-06, |
| "loss": 0.066, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 1.588345004423415, |
| "learning_rate": 4.984400319134841e-06, |
| "loss": 0.079, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 2.8167066456613368, |
| "learning_rate": 4.984320506952487e-06, |
| "loss": 0.1743, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 1.8409665855781128, |
| "learning_rate": 4.9842404917626796e-06, |
| "loss": 0.1009, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 1.5444918002986228, |
| "learning_rate": 4.984160273571959e-06, |
| "loss": 0.0952, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 1.9718645058282944, |
| "learning_rate": 4.9840798523868785e-06, |
| "loss": 0.1217, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 1.669853882784426, |
| "learning_rate": 4.983999228214011e-06, |
| "loss": 0.083, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 1.5445667787054873, |
| "learning_rate": 4.983918401059943e-06, |
| "loss": 0.0838, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 1.8477622601816133, |
| "learning_rate": 4.983837370931282e-06, |
| "loss": 0.1199, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 2.295804335093856, |
| "learning_rate": 4.983756137834647e-06, |
| "loss": 0.1561, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 2.1902816453958933, |
| "learning_rate": 4.9836747017766765e-06, |
| "loss": 0.1014, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 1.7414949549224419, |
| "learning_rate": 4.983593062764027e-06, |
| "loss": 0.1046, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 3.529761555914209, |
| "learning_rate": 4.983511220803367e-06, |
| "loss": 0.1573, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 1.5931351386368249, |
| "learning_rate": 4.983429175901386e-06, |
| "loss": 0.0896, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 1.4617184144821196, |
| "learning_rate": 4.983346928064788e-06, |
| "loss": 0.0698, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 1.564679441746091, |
| "learning_rate": 4.9832644773002935e-06, |
| "loss": 0.0955, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 1.4077890282448986, |
| "learning_rate": 4.98318182361464e-06, |
| "loss": 0.0887, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 1.6028267121804223, |
| "learning_rate": 4.9830989670145825e-06, |
| "loss": 0.0989, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 1.8479648547197383, |
| "learning_rate": 4.9830159075068905e-06, |
| "loss": 0.1009, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 1.8145495712184487, |
| "learning_rate": 4.9829326450983514e-06, |
| "loss": 0.1125, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 1.839873930402737, |
| "learning_rate": 4.98284917979577e-06, |
| "loss": 0.0975, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 2.3433237407057863, |
| "learning_rate": 4.9827655116059656e-06, |
| "loss": 0.1061, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 1.479552769836274, |
| "learning_rate": 4.9826816405357755e-06, |
| "loss": 0.105, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 1.0380040250679141, |
| "learning_rate": 4.982597566592054e-06, |
| "loss": 0.0522, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 2.2146611071914744, |
| "learning_rate": 4.982513289781671e-06, |
| "loss": 0.1403, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 1.4265466923705232, |
| "learning_rate": 4.982428810111512e-06, |
| "loss": 0.0846, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 1.4254072959974569, |
| "learning_rate": 4.9823441275884814e-06, |
| "loss": 0.0787, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 2.353200458571576, |
| "learning_rate": 4.982259242219499e-06, |
| "loss": 0.1114, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 1.3512279730893322, |
| "learning_rate": 4.9821741540115006e-06, |
| "loss": 0.0678, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 1.728060266498106, |
| "learning_rate": 4.982088862971441e-06, |
| "loss": 0.1129, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 1.8022543001727114, |
| "learning_rate": 4.982003369106287e-06, |
| "loss": 0.1036, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 1.2312712834502222, |
| "learning_rate": 4.981917672423028e-06, |
| "loss": 0.065, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 1.6183848549336255, |
| "learning_rate": 4.981831772928664e-06, |
| "loss": 0.0934, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 2.001713262915152, |
| "learning_rate": 4.981745670630216e-06, |
| "loss": 0.1356, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 2.0057745044552995, |
| "learning_rate": 4.981659365534718e-06, |
| "loss": 0.1285, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 2.299079022869691, |
| "learning_rate": 4.981572857649225e-06, |
| "loss": 0.1195, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 1.6869951958248894, |
| "learning_rate": 4.981486146980804e-06, |
| "loss": 0.0877, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 1.9301190501764922, |
| "learning_rate": 4.9813992335365415e-06, |
| "loss": 0.0977, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 1.6227704434432904, |
| "learning_rate": 4.98131211732354e-06, |
| "loss": 0.1035, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 1.632769015838627, |
| "learning_rate": 4.981224798348917e-06, |
| "loss": 0.0833, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 2.3862639707091082, |
| "learning_rate": 4.981137276619809e-06, |
| "loss": 0.1419, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 1.2625986411158334, |
| "learning_rate": 4.9810495521433675e-06, |
| "loss": 0.078, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 2.5081068393508157, |
| "learning_rate": 4.9809616249267616e-06, |
| "loss": 0.1478, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 1.9644808854065114, |
| "learning_rate": 4.980873494977174e-06, |
| "loss": 0.121, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 1.647433915922947, |
| "learning_rate": 4.98078516230181e-06, |
| "loss": 0.0865, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 1.5774273491436515, |
| "learning_rate": 4.980696626907884e-06, |
| "loss": 0.0887, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.5604062690588907, |
| "learning_rate": 4.980607888802633e-06, |
| "loss": 0.1, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 1.548442809835796, |
| "learning_rate": 4.980518947993307e-06, |
| "loss": 0.1005, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 1.6276180373825353, |
| "learning_rate": 4.980429804487176e-06, |
| "loss": 0.1006, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 1.5718547041391637, |
| "learning_rate": 4.980340458291521e-06, |
| "loss": 0.0858, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 1.3679183632524226, |
| "learning_rate": 4.980250909413646e-06, |
| "loss": 0.0901, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 1.7491296961984788, |
| "learning_rate": 4.980161157860867e-06, |
| "loss": 0.0888, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 2.0306839493761446, |
| "learning_rate": 4.980071203640519e-06, |
| "loss": 0.0893, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 1.353153596211688, |
| "learning_rate": 4.979981046759952e-06, |
| "loss": 0.0753, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 1.969605104045741, |
| "learning_rate": 4.979890687226533e-06, |
| "loss": 0.1033, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 2.085518332646124, |
| "learning_rate": 4.979800125047647e-06, |
| "loss": 0.0979, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 1.6181669031153556, |
| "learning_rate": 4.979709360230692e-06, |
| "loss": 0.0969, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 1.6760914355637484, |
| "learning_rate": 4.979618392783087e-06, |
| "loss": 0.0883, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 1.2907730003800948, |
| "learning_rate": 4.979527222712266e-06, |
| "loss": 0.0775, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 1.241096973502198, |
| "learning_rate": 4.9794358500256765e-06, |
| "loss": 0.0599, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 1.579037640818148, |
| "learning_rate": 4.979344274730786e-06, |
| "loss": 0.0831, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 2.225915719971972, |
| "learning_rate": 4.979252496835079e-06, |
| "loss": 0.1116, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 2.3031173397129923, |
| "learning_rate": 4.979160516346054e-06, |
| "loss": 0.1536, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 27.297310781833385, |
| "learning_rate": 4.979068333271227e-06, |
| "loss": 0.9223, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 2.4041431299507607, |
| "learning_rate": 4.978975947618131e-06, |
| "loss": 0.1184, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 1.6683861662324915, |
| "learning_rate": 4.978883359394316e-06, |
| "loss": 0.1301, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 1.9056814965685545, |
| "learning_rate": 4.978790568607347e-06, |
| "loss": 0.1001, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 1.9713836323302738, |
| "learning_rate": 4.9786975752648076e-06, |
| "loss": 0.1174, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 1.598376196967646, |
| "learning_rate": 4.978604379374295e-06, |
| "loss": 0.0986, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 1.5517923833736031, |
| "learning_rate": 4.978510980943427e-06, |
| "loss": 0.0807, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 2.004418653450344, |
| "learning_rate": 4.978417379979834e-06, |
| "loss": 0.1065, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 1.7753220163198007, |
| "learning_rate": 4.978323576491165e-06, |
| "loss": 0.0987, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 1.7384737383317277, |
| "learning_rate": 4.978229570485085e-06, |
| "loss": 0.1048, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 1.5352099211420311, |
| "learning_rate": 4.978135361969276e-06, |
| "loss": 0.0983, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 1.6028799125387194, |
| "learning_rate": 4.9780409509514375e-06, |
| "loss": 0.091, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 1.9664054893168261, |
| "learning_rate": 4.977946337439282e-06, |
| "loss": 0.1495, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 1.7122667851036462, |
| "learning_rate": 4.9778515214405436e-06, |
| "loss": 0.1139, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 1.7566455248377864, |
| "learning_rate": 4.977756502962967e-06, |
| "loss": 0.1097, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 1.1350501611425003, |
| "learning_rate": 4.97766128201432e-06, |
| "loss": 0.0629, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 1.2023067292666059, |
| "learning_rate": 4.977565858602381e-06, |
| "loss": 0.0782, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 1.628252441426902, |
| "learning_rate": 4.977470232734949e-06, |
| "loss": 0.0987, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 1.724322735405813, |
| "learning_rate": 4.977374404419838e-06, |
| "loss": 0.0903, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 1.470263169494043, |
| "learning_rate": 4.977278373664877e-06, |
| "loss": 0.0882, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 2.599396527432543, |
| "learning_rate": 4.977182140477916e-06, |
| "loss": 0.1209, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 1.6800447119151198, |
| "learning_rate": 4.977085704866817e-06, |
| "loss": 0.0776, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 1.5595540666125045, |
| "learning_rate": 4.97698906683946e-06, |
| "loss": 0.103, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 2.248635180290087, |
| "learning_rate": 4.9768922264037435e-06, |
| "loss": 0.1388, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 1.1547627152960565, |
| "learning_rate": 4.976795183567579e-06, |
| "loss": 0.0624, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 1.56353757750327, |
| "learning_rate": 4.976697938338898e-06, |
| "loss": 0.0856, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 1.2335181237621284, |
| "learning_rate": 4.976600490725645e-06, |
| "loss": 0.0644, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 1.900991648340467, |
| "learning_rate": 4.976502840735785e-06, |
| "loss": 0.153, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 1.3078243371858722, |
| "learning_rate": 4.976404988377297e-06, |
| "loss": 0.0621, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 2.0047686247285923, |
| "learning_rate": 4.976306933658176e-06, |
| "loss": 0.1136, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 1.8552855878852923, |
| "learning_rate": 4.976208676586435e-06, |
| "loss": 0.1284, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 1.8525936784229493, |
| "learning_rate": 4.976110217170104e-06, |
| "loss": 0.0917, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 1.4658188242525991, |
| "learning_rate": 4.976011555417228e-06, |
| "loss": 0.0749, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 1.1511032936840262, |
| "learning_rate": 4.975912691335869e-06, |
| "loss": 0.0761, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 1.458580259230844, |
| "learning_rate": 4.975813624934106e-06, |
| "loss": 0.0768, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 1.5627508232221192, |
| "learning_rate": 4.975714356220035e-06, |
| "loss": 0.0823, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 1.075721834306004, |
| "learning_rate": 4.975614885201766e-06, |
| "loss": 0.0504, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 1.6198884733457342, |
| "learning_rate": 4.975515211887429e-06, |
| "loss": 0.1024, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 1.6346417323820548, |
| "learning_rate": 4.9754153362851684e-06, |
| "loss": 0.0851, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 2.448143027911265, |
| "learning_rate": 4.975315258403145e-06, |
| "loss": 0.1479, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 1.6016068432961146, |
| "learning_rate": 4.975214978249537e-06, |
| "loss": 0.0886, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 1.4721161321318619, |
| "learning_rate": 4.975114495832539e-06, |
| "loss": 0.0976, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 1.7625335294527533, |
| "learning_rate": 4.975013811160362e-06, |
| "loss": 0.0898, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 1.9298670425360585, |
| "learning_rate": 4.974912924241233e-06, |
| "loss": 0.1027, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 1.4996755802132458, |
| "learning_rate": 4.974811835083397e-06, |
| "loss": 0.0978, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 2.1147277125940955, |
| "learning_rate": 4.974710543695114e-06, |
| "loss": 0.1063, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 2.529920688558412, |
| "learning_rate": 4.974609050084661e-06, |
| "loss": 0.1476, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 2.14209787933433, |
| "learning_rate": 4.974507354260332e-06, |
| "loss": 0.1261, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 1.9058176611193165, |
| "learning_rate": 4.974405456230436e-06, |
| "loss": 0.1203, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 1.8980074058725056, |
| "learning_rate": 4.974303356003301e-06, |
| "loss": 0.0996, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 1.4579903539692274, |
| "learning_rate": 4.974201053587268e-06, |
| "loss": 0.0943, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 1.3940386820106656, |
| "learning_rate": 4.9740985489907005e-06, |
| "loss": 0.0663, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 2.441971054754706, |
| "learning_rate": 4.973995842221971e-06, |
| "loss": 0.1245, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 1.919620601900113, |
| "learning_rate": 4.973892933289476e-06, |
| "loss": 0.1159, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 1.672712776153676, |
| "learning_rate": 4.97378982220162e-06, |
| "loss": 0.0981, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 1.2125382683302124, |
| "learning_rate": 4.973686508966832e-06, |
| "loss": 0.0601, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 1.222443145221144, |
| "learning_rate": 4.973582993593554e-06, |
| "loss": 0.0715, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 1.5223951861259333, |
| "learning_rate": 4.973479276090244e-06, |
| "loss": 0.0795, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 1.2392582362318094, |
| "learning_rate": 4.973375356465378e-06, |
| "loss": 0.061, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 1.7285156139774616, |
| "learning_rate": 4.973271234727447e-06, |
| "loss": 0.1201, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 1.4723786585295477, |
| "learning_rate": 4.97316691088496e-06, |
| "loss": 0.0885, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 2.25192801645438, |
| "learning_rate": 4.973062384946442e-06, |
| "loss": 0.135, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 1.1373098395352674, |
| "learning_rate": 4.9729576569204345e-06, |
| "loss": 0.0728, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 1.5300830315604266, |
| "learning_rate": 4.972852726815495e-06, |
| "loss": 0.0941, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 1.8026113068627658, |
| "learning_rate": 4.972747594640197e-06, |
| "loss": 0.1247, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 1.794104737159684, |
| "learning_rate": 4.9726422604031335e-06, |
| "loss": 0.095, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 1.1504559186965777, |
| "learning_rate": 4.97253672411291e-06, |
| "loss": 0.0674, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 1.4316672986650767, |
| "learning_rate": 4.972430985778152e-06, |
| "loss": 0.0702, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 1.5328603666600327, |
| "learning_rate": 4.972325045407499e-06, |
| "loss": 0.0675, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 3.2405357176859857, |
| "learning_rate": 4.972218903009608e-06, |
| "loss": 0.1212, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 1.5109558607242208, |
| "learning_rate": 4.972112558593153e-06, |
| "loss": 0.0938, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 1.264935168060258, |
| "learning_rate": 4.972006012166823e-06, |
| "loss": 0.0742, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 1.3461924059029533, |
| "learning_rate": 4.971899263739326e-06, |
| "loss": 0.0844, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 1.7441591810954875, |
| "learning_rate": 4.971792313319384e-06, |
| "loss": 0.1139, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 1.7027600325330141, |
| "learning_rate": 4.971685160915737e-06, |
| "loss": 0.0867, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 1.6301828004618641, |
| "learning_rate": 4.971577806537139e-06, |
| "loss": 0.0943, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 1.6173281507194255, |
| "learning_rate": 4.971470250192366e-06, |
| "loss": 0.1052, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 17.712189021618492, |
| "learning_rate": 4.9713624918902045e-06, |
| "loss": 0.3191, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 2.336934606774547, |
| "learning_rate": 4.971254531639461e-06, |
| "loss": 0.1347, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 1.8922827015678323, |
| "learning_rate": 4.971146369448957e-06, |
| "loss": 0.1144, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 1.7408688040721931, |
| "learning_rate": 4.971038005327532e-06, |
| "loss": 0.1143, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 1.9327103804196282, |
| "learning_rate": 4.970929439284039e-06, |
| "loss": 0.1377, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 2.0181579320929224, |
| "learning_rate": 4.970820671327351e-06, |
| "loss": 0.1259, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 1.1056426992050885, |
| "learning_rate": 4.9707117014663565e-06, |
| "loss": 0.0633, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 1.853338129642874, |
| "learning_rate": 4.97060252970996e-06, |
| "loss": 0.1215, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 1.6843406450831364, |
| "learning_rate": 4.970493156067081e-06, |
| "loss": 0.1016, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 1.1701908663612965, |
| "learning_rate": 4.970383580546658e-06, |
| "loss": 0.0731, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 1.7890527407391215, |
| "learning_rate": 4.970273803157645e-06, |
| "loss": 0.1097, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 1.4169073671700831, |
| "learning_rate": 4.970163823909013e-06, |
| "loss": 0.0845, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 1.5828589024944335, |
| "learning_rate": 4.970053642809748e-06, |
| "loss": 0.0921, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 1.6370747251722932, |
| "learning_rate": 4.969943259868853e-06, |
| "loss": 0.1088, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 2.023470308157194, |
| "learning_rate": 4.969832675095351e-06, |
| "loss": 0.1052, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 1.7462230999429424, |
| "learning_rate": 4.969721888498275e-06, |
| "loss": 0.1141, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 1.428774250085193, |
| "learning_rate": 4.96961090008668e-06, |
| "loss": 0.0824, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 1.6447081301063733, |
| "learning_rate": 4.969499709869635e-06, |
| "loss": 0.1324, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 2.0250820847646054, |
| "learning_rate": 4.969388317856225e-06, |
| "loss": 0.1122, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 2.060820071851061, |
| "learning_rate": 4.969276724055554e-06, |
| "loss": 0.128, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 1.8421595012757042, |
| "learning_rate": 4.969164928476741e-06, |
| "loss": 0.0929, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 1.8378761522798848, |
| "learning_rate": 4.969052931128919e-06, |
| "loss": 0.1038, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 1.4559119574869848, |
| "learning_rate": 4.968940732021243e-06, |
| "loss": 0.0884, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 1.9971887851212364, |
| "learning_rate": 4.9688283311628795e-06, |
| "loss": 0.1353, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 1.7386639848323409, |
| "learning_rate": 4.968715728563014e-06, |
| "loss": 0.1025, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2529572338489536, |
| "grad_norm": 1.260155855896464, |
| "learning_rate": 4.968602924230847e-06, |
| "loss": 0.0684, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2534121929026388, |
| "grad_norm": 2.3395689748358843, |
| "learning_rate": 4.968489918175598e-06, |
| "loss": 0.1151, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2538671519563239, |
| "grad_norm": 2.0737729432038137, |
| "learning_rate": 4.9683767104065014e-06, |
| "loss": 0.107, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2543221110100091, |
| "grad_norm": 1.4554456387078378, |
| "learning_rate": 4.968263300932806e-06, |
| "loss": 0.0674, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 1.236095562563839, |
| "learning_rate": 4.968149689763781e-06, |
| "loss": 0.0771, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2552320291173794, |
| "grad_norm": 1.6261579693523964, |
| "learning_rate": 4.968035876908708e-06, |
| "loss": 0.1033, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2556869881710646, |
| "grad_norm": 1.8267174614929946, |
| "learning_rate": 4.967921862376889e-06, |
| "loss": 0.1153, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.25614194722474976, |
| "grad_norm": 1.9897704292294367, |
| "learning_rate": 4.9678076461776415e-06, |
| "loss": 0.1168, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.25659690627843496, |
| "grad_norm": 1.9727936679798233, |
| "learning_rate": 4.9676932283202965e-06, |
| "loss": 0.1389, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2570518653321201, |
| "grad_norm": 1.8484690700205213, |
| "learning_rate": 4.967578608814205e-06, |
| "loss": 0.1024, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2575068243858053, |
| "grad_norm": 1.4833575893287436, |
| "learning_rate": 4.9674637876687345e-06, |
| "loss": 0.0959, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.25796178343949044, |
| "grad_norm": 1.0731244531443167, |
| "learning_rate": 4.967348764893265e-06, |
| "loss": 0.0652, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2584167424931756, |
| "grad_norm": 1.882586364820984, |
| "learning_rate": 4.967233540497197e-06, |
| "loss": 0.0887, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2588717015468608, |
| "grad_norm": 1.5585900206462215, |
| "learning_rate": 4.967118114489946e-06, |
| "loss": 0.0705, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.25932666060054593, |
| "grad_norm": 1.4304247727655925, |
| "learning_rate": 4.967002486880944e-06, |
| "loss": 0.0689, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25978161965423113, |
| "grad_norm": 1.996611084455256, |
| "learning_rate": 4.966886657679641e-06, |
| "loss": 0.1134, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2602365787079163, |
| "grad_norm": 2.573142554440562, |
| "learning_rate": 4.966770626895499e-06, |
| "loss": 0.137, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2606915377616015, |
| "grad_norm": 1.7759211248358038, |
| "learning_rate": 4.966654394538002e-06, |
| "loss": 0.097, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2611464968152866, |
| "grad_norm": 1.3021079669208342, |
| "learning_rate": 4.966537960616646e-06, |
| "loss": 0.0774, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.26160145586897177, |
| "grad_norm": 2.328733131052364, |
| "learning_rate": 4.9664213251409486e-06, |
| "loss": 0.1105, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26205641492265697, |
| "grad_norm": 2.281267812919593, |
| "learning_rate": 4.9663044881204375e-06, |
| "loss": 0.1556, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2625113739763421, |
| "grad_norm": 1.7215892787568372, |
| "learning_rate": 4.9661874495646615e-06, |
| "loss": 0.0917, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2629663330300273, |
| "grad_norm": 1.3072003221216781, |
| "learning_rate": 4.9660702094831845e-06, |
| "loss": 0.0818, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.26342129208371245, |
| "grad_norm": 2.141135787879026, |
| "learning_rate": 4.965952767885587e-06, |
| "loss": 0.1187, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.26387625113739765, |
| "grad_norm": 2.3440295569320857, |
| "learning_rate": 4.965835124781465e-06, |
| "loss": 0.1336, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2643312101910828, |
| "grad_norm": 1.2377586425554465, |
| "learning_rate": 4.965717280180432e-06, |
| "loss": 0.0771, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.26478616924476794, |
| "grad_norm": 1.5553208083958672, |
| "learning_rate": 4.965599234092118e-06, |
| "loss": 0.0906, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.26524112829845314, |
| "grad_norm": 1.676762616981095, |
| "learning_rate": 4.96548098652617e-06, |
| "loss": 0.1091, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2656960873521383, |
| "grad_norm": 1.8329426527347645, |
| "learning_rate": 4.965362537492249e-06, |
| "loss": 0.1171, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2661510464058235, |
| "grad_norm": 1.2752855217123082, |
| "learning_rate": 4.9652438870000356e-06, |
| "loss": 0.0726, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26660600545950863, |
| "grad_norm": 1.188941544645384, |
| "learning_rate": 4.965125035059224e-06, |
| "loss": 0.0801, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.26706096451319383, |
| "grad_norm": 1.4654127807937742, |
| "learning_rate": 4.965005981679527e-06, |
| "loss": 0.0839, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.267515923566879, |
| "grad_norm": 2.0288718475884107, |
| "learning_rate": 4.964886726870673e-06, |
| "loss": 0.1239, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2679708826205642, |
| "grad_norm": 1.972686660841513, |
| "learning_rate": 4.964767270642407e-06, |
| "loss": 0.1004, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2684258416742493, |
| "grad_norm": 1.6499743360699521, |
| "learning_rate": 4.964647613004491e-06, |
| "loss": 0.0976, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26888080072793447, |
| "grad_norm": 1.5661213245685233, |
| "learning_rate": 4.964527753966702e-06, |
| "loss": 0.0818, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.26933575978161967, |
| "grad_norm": 1.387453226127614, |
| "learning_rate": 4.964407693538834e-06, |
| "loss": 0.0813, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2697907188353048, |
| "grad_norm": 1.8652006740776592, |
| "learning_rate": 4.9642874317307e-06, |
| "loss": 0.1092, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.27024567788899, |
| "grad_norm": 1.6739291749648295, |
| "learning_rate": 4.964166968552124e-06, |
| "loss": 0.1262, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.27070063694267515, |
| "grad_norm": 1.4965319066427345, |
| "learning_rate": 4.9640463040129525e-06, |
| "loss": 0.0749, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27115559599636035, |
| "grad_norm": 1.483777185503557, |
| "learning_rate": 4.963925438123044e-06, |
| "loss": 0.075, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2716105550500455, |
| "grad_norm": 1.646106287941782, |
| "learning_rate": 4.963804370892276e-06, |
| "loss": 0.0948, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.27206551410373064, |
| "grad_norm": 1.8923424637891237, |
| "learning_rate": 4.9636831023305405e-06, |
| "loss": 0.1296, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.27252047315741584, |
| "grad_norm": 1.453967822900046, |
| "learning_rate": 4.963561632447748e-06, |
| "loss": 0.0777, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.272975432211101, |
| "grad_norm": 1.2633146266239919, |
| "learning_rate": 4.9634399612538255e-06, |
| "loss": 0.0704, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2734303912647862, |
| "grad_norm": 24.856853600017228, |
| "learning_rate": 4.963318088758714e-06, |
| "loss": 0.4372, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.27388535031847133, |
| "grad_norm": 1.6301604814034822, |
| "learning_rate": 4.963196014972371e-06, |
| "loss": 0.0879, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.27434030937215653, |
| "grad_norm": 1.556460730817159, |
| "learning_rate": 4.963073739904775e-06, |
| "loss": 0.0893, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2747952684258417, |
| "grad_norm": 1.657318032059153, |
| "learning_rate": 4.962951263565915e-06, |
| "loss": 0.0933, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2752502274795268, |
| "grad_norm": 2.273490391362205, |
| "learning_rate": 4.962828585965801e-06, |
| "loss": 0.1038, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.275705186533212, |
| "grad_norm": 1.5114052665682505, |
| "learning_rate": 4.962705707114457e-06, |
| "loss": 0.097, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.27616014558689717, |
| "grad_norm": 1.7683179621585026, |
| "learning_rate": 4.962582627021923e-06, |
| "loss": 0.1127, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.27661510464058237, |
| "grad_norm": 1.8859941959717001, |
| "learning_rate": 4.962459345698258e-06, |
| "loss": 0.1152, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2770700636942675, |
| "grad_norm": 1.9839838015935523, |
| "learning_rate": 4.962335863153537e-06, |
| "loss": 0.1198, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2775250227479527, |
| "grad_norm": 1.3671283570292578, |
| "learning_rate": 4.962212179397847e-06, |
| "loss": 0.0876, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27797998180163785, |
| "grad_norm": 1.4623540558631782, |
| "learning_rate": 4.962088294441299e-06, |
| "loss": 0.0754, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.278434940855323, |
| "grad_norm": 2.3501285954750806, |
| "learning_rate": 4.9619642082940135e-06, |
| "loss": 0.1, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2788898999090082, |
| "grad_norm": 1.6593172768016098, |
| "learning_rate": 4.9618399209661305e-06, |
| "loss": 0.0918, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.27934485896269334, |
| "grad_norm": 1.4913746956676242, |
| "learning_rate": 4.961715432467807e-06, |
| "loss": 0.0788, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.27979981801637854, |
| "grad_norm": 1.3335438953393988, |
| "learning_rate": 4.961590742809216e-06, |
| "loss": 0.0743, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2802547770700637, |
| "grad_norm": 1.4631866469804606, |
| "learning_rate": 4.961465852000545e-06, |
| "loss": 0.0869, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2807097361237489, |
| "grad_norm": 1.8021656107937525, |
| "learning_rate": 4.961340760052001e-06, |
| "loss": 0.0906, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.28116469517743403, |
| "grad_norm": 1.74213914067233, |
| "learning_rate": 4.961215466973806e-06, |
| "loss": 0.0926, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2816196542311192, |
| "grad_norm": 2.764803909834576, |
| "learning_rate": 4.961089972776197e-06, |
| "loss": 0.1823, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2820746132848044, |
| "grad_norm": 1.3665676735119967, |
| "learning_rate": 4.9609642774694285e-06, |
| "loss": 0.0734, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2825295723384895, |
| "grad_norm": 1.9426323562959267, |
| "learning_rate": 4.960838381063774e-06, |
| "loss": 0.0972, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2829845313921747, |
| "grad_norm": 2.3374254341147322, |
| "learning_rate": 4.960712283569521e-06, |
| "loss": 0.1411, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.28343949044585987, |
| "grad_norm": 2.2747894788958543, |
| "learning_rate": 4.960585984996971e-06, |
| "loss": 0.1033, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.28389444949954507, |
| "grad_norm": 1.7445142059152803, |
| "learning_rate": 4.960459485356447e-06, |
| "loss": 0.1222, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2843494085532302, |
| "grad_norm": 1.5220008831965313, |
| "learning_rate": 4.960332784658285e-06, |
| "loss": 0.1027, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28480436760691535, |
| "grad_norm": 2.1347326062219034, |
| "learning_rate": 4.960205882912839e-06, |
| "loss": 0.1237, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.28525932666060055, |
| "grad_norm": 2.5984695620436002, |
| "learning_rate": 4.9600787801304785e-06, |
| "loss": 0.1871, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.1207792848317375, |
| "learning_rate": 4.959951476321589e-06, |
| "loss": 0.1205, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2861692447679709, |
| "grad_norm": 1.1897630810057305, |
| "learning_rate": 4.959823971496575e-06, |
| "loss": 0.0773, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.28662420382165604, |
| "grad_norm": 3.4920069239312976, |
| "learning_rate": 4.959696265665853e-06, |
| "loss": 0.1897, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28707916287534124, |
| "grad_norm": 1.425742783647833, |
| "learning_rate": 4.959568358839862e-06, |
| "loss": 0.0635, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2875341219290264, |
| "grad_norm": 1.330689822741385, |
| "learning_rate": 4.95944025102905e-06, |
| "loss": 0.0722, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.28798908098271153, |
| "grad_norm": 1.99039564333339, |
| "learning_rate": 4.959311942243888e-06, |
| "loss": 0.1158, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.28844404003639673, |
| "grad_norm": 1.593751969696495, |
| "learning_rate": 4.95918343249486e-06, |
| "loss": 0.0861, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2888989990900819, |
| "grad_norm": 1.8945402616067804, |
| "learning_rate": 4.959054721792469e-06, |
| "loss": 0.1171, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2893539581437671, |
| "grad_norm": 1.4569740573581391, |
| "learning_rate": 4.958925810147231e-06, |
| "loss": 0.0777, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2898089171974522, |
| "grad_norm": 1.7102068304451903, |
| "learning_rate": 4.958796697569679e-06, |
| "loss": 0.0872, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2902638762511374, |
| "grad_norm": 1.5378977203553044, |
| "learning_rate": 4.958667384070365e-06, |
| "loss": 0.0796, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.29071883530482256, |
| "grad_norm": 1.9723232607058794, |
| "learning_rate": 4.958537869659855e-06, |
| "loss": 0.1204, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2911737943585077, |
| "grad_norm": 1.4856408560761394, |
| "learning_rate": 4.958408154348734e-06, |
| "loss": 0.0763, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2916287534121929, |
| "grad_norm": 1.7342797592944788, |
| "learning_rate": 4.9582782381476e-06, |
| "loss": 0.1104, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.29208371246587805, |
| "grad_norm": 2.179383476129295, |
| "learning_rate": 4.958148121067071e-06, |
| "loss": 0.1694, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.29253867151956325, |
| "grad_norm": 1.8609060135735762, |
| "learning_rate": 4.9580178031177775e-06, |
| "loss": 0.1303, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2929936305732484, |
| "grad_norm": 1.4742279064065518, |
| "learning_rate": 4.9578872843103694e-06, |
| "loss": 0.1001, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2934485896269336, |
| "grad_norm": 1.7670333338462736, |
| "learning_rate": 4.957756564655513e-06, |
| "loss": 0.1022, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.29390354868061874, |
| "grad_norm": 1.6630538784639108, |
| "learning_rate": 4.957625644163888e-06, |
| "loss": 0.1055, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2943585077343039, |
| "grad_norm": 1.9118546637397547, |
| "learning_rate": 4.957494522846194e-06, |
| "loss": 0.1029, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2948134667879891, |
| "grad_norm": 1.7468783195584092, |
| "learning_rate": 4.957363200713146e-06, |
| "loss": 0.13, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.29526842584167423, |
| "grad_norm": 1.4923304655802225, |
| "learning_rate": 4.957231677775475e-06, |
| "loss": 0.0846, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.29572338489535943, |
| "grad_norm": 2.0864859163635407, |
| "learning_rate": 4.957099954043928e-06, |
| "loss": 0.1363, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2961783439490446, |
| "grad_norm": 1.467640729386297, |
| "learning_rate": 4.956968029529269e-06, |
| "loss": 0.113, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2966333030027298, |
| "grad_norm": 1.5940129351295147, |
| "learning_rate": 4.956835904242277e-06, |
| "loss": 0.1121, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2970882620564149, |
| "grad_norm": 1.305300483782713, |
| "learning_rate": 4.9567035781937516e-06, |
| "loss": 0.0569, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.29754322111010006, |
| "grad_norm": 1.8626374769697236, |
| "learning_rate": 4.9565710513945024e-06, |
| "loss": 0.095, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.29799818016378526, |
| "grad_norm": 1.9350135167075724, |
| "learning_rate": 4.956438323855362e-06, |
| "loss": 0.11, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2984531392174704, |
| "grad_norm": 1.7292500874953625, |
| "learning_rate": 4.956305395587174e-06, |
| "loss": 0.1259, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2989080982711556, |
| "grad_norm": 1.7021672274359103, |
| "learning_rate": 4.956172266600802e-06, |
| "loss": 0.0857, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.29936305732484075, |
| "grad_norm": 1.2481942065304896, |
| "learning_rate": 4.956038936907125e-06, |
| "loss": 0.0776, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.29981801637852595, |
| "grad_norm": 1.4091727470459356, |
| "learning_rate": 4.955905406517036e-06, |
| "loss": 0.0706, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3002729754322111, |
| "grad_norm": 1.8640524340898077, |
| "learning_rate": 4.95577167544145e-06, |
| "loss": 0.1176, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.30072793448589624, |
| "grad_norm": 2.0619543797721698, |
| "learning_rate": 4.955637743691291e-06, |
| "loss": 0.1148, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.30118289353958144, |
| "grad_norm": 1.9364848961200234, |
| "learning_rate": 4.955503611277506e-06, |
| "loss": 0.0964, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3016378525932666, |
| "grad_norm": 1.5509916734065172, |
| "learning_rate": 4.955369278211055e-06, |
| "loss": 0.0824, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3020928116469518, |
| "grad_norm": 1.8848317603882998, |
| "learning_rate": 4.955234744502914e-06, |
| "loss": 0.1, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.30254777070063693, |
| "grad_norm": 1.7147002197137917, |
| "learning_rate": 4.955100010164079e-06, |
| "loss": 0.1042, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30300272975432213, |
| "grad_norm": 1.8287392204283686, |
| "learning_rate": 4.954965075205557e-06, |
| "loss": 0.0894, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3034576888080073, |
| "grad_norm": 3.2978505813072765, |
| "learning_rate": 4.9548299396383755e-06, |
| "loss": 0.1555, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3039126478616925, |
| "grad_norm": 1.733214316892207, |
| "learning_rate": 4.954694603473578e-06, |
| "loss": 0.0848, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3043676069153776, |
| "grad_norm": 2.1290440022616917, |
| "learning_rate": 4.954559066722222e-06, |
| "loss": 0.1329, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.30482256596906276, |
| "grad_norm": 1.7482728884321743, |
| "learning_rate": 4.954423329395385e-06, |
| "loss": 0.1135, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30527752502274796, |
| "grad_norm": 1.8272762006745102, |
| "learning_rate": 4.954287391504156e-06, |
| "loss": 0.1233, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3057324840764331, |
| "grad_norm": 2.276356474817249, |
| "learning_rate": 4.9541512530596455e-06, |
| "loss": 0.1426, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3061874431301183, |
| "grad_norm": 1.5212465132609405, |
| "learning_rate": 4.954014914072978e-06, |
| "loss": 0.0908, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.30664240218380345, |
| "grad_norm": 1.7081770141846233, |
| "learning_rate": 4.9538783745552934e-06, |
| "loss": 0.1069, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.30709736123748865, |
| "grad_norm": 2.2065783569813755, |
| "learning_rate": 4.95374163451775e-06, |
| "loss": 0.1303, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3075523202911738, |
| "grad_norm": 1.9717809133208803, |
| "learning_rate": 4.953604693971521e-06, |
| "loss": 0.0969, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.30800727934485894, |
| "grad_norm": 1.5094990032560427, |
| "learning_rate": 4.953467552927798e-06, |
| "loss": 0.059, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.30846223839854414, |
| "grad_norm": 2.5084055121202726, |
| "learning_rate": 4.9533302113977845e-06, |
| "loss": 0.141, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3089171974522293, |
| "grad_norm": 2.1105100650062814, |
| "learning_rate": 4.9531926693927055e-06, |
| "loss": 0.1162, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3093721565059145, |
| "grad_norm": 1.9374617838160508, |
| "learning_rate": 4.953054926923801e-06, |
| "loss": 0.1119, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30982711555959963, |
| "grad_norm": 2.266159358282095, |
| "learning_rate": 4.952916984002325e-06, |
| "loss": 0.1188, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.31028207461328483, |
| "grad_norm": 2.1490900129362243, |
| "learning_rate": 4.95277884063955e-06, |
| "loss": 0.1337, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.31073703366697, |
| "grad_norm": 1.5330806658735066, |
| "learning_rate": 4.952640496846766e-06, |
| "loss": 0.109, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3111919927206551, |
| "grad_norm": 1.41231573264733, |
| "learning_rate": 4.952501952635276e-06, |
| "loss": 0.0837, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3116469517743403, |
| "grad_norm": 1.993511064296186, |
| "learning_rate": 4.952363208016402e-06, |
| "loss": 0.1272, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.31210191082802546, |
| "grad_norm": 1.6098606771380728, |
| "learning_rate": 4.952224263001482e-06, |
| "loss": 0.0816, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.31255686988171066, |
| "grad_norm": 1.2309412681015492, |
| "learning_rate": 4.952085117601868e-06, |
| "loss": 0.0692, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3130118289353958, |
| "grad_norm": 1.7997377974129165, |
| "learning_rate": 4.951945771828933e-06, |
| "loss": 0.1322, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.313466787989081, |
| "grad_norm": 1.3223154067967124, |
| "learning_rate": 4.951806225694061e-06, |
| "loss": 0.0979, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.31392174704276615, |
| "grad_norm": 1.9747397800251965, |
| "learning_rate": 4.951666479208658e-06, |
| "loss": 0.1184, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3143767060964513, |
| "grad_norm": 1.4466542632801185, |
| "learning_rate": 4.951526532384141e-06, |
| "loss": 0.085, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3148316651501365, |
| "grad_norm": 1.8649877852775587, |
| "learning_rate": 4.951386385231946e-06, |
| "loss": 0.1011, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.31528662420382164, |
| "grad_norm": 1.2680670071467166, |
| "learning_rate": 4.951246037763528e-06, |
| "loss": 0.0748, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.31574158325750684, |
| "grad_norm": 1.5151831279551418, |
| "learning_rate": 4.9511054899903524e-06, |
| "loss": 0.0874, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.316196542311192, |
| "grad_norm": 1.6436638497099227, |
| "learning_rate": 4.950964741923905e-06, |
| "loss": 0.0982, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3166515013648772, |
| "grad_norm": 1.5379093700813176, |
| "learning_rate": 4.950823793575688e-06, |
| "loss": 0.0857, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.31710646041856233, |
| "grad_norm": 2.4063943761092452, |
| "learning_rate": 4.950682644957218e-06, |
| "loss": 0.1253, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3175614194722475, |
| "grad_norm": 2.5063143673804844, |
| "learning_rate": 4.9505412960800295e-06, |
| "loss": 0.1511, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3180163785259327, |
| "grad_norm": 1.722833309256951, |
| "learning_rate": 4.950399746955673e-06, |
| "loss": 0.0999, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 1.8190148406823232, |
| "learning_rate": 4.950257997595716e-06, |
| "loss": 0.0895, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.318926296633303, |
| "grad_norm": 1.9186747250049239, |
| "learning_rate": 4.950116048011739e-06, |
| "loss": 0.0964, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.31938125568698816, |
| "grad_norm": 1.372930302125184, |
| "learning_rate": 4.949973898215344e-06, |
| "loss": 0.0589, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.31983621474067336, |
| "grad_norm": 1.9707430002902289, |
| "learning_rate": 4.949831548218146e-06, |
| "loss": 0.1054, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3202911737943585, |
| "grad_norm": 2.0845604349239832, |
| "learning_rate": 4.949688998031777e-06, |
| "loss": 0.1105, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.32074613284804365, |
| "grad_norm": 1.4969274131429369, |
| "learning_rate": 4.949546247667886e-06, |
| "loss": 0.0814, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.32120109190172885, |
| "grad_norm": 1.9940826155791407, |
| "learning_rate": 4.949403297138137e-06, |
| "loss": 0.1064, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.321656050955414, |
| "grad_norm": 1.7246519891154302, |
| "learning_rate": 4.949260146454212e-06, |
| "loss": 0.1093, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3221110100090992, |
| "grad_norm": 1.6890948945842699, |
| "learning_rate": 4.94911679562781e-06, |
| "loss": 0.0888, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.32256596906278434, |
| "grad_norm": 2.0455963687465837, |
| "learning_rate": 4.948973244670643e-06, |
| "loss": 0.1019, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.32302092811646954, |
| "grad_norm": 1.7678121189421865, |
| "learning_rate": 4.948829493594441e-06, |
| "loss": 0.0961, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3234758871701547, |
| "grad_norm": 1.3731566726245188, |
| "learning_rate": 4.9486855424109524e-06, |
| "loss": 0.072, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.32393084622383983, |
| "grad_norm": 1.4962983653581472, |
| "learning_rate": 4.948541391131939e-06, |
| "loss": 0.0905, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.32438580527752503, |
| "grad_norm": 1.4198695601427125, |
| "learning_rate": 4.948397039769181e-06, |
| "loss": 0.0616, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3248407643312102, |
| "grad_norm": 1.131377673368795, |
| "learning_rate": 4.948252488334474e-06, |
| "loss": 0.0526, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3252957233848954, |
| "grad_norm": 1.1969683311404917, |
| "learning_rate": 4.948107736839629e-06, |
| "loss": 0.0763, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3257506824385805, |
| "grad_norm": 1.6793927846583725, |
| "learning_rate": 4.947962785296476e-06, |
| "loss": 0.1153, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3262056414922657, |
| "grad_norm": 2.070694963019659, |
| "learning_rate": 4.9478176337168594e-06, |
| "loss": 0.1153, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.32666060054595086, |
| "grad_norm": 2.7729923804058516, |
| "learning_rate": 4.9476722821126386e-06, |
| "loss": 0.171, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.327115559599636, |
| "grad_norm": 1.4442284620787837, |
| "learning_rate": 4.9475267304956945e-06, |
| "loss": 0.0997, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3275705186533212, |
| "grad_norm": 2.0979816044129413, |
| "learning_rate": 4.947380978877917e-06, |
| "loss": 0.1138, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32802547770700635, |
| "grad_norm": 1.9982881232916472, |
| "learning_rate": 4.947235027271219e-06, |
| "loss": 0.1402, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.32848043676069155, |
| "grad_norm": 1.3317844805683108, |
| "learning_rate": 4.9470888756875265e-06, |
| "loss": 0.0707, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3289353958143767, |
| "grad_norm": 1.4665146144499257, |
| "learning_rate": 4.946942524138782e-06, |
| "loss": 0.075, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3293903548680619, |
| "grad_norm": 1.6321427811402383, |
| "learning_rate": 4.946795972636944e-06, |
| "loss": 0.0971, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.32984531392174704, |
| "grad_norm": 1.9541110640157349, |
| "learning_rate": 4.94664922119399e-06, |
| "loss": 0.1347, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3303002729754322, |
| "grad_norm": 1.664760132709453, |
| "learning_rate": 4.94650226982191e-06, |
| "loss": 0.0959, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3307552320291174, |
| "grad_norm": 2.509161708357272, |
| "learning_rate": 4.9463551185327115e-06, |
| "loss": 0.1885, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.33121019108280253, |
| "grad_norm": 1.7296886670922147, |
| "learning_rate": 4.946207767338422e-06, |
| "loss": 0.0867, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.33166515013648773, |
| "grad_norm": 1.5254904811287948, |
| "learning_rate": 4.9460602162510805e-06, |
| "loss": 0.09, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3321201091901729, |
| "grad_norm": 1.3404896968358107, |
| "learning_rate": 4.945912465282744e-06, |
| "loss": 0.0782, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3325750682438581, |
| "grad_norm": 1.79952897501454, |
| "learning_rate": 4.945764514445487e-06, |
| "loss": 0.1444, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3330300272975432, |
| "grad_norm": 2.48899319031489, |
| "learning_rate": 4.9456163637513986e-06, |
| "loss": 0.1136, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.33348498635122836, |
| "grad_norm": 1.8285171425829347, |
| "learning_rate": 4.945468013212585e-06, |
| "loss": 0.1052, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.33393994540491356, |
| "grad_norm": 1.7843881981445446, |
| "learning_rate": 4.945319462841169e-06, |
| "loss": 0.1116, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3343949044585987, |
| "grad_norm": 2.181301353034186, |
| "learning_rate": 4.94517071264929e-06, |
| "loss": 0.1251, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3348498635122839, |
| "grad_norm": 1.2980326592722402, |
| "learning_rate": 4.945021762649102e-06, |
| "loss": 0.0648, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.33530482256596905, |
| "grad_norm": 1.3874782347309536, |
| "learning_rate": 4.9448726128527776e-06, |
| "loss": 0.0978, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.33575978161965425, |
| "grad_norm": 1.8955499231356112, |
| "learning_rate": 4.944723263272504e-06, |
| "loss": 0.0998, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3362147406733394, |
| "grad_norm": 1.6102418502733031, |
| "learning_rate": 4.944573713920485e-06, |
| "loss": 0.1055, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.33666969972702454, |
| "grad_norm": 3.355056116777925, |
| "learning_rate": 4.944423964808943e-06, |
| "loss": 0.1831, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33712465878070974, |
| "grad_norm": 1.507329867530008, |
| "learning_rate": 4.944274015950113e-06, |
| "loss": 0.0889, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3375796178343949, |
| "grad_norm": 1.610548678904166, |
| "learning_rate": 4.944123867356249e-06, |
| "loss": 0.0752, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3380345768880801, |
| "grad_norm": 1.918715600058829, |
| "learning_rate": 4.943973519039619e-06, |
| "loss": 0.1335, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.33848953594176523, |
| "grad_norm": 1.3921163271356483, |
| "learning_rate": 4.943822971012511e-06, |
| "loss": 0.0727, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.33894449499545043, |
| "grad_norm": 1.2023922578586952, |
| "learning_rate": 4.943672223287226e-06, |
| "loss": 0.0628, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3393994540491356, |
| "grad_norm": 2.2794421985003317, |
| "learning_rate": 4.9435212758760815e-06, |
| "loss": 0.1404, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3398544131028208, |
| "grad_norm": 1.3986125533304865, |
| "learning_rate": 4.943370128791413e-06, |
| "loss": 0.0787, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3403093721565059, |
| "grad_norm": 1.5259961799310353, |
| "learning_rate": 4.943218782045574e-06, |
| "loss": 0.1079, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.34076433121019106, |
| "grad_norm": 1.8181192019120165, |
| "learning_rate": 4.943067235650927e-06, |
| "loss": 0.1195, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.34121929026387626, |
| "grad_norm": 1.831268771798402, |
| "learning_rate": 4.942915489619859e-06, |
| "loss": 0.1065, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3416742493175614, |
| "grad_norm": 1.7306841826817951, |
| "learning_rate": 4.9427635439647704e-06, |
| "loss": 0.1232, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3421292083712466, |
| "grad_norm": 1.7076927486745839, |
| "learning_rate": 4.942611398698075e-06, |
| "loss": 0.0912, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.34258416742493175, |
| "grad_norm": 1.7425991433970283, |
| "learning_rate": 4.942459053832208e-06, |
| "loss": 0.0997, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.34303912647861695, |
| "grad_norm": 1.809200639541382, |
| "learning_rate": 4.942306509379617e-06, |
| "loss": 0.1085, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3434940855323021, |
| "grad_norm": 1.293751880354007, |
| "learning_rate": 4.942153765352767e-06, |
| "loss": 0.0966, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34394904458598724, |
| "grad_norm": 1.2918089478267207, |
| "learning_rate": 4.94200082176414e-06, |
| "loss": 0.078, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.34440400363967244, |
| "grad_norm": 1.5059276244213293, |
| "learning_rate": 4.941847678626234e-06, |
| "loss": 0.0805, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3448589626933576, |
| "grad_norm": 1.4851814064844335, |
| "learning_rate": 4.941694335951563e-06, |
| "loss": 0.0983, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3453139217470428, |
| "grad_norm": 1.8989617812022122, |
| "learning_rate": 4.9415407937526575e-06, |
| "loss": 0.1107, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.34576888080072793, |
| "grad_norm": 1.8347292963195811, |
| "learning_rate": 4.9413870520420635e-06, |
| "loss": 0.1237, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34622383985441313, |
| "grad_norm": 1.5924498433598573, |
| "learning_rate": 4.941233110832346e-06, |
| "loss": 0.0735, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3466787989080983, |
| "grad_norm": 2.3326854621993984, |
| "learning_rate": 4.941078970136082e-06, |
| "loss": 0.1295, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3471337579617834, |
| "grad_norm": 1.7112828341096407, |
| "learning_rate": 4.940924629965869e-06, |
| "loss": 0.1162, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3475887170154686, |
| "grad_norm": 1.5436956280322631, |
| "learning_rate": 4.940770090334319e-06, |
| "loss": 0.0861, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.34804367606915376, |
| "grad_norm": 1.6236751771508604, |
| "learning_rate": 4.940615351254059e-06, |
| "loss": 0.0968, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.34849863512283896, |
| "grad_norm": 1.0400997330052792, |
| "learning_rate": 4.940460412737734e-06, |
| "loss": 0.0711, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3489535941765241, |
| "grad_norm": 1.623731539624473, |
| "learning_rate": 4.940305274798005e-06, |
| "loss": 0.0929, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3494085532302093, |
| "grad_norm": 1.3764287278870393, |
| "learning_rate": 4.940149937447549e-06, |
| "loss": 0.1002, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.34986351228389445, |
| "grad_norm": 1.1571526873015439, |
| "learning_rate": 4.939994400699061e-06, |
| "loss": 0.0659, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3503184713375796, |
| "grad_norm": 1.3670356182264325, |
| "learning_rate": 4.939838664565248e-06, |
| "loss": 0.0991, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3507734303912648, |
| "grad_norm": 1.2532975621868427, |
| "learning_rate": 4.939682729058839e-06, |
| "loss": 0.0713, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.35122838944494994, |
| "grad_norm": 1.3003896066972325, |
| "learning_rate": 4.939526594192574e-06, |
| "loss": 0.0784, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.35168334849863514, |
| "grad_norm": 1.4253255736587618, |
| "learning_rate": 4.939370259979213e-06, |
| "loss": 0.0826, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3521383075523203, |
| "grad_norm": 2.0399381310170766, |
| "learning_rate": 4.9392137264315295e-06, |
| "loss": 0.1293, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3525932666060055, |
| "grad_norm": 1.938165172266556, |
| "learning_rate": 4.939056993562316e-06, |
| "loss": 0.1407, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35304822565969063, |
| "grad_norm": 1.5665447950299711, |
| "learning_rate": 4.9389000613843805e-06, |
| "loss": 0.0942, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3535031847133758, |
| "grad_norm": 1.6514430942693614, |
| "learning_rate": 4.938742929910546e-06, |
| "loss": 0.0927, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.353958143767061, |
| "grad_norm": 1.0136329941515525, |
| "learning_rate": 4.938585599153652e-06, |
| "loss": 0.0676, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3544131028207461, |
| "grad_norm": 1.6808166258098367, |
| "learning_rate": 4.938428069126555e-06, |
| "loss": 0.1029, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3548680618744313, |
| "grad_norm": 1.6649052760273926, |
| "learning_rate": 4.9382703398421285e-06, |
| "loss": 0.0952, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35532302092811646, |
| "grad_norm": 1.734423574608651, |
| "learning_rate": 4.938112411313261e-06, |
| "loss": 0.1098, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.35577797998180166, |
| "grad_norm": 1.5154424391674823, |
| "learning_rate": 4.937954283552858e-06, |
| "loss": 0.0808, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3562329390354868, |
| "grad_norm": 1.6988796126790968, |
| "learning_rate": 4.93779595657384e-06, |
| "loss": 0.1066, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.35668789808917195, |
| "grad_norm": 2.050921985283142, |
| "learning_rate": 4.937637430389145e-06, |
| "loss": 0.1184, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 1.5678672253769157, |
| "learning_rate": 4.937478705011729e-06, |
| "loss": 0.0709, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3575978161965423, |
| "grad_norm": 1.5215473079480804, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.1086, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3580527752502275, |
| "grad_norm": 1.4009067409412712, |
| "learning_rate": 4.937160656730625e-06, |
| "loss": 0.1004, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.35850773430391264, |
| "grad_norm": 1.538795370618956, |
| "learning_rate": 4.9370013338529274e-06, |
| "loss": 0.0897, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.35896269335759784, |
| "grad_norm": 1.3446100123630027, |
| "learning_rate": 4.936841811834486e-06, |
| "loss": 0.0907, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.359417652411283, |
| "grad_norm": 1.9381081676057568, |
| "learning_rate": 4.936682090688337e-06, |
| "loss": 0.1534, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35987261146496813, |
| "grad_norm": 1.787589837431021, |
| "learning_rate": 4.936522170427531e-06, |
| "loss": 0.0919, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.36032757051865333, |
| "grad_norm": 1.7189621906826116, |
| "learning_rate": 4.936362051065136e-06, |
| "loss": 0.0799, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3607825295723385, |
| "grad_norm": 1.615638183805568, |
| "learning_rate": 4.936201732614238e-06, |
| "loss": 0.0898, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3612374886260237, |
| "grad_norm": 1.899483445293266, |
| "learning_rate": 4.9360412150879355e-06, |
| "loss": 0.1086, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3616924476797088, |
| "grad_norm": 1.8831302635176637, |
| "learning_rate": 4.935880498499346e-06, |
| "loss": 0.0951, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.362147406733394, |
| "grad_norm": 2.0172166216160594, |
| "learning_rate": 4.935719582861604e-06, |
| "loss": 0.0983, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.36260236578707916, |
| "grad_norm": 1.7713001106130557, |
| "learning_rate": 4.935558468187855e-06, |
| "loss": 0.1177, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3630573248407643, |
| "grad_norm": 2.049007453668216, |
| "learning_rate": 4.935397154491268e-06, |
| "loss": 0.1349, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3635122838944495, |
| "grad_norm": 2.02340700279538, |
| "learning_rate": 4.935235641785023e-06, |
| "loss": 0.1419, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.36396724294813465, |
| "grad_norm": 1.5504094804690502, |
| "learning_rate": 4.935073930082319e-06, |
| "loss": 0.1141, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36442220200181985, |
| "grad_norm": 1.3892292745868653, |
| "learning_rate": 4.93491201939637e-06, |
| "loss": 0.0859, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.364877161055505, |
| "grad_norm": 1.636711407623354, |
| "learning_rate": 4.934749909740408e-06, |
| "loss": 0.1168, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3653321201091902, |
| "grad_norm": 1.5867549476191922, |
| "learning_rate": 4.934587601127677e-06, |
| "loss": 0.0941, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.36578707916287534, |
| "grad_norm": 1.5019646850922737, |
| "learning_rate": 4.934425093571442e-06, |
| "loss": 0.0931, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3662420382165605, |
| "grad_norm": 1.5412581659446851, |
| "learning_rate": 4.934262387084984e-06, |
| "loss": 0.0931, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3666969972702457, |
| "grad_norm": 1.3579602631174856, |
| "learning_rate": 4.934099481681595e-06, |
| "loss": 0.0745, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.36715195632393083, |
| "grad_norm": 1.800459979497766, |
| "learning_rate": 4.933936377374589e-06, |
| "loss": 0.1072, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.36760691537761603, |
| "grad_norm": 1.1946995764469395, |
| "learning_rate": 4.933773074177293e-06, |
| "loss": 0.0848, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3680618744313012, |
| "grad_norm": 1.6651644751131276, |
| "learning_rate": 4.933609572103053e-06, |
| "loss": 0.0965, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3685168334849864, |
| "grad_norm": 1.913995880200427, |
| "learning_rate": 4.933445871165229e-06, |
| "loss": 0.1315, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3689717925386715, |
| "grad_norm": 1.5517430124798408, |
| "learning_rate": 4.933281971377197e-06, |
| "loss": 0.0856, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.36942675159235666, |
| "grad_norm": 1.474632001508129, |
| "learning_rate": 4.933117872752352e-06, |
| "loss": 0.0989, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.36988171064604186, |
| "grad_norm": 1.8862093944877263, |
| "learning_rate": 4.932953575304102e-06, |
| "loss": 0.1087, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.370336669699727, |
| "grad_norm": 1.6830668966166524, |
| "learning_rate": 4.932789079045873e-06, |
| "loss": 0.1213, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3707916287534122, |
| "grad_norm": 1.7198476556190763, |
| "learning_rate": 4.932624383991106e-06, |
| "loss": 0.1215, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.37124658780709735, |
| "grad_norm": 2.109229814604393, |
| "learning_rate": 4.9324594901532605e-06, |
| "loss": 0.1337, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.37170154686078255, |
| "grad_norm": 1.4154701665481155, |
| "learning_rate": 4.93229439754581e-06, |
| "loss": 0.0944, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3721565059144677, |
| "grad_norm": 1.973608289061544, |
| "learning_rate": 4.932129106182246e-06, |
| "loss": 0.0901, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.37261146496815284, |
| "grad_norm": 1.651833939526615, |
| "learning_rate": 4.931963616076075e-06, |
| "loss": 0.0876, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.37306642402183804, |
| "grad_norm": 1.3876140677966586, |
| "learning_rate": 4.93179792724082e-06, |
| "loss": 0.0791, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3735213830755232, |
| "grad_norm": 1.4201117298181156, |
| "learning_rate": 4.9316320396900195e-06, |
| "loss": 0.0857, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3739763421292084, |
| "grad_norm": 2.158894018361071, |
| "learning_rate": 4.9314659534372305e-06, |
| "loss": 0.1499, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.37443130118289353, |
| "grad_norm": 1.2722019893377066, |
| "learning_rate": 4.931299668496024e-06, |
| "loss": 0.0626, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.37488626023657873, |
| "grad_norm": 1.5889108253283166, |
| "learning_rate": 4.931133184879988e-06, |
| "loss": 0.1003, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.37534121929026387, |
| "grad_norm": 1.133918642525753, |
| "learning_rate": 4.930966502602727e-06, |
| "loss": 0.0714, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37579617834394907, |
| "grad_norm": 2.1296168633446615, |
| "learning_rate": 4.930799621677862e-06, |
| "loss": 0.1276, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3762511373976342, |
| "grad_norm": 2.018575113751553, |
| "learning_rate": 4.93063254211903e-06, |
| "loss": 0.134, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.37670609645131936, |
| "grad_norm": 1.2247931548507431, |
| "learning_rate": 4.930465263939882e-06, |
| "loss": 0.0617, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.37716105550500456, |
| "grad_norm": 2.032637719937323, |
| "learning_rate": 4.9302977871540894e-06, |
| "loss": 0.1191, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3776160145586897, |
| "grad_norm": 1.8922514826155596, |
| "learning_rate": 4.930130111775336e-06, |
| "loss": 0.1136, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3780709736123749, |
| "grad_norm": 1.2345527477299194, |
| "learning_rate": 4.9299622378173245e-06, |
| "loss": 0.0613, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.37852593266606005, |
| "grad_norm": 2.2369584057058693, |
| "learning_rate": 4.929794165293773e-06, |
| "loss": 0.1384, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.37898089171974525, |
| "grad_norm": 1.2980952577352378, |
| "learning_rate": 4.9296258942184145e-06, |
| "loss": 0.0889, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3794358507734304, |
| "grad_norm": 2.116237658876168, |
| "learning_rate": 4.929457424605e-06, |
| "loss": 0.1156, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.37989080982711554, |
| "grad_norm": 1.820103679143319, |
| "learning_rate": 4.929288756467296e-06, |
| "loss": 0.1224, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.38034576888080074, |
| "grad_norm": 1.6658306682266317, |
| "learning_rate": 4.929119889819086e-06, |
| "loss": 0.0871, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3808007279344859, |
| "grad_norm": 2.7831412779318128, |
| "learning_rate": 4.928950824674169e-06, |
| "loss": 0.1447, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3812556869881711, |
| "grad_norm": 1.460745158832598, |
| "learning_rate": 4.928781561046359e-06, |
| "loss": 0.0902, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3817106460418562, |
| "grad_norm": 1.544649379546627, |
| "learning_rate": 4.928612098949488e-06, |
| "loss": 0.0995, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 1.583411250445995, |
| "learning_rate": 4.9284424383974026e-06, |
| "loss": 0.1007, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38262056414922657, |
| "grad_norm": 1.2960669635575661, |
| "learning_rate": 4.928272579403969e-06, |
| "loss": 0.0679, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3830755232029117, |
| "grad_norm": 1.4865280371498417, |
| "learning_rate": 4.928102521983067e-06, |
| "loss": 0.1208, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3835304822565969, |
| "grad_norm": 2.1345090660254145, |
| "learning_rate": 4.9279322661485906e-06, |
| "loss": 0.1489, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.38398544131028206, |
| "grad_norm": 1.705469805887344, |
| "learning_rate": 4.927761811914455e-06, |
| "loss": 0.1084, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.38444040036396726, |
| "grad_norm": 1.358954041720105, |
| "learning_rate": 4.927591159294587e-06, |
| "loss": 0.0827, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3848953594176524, |
| "grad_norm": 1.8335314647218843, |
| "learning_rate": 4.927420308302933e-06, |
| "loss": 0.102, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3853503184713376, |
| "grad_norm": 1.710141204765745, |
| "learning_rate": 4.927249258953454e-06, |
| "loss": 0.1091, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.38580527752502275, |
| "grad_norm": 1.7784989569871608, |
| "learning_rate": 4.927078011260126e-06, |
| "loss": 0.1094, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3862602365787079, |
| "grad_norm": 1.9072996593932403, |
| "learning_rate": 4.926906565236943e-06, |
| "loss": 0.1255, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3867151956323931, |
| "grad_norm": 1.7435526255624214, |
| "learning_rate": 4.926734920897916e-06, |
| "loss": 0.1076, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38717015468607824, |
| "grad_norm": 1.3254342460194672, |
| "learning_rate": 4.926563078257071e-06, |
| "loss": 0.099, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.38762511373976344, |
| "grad_norm": 1.0985508710385608, |
| "learning_rate": 4.926391037328448e-06, |
| "loss": 0.0848, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3880800727934486, |
| "grad_norm": 1.6344858491886853, |
| "learning_rate": 4.926218798126108e-06, |
| "loss": 0.1102, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3885350318471338, |
| "grad_norm": 1.694464350768917, |
| "learning_rate": 4.926046360664124e-06, |
| "loss": 0.0868, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3889899909008189, |
| "grad_norm": 1.865189060623283, |
| "learning_rate": 4.925873724956588e-06, |
| "loss": 0.1152, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.38944494995450407, |
| "grad_norm": 1.794490671041637, |
| "learning_rate": 4.9257008910176065e-06, |
| "loss": 0.1443, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.38989990900818927, |
| "grad_norm": 1.6294296423553156, |
| "learning_rate": 4.925527858861302e-06, |
| "loss": 0.092, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3903548680618744, |
| "grad_norm": 1.7424555145921712, |
| "learning_rate": 4.925354628501814e-06, |
| "loss": 0.1002, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3908098271155596, |
| "grad_norm": 2.309513172607415, |
| "learning_rate": 4.925181199953299e-06, |
| "loss": 0.1288, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.39126478616924476, |
| "grad_norm": 1.3668641274774587, |
| "learning_rate": 4.9250075732299285e-06, |
| "loss": 0.0903, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39171974522292996, |
| "grad_norm": 1.7785057619158235, |
| "learning_rate": 4.92483374834589e-06, |
| "loss": 0.1181, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3921747042766151, |
| "grad_norm": 1.5234971151354315, |
| "learning_rate": 4.9246597253153884e-06, |
| "loss": 0.0935, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.39262966333030025, |
| "grad_norm": 1.1791645313929775, |
| "learning_rate": 4.924485504152644e-06, |
| "loss": 0.0822, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.39308462238398545, |
| "grad_norm": 1.5983057485508323, |
| "learning_rate": 4.924311084871892e-06, |
| "loss": 0.0966, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.3935395814376706, |
| "grad_norm": 1.6634965227764558, |
| "learning_rate": 4.924136467487387e-06, |
| "loss": 0.0759, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3939945404913558, |
| "grad_norm": 1.5231170961334706, |
| "learning_rate": 4.923961652013397e-06, |
| "loss": 0.0881, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.39444949954504094, |
| "grad_norm": 1.4495990250164725, |
| "learning_rate": 4.923786638464207e-06, |
| "loss": 0.0941, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.39490445859872614, |
| "grad_norm": 1.3390712595063252, |
| "learning_rate": 4.9236114268541196e-06, |
| "loss": 0.0846, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3953594176524113, |
| "grad_norm": 1.627122973701433, |
| "learning_rate": 4.923436017197451e-06, |
| "loss": 0.0819, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3958143767060964, |
| "grad_norm": 1.3377642278691055, |
| "learning_rate": 4.923260409508535e-06, |
| "loss": 0.088, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3962693357597816, |
| "grad_norm": 1.9694748985572026, |
| "learning_rate": 4.9230846038017214e-06, |
| "loss": 0.151, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.39672429481346677, |
| "grad_norm": 1.4923965061921258, |
| "learning_rate": 4.922908600091378e-06, |
| "loss": 0.0795, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.39717925386715197, |
| "grad_norm": 1.8057120373297069, |
| "learning_rate": 4.9227323983918835e-06, |
| "loss": 0.1439, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3976342129208371, |
| "grad_norm": 1.226146313826682, |
| "learning_rate": 4.922555998717639e-06, |
| "loss": 0.0845, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3980891719745223, |
| "grad_norm": 1.4188073442884932, |
| "learning_rate": 4.922379401083058e-06, |
| "loss": 0.0723, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39854413102820746, |
| "grad_norm": 1.6044422866063657, |
| "learning_rate": 4.922202605502573e-06, |
| "loss": 0.0981, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3989990900818926, |
| "grad_norm": 1.645096377490142, |
| "learning_rate": 4.922025611990629e-06, |
| "loss": 0.0882, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3994540491355778, |
| "grad_norm": 1.4988618969542298, |
| "learning_rate": 4.92184842056169e-06, |
| "loss": 0.0914, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.39990900818926295, |
| "grad_norm": 1.4716766649704647, |
| "learning_rate": 4.921671031230235e-06, |
| "loss": 0.0843, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.40036396724294815, |
| "grad_norm": 1.8151437273817552, |
| "learning_rate": 4.921493444010759e-06, |
| "loss": 0.1115, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4008189262966333, |
| "grad_norm": 1.3841092562389385, |
| "learning_rate": 4.921315658917774e-06, |
| "loss": 0.0821, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4012738853503185, |
| "grad_norm": 1.5281014710080694, |
| "learning_rate": 4.921137675965809e-06, |
| "loss": 0.0894, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.40172884440400364, |
| "grad_norm": 1.1860457913745353, |
| "learning_rate": 4.920959495169406e-06, |
| "loss": 0.0819, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4021838034576888, |
| "grad_norm": 1.9670434695091386, |
| "learning_rate": 4.920781116543126e-06, |
| "loss": 0.1198, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.402638762511374, |
| "grad_norm": 1.4837005110977715, |
| "learning_rate": 4.920602540101546e-06, |
| "loss": 0.0871, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4030937215650591, |
| "grad_norm": 1.8269163623820734, |
| "learning_rate": 4.920423765859257e-06, |
| "loss": 0.0956, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4035486806187443, |
| "grad_norm": 1.6998774179110374, |
| "learning_rate": 4.920244793830869e-06, |
| "loss": 0.0973, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.40400363967242947, |
| "grad_norm": 1.6596471546846747, |
| "learning_rate": 4.920065624031006e-06, |
| "loss": 0.1085, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.40445859872611467, |
| "grad_norm": 1.4077908132773769, |
| "learning_rate": 4.919886256474309e-06, |
| "loss": 0.0904, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4049135577797998, |
| "grad_norm": 1.7022215596121757, |
| "learning_rate": 4.919706691175435e-06, |
| "loss": 0.091, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40536851683348496, |
| "grad_norm": 2.1232813584307455, |
| "learning_rate": 4.919526928149058e-06, |
| "loss": 0.1366, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.40582347588717016, |
| "grad_norm": 1.6341211456957871, |
| "learning_rate": 4.919346967409867e-06, |
| "loss": 0.1108, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4062784349408553, |
| "grad_norm": 1.5324489468460818, |
| "learning_rate": 4.919166808972567e-06, |
| "loss": 0.1228, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4067333939945405, |
| "grad_norm": 2.099437608372934, |
| "learning_rate": 4.918986452851881e-06, |
| "loss": 0.1245, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.40718835304822565, |
| "grad_norm": 1.3588941988828955, |
| "learning_rate": 4.918805899062545e-06, |
| "loss": 0.0621, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.40764331210191085, |
| "grad_norm": 0.8277266375645331, |
| "learning_rate": 4.9186251476193146e-06, |
| "loss": 0.0499, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.408098271155596, |
| "grad_norm": 1.7852175335240448, |
| "learning_rate": 4.918444198536959e-06, |
| "loss": 0.1206, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.40855323020928114, |
| "grad_norm": 1.5382745011065326, |
| "learning_rate": 4.918263051830267e-06, |
| "loss": 0.1081, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.40900818926296634, |
| "grad_norm": 1.621296590196374, |
| "learning_rate": 4.918081707514037e-06, |
| "loss": 0.0881, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4094631483166515, |
| "grad_norm": 2.178092466242458, |
| "learning_rate": 4.917900165603091e-06, |
| "loss": 0.1364, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4099181073703367, |
| "grad_norm": 1.5880350908655525, |
| "learning_rate": 4.9177184261122624e-06, |
| "loss": 0.1073, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4103730664240218, |
| "grad_norm": 1.8483741427612825, |
| "learning_rate": 4.917536489056402e-06, |
| "loss": 0.0972, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.410828025477707, |
| "grad_norm": 1.5893537500919641, |
| "learning_rate": 4.9173543544503775e-06, |
| "loss": 0.0851, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.41128298453139217, |
| "grad_norm": 1.144493331243443, |
| "learning_rate": 4.917172022309072e-06, |
| "loss": 0.0637, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.41173794358507737, |
| "grad_norm": 1.139422632834299, |
| "learning_rate": 4.916989492647385e-06, |
| "loss": 0.065, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4121929026387625, |
| "grad_norm": 1.2858602055549935, |
| "learning_rate": 4.916806765480231e-06, |
| "loss": 0.079, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.41264786169244766, |
| "grad_norm": 1.9716514818564959, |
| "learning_rate": 4.9166238408225416e-06, |
| "loss": 0.161, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.41310282074613286, |
| "grad_norm": 1.6206512831659239, |
| "learning_rate": 4.916440718689267e-06, |
| "loss": 0.0958, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.413557779799818, |
| "grad_norm": 1.2472167749456646, |
| "learning_rate": 4.916257399095369e-06, |
| "loss": 0.0705, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4140127388535032, |
| "grad_norm": 1.1891048303298737, |
| "learning_rate": 4.916073882055827e-06, |
| "loss": 0.0671, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41446769790718835, |
| "grad_norm": 1.9533245506572903, |
| "learning_rate": 4.91589016758564e-06, |
| "loss": 0.1203, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.41492265696087355, |
| "grad_norm": 1.7223916244259532, |
| "learning_rate": 4.915706255699817e-06, |
| "loss": 0.1171, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4153776160145587, |
| "grad_norm": 2.042050502050582, |
| "learning_rate": 4.915522146413389e-06, |
| "loss": 0.152, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.41583257506824384, |
| "grad_norm": 1.5213892799482642, |
| "learning_rate": 4.9153378397413985e-06, |
| "loss": 0.1011, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.41628753412192904, |
| "grad_norm": 1.8893914267841023, |
| "learning_rate": 4.915153335698908e-06, |
| "loss": 0.1133, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4167424931756142, |
| "grad_norm": 1.7882796521112458, |
| "learning_rate": 4.914968634300994e-06, |
| "loss": 0.1081, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4171974522292994, |
| "grad_norm": 1.186974851727905, |
| "learning_rate": 4.914783735562748e-06, |
| "loss": 0.0791, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4176524112829845, |
| "grad_norm": 1.3276822787818023, |
| "learning_rate": 4.914598639499281e-06, |
| "loss": 0.0929, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4181073703366697, |
| "grad_norm": 1.3143453344689244, |
| "learning_rate": 4.914413346125717e-06, |
| "loss": 0.0907, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.41856232939035487, |
| "grad_norm": 1.2706441279848544, |
| "learning_rate": 4.914227855457199e-06, |
| "loss": 0.0797, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41901728844404, |
| "grad_norm": 1.8437493208675002, |
| "learning_rate": 4.914042167508881e-06, |
| "loss": 0.0851, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4194722474977252, |
| "grad_norm": 1.4975873837594447, |
| "learning_rate": 4.9138562822959416e-06, |
| "loss": 0.0735, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.41992720655141036, |
| "grad_norm": 1.8590378932388973, |
| "learning_rate": 4.913670199833566e-06, |
| "loss": 0.0955, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.42038216560509556, |
| "grad_norm": 1.6110342357827778, |
| "learning_rate": 4.913483920136961e-06, |
| "loss": 0.0904, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4208371246587807, |
| "grad_norm": 1.761284240310015, |
| "learning_rate": 4.91329744322135e-06, |
| "loss": 0.0967, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4212920837124659, |
| "grad_norm": 1.3709410104557458, |
| "learning_rate": 4.913110769101971e-06, |
| "loss": 0.0872, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.42174704276615105, |
| "grad_norm": 1.6539854986144262, |
| "learning_rate": 4.912923897794077e-06, |
| "loss": 0.0982, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4222020018198362, |
| "grad_norm": 1.6465498130671066, |
| "learning_rate": 4.912736829312938e-06, |
| "loss": 0.1093, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4226569608735214, |
| "grad_norm": 1.8873864205133448, |
| "learning_rate": 4.912549563673842e-06, |
| "loss": 0.1239, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.42311191992720654, |
| "grad_norm": 1.5496708014603886, |
| "learning_rate": 4.912362100892091e-06, |
| "loss": 0.1273, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42356687898089174, |
| "grad_norm": 1.1519662533075623, |
| "learning_rate": 4.912174440983002e-06, |
| "loss": 0.0729, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4240218380345769, |
| "grad_norm": 1.6674274772885138, |
| "learning_rate": 4.911986583961912e-06, |
| "loss": 0.1107, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4244767970882621, |
| "grad_norm": 1.8943327104641587, |
| "learning_rate": 4.91179852984417e-06, |
| "loss": 0.0989, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4249317561419472, |
| "grad_norm": 1.3387420389544245, |
| "learning_rate": 4.911610278645144e-06, |
| "loss": 0.0873, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.42538671519563237, |
| "grad_norm": 1.3086866571732974, |
| "learning_rate": 4.911421830380217e-06, |
| "loss": 0.0767, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42584167424931757, |
| "grad_norm": 2.04544186641041, |
| "learning_rate": 4.911233185064788e-06, |
| "loss": 0.1285, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4262966333030027, |
| "grad_norm": 1.6906012723967403, |
| "learning_rate": 4.911044342714272e-06, |
| "loss": 0.0997, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4267515923566879, |
| "grad_norm": 1.439162135385858, |
| "learning_rate": 4.9108553033440995e-06, |
| "loss": 0.0744, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.42720655141037306, |
| "grad_norm": 1.2593154408057343, |
| "learning_rate": 4.91066606696972e-06, |
| "loss": 0.074, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.42766151046405826, |
| "grad_norm": 1.7514521824191083, |
| "learning_rate": 4.910476633606597e-06, |
| "loss": 0.0971, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4281164695177434, |
| "grad_norm": 1.5625231909908295, |
| "learning_rate": 4.9102870032702075e-06, |
| "loss": 0.0689, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.5194579023544843, |
| "learning_rate": 4.910097175976049e-06, |
| "loss": 0.0824, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.42902638762511375, |
| "grad_norm": 1.4223453649486908, |
| "learning_rate": 4.909907151739634e-06, |
| "loss": 0.0747, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4294813466787989, |
| "grad_norm": 2.2121264200483393, |
| "learning_rate": 4.909716930576489e-06, |
| "loss": 0.1463, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4299363057324841, |
| "grad_norm": 1.5012792406542972, |
| "learning_rate": 4.909526512502158e-06, |
| "loss": 0.1241, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.43039126478616924, |
| "grad_norm": 1.6714102508168673, |
| "learning_rate": 4.9093358975322025e-06, |
| "loss": 0.1045, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.43084622383985444, |
| "grad_norm": 1.5613346147429912, |
| "learning_rate": 4.909145085682198e-06, |
| "loss": 0.1105, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4313011828935396, |
| "grad_norm": 1.4864622392832871, |
| "learning_rate": 4.908954076967737e-06, |
| "loss": 0.0831, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4317561419472247, |
| "grad_norm": 1.5530391149425158, |
| "learning_rate": 4.908762871404427e-06, |
| "loss": 0.1345, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4322111010009099, |
| "grad_norm": 1.5444429676980205, |
| "learning_rate": 4.908571469007893e-06, |
| "loss": 0.0886, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43266606005459507, |
| "grad_norm": 1.8034818342216412, |
| "learning_rate": 4.908379869793776e-06, |
| "loss": 0.1046, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.43312101910828027, |
| "grad_norm": 1.3153452614362922, |
| "learning_rate": 4.908188073777732e-06, |
| "loss": 0.0715, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4335759781619654, |
| "grad_norm": 2.0825682650521857, |
| "learning_rate": 4.9079960809754334e-06, |
| "loss": 0.135, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4340309372156506, |
| "grad_norm": 1.3431541090651076, |
| "learning_rate": 4.90780389140257e-06, |
| "loss": 0.0812, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.43448589626933576, |
| "grad_norm": 2.018134282960315, |
| "learning_rate": 4.907611505074846e-06, |
| "loss": 0.1001, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4349408553230209, |
| "grad_norm": 1.8270847906398506, |
| "learning_rate": 4.907418922007983e-06, |
| "loss": 0.1054, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4353958143767061, |
| "grad_norm": 1.5502670619333374, |
| "learning_rate": 4.907226142217717e-06, |
| "loss": 0.0832, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.43585077343039125, |
| "grad_norm": 1.5099564094926066, |
| "learning_rate": 4.9070331657198015e-06, |
| "loss": 0.093, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.43630573248407645, |
| "grad_norm": 1.6580816557213998, |
| "learning_rate": 4.906839992530006e-06, |
| "loss": 0.1133, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4367606915377616, |
| "grad_norm": 1.9468112171012433, |
| "learning_rate": 4.906646622664115e-06, |
| "loss": 0.1122, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4372156505914468, |
| "grad_norm": 1.3246750710377195, |
| "learning_rate": 4.906453056137931e-06, |
| "loss": 0.0572, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.43767060964513194, |
| "grad_norm": 2.1577598041780846, |
| "learning_rate": 4.90625929296727e-06, |
| "loss": 0.1419, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4381255686988171, |
| "grad_norm": 1.3649728107391488, |
| "learning_rate": 4.9060653331679665e-06, |
| "loss": 0.1026, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4385805277525023, |
| "grad_norm": 1.7954750394301047, |
| "learning_rate": 4.90587117675587e-06, |
| "loss": 0.124, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4390354868061874, |
| "grad_norm": 1.6192897762023186, |
| "learning_rate": 4.905676823746846e-06, |
| "loss": 0.102, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4394904458598726, |
| "grad_norm": 1.183156466195084, |
| "learning_rate": 4.9054822741567745e-06, |
| "loss": 0.0741, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.43994540491355777, |
| "grad_norm": 1.791057313794206, |
| "learning_rate": 4.905287528001555e-06, |
| "loss": 0.0986, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.44040036396724297, |
| "grad_norm": 1.5587372758795195, |
| "learning_rate": 4.905092585297102e-06, |
| "loss": 0.0959, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4408553230209281, |
| "grad_norm": 1.9086814389692623, |
| "learning_rate": 4.904897446059344e-06, |
| "loss": 0.1124, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.44131028207461326, |
| "grad_norm": 1.5518685718016205, |
| "learning_rate": 4.9047021103042255e-06, |
| "loss": 0.0802, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.44176524112829846, |
| "grad_norm": 1.5626634869227398, |
| "learning_rate": 4.904506578047712e-06, |
| "loss": 0.0966, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4422202001819836, |
| "grad_norm": 1.6777151282946248, |
| "learning_rate": 4.9043108493057785e-06, |
| "loss": 0.0946, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4426751592356688, |
| "grad_norm": 1.3918546303467518, |
| "learning_rate": 4.904114924094421e-06, |
| "loss": 0.0776, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.44313011828935395, |
| "grad_norm": 1.7054781101293177, |
| "learning_rate": 4.903918802429648e-06, |
| "loss": 0.1076, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.44358507734303915, |
| "grad_norm": 0.9435161970580179, |
| "learning_rate": 4.9037224843274875e-06, |
| "loss": 0.055, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4440400363967243, |
| "grad_norm": 1.8279732096534727, |
| "learning_rate": 4.903525969803979e-06, |
| "loss": 0.144, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.44449499545040944, |
| "grad_norm": 1.5827975534285916, |
| "learning_rate": 4.903329258875184e-06, |
| "loss": 0.0876, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.44494995450409464, |
| "grad_norm": 1.5817514212508765, |
| "learning_rate": 4.903132351557175e-06, |
| "loss": 0.1003, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4454049135577798, |
| "grad_norm": 1.55794858043461, |
| "learning_rate": 4.902935247866043e-06, |
| "loss": 0.0901, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 1.7648097170403771, |
| "learning_rate": 4.9027379478178935e-06, |
| "loss": 0.1117, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4463148316651501, |
| "grad_norm": 1.4493752053158233, |
| "learning_rate": 4.90254045142885e-06, |
| "loss": 0.0824, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4467697907188353, |
| "grad_norm": 1.4618354488172722, |
| "learning_rate": 4.90234275871505e-06, |
| "loss": 0.08, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.44722474977252047, |
| "grad_norm": 2.314057245131694, |
| "learning_rate": 4.9021448696926486e-06, |
| "loss": 0.1437, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.44767970882620567, |
| "grad_norm": 1.2365214796695643, |
| "learning_rate": 4.901946784377816e-06, |
| "loss": 0.0955, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4481346678798908, |
| "grad_norm": 1.2633152164234291, |
| "learning_rate": 4.90174850278674e-06, |
| "loss": 0.0803, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.44858962693357596, |
| "grad_norm": 1.5083171008818446, |
| "learning_rate": 4.901550024935623e-06, |
| "loss": 0.0942, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.44904458598726116, |
| "grad_norm": 1.1583463791947812, |
| "learning_rate": 4.901351350840683e-06, |
| "loss": 0.0786, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4494995450409463, |
| "grad_norm": 1.343367085202188, |
| "learning_rate": 4.901152480518155e-06, |
| "loss": 0.0724, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4499545040946315, |
| "grad_norm": 1.1159650914918346, |
| "learning_rate": 4.900953413984289e-06, |
| "loss": 0.0681, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.45040946314831665, |
| "grad_norm": 2.0950998044271025, |
| "learning_rate": 4.900754151255353e-06, |
| "loss": 0.1541, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.45086442220200185, |
| "grad_norm": 1.4260341278646986, |
| "learning_rate": 4.9005546923476305e-06, |
| "loss": 0.0707, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.451319381255687, |
| "grad_norm": 1.6502415030386688, |
| "learning_rate": 4.9003550372774185e-06, |
| "loss": 0.1111, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.45177434030937214, |
| "grad_norm": 1.280806174818392, |
| "learning_rate": 4.900155186061033e-06, |
| "loss": 0.0789, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.45222929936305734, |
| "grad_norm": 1.9745186799391785, |
| "learning_rate": 4.8999551387148045e-06, |
| "loss": 0.1125, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4526842584167425, |
| "grad_norm": 1.2542781615680096, |
| "learning_rate": 4.89975489525508e-06, |
| "loss": 0.0814, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4531392174704277, |
| "grad_norm": 1.5218729573521388, |
| "learning_rate": 4.899554455698223e-06, |
| "loss": 0.0849, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4535941765241128, |
| "grad_norm": 1.4911465655176248, |
| "learning_rate": 4.899353820060612e-06, |
| "loss": 0.0887, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.454049135577798, |
| "grad_norm": 1.8552177664529743, |
| "learning_rate": 4.899152988358643e-06, |
| "loss": 0.1153, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.45450409463148317, |
| "grad_norm": 1.3462289694693903, |
| "learning_rate": 4.898951960608725e-06, |
| "loss": 0.0768, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4549590536851683, |
| "grad_norm": 1.5105165626051191, |
| "learning_rate": 4.8987507368272865e-06, |
| "loss": 0.0916, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4554140127388535, |
| "grad_norm": 1.7874012401425645, |
| "learning_rate": 4.898549317030772e-06, |
| "loss": 0.1228, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.45586897179253866, |
| "grad_norm": 1.8678564128703685, |
| "learning_rate": 4.898347701235637e-06, |
| "loss": 0.1226, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.45632393084622386, |
| "grad_norm": 1.9367180322034927, |
| "learning_rate": 4.89814588945836e-06, |
| "loss": 0.1239, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.456778889899909, |
| "grad_norm": 1.8462049373063074, |
| "learning_rate": 4.89794388171543e-06, |
| "loss": 0.1106, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4572338489535942, |
| "grad_norm": 1.7977459529642075, |
| "learning_rate": 4.897741678023356e-06, |
| "loss": 0.1137, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.45768880800727935, |
| "grad_norm": 1.4317415496884898, |
| "learning_rate": 4.897539278398659e-06, |
| "loss": 0.0835, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4581437670609645, |
| "grad_norm": 1.947224769167489, |
| "learning_rate": 4.8973366828578804e-06, |
| "loss": 0.1087, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4585987261146497, |
| "grad_norm": 1.6840082807319827, |
| "learning_rate": 4.897133891417574e-06, |
| "loss": 0.1004, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.45905368516833484, |
| "grad_norm": 1.6722996299672828, |
| "learning_rate": 4.896930904094311e-06, |
| "loss": 0.0869, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.45950864422202004, |
| "grad_norm": 2.2431321251776986, |
| "learning_rate": 4.896727720904679e-06, |
| "loss": 0.121, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4599636032757052, |
| "grad_norm": 1.2761704386307018, |
| "learning_rate": 4.896524341865282e-06, |
| "loss": 0.0736, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4604185623293904, |
| "grad_norm": 1.6413390038739506, |
| "learning_rate": 4.896320766992737e-06, |
| "loss": 0.1286, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4608735213830755, |
| "grad_norm": 1.5251335582402008, |
| "learning_rate": 4.896116996303682e-06, |
| "loss": 0.0989, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.46132848043676067, |
| "grad_norm": 1.8038369878473837, |
| "learning_rate": 4.895913029814766e-06, |
| "loss": 0.097, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.46178343949044587, |
| "grad_norm": 2.012861641550116, |
| "learning_rate": 4.895708867542658e-06, |
| "loss": 0.1111, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.462238398544131, |
| "grad_norm": 1.7366035889417508, |
| "learning_rate": 4.895504509504039e-06, |
| "loss": 0.1029, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4626933575978162, |
| "grad_norm": 1.3763665767496873, |
| "learning_rate": 4.89529995571561e-06, |
| "loss": 0.0938, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.46314831665150136, |
| "grad_norm": 1.6906151679744952, |
| "learning_rate": 4.895095206194086e-06, |
| "loss": 0.1085, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.46360327570518656, |
| "grad_norm": 1.5053749521419235, |
| "learning_rate": 4.894890260956198e-06, |
| "loss": 0.0884, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4640582347588717, |
| "grad_norm": 1.5334372638839222, |
| "learning_rate": 4.8946851200186925e-06, |
| "loss": 0.1015, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46451319381255685, |
| "grad_norm": 1.576638091265577, |
| "learning_rate": 4.894479783398334e-06, |
| "loss": 0.0903, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.46496815286624205, |
| "grad_norm": 1.7368682352331435, |
| "learning_rate": 4.8942742511119004e-06, |
| "loss": 0.1029, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4654231119199272, |
| "grad_norm": 3.9669130222003455, |
| "learning_rate": 4.894068523176187e-06, |
| "loss": 0.2383, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4658780709736124, |
| "grad_norm": 1.5974114766744798, |
| "learning_rate": 4.8938625996080056e-06, |
| "loss": 0.1116, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.46633303002729753, |
| "grad_norm": 1.1252846797063132, |
| "learning_rate": 4.893656480424184e-06, |
| "loss": 0.0673, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46678798908098273, |
| "grad_norm": 1.5329254322284862, |
| "learning_rate": 4.893450165641564e-06, |
| "loss": 0.1066, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4672429481346679, |
| "grad_norm": 1.3116647286111784, |
| "learning_rate": 4.893243655277005e-06, |
| "loss": 0.086, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.467697907188353, |
| "grad_norm": 1.5621452726926597, |
| "learning_rate": 4.893036949347383e-06, |
| "loss": 0.0937, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4681528662420382, |
| "grad_norm": 1.44299341979305, |
| "learning_rate": 4.892830047869588e-06, |
| "loss": 0.0922, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.46860782529572337, |
| "grad_norm": 1.2004173985623205, |
| "learning_rate": 4.892622950860527e-06, |
| "loss": 0.0545, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46906278434940857, |
| "grad_norm": 1.2933675353670258, |
| "learning_rate": 4.892415658337123e-06, |
| "loss": 0.0938, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4695177434030937, |
| "grad_norm": 1.3899639516557423, |
| "learning_rate": 4.892208170316317e-06, |
| "loss": 0.0807, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4699727024567789, |
| "grad_norm": 1.2103198454795117, |
| "learning_rate": 4.892000486815062e-06, |
| "loss": 0.0724, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.47042766151046406, |
| "grad_norm": 1.4625912187815495, |
| "learning_rate": 4.891792607850328e-06, |
| "loss": 0.0944, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4708826205641492, |
| "grad_norm": 2.3778377956475074, |
| "learning_rate": 4.891584533439104e-06, |
| "loss": 0.1301, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4713375796178344, |
| "grad_norm": 1.6240877825800288, |
| "learning_rate": 4.891376263598393e-06, |
| "loss": 0.1056, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.47179253867151955, |
| "grad_norm": 1.377205820937822, |
| "learning_rate": 4.891167798345213e-06, |
| "loss": 0.0879, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.47224749772520475, |
| "grad_norm": 1.918358313853146, |
| "learning_rate": 4.890959137696598e-06, |
| "loss": 0.1218, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4727024567788899, |
| "grad_norm": 1.9802948601827106, |
| "learning_rate": 4.890750281669601e-06, |
| "loss": 0.0966, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4731574158325751, |
| "grad_norm": 1.209426799273833, |
| "learning_rate": 4.890541230281287e-06, |
| "loss": 0.0687, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47361237488626023, |
| "grad_norm": 1.714672711362897, |
| "learning_rate": 4.8903319835487385e-06, |
| "loss": 0.1119, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4740673339399454, |
| "grad_norm": 1.8426958086935912, |
| "learning_rate": 4.890122541489056e-06, |
| "loss": 0.1071, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4745222929936306, |
| "grad_norm": 1.5412332450392434, |
| "learning_rate": 4.889912904119353e-06, |
| "loss": 0.1194, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4749772520473157, |
| "grad_norm": 1.5900743055736573, |
| "learning_rate": 4.88970307145676e-06, |
| "loss": 0.0905, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4754322111010009, |
| "grad_norm": 1.299438309320783, |
| "learning_rate": 4.889493043518423e-06, |
| "loss": 0.0782, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.47588717015468607, |
| "grad_norm": 1.2775434133946648, |
| "learning_rate": 4.889282820321506e-06, |
| "loss": 0.067, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.47634212920837127, |
| "grad_norm": 2.0181187729173313, |
| "learning_rate": 4.889072401883187e-06, |
| "loss": 0.1039, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4767970882620564, |
| "grad_norm": 1.3673144633984753, |
| "learning_rate": 4.88886178822066e-06, |
| "loss": 0.0871, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.47725204731574156, |
| "grad_norm": 1.5512598399498212, |
| "learning_rate": 4.888650979351136e-06, |
| "loss": 0.0936, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.47770700636942676, |
| "grad_norm": 1.8862924775266208, |
| "learning_rate": 4.888439975291841e-06, |
| "loss": 0.149, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4781619654231119, |
| "grad_norm": 1.527860807788029, |
| "learning_rate": 4.888228776060017e-06, |
| "loss": 0.0981, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4786169244767971, |
| "grad_norm": 1.635801739367282, |
| "learning_rate": 4.888017381672923e-06, |
| "loss": 0.1004, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.47907188353048225, |
| "grad_norm": 1.496869794404093, |
| "learning_rate": 4.887805792147832e-06, |
| "loss": 0.0921, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.47952684258416745, |
| "grad_norm": 1.729233289880027, |
| "learning_rate": 4.887594007502036e-06, |
| "loss": 0.089, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4799818016378526, |
| "grad_norm": 1.9599768924005974, |
| "learning_rate": 4.887382027752838e-06, |
| "loss": 0.1029, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.48043676069153773, |
| "grad_norm": 1.6584360062505734, |
| "learning_rate": 4.8871698529175636e-06, |
| "loss": 0.1173, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.48089171974522293, |
| "grad_norm": 1.631421092772313, |
| "learning_rate": 4.886957483013549e-06, |
| "loss": 0.1231, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4813466787989081, |
| "grad_norm": 2.3766899063373996, |
| "learning_rate": 4.886744918058149e-06, |
| "loss": 0.13, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4818016378525933, |
| "grad_norm": 1.7346716794855597, |
| "learning_rate": 4.886532158068732e-06, |
| "loss": 0.0938, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4822565969062784, |
| "grad_norm": 1.5214305907929453, |
| "learning_rate": 4.886319203062683e-06, |
| "loss": 0.0761, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4827115559599636, |
| "grad_norm": 1.6073102647133055, |
| "learning_rate": 4.886106053057408e-06, |
| "loss": 0.0818, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.48316651501364877, |
| "grad_norm": 1.803380712114119, |
| "learning_rate": 4.88589270807032e-06, |
| "loss": 0.1231, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.48362147406733397, |
| "grad_norm": 1.5275199982317587, |
| "learning_rate": 4.885679168118855e-06, |
| "loss": 0.1105, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4840764331210191, |
| "grad_norm": 1.8472965185652206, |
| "learning_rate": 4.8854654332204635e-06, |
| "loss": 0.1324, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.48453139217470426, |
| "grad_norm": 1.41701925154465, |
| "learning_rate": 4.885251503392607e-06, |
| "loss": 0.0767, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48498635122838946, |
| "grad_norm": 2.00437974621472, |
| "learning_rate": 4.885037378652771e-06, |
| "loss": 0.1336, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4854413102820746, |
| "grad_norm": 1.4895968911800157, |
| "learning_rate": 4.884823059018451e-06, |
| "loss": 0.0726, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4858962693357598, |
| "grad_norm": 1.5673178312119351, |
| "learning_rate": 4.88460854450716e-06, |
| "loss": 0.0843, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.48635122838944495, |
| "grad_norm": 1.1450505304026162, |
| "learning_rate": 4.884393835136427e-06, |
| "loss": 0.073, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.48680618744313015, |
| "grad_norm": 1.5223195045028948, |
| "learning_rate": 4.884178930923799e-06, |
| "loss": 0.0823, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4872611464968153, |
| "grad_norm": 1.912651615279676, |
| "learning_rate": 4.883963831886834e-06, |
| "loss": 0.0989, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.48771610555050043, |
| "grad_norm": 1.6904540179044927, |
| "learning_rate": 4.8837485380431115e-06, |
| "loss": 0.0981, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.48817106460418563, |
| "grad_norm": 1.4559744514600277, |
| "learning_rate": 4.883533049410223e-06, |
| "loss": 0.0874, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4886260236578708, |
| "grad_norm": 1.9041018278788933, |
| "learning_rate": 4.8833173660057785e-06, |
| "loss": 0.1065, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.489080982711556, |
| "grad_norm": 1.582657768337463, |
| "learning_rate": 4.8831014878474004e-06, |
| "loss": 0.0993, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4895359417652411, |
| "grad_norm": 1.487895945323618, |
| "learning_rate": 4.882885414952732e-06, |
| "loss": 0.0887, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4899909008189263, |
| "grad_norm": 1.1105199391014717, |
| "learning_rate": 4.882669147339428e-06, |
| "loss": 0.0521, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.49044585987261147, |
| "grad_norm": 1.3448385373486804, |
| "learning_rate": 4.882452685025161e-06, |
| "loss": 0.0606, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4909008189262966, |
| "grad_norm": 1.9169790386878416, |
| "learning_rate": 4.88223602802762e-06, |
| "loss": 0.1103, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4913557779799818, |
| "grad_norm": 1.4350936971881065, |
| "learning_rate": 4.882019176364509e-06, |
| "loss": 0.1052, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.49181073703366696, |
| "grad_norm": 1.9005260167330429, |
| "learning_rate": 4.881802130053548e-06, |
| "loss": 0.1217, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.49226569608735216, |
| "grad_norm": 1.4814940279383466, |
| "learning_rate": 4.881584889112473e-06, |
| "loss": 0.079, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4927206551410373, |
| "grad_norm": 1.7134074599855604, |
| "learning_rate": 4.881367453559036e-06, |
| "loss": 0.1025, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4931756141947225, |
| "grad_norm": 1.2847311247280295, |
| "learning_rate": 4.881149823411005e-06, |
| "loss": 0.0587, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.49363057324840764, |
| "grad_norm": 1.196984822353409, |
| "learning_rate": 4.880931998686162e-06, |
| "loss": 0.0779, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4940855323020928, |
| "grad_norm": 2.247552936990941, |
| "learning_rate": 4.880713979402311e-06, |
| "loss": 0.1534, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.494540491355778, |
| "grad_norm": 2.5523444538687645, |
| "learning_rate": 4.880495765577263e-06, |
| "loss": 0.146, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.49499545040946313, |
| "grad_norm": 1.7690099480339412, |
| "learning_rate": 4.880277357228852e-06, |
| "loss": 0.084, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.49545040946314833, |
| "grad_norm": 1.2117156565437108, |
| "learning_rate": 4.880058754374923e-06, |
| "loss": 0.0833, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4959053685168335, |
| "grad_norm": 1.5484757487864966, |
| "learning_rate": 4.879839957033343e-06, |
| "loss": 0.0938, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4963603275705187, |
| "grad_norm": 1.5534223234923523, |
| "learning_rate": 4.879620965221987e-06, |
| "loss": 0.09, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4968152866242038, |
| "grad_norm": 1.3405465803260945, |
| "learning_rate": 4.879401778958755e-06, |
| "loss": 0.0784, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.49727024567788897, |
| "grad_norm": 1.3343510524547628, |
| "learning_rate": 4.8791823982615525e-06, |
| "loss": 0.064, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.49772520473157417, |
| "grad_norm": 1.2315640234775116, |
| "learning_rate": 4.878962823148308e-06, |
| "loss": 0.067, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4981801637852593, |
| "grad_norm": 1.654273388728327, |
| "learning_rate": 4.878743053636968e-06, |
| "loss": 0.0964, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4986351228389445, |
| "grad_norm": 1.3344367681027707, |
| "learning_rate": 4.878523089745485e-06, |
| "loss": 0.0865, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.49909008189262966, |
| "grad_norm": 1.0737534169537484, |
| "learning_rate": 4.878302931491837e-06, |
| "loss": 0.0722, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.49954504094631486, |
| "grad_norm": 1.2217058614506033, |
| "learning_rate": 4.8780825788940145e-06, |
| "loss": 0.0531, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.765512273684173, |
| "learning_rate": 4.877862031970023e-06, |
| "loss": 0.1016, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5004549590536852, |
| "grad_norm": 2.1360497116346444, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.1095, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5009099181073703, |
| "grad_norm": 1.5928570797543171, |
| "learning_rate": 4.877420355215637e-06, |
| "loss": 0.0909, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5013648771610555, |
| "grad_norm": 1.9221830556747463, |
| "learning_rate": 4.877199225421334e-06, |
| "loss": 0.123, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5018198362147407, |
| "grad_norm": 1.967973587212139, |
| "learning_rate": 4.8769779013730454e-06, |
| "loss": 0.1535, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5022747952684259, |
| "grad_norm": 2.02512821365078, |
| "learning_rate": 4.876756383088858e-06, |
| "loss": 0.1173, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.502729754322111, |
| "grad_norm": 1.3904167109659709, |
| "learning_rate": 4.876534670586872e-06, |
| "loss": 0.0839, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5031847133757962, |
| "grad_norm": 1.4435165077122623, |
| "learning_rate": 4.8763127638852045e-06, |
| "loss": 0.0924, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5036396724294814, |
| "grad_norm": 1.7029448773247835, |
| "learning_rate": 4.87609066300199e-06, |
| "loss": 0.1076, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5040946314831665, |
| "grad_norm": 1.750067106251082, |
| "learning_rate": 4.875868367955376e-06, |
| "loss": 0.1077, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5045495905368517, |
| "grad_norm": 1.9748651822243342, |
| "learning_rate": 4.87564587876353e-06, |
| "loss": 0.1294, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5050045495905369, |
| "grad_norm": 1.7656971074259822, |
| "learning_rate": 4.87542319544463e-06, |
| "loss": 0.0974, |
| "step": 1110 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7279902056448.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|