| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 894, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011185682326621924, | |
| "grad_norm": 4.768556118011475, | |
| "learning_rate": 4.99998456401763e-05, | |
| "loss": 4.817, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0022371364653243847, | |
| "grad_norm": 11.73538875579834, | |
| "learning_rate": 4.9999382562611344e-05, | |
| "loss": 4.6414, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003355704697986577, | |
| "grad_norm": 4.775832176208496, | |
| "learning_rate": 4.999861077302358e-05, | |
| "loss": 4.6803, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0044742729306487695, | |
| "grad_norm": 7.621553421020508, | |
| "learning_rate": 4.9997530280943684e-05, | |
| "loss": 4.8222, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005592841163310962, | |
| "grad_norm": 4.997500896453857, | |
| "learning_rate": 4.9996141099714405e-05, | |
| "loss": 4.581, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006711409395973154, | |
| "grad_norm": 4.785826683044434, | |
| "learning_rate": 4.999444324649045e-05, | |
| "loss": 4.4922, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007829977628635347, | |
| "grad_norm": 6.647776126861572, | |
| "learning_rate": 4.999243674223826e-05, | |
| "loss": 4.8455, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008948545861297539, | |
| "grad_norm": 6.596885681152344, | |
| "learning_rate": 4.9990121611735704e-05, | |
| "loss": 4.5977, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010067114093959731, | |
| "grad_norm": 3.9681015014648438, | |
| "learning_rate": 4.998749788357184e-05, | |
| "loss": 4.5543, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.011185682326621925, | |
| "grad_norm": 3.7588727474212646, | |
| "learning_rate": 4.998456559014653e-05, | |
| "loss": 4.5815, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012304250559284116, | |
| "grad_norm": 4.819014072418213, | |
| "learning_rate": 4.9981324767670034e-05, | |
| "loss": 4.5079, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013422818791946308, | |
| "grad_norm": 3.4477319717407227, | |
| "learning_rate": 4.997777545616258e-05, | |
| "loss": 4.299, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0145413870246085, | |
| "grad_norm": 4.113775730133057, | |
| "learning_rate": 4.997391769945385e-05, | |
| "loss": 4.3105, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.015659955257270694, | |
| "grad_norm": 3.848585844039917, | |
| "learning_rate": 4.996975154518245e-05, | |
| "loss": 4.4019, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.016778523489932886, | |
| "grad_norm": 3.787041425704956, | |
| "learning_rate": 4.996527704479535e-05, | |
| "loss": 4.4983, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.017897091722595078, | |
| "grad_norm": 4.073084354400635, | |
| "learning_rate": 4.996049425354718e-05, | |
| "loss": 4.4303, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01901565995525727, | |
| "grad_norm": 3.8887457847595215, | |
| "learning_rate": 4.99554032304996e-05, | |
| "loss": 4.3752, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.020134228187919462, | |
| "grad_norm": 4.539124011993408, | |
| "learning_rate": 4.995000403852057e-05, | |
| "loss": 4.4398, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021252796420581657, | |
| "grad_norm": 4.554653167724609, | |
| "learning_rate": 4.994429674428356e-05, | |
| "loss": 4.5068, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02237136465324385, | |
| "grad_norm": 4.2434892654418945, | |
| "learning_rate": 4.9938281418266717e-05, | |
| "loss": 4.2128, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02348993288590604, | |
| "grad_norm": 3.5199990272521973, | |
| "learning_rate": 4.993195813475202e-05, | |
| "loss": 4.3802, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.024608501118568233, | |
| "grad_norm": 4.401162624359131, | |
| "learning_rate": 4.9925326971824345e-05, | |
| "loss": 4.3791, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.025727069351230425, | |
| "grad_norm": 3.330929756164551, | |
| "learning_rate": 4.9918388011370496e-05, | |
| "loss": 4.3586, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.026845637583892617, | |
| "grad_norm": 3.9935824871063232, | |
| "learning_rate": 4.9911141339078215e-05, | |
| "loss": 4.4506, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.02796420581655481, | |
| "grad_norm": 5.406892776489258, | |
| "learning_rate": 4.990358704443511e-05, | |
| "loss": 4.1189, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.029082774049217, | |
| "grad_norm": 3.9377341270446777, | |
| "learning_rate": 4.989572522072753e-05, | |
| "loss": 4.4841, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.030201342281879196, | |
| "grad_norm": 3.408912181854248, | |
| "learning_rate": 4.988755596503948e-05, | |
| "loss": 4.4523, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03131991051454139, | |
| "grad_norm": 3.9062108993530273, | |
| "learning_rate": 4.987907937825133e-05, | |
| "loss": 4.2521, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03243847874720358, | |
| "grad_norm": 3.1871280670166016, | |
| "learning_rate": 4.987029556503864e-05, | |
| "loss": 3.9971, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.03355704697986577, | |
| "grad_norm": 3.458665609359741, | |
| "learning_rate": 4.986120463387084e-05, | |
| "loss": 4.3549, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03467561521252797, | |
| "grad_norm": 3.2417380809783936, | |
| "learning_rate": 4.985180669700989e-05, | |
| "loss": 4.452, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.035794183445190156, | |
| "grad_norm": 2.737922430038452, | |
| "learning_rate": 4.9842101870508904e-05, | |
| "loss": 4.5525, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03691275167785235, | |
| "grad_norm": 3.5653915405273438, | |
| "learning_rate": 4.9832090274210714e-05, | |
| "loss": 4.3004, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03803131991051454, | |
| "grad_norm": 3.541865348815918, | |
| "learning_rate": 4.982177203174636e-05, | |
| "loss": 4.1722, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.039149888143176735, | |
| "grad_norm": 3.923772096633911, | |
| "learning_rate": 4.981114727053362e-05, | |
| "loss": 4.4216, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.040268456375838924, | |
| "grad_norm": 4.278195381164551, | |
| "learning_rate": 4.98002161217754e-05, | |
| "loss": 4.2207, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04138702460850112, | |
| "grad_norm": 2.4082083702087402, | |
| "learning_rate": 4.9788978720458104e-05, | |
| "loss": 4.2682, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.042505592841163314, | |
| "grad_norm": 3.5256547927856445, | |
| "learning_rate": 4.977743520535001e-05, | |
| "loss": 4.2313, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0436241610738255, | |
| "grad_norm": 3.8092055320739746, | |
| "learning_rate": 4.9765585718999495e-05, | |
| "loss": 4.3788, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0447427293064877, | |
| "grad_norm": 3.763134241104126, | |
| "learning_rate": 4.975343040773335e-05, | |
| "loss": 4.1468, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04586129753914989, | |
| "grad_norm": 4.660019874572754, | |
| "learning_rate": 4.974096942165489e-05, | |
| "loss": 4.1907, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04697986577181208, | |
| "grad_norm": 4.472564220428467, | |
| "learning_rate": 4.9728202914642183e-05, | |
| "loss": 4.5382, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04809843400447427, | |
| "grad_norm": 4.786443710327148, | |
| "learning_rate": 4.9715131044346084e-05, | |
| "loss": 4.2274, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.049217002237136466, | |
| "grad_norm": 4.188578128814697, | |
| "learning_rate": 4.970175397218832e-05, | |
| "loss": 4.0529, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.050335570469798654, | |
| "grad_norm": 3.1671926975250244, | |
| "learning_rate": 4.9688071863359484e-05, | |
| "loss": 4.1644, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05145413870246085, | |
| "grad_norm": 3.790748357772827, | |
| "learning_rate": 4.9674084886817016e-05, | |
| "loss": 3.9051, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.052572706935123045, | |
| "grad_norm": 3.404773473739624, | |
| "learning_rate": 4.965979321528309e-05, | |
| "loss": 4.5011, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.053691275167785234, | |
| "grad_norm": 3.7244088649749756, | |
| "learning_rate": 4.9645197025242506e-05, | |
| "loss": 4.3524, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05480984340044743, | |
| "grad_norm": 2.824608564376831, | |
| "learning_rate": 4.963029649694049e-05, | |
| "loss": 4.3211, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05592841163310962, | |
| "grad_norm": 3.457947254180908, | |
| "learning_rate": 4.9615091814380465e-05, | |
| "loss": 4.3325, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05704697986577181, | |
| "grad_norm": 2.658190965652466, | |
| "learning_rate": 4.959958316532181e-05, | |
| "loss": 4.2936, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.058165548098434, | |
| "grad_norm": 3.7741482257843018, | |
| "learning_rate": 4.9583770741277505e-05, | |
| "loss": 4.184, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0592841163310962, | |
| "grad_norm": 2.5784506797790527, | |
| "learning_rate": 4.9567654737511794e-05, | |
| "loss": 4.2936, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.06040268456375839, | |
| "grad_norm": 3.018603563308716, | |
| "learning_rate": 4.955123535303776e-05, | |
| "loss": 4.1786, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06152125279642058, | |
| "grad_norm": 3.7826101779937744, | |
| "learning_rate": 4.953451279061485e-05, | |
| "loss": 4.207, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06263982102908278, | |
| "grad_norm": 4.488926887512207, | |
| "learning_rate": 4.951748725674643e-05, | |
| "loss": 4.6036, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06375838926174497, | |
| "grad_norm": 4.047707557678223, | |
| "learning_rate": 4.950015896167716e-05, | |
| "loss": 3.9239, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06487695749440715, | |
| "grad_norm": 2.5932610034942627, | |
| "learning_rate": 4.9482528119390435e-05, | |
| "loss": 4.1098, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06599552572706935, | |
| "grad_norm": 2.828861951828003, | |
| "learning_rate": 4.946459494760578e-05, | |
| "loss": 4.2837, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06711409395973154, | |
| "grad_norm": 3.5818116664886475, | |
| "learning_rate": 4.9446359667776065e-05, | |
| "loss": 4.3673, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06823266219239374, | |
| "grad_norm": 2.7391154766082764, | |
| "learning_rate": 4.9427822505084874e-05, | |
| "loss": 4.5594, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06935123042505593, | |
| "grad_norm": 2.6267666816711426, | |
| "learning_rate": 4.9408983688443654e-05, | |
| "loss": 4.3283, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07046979865771812, | |
| "grad_norm": 2.574420690536499, | |
| "learning_rate": 4.938984345048892e-05, | |
| "loss": 4.2056, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07158836689038031, | |
| "grad_norm": 3.568028450012207, | |
| "learning_rate": 4.937040202757937e-05, | |
| "loss": 4.2298, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07270693512304251, | |
| "grad_norm": 3.778529405593872, | |
| "learning_rate": 4.9350659659792976e-05, | |
| "loss": 4.1133, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0738255033557047, | |
| "grad_norm": 2.817169427871704, | |
| "learning_rate": 4.933061659092401e-05, | |
| "loss": 4.183, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07494407158836688, | |
| "grad_norm": 3.1611924171447754, | |
| "learning_rate": 4.931027306848004e-05, | |
| "loss": 4.1715, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07606263982102908, | |
| "grad_norm": 3.813957929611206, | |
| "learning_rate": 4.9289629343678864e-05, | |
| "loss": 4.2382, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07718120805369127, | |
| "grad_norm": 2.7094550132751465, | |
| "learning_rate": 4.926868567144543e-05, | |
| "loss": 4.107, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07829977628635347, | |
| "grad_norm": 2.958317995071411, | |
| "learning_rate": 4.924744231040864e-05, | |
| "loss": 4.2052, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07941834451901567, | |
| "grad_norm": 2.8046460151672363, | |
| "learning_rate": 4.9225899522898236e-05, | |
| "loss": 4.1191, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.08053691275167785, | |
| "grad_norm": 2.8792548179626465, | |
| "learning_rate": 4.920405757494147e-05, | |
| "loss": 3.9608, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08165548098434004, | |
| "grad_norm": 2.953927516937256, | |
| "learning_rate": 4.91819167362599e-05, | |
| "loss": 4.6505, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08277404921700224, | |
| "grad_norm": 2.8335180282592773, | |
| "learning_rate": 4.915947728026598e-05, | |
| "loss": 4.5291, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08389261744966443, | |
| "grad_norm": 2.0205912590026855, | |
| "learning_rate": 4.9136739484059766e-05, | |
| "loss": 4.1919, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08501118568232663, | |
| "grad_norm": 4.005690097808838, | |
| "learning_rate": 4.911370362842543e-05, | |
| "loss": 4.2847, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08612975391498881, | |
| "grad_norm": 2.3989546298980713, | |
| "learning_rate": 4.9090369997827826e-05, | |
| "loss": 4.2773, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.087248322147651, | |
| "grad_norm": 2.6297130584716797, | |
| "learning_rate": 4.9066738880408945e-05, | |
| "loss": 4.1242, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0883668903803132, | |
| "grad_norm": 3.310232639312744, | |
| "learning_rate": 4.904281056798441e-05, | |
| "loss": 4.4331, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.0894854586129754, | |
| "grad_norm": 1.7795904874801636, | |
| "learning_rate": 4.901858535603983e-05, | |
| "loss": 4.1656, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09060402684563758, | |
| "grad_norm": 2.2798268795013428, | |
| "learning_rate": 4.899406354372715e-05, | |
| "loss": 4.1374, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09172259507829977, | |
| "grad_norm": 2.3790340423583984, | |
| "learning_rate": 4.896924543386099e-05, | |
| "loss": 4.1786, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09284116331096197, | |
| "grad_norm": 2.345724105834961, | |
| "learning_rate": 4.894413133291488e-05, | |
| "loss": 3.9511, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.09395973154362416, | |
| "grad_norm": 3.110872507095337, | |
| "learning_rate": 4.891872155101746e-05, | |
| "loss": 4.2832, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09507829977628636, | |
| "grad_norm": 2.6279842853546143, | |
| "learning_rate": 4.889301640194869e-05, | |
| "loss": 4.4677, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09619686800894854, | |
| "grad_norm": 2.8604133129119873, | |
| "learning_rate": 4.886701620313595e-05, | |
| "loss": 4.3158, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09731543624161074, | |
| "grad_norm": 4.6412858963012695, | |
| "learning_rate": 4.884072127565014e-05, | |
| "loss": 4.4612, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09843400447427293, | |
| "grad_norm": 2.649590492248535, | |
| "learning_rate": 4.881413194420169e-05, | |
| "loss": 3.9387, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09955257270693513, | |
| "grad_norm": 3.56644344329834, | |
| "learning_rate": 4.878724853713655e-05, | |
| "loss": 4.2099, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.10067114093959731, | |
| "grad_norm": 2.599231243133545, | |
| "learning_rate": 4.876007138643216e-05, | |
| "loss": 4.3014, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1017897091722595, | |
| "grad_norm": 2.8511576652526855, | |
| "learning_rate": 4.8732600827693344e-05, | |
| "loss": 3.8494, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1029082774049217, | |
| "grad_norm": 2.3662049770355225, | |
| "learning_rate": 4.870483720014814e-05, | |
| "loss": 3.9008, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1040268456375839, | |
| "grad_norm": 3.010821580886841, | |
| "learning_rate": 4.8676780846643644e-05, | |
| "loss": 4.1022, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10514541387024609, | |
| "grad_norm": 3.158817768096924, | |
| "learning_rate": 4.864843211364176e-05, | |
| "loss": 4.2633, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10626398210290827, | |
| "grad_norm": 2.9291787147521973, | |
| "learning_rate": 4.861979135121493e-05, | |
| "loss": 4.0874, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10738255033557047, | |
| "grad_norm": 3.55708909034729, | |
| "learning_rate": 4.859085891304178e-05, | |
| "loss": 4.1598, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10850111856823266, | |
| "grad_norm": 2.482081651687622, | |
| "learning_rate": 4.85616351564028e-05, | |
| "loss": 4.4488, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10961968680089486, | |
| "grad_norm": 2.2329485416412354, | |
| "learning_rate": 4.853212044217591e-05, | |
| "loss": 4.1849, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.11073825503355705, | |
| "grad_norm": 2.4485504627227783, | |
| "learning_rate": 4.8502315134832e-05, | |
| "loss": 4.2842, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.11185682326621924, | |
| "grad_norm": 2.239509344100952, | |
| "learning_rate": 4.847221960243041e-05, | |
| "loss": 4.4354, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11297539149888143, | |
| "grad_norm": 2.5421688556671143, | |
| "learning_rate": 4.8441834216614454e-05, | |
| "loss": 4.4723, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.11409395973154363, | |
| "grad_norm": 3.123340129852295, | |
| "learning_rate": 4.8411159352606734e-05, | |
| "loss": 3.812, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11521252796420582, | |
| "grad_norm": 1.9788379669189453, | |
| "learning_rate": 4.838019538920458e-05, | |
| "loss": 4.2419, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.116331096196868, | |
| "grad_norm": 2.201848268508911, | |
| "learning_rate": 4.834894270877536e-05, | |
| "loss": 4.1965, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1174496644295302, | |
| "grad_norm": 2.6100194454193115, | |
| "learning_rate": 4.831740169725172e-05, | |
| "loss": 4.3533, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1185682326621924, | |
| "grad_norm": 2.866046905517578, | |
| "learning_rate": 4.8285572744126854e-05, | |
| "loss": 3.9282, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11968680089485459, | |
| "grad_norm": 2.9005253314971924, | |
| "learning_rate": 4.8253456242449704e-05, | |
| "loss": 4.2894, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.12080536912751678, | |
| "grad_norm": 2.173900842666626, | |
| "learning_rate": 4.822105258882007e-05, | |
| "loss": 4.3334, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.12192393736017897, | |
| "grad_norm": 2.949611186981201, | |
| "learning_rate": 4.818836218338373e-05, | |
| "loss": 4.2813, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.12304250559284116, | |
| "grad_norm": 3.2048230171203613, | |
| "learning_rate": 4.81553854298275e-05, | |
| "loss": 4.3333, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12416107382550336, | |
| "grad_norm": 2.2151906490325928, | |
| "learning_rate": 4.812212273537425e-05, | |
| "loss": 4.0185, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12527964205816555, | |
| "grad_norm": 3.4764933586120605, | |
| "learning_rate": 4.808857451077788e-05, | |
| "loss": 4.0535, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12639821029082773, | |
| "grad_norm": 2.3762760162353516, | |
| "learning_rate": 4.805474117031822e-05, | |
| "loss": 4.3605, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12751677852348994, | |
| "grad_norm": 2.3675270080566406, | |
| "learning_rate": 4.802062313179595e-05, | |
| "loss": 4.1901, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12863534675615212, | |
| "grad_norm": 2.8210885524749756, | |
| "learning_rate": 4.798622081652743e-05, | |
| "loss": 4.5484, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1297539149888143, | |
| "grad_norm": 2.597954511642456, | |
| "learning_rate": 4.795153464933948e-05, | |
| "loss": 4.3055, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.13087248322147652, | |
| "grad_norm": 2.793868064880371, | |
| "learning_rate": 4.7916565058564155e-05, | |
| "loss": 4.4277, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.1319910514541387, | |
| "grad_norm": 3.061800718307495, | |
| "learning_rate": 4.788131247603345e-05, | |
| "loss": 4.2027, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1331096196868009, | |
| "grad_norm": 3.1223654747009277, | |
| "learning_rate": 4.784577733707394e-05, | |
| "loss": 4.3513, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1342281879194631, | |
| "grad_norm": 2.714157819747925, | |
| "learning_rate": 4.7809960080501464e-05, | |
| "loss": 4.2208, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13534675615212527, | |
| "grad_norm": 2.218320369720459, | |
| "learning_rate": 4.777386114861565e-05, | |
| "loss": 4.2255, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.13646532438478748, | |
| "grad_norm": 2.74094557762146, | |
| "learning_rate": 4.7737480987194484e-05, | |
| "loss": 4.4531, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13758389261744966, | |
| "grad_norm": 2.0145950317382812, | |
| "learning_rate": 4.7700820045488783e-05, | |
| "loss": 4.2616, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.13870246085011187, | |
| "grad_norm": 3.6711690425872803, | |
| "learning_rate": 4.766387877621667e-05, | |
| "loss": 4.3626, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13982102908277405, | |
| "grad_norm": 2.7168869972229004, | |
| "learning_rate": 4.762665763555797e-05, | |
| "loss": 4.1299, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.14093959731543623, | |
| "grad_norm": 2.299030065536499, | |
| "learning_rate": 4.758915708314858e-05, | |
| "loss": 4.378, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.14205816554809844, | |
| "grad_norm": 2.2006888389587402, | |
| "learning_rate": 4.755137758207479e-05, | |
| "loss": 4.5967, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.14317673378076062, | |
| "grad_norm": 2.05462908744812, | |
| "learning_rate": 4.751331959886758e-05, | |
| "loss": 4.235, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14429530201342283, | |
| "grad_norm": 1.8818479776382446, | |
| "learning_rate": 4.7474983603496815e-05, | |
| "loss": 4.1797, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14541387024608501, | |
| "grad_norm": 2.008810043334961, | |
| "learning_rate": 4.7436370069365524e-05, | |
| "loss": 4.26, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1465324384787472, | |
| "grad_norm": 2.405534029006958, | |
| "learning_rate": 4.739747947330394e-05, | |
| "loss": 3.8538, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1476510067114094, | |
| "grad_norm": 2.6731414794921875, | |
| "learning_rate": 4.7358312295563734e-05, | |
| "loss": 4.1691, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1487695749440716, | |
| "grad_norm": 3.544353723526001, | |
| "learning_rate": 4.7318869019811986e-05, | |
| "loss": 4.0219, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14988814317673377, | |
| "grad_norm": 2.278864622116089, | |
| "learning_rate": 4.727915013312526e-05, | |
| "loss": 4.3926, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.15100671140939598, | |
| "grad_norm": 2.240647554397583, | |
| "learning_rate": 4.7239156125983594e-05, | |
| "loss": 4.3835, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.15212527964205816, | |
| "grad_norm": 3.1606392860412598, | |
| "learning_rate": 4.7198887492264416e-05, | |
| "loss": 3.9971, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15324384787472037, | |
| "grad_norm": 2.4671874046325684, | |
| "learning_rate": 4.7158344729236454e-05, | |
| "loss": 4.4052, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.15436241610738255, | |
| "grad_norm": 1.9113795757293701, | |
| "learning_rate": 4.711752833755362e-05, | |
| "loss": 4.1998, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.15548098434004473, | |
| "grad_norm": 2.9461162090301514, | |
| "learning_rate": 4.707643882124878e-05, | |
| "loss": 4.212, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15659955257270694, | |
| "grad_norm": 3.278677463531494, | |
| "learning_rate": 4.70350766877276e-05, | |
| "loss": 4.4147, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15771812080536912, | |
| "grad_norm": 3.2217676639556885, | |
| "learning_rate": 4.699344244776218e-05, | |
| "loss": 4.2919, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15883668903803133, | |
| "grad_norm": 1.8135348558425903, | |
| "learning_rate": 4.6951536615484854e-05, | |
| "loss": 4.0982, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1599552572706935, | |
| "grad_norm": 3.3648054599761963, | |
| "learning_rate": 4.6909359708381775e-05, | |
| "loss": 4.3256, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1610738255033557, | |
| "grad_norm": 2.8032402992248535, | |
| "learning_rate": 4.686691224728652e-05, | |
| "loss": 4.3029, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1621923937360179, | |
| "grad_norm": 2.585247039794922, | |
| "learning_rate": 4.682419475637372e-05, | |
| "loss": 4.1191, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.16331096196868009, | |
| "grad_norm": 2.0398333072662354, | |
| "learning_rate": 4.678120776315251e-05, | |
| "loss": 4.2553, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1644295302013423, | |
| "grad_norm": 2.485276460647583, | |
| "learning_rate": 4.673795179846007e-05, | |
| "loss": 4.2602, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16554809843400448, | |
| "grad_norm": 2.1474947929382324, | |
| "learning_rate": 4.669442739645506e-05, | |
| "loss": 4.2519, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 2.4320313930511475, | |
| "learning_rate": 4.665063509461097e-05, | |
| "loss": 4.2698, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.16778523489932887, | |
| "grad_norm": 3.272096872329712, | |
| "learning_rate": 4.660657543370958e-05, | |
| "loss": 4.2166, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16890380313199105, | |
| "grad_norm": 2.540706157684326, | |
| "learning_rate": 4.656224895783421e-05, | |
| "loss": 4.133, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.17002237136465326, | |
| "grad_norm": 2.285813808441162, | |
| "learning_rate": 4.651765621436303e-05, | |
| "loss": 4.4777, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.17114093959731544, | |
| "grad_norm": 3.893460273742676, | |
| "learning_rate": 4.6472797753962246e-05, | |
| "loss": 4.2023, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.17225950782997762, | |
| "grad_norm": 2.54040789604187, | |
| "learning_rate": 4.6427674130579424e-05, | |
| "loss": 4.2924, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.17337807606263983, | |
| "grad_norm": 2.005213975906372, | |
| "learning_rate": 4.6382285901436495e-05, | |
| "loss": 3.8809, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.174496644295302, | |
| "grad_norm": 3.2139828205108643, | |
| "learning_rate": 4.633663362702299e-05, | |
| "loss": 4.5209, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.1756152125279642, | |
| "grad_norm": 2.509523868560791, | |
| "learning_rate": 4.62907178710891e-05, | |
| "loss": 4.3067, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1767337807606264, | |
| "grad_norm": 2.318631649017334, | |
| "learning_rate": 4.6244539200638626e-05, | |
| "loss": 4.0036, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.17785234899328858, | |
| "grad_norm": 2.146442413330078, | |
| "learning_rate": 4.61980981859221e-05, | |
| "loss": 4.3332, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.1789709172259508, | |
| "grad_norm": 2.574519634246826, | |
| "learning_rate": 4.615139540042966e-05, | |
| "loss": 4.0637, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18008948545861297, | |
| "grad_norm": 2.722486972808838, | |
| "learning_rate": 4.610443142088402e-05, | |
| "loss": 4.1284, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.18120805369127516, | |
| "grad_norm": 2.3555033206939697, | |
| "learning_rate": 4.60572068272333e-05, | |
| "loss": 4.2058, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.18232662192393737, | |
| "grad_norm": 2.389615297317505, | |
| "learning_rate": 4.60097222026439e-05, | |
| "loss": 4.0544, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.18344519015659955, | |
| "grad_norm": 2.4774980545043945, | |
| "learning_rate": 4.596197813349328e-05, | |
| "loss": 4.0337, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.18456375838926176, | |
| "grad_norm": 2.1499996185302734, | |
| "learning_rate": 4.591397520936271e-05, | |
| "loss": 4.1015, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.18568232662192394, | |
| "grad_norm": 2.3946421146392822, | |
| "learning_rate": 4.586571402303006e-05, | |
| "loss": 3.8225, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18680089485458612, | |
| "grad_norm": 2.3064377307891846, | |
| "learning_rate": 4.581719517046236e-05, | |
| "loss": 4.0789, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.18791946308724833, | |
| "grad_norm": 2.358966827392578, | |
| "learning_rate": 4.576841925080853e-05, | |
| "loss": 3.9453, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1890380313199105, | |
| "grad_norm": 1.9163504838943481, | |
| "learning_rate": 4.5719386866391976e-05, | |
| "loss": 3.9758, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.19015659955257272, | |
| "grad_norm": 2.147257089614868, | |
| "learning_rate": 4.56700986227031e-05, | |
| "loss": 4.0456, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1912751677852349, | |
| "grad_norm": 2.340684413909912, | |
| "learning_rate": 4.5620555128391884e-05, | |
| "loss": 3.9842, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.19239373601789708, | |
| "grad_norm": 1.9256740808486938, | |
| "learning_rate": 4.557075699526032e-05, | |
| "loss": 3.9314, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1935123042505593, | |
| "grad_norm": 2.1142566204071045, | |
| "learning_rate": 4.552070483825489e-05, | |
| "loss": 4.216, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.19463087248322147, | |
| "grad_norm": 2.934136152267456, | |
| "learning_rate": 4.5470399275458985e-05, | |
| "loss": 4.3844, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.19574944071588368, | |
| "grad_norm": 2.802530527114868, | |
| "learning_rate": 4.541984092808521e-05, | |
| "loss": 4.1614, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19686800894854586, | |
| "grad_norm": 3.0535061359405518, | |
| "learning_rate": 4.536903042046777e-05, | |
| "loss": 4.0901, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19798657718120805, | |
| "grad_norm": 3.6601295471191406, | |
| "learning_rate": 4.531796838005477e-05, | |
| "loss": 4.1447, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.19910514541387025, | |
| "grad_norm": 2.675509214401245, | |
| "learning_rate": 4.526665543740038e-05, | |
| "loss": 4.0328, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.20022371364653244, | |
| "grad_norm": 3.0935540199279785, | |
| "learning_rate": 4.5215092226157165e-05, | |
| "loss": 4.0755, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.20134228187919462, | |
| "grad_norm": 2.0524206161499023, | |
| "learning_rate": 4.516327938306818e-05, | |
| "loss": 4.2309, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.20246085011185683, | |
| "grad_norm": 2.2571775913238525, | |
| "learning_rate": 4.5111217547959114e-05, | |
| "loss": 4.1893, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.203579418344519, | |
| "grad_norm": 3.665895462036133, | |
| "learning_rate": 4.505890736373045e-05, | |
| "loss": 4.2883, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.20469798657718122, | |
| "grad_norm": 2.241145372390747, | |
| "learning_rate": 4.500634947634943e-05, | |
| "loss": 4.1803, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.2058165548098434, | |
| "grad_norm": 2.3424227237701416, | |
| "learning_rate": 4.495354453484216e-05, | |
| "loss": 3.9584, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20693512304250558, | |
| "grad_norm": 2.7996134757995605, | |
| "learning_rate": 4.4900493191285554e-05, | |
| "loss": 3.977, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2080536912751678, | |
| "grad_norm": 2.122816324234009, | |
| "learning_rate": 4.48471961007993e-05, | |
| "loss": 4.1925, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.20917225950782997, | |
| "grad_norm": 2.3053228855133057, | |
| "learning_rate": 4.479365392153776e-05, | |
| "loss": 4.3152, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.21029082774049218, | |
| "grad_norm": 2.4579198360443115, | |
| "learning_rate": 4.473986731468183e-05, | |
| "loss": 3.8768, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.21140939597315436, | |
| "grad_norm": 4.130041599273682, | |
| "learning_rate": 4.4685836944430816e-05, | |
| "loss": 3.9151, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.21252796420581654, | |
| "grad_norm": 3.137632369995117, | |
| "learning_rate": 4.4631563477994184e-05, | |
| "loss": 4.2563, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21364653243847875, | |
| "grad_norm": 2.419900894165039, | |
| "learning_rate": 4.457704758558335e-05, | |
| "loss": 4.0084, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.21476510067114093, | |
| "grad_norm": 2.5032031536102295, | |
| "learning_rate": 4.4522289940403404e-05, | |
| "loss": 4.2155, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.21588366890380314, | |
| "grad_norm": 2.497797727584839, | |
| "learning_rate": 4.446729121864478e-05, | |
| "loss": 3.8909, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.21700223713646533, | |
| "grad_norm": 3.136139154434204, | |
| "learning_rate": 4.441205209947491e-05, | |
| "loss": 4.45, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2181208053691275, | |
| "grad_norm": 2.3615260124206543, | |
| "learning_rate": 4.435657326502986e-05, | |
| "loss": 4.3952, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21923937360178972, | |
| "grad_norm": 2.7231903076171875, | |
| "learning_rate": 4.430085540040587e-05, | |
| "loss": 4.0338, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2203579418344519, | |
| "grad_norm": 2.807499408721924, | |
| "learning_rate": 4.4244899193650933e-05, | |
| "loss": 4.4239, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2214765100671141, | |
| "grad_norm": 2.6443018913269043, | |
| "learning_rate": 4.418870533575625e-05, | |
| "loss": 4.1129, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2225950782997763, | |
| "grad_norm": 2.2121267318725586, | |
| "learning_rate": 4.4132274520647754e-05, | |
| "loss": 4.2689, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.22371364653243847, | |
| "grad_norm": 2.53283953666687, | |
| "learning_rate": 4.407560744517749e-05, | |
| "loss": 4.1621, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22483221476510068, | |
| "grad_norm": 2.109092950820923, | |
| "learning_rate": 4.401870480911504e-05, | |
| "loss": 4.2749, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.22595078299776286, | |
| "grad_norm": 2.3910768032073975, | |
| "learning_rate": 4.3961567315138885e-05, | |
| "loss": 4.2594, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22706935123042504, | |
| "grad_norm": 3.1483852863311768, | |
| "learning_rate": 4.3904195668827697e-05, | |
| "loss": 3.8371, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.22818791946308725, | |
| "grad_norm": 3.3514859676361084, | |
| "learning_rate": 4.384659057865165e-05, | |
| "loss": 4.1402, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.22930648769574943, | |
| "grad_norm": 4.020583629608154, | |
| "learning_rate": 4.378875275596367e-05, | |
| "loss": 3.8622, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.23042505592841164, | |
| "grad_norm": 2.541602611541748, | |
| "learning_rate": 4.3730682914990653e-05, | |
| "loss": 4.3944, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.23154362416107382, | |
| "grad_norm": 2.3997037410736084, | |
| "learning_rate": 4.3672381772824615e-05, | |
| "loss": 4.1781, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.232662192393736, | |
| "grad_norm": 3.2126035690307617, | |
| "learning_rate": 4.36138500494139e-05, | |
| "loss": 4.115, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.23378076062639822, | |
| "grad_norm": 2.839704990386963, | |
| "learning_rate": 4.355508846755422e-05, | |
| "loss": 3.9238, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.2348993288590604, | |
| "grad_norm": 3.05448317527771, | |
| "learning_rate": 4.3496097752879764e-05, | |
| "loss": 4.11, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2360178970917226, | |
| "grad_norm": 2.8709630966186523, | |
| "learning_rate": 4.343687863385425e-05, | |
| "loss": 3.9743, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2371364653243848, | |
| "grad_norm": 2.701570749282837, | |
| "learning_rate": 4.3377431841761875e-05, | |
| "loss": 3.8432, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.23825503355704697, | |
| "grad_norm": 2.5893900394439697, | |
| "learning_rate": 4.331775811069837e-05, | |
| "loss": 4.0372, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.23937360178970918, | |
| "grad_norm": 2.2405996322631836, | |
| "learning_rate": 4.325785817756186e-05, | |
| "loss": 3.8983, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.24049217002237136, | |
| "grad_norm": 2.6390066146850586, | |
| "learning_rate": 4.3197732782043784e-05, | |
| "loss": 4.2161, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.24161073825503357, | |
| "grad_norm": 2.5067667961120605, | |
| "learning_rate": 4.3137382666619783e-05, | |
| "loss": 4.3013, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.24272930648769575, | |
| "grad_norm": 2.50183367729187, | |
| "learning_rate": 4.307680857654052e-05, | |
| "loss": 4.1306, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.24384787472035793, | |
| "grad_norm": 2.526111364364624, | |
| "learning_rate": 4.301601125982245e-05, | |
| "loss": 3.9925, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.24496644295302014, | |
| "grad_norm": 2.189241886138916, | |
| "learning_rate": 4.2954991467238634e-05, | |
| "loss": 4.1998, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.24608501118568232, | |
| "grad_norm": 2.3332066535949707, | |
| "learning_rate": 4.289374995230942e-05, | |
| "loss": 4.1606, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24720357941834453, | |
| "grad_norm": 3.0104753971099854, | |
| "learning_rate": 4.2832287471293155e-05, | |
| "loss": 4.3294, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2483221476510067, | |
| "grad_norm": 3.3456063270568848, | |
| "learning_rate": 4.277060478317687e-05, | |
| "loss": 4.3516, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2494407158836689, | |
| "grad_norm": 1.9993607997894287, | |
| "learning_rate": 4.270870264966687e-05, | |
| "loss": 4.2042, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2505592841163311, | |
| "grad_norm": 3.882202625274658, | |
| "learning_rate": 4.264658183517935e-05, | |
| "loss": 4.1279, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.2516778523489933, | |
| "grad_norm": 2.162135124206543, | |
| "learning_rate": 4.258424310683094e-05, | |
| "loss": 4.0197, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.25279642058165547, | |
| "grad_norm": 1.9756934642791748, | |
| "learning_rate": 4.2521687234429264e-05, | |
| "loss": 4.1365, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2539149888143177, | |
| "grad_norm": 2.928903579711914, | |
| "learning_rate": 4.245891499046338e-05, | |
| "loss": 4.1895, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.2550335570469799, | |
| "grad_norm": 2.3427515029907227, | |
| "learning_rate": 4.239592715009429e-05, | |
| "loss": 4.3197, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.25615212527964204, | |
| "grad_norm": 2.0358877182006836, | |
| "learning_rate": 4.2332724491145374e-05, | |
| "loss": 4.0241, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.25727069351230425, | |
| "grad_norm": 3.108318567276001, | |
| "learning_rate": 4.226930779409271e-05, | |
| "loss": 3.8592, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25838926174496646, | |
| "grad_norm": 2.0987706184387207, | |
| "learning_rate": 4.2205677842055516e-05, | |
| "loss": 4.1741, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2595078299776286, | |
| "grad_norm": 2.86515474319458, | |
| "learning_rate": 4.214183542078646e-05, | |
| "loss": 4.0576, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2606263982102908, | |
| "grad_norm": 3.104260206222534, | |
| "learning_rate": 4.207778131866191e-05, | |
| "loss": 3.9961, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.26174496644295303, | |
| "grad_norm": 2.444136142730713, | |
| "learning_rate": 4.2013516326672273e-05, | |
| "loss": 4.0128, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.26286353467561524, | |
| "grad_norm": 2.4717466831207275, | |
| "learning_rate": 4.194904123841218e-05, | |
| "loss": 4.4341, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2639821029082774, | |
| "grad_norm": 2.2069778442382812, | |
| "learning_rate": 4.188435685007069e-05, | |
| "loss": 3.9367, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2651006711409396, | |
| "grad_norm": 2.673962354660034, | |
| "learning_rate": 4.1819463960421454e-05, | |
| "loss": 3.9492, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2662192393736018, | |
| "grad_norm": 2.869675397872925, | |
| "learning_rate": 4.175436337081289e-05, | |
| "loss": 4.2262, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.26733780760626397, | |
| "grad_norm": 2.8593602180480957, | |
| "learning_rate": 4.168905588515822e-05, | |
| "loss": 3.9327, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.2684563758389262, | |
| "grad_norm": 3.113313674926758, | |
| "learning_rate": 4.162354230992562e-05, | |
| "loss": 4.3994, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2695749440715884, | |
| "grad_norm": 3.0361926555633545, | |
| "learning_rate": 4.155782345412819e-05, | |
| "loss": 4.2951, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.27069351230425054, | |
| "grad_norm": 2.609422206878662, | |
| "learning_rate": 4.149190012931402e-05, | |
| "loss": 4.1109, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.27181208053691275, | |
| "grad_norm": 2.932523250579834, | |
| "learning_rate": 4.1425773149556134e-05, | |
| "loss": 4.1605, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.27293064876957496, | |
| "grad_norm": 2.0494723320007324, | |
| "learning_rate": 4.135944333144244e-05, | |
| "loss": 4.2708, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2740492170022371, | |
| "grad_norm": 2.5390541553497314, | |
| "learning_rate": 4.129291149406567e-05, | |
| "loss": 3.9803, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2751677852348993, | |
| "grad_norm": 2.236856460571289, | |
| "learning_rate": 4.122617845901322e-05, | |
| "loss": 4.1316, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.27628635346756153, | |
| "grad_norm": 2.3391566276550293, | |
| "learning_rate": 4.1159245050357065e-05, | |
| "loss": 4.2234, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.27740492170022374, | |
| "grad_norm": 2.809293031692505, | |
| "learning_rate": 4.1092112094643543e-05, | |
| "loss": 4.0531, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2785234899328859, | |
| "grad_norm": 2.943033456802368, | |
| "learning_rate": 4.102478042088315e-05, | |
| "loss": 3.9868, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.2796420581655481, | |
| "grad_norm": 1.9977362155914307, | |
| "learning_rate": 4.095725086054029e-05, | |
| "loss": 3.7822, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2807606263982103, | |
| "grad_norm": 2.405503988265991, | |
| "learning_rate": 4.088952424752307e-05, | |
| "loss": 4.1802, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.28187919463087246, | |
| "grad_norm": 2.7163617610931396, | |
| "learning_rate": 4.082160141817293e-05, | |
| "loss": 4.1736, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2829977628635347, | |
| "grad_norm": 2.2036311626434326, | |
| "learning_rate": 4.075348321125433e-05, | |
| "loss": 3.9607, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2841163310961969, | |
| "grad_norm": 3.2602269649505615, | |
| "learning_rate": 4.068517046794443e-05, | |
| "loss": 3.984, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.28523489932885904, | |
| "grad_norm": 3.4510889053344727, | |
| "learning_rate": 4.0616664031822684e-05, | |
| "loss": 4.1701, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.28635346756152125, | |
| "grad_norm": 2.2935943603515625, | |
| "learning_rate": 4.0547964748860386e-05, | |
| "loss": 4.1199, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.28747203579418346, | |
| "grad_norm": 3.6087756156921387, | |
| "learning_rate": 4.0479073467410286e-05, | |
| "loss": 4.0622, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.28859060402684567, | |
| "grad_norm": 3.2882027626037598, | |
| "learning_rate": 4.040999103819606e-05, | |
| "loss": 4.2735, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2897091722595078, | |
| "grad_norm": 2.9699313640594482, | |
| "learning_rate": 4.034071831430184e-05, | |
| "loss": 3.9871, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.29082774049217003, | |
| "grad_norm": 2.889160394668579, | |
| "learning_rate": 4.0271256151161664e-05, | |
| "loss": 4.0421, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.29194630872483224, | |
| "grad_norm": 2.320803165435791, | |
| "learning_rate": 4.020160540654891e-05, | |
| "loss": 4.4205, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2930648769574944, | |
| "grad_norm": 3.7101683616638184, | |
| "learning_rate": 4.0131766940565715e-05, | |
| "loss": 3.9198, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2941834451901566, | |
| "grad_norm": 2.4428420066833496, | |
| "learning_rate": 4.006174161563233e-05, | |
| "loss": 4.1809, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2953020134228188, | |
| "grad_norm": 3.8638551235198975, | |
| "learning_rate": 3.999153029647651e-05, | |
| "loss": 3.9782, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.29642058165548096, | |
| "grad_norm": 1.7899916172027588, | |
| "learning_rate": 3.9921133850122805e-05, | |
| "loss": 4.3221, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2975391498881432, | |
| "grad_norm": 3.8810582160949707, | |
| "learning_rate": 3.9850553145881854e-05, | |
| "loss": 3.8899, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2986577181208054, | |
| "grad_norm": 2.2642791271209717, | |
| "learning_rate": 3.9779789055339656e-05, | |
| "loss": 4.0838, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.29977628635346754, | |
| "grad_norm": 2.4321177005767822, | |
| "learning_rate": 3.9708842452346836e-05, | |
| "loss": 4.2623, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.30089485458612975, | |
| "grad_norm": 2.04886794090271, | |
| "learning_rate": 3.963771421300777e-05, | |
| "loss": 4.1379, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.30201342281879195, | |
| "grad_norm": 1.9282593727111816, | |
| "learning_rate": 3.956640521566989e-05, | |
| "loss": 4.2569, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.30313199105145416, | |
| "grad_norm": 2.2058980464935303, | |
| "learning_rate": 3.949491634091272e-05, | |
| "loss": 4.7175, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3042505592841163, | |
| "grad_norm": 2.039492130279541, | |
| "learning_rate": 3.9423248471537065e-05, | |
| "loss": 4.234, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3053691275167785, | |
| "grad_norm": 1.9331101179122925, | |
| "learning_rate": 3.935140249255412e-05, | |
| "loss": 3.9734, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.30648769574944074, | |
| "grad_norm": 2.2190299034118652, | |
| "learning_rate": 3.9279379291174465e-05, | |
| "loss": 4.1752, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3076062639821029, | |
| "grad_norm": 2.8630807399749756, | |
| "learning_rate": 3.920717975679723e-05, | |
| "loss": 4.0549, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3087248322147651, | |
| "grad_norm": 2.7145142555236816, | |
| "learning_rate": 3.913480478099897e-05, | |
| "loss": 4.1861, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3098434004474273, | |
| "grad_norm": 3.505417823791504, | |
| "learning_rate": 3.9062255257522794e-05, | |
| "loss": 4.0303, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.31096196868008946, | |
| "grad_norm": 3.250004291534424, | |
| "learning_rate": 3.8989532082267225e-05, | |
| "loss": 4.2068, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.31208053691275167, | |
| "grad_norm": 2.1549458503723145, | |
| "learning_rate": 3.891663615327518e-05, | |
| "loss": 3.921, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3131991051454139, | |
| "grad_norm": 2.008209705352783, | |
| "learning_rate": 3.884356837072288e-05, | |
| "loss": 3.8706, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3143176733780761, | |
| "grad_norm": 3.1446456909179688, | |
| "learning_rate": 3.877032963690873e-05, | |
| "loss": 4.0936, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.31543624161073824, | |
| "grad_norm": 2.9265387058258057, | |
| "learning_rate": 3.8696920856242174e-05, | |
| "loss": 4.2987, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.31655480984340045, | |
| "grad_norm": 2.246251344680786, | |
| "learning_rate": 3.8623342935232525e-05, | |
| "loss": 4.1966, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.31767337807606266, | |
| "grad_norm": 2.046858549118042, | |
| "learning_rate": 3.854959678247778e-05, | |
| "loss": 4.3037, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3187919463087248, | |
| "grad_norm": 2.070281744003296, | |
| "learning_rate": 3.847568330865338e-05, | |
| "loss": 3.8673, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.319910514541387, | |
| "grad_norm": 2.2606728076934814, | |
| "learning_rate": 3.8401603426501e-05, | |
| "loss": 4.3719, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.32102908277404923, | |
| "grad_norm": 2.7562570571899414, | |
| "learning_rate": 3.8327358050817234e-05, | |
| "loss": 3.8372, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3221476510067114, | |
| "grad_norm": 2.6188101768493652, | |
| "learning_rate": 3.8252948098442344e-05, | |
| "loss": 3.9952, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3232662192393736, | |
| "grad_norm": 3.580763101577759, | |
| "learning_rate": 3.817837448824888e-05, | |
| "loss": 4.0735, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3243847874720358, | |
| "grad_norm": 2.4125702381134033, | |
| "learning_rate": 3.81036381411304e-05, | |
| "loss": 4.0671, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.32550335570469796, | |
| "grad_norm": 2.0095221996307373, | |
| "learning_rate": 3.8028739979990066e-05, | |
| "loss": 4.1353, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.32662192393736017, | |
| "grad_norm": 2.5612494945526123, | |
| "learning_rate": 3.7953680929729215e-05, | |
| "loss": 3.9072, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3277404921700224, | |
| "grad_norm": 2.8321776390075684, | |
| "learning_rate": 3.787846191723599e-05, | |
| "loss": 4.1067, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3288590604026846, | |
| "grad_norm": 2.400554895401001, | |
| "learning_rate": 3.780308387137387e-05, | |
| "loss": 4.1338, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.32997762863534674, | |
| "grad_norm": 2.4202167987823486, | |
| "learning_rate": 3.772754772297022e-05, | |
| "loss": 4.139, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.33109619686800895, | |
| "grad_norm": 2.7618842124938965, | |
| "learning_rate": 3.7651854404804755e-05, | |
| "loss": 3.7908, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.33221476510067116, | |
| "grad_norm": 3.0693230628967285, | |
| "learning_rate": 3.757600485159805e-05, | |
| "loss": 3.9235, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 1.86801016330719, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 4.1265, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3344519015659955, | |
| "grad_norm": 2.0775158405303955, | |
| "learning_rate": 3.742384078857824e-05, | |
| "loss": 4.0009, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.33557046979865773, | |
| "grad_norm": 2.6333956718444824, | |
| "learning_rate": 3.7347528157806586e-05, | |
| "loss": 4.161, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3366890380313199, | |
| "grad_norm": 2.197185754776001, | |
| "learning_rate": 3.727106305005336e-05, | |
| "loss": 3.9653, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3378076062639821, | |
| "grad_norm": 2.323561191558838, | |
| "learning_rate": 3.719444640956981e-05, | |
| "loss": 4.0114, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3389261744966443, | |
| "grad_norm": 2.5178816318511963, | |
| "learning_rate": 3.7117679182478417e-05, | |
| "loss": 3.8501, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3400447427293065, | |
| "grad_norm": 2.5967633724212646, | |
| "learning_rate": 3.704076231676125e-05, | |
| "loss": 3.7106, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.34116331096196867, | |
| "grad_norm": 2.9387967586517334, | |
| "learning_rate": 3.696369676224819e-05, | |
| "loss": 3.9001, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3422818791946309, | |
| "grad_norm": 2.7642838954925537, | |
| "learning_rate": 3.688648347060529e-05, | |
| "loss": 4.0384, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3434004474272931, | |
| "grad_norm": 2.5087080001831055, | |
| "learning_rate": 3.680912339532296e-05, | |
| "loss": 4.1227, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.34451901565995524, | |
| "grad_norm": 2.084219217300415, | |
| "learning_rate": 3.67316174917042e-05, | |
| "loss": 4.0968, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.34563758389261745, | |
| "grad_norm": 3.1434361934661865, | |
| "learning_rate": 3.66539667168528e-05, | |
| "loss": 4.3648, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.34675615212527966, | |
| "grad_norm": 2.9753198623657227, | |
| "learning_rate": 3.657617202966158e-05, | |
| "loss": 3.8644, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3478747203579418, | |
| "grad_norm": 1.990665316581726, | |
| "learning_rate": 3.649823439080047e-05, | |
| "loss": 4.0785, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.348993288590604, | |
| "grad_norm": 3.1740851402282715, | |
| "learning_rate": 3.6420154762704686e-05, | |
| "loss": 4.1794, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.35011185682326623, | |
| "grad_norm": 2.5907697677612305, | |
| "learning_rate": 3.634193410956282e-05, | |
| "loss": 3.887, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3512304250559284, | |
| "grad_norm": 2.6295742988586426, | |
| "learning_rate": 3.6263573397305e-05, | |
| "loss": 4.0013, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3523489932885906, | |
| "grad_norm": 3.465045213699341, | |
| "learning_rate": 3.618507359359087e-05, | |
| "loss": 3.9178, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3534675615212528, | |
| "grad_norm": 2.5845601558685303, | |
| "learning_rate": 3.6106435667797685e-05, | |
| "loss": 3.9463, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.354586129753915, | |
| "grad_norm": 2.198699712753296, | |
| "learning_rate": 3.602766059100838e-05, | |
| "loss": 3.6981, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.35570469798657717, | |
| "grad_norm": 2.5952913761138916, | |
| "learning_rate": 3.5948749335999496e-05, | |
| "loss": 4.3058, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3568232662192394, | |
| "grad_norm": 2.6661579608917236, | |
| "learning_rate": 3.586970287722923e-05, | |
| "loss": 3.9699, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3579418344519016, | |
| "grad_norm": 2.2353670597076416, | |
| "learning_rate": 3.5790522190825365e-05, | |
| "loss": 3.9394, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.35906040268456374, | |
| "grad_norm": 1.800933599472046, | |
| "learning_rate": 3.571120825457327e-05, | |
| "loss": 3.9037, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.36017897091722595, | |
| "grad_norm": 2.2671263217926025, | |
| "learning_rate": 3.563176204790374e-05, | |
| "loss": 4.2906, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.36129753914988816, | |
| "grad_norm": 2.249171733856201, | |
| "learning_rate": 3.555218455188099e-05, | |
| "loss": 4.0766, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3624161073825503, | |
| "grad_norm": 2.222735643386841, | |
| "learning_rate": 3.547247674919046e-05, | |
| "loss": 4.0171, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3635346756152125, | |
| "grad_norm": 2.537754774093628, | |
| "learning_rate": 3.539263962412676e-05, | |
| "loss": 3.9333, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.36465324384787473, | |
| "grad_norm": 3.2057714462280273, | |
| "learning_rate": 3.5312674162581436e-05, | |
| "loss": 4.1573, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.36577181208053694, | |
| "grad_norm": 3.4859671592712402, | |
| "learning_rate": 3.523258135203087e-05, | |
| "loss": 3.8762, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3668903803131991, | |
| "grad_norm": 3.155507802963257, | |
| "learning_rate": 3.5152362181524014e-05, | |
| "loss": 3.9051, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3680089485458613, | |
| "grad_norm": 2.1552867889404297, | |
| "learning_rate": 3.507201764167024e-05, | |
| "loss": 3.8245, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3691275167785235, | |
| "grad_norm": 2.492128372192383, | |
| "learning_rate": 3.4991548724627054e-05, | |
| "loss": 3.9806, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.37024608501118567, | |
| "grad_norm": 2.913975715637207, | |
| "learning_rate": 3.491095642408791e-05, | |
| "loss": 4.0708, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3713646532438479, | |
| "grad_norm": 1.982664942741394, | |
| "learning_rate": 3.483024173526985e-05, | |
| "loss": 3.7897, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3724832214765101, | |
| "grad_norm": 2.7133326530456543, | |
| "learning_rate": 3.4749405654901294e-05, | |
| "loss": 4.0167, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.37360178970917224, | |
| "grad_norm": 2.769604444503784, | |
| "learning_rate": 3.46684491812097e-05, | |
| "loss": 4.2449, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.37472035794183445, | |
| "grad_norm": 2.3599345684051514, | |
| "learning_rate": 3.45873733139092e-05, | |
| "loss": 4.0715, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.37583892617449666, | |
| "grad_norm": 3.5112671852111816, | |
| "learning_rate": 3.4506179054188344e-05, | |
| "loss": 3.896, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3769574944071588, | |
| "grad_norm": 2.5493431091308594, | |
| "learning_rate": 3.442486740469766e-05, | |
| "loss": 3.8645, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.378076062639821, | |
| "grad_norm": 2.665292501449585, | |
| "learning_rate": 3.434343936953729e-05, | |
| "loss": 3.9132, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.37919463087248323, | |
| "grad_norm": 2.7781031131744385, | |
| "learning_rate": 3.426189595424461e-05, | |
| "loss": 4.0039, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.38031319910514544, | |
| "grad_norm": 3.5199902057647705, | |
| "learning_rate": 3.41802381657818e-05, | |
| "loss": 4.0221, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3814317673378076, | |
| "grad_norm": 2.529172897338867, | |
| "learning_rate": 3.4098467012523404e-05, | |
| "loss": 3.9565, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3825503355704698, | |
| "grad_norm": 3.5351314544677734, | |
| "learning_rate": 3.401658350424389e-05, | |
| "loss": 4.2232, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.383668903803132, | |
| "grad_norm": 2.705423593521118, | |
| "learning_rate": 3.393458865210516e-05, | |
| "loss": 3.8003, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.38478747203579416, | |
| "grad_norm": 2.4735591411590576, | |
| "learning_rate": 3.38524834686441e-05, | |
| "loss": 3.9762, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3859060402684564, | |
| "grad_norm": 3.4464943408966064, | |
| "learning_rate": 3.377026896776003e-05, | |
| "loss": 4.2064, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3870246085011186, | |
| "grad_norm": 2.1392390727996826, | |
| "learning_rate": 3.368794616470222e-05, | |
| "loss": 3.9257, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.38814317673378074, | |
| "grad_norm": 3.5217792987823486, | |
| "learning_rate": 3.360551607605735e-05, | |
| "loss": 3.7378, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.38926174496644295, | |
| "grad_norm": 2.3355872631073, | |
| "learning_rate": 3.3522979719736926e-05, | |
| "loss": 4.3103, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.39038031319910516, | |
| "grad_norm": 1.909463882446289, | |
| "learning_rate": 3.344033811496475e-05, | |
| "loss": 3.9676, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.39149888143176736, | |
| "grad_norm": 2.3093831539154053, | |
| "learning_rate": 3.3357592282264296e-05, | |
| "loss": 3.915, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3926174496644295, | |
| "grad_norm": 2.086834192276001, | |
| "learning_rate": 3.327474324344614e-05, | |
| "loss": 3.9272, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.39373601789709173, | |
| "grad_norm": 2.3872201442718506, | |
| "learning_rate": 3.3191792021595316e-05, | |
| "loss": 3.8731, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.39485458612975394, | |
| "grad_norm": 2.4958016872406006, | |
| "learning_rate": 3.310873964105872e-05, | |
| "loss": 3.9696, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3959731543624161, | |
| "grad_norm": 2.3622846603393555, | |
| "learning_rate": 3.302558712743241e-05, | |
| "loss": 3.6666, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3970917225950783, | |
| "grad_norm": 3.684812068939209, | |
| "learning_rate": 3.2942335507548966e-05, | |
| "loss": 3.9342, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3982102908277405, | |
| "grad_norm": 2.6267335414886475, | |
| "learning_rate": 3.285898580946482e-05, | |
| "loss": 4.0503, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.39932885906040266, | |
| "grad_norm": 2.092944860458374, | |
| "learning_rate": 3.277553906244756e-05, | |
| "loss": 4.1497, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.4004474272930649, | |
| "grad_norm": 2.3737025260925293, | |
| "learning_rate": 3.2691996296963186e-05, | |
| "loss": 3.8479, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4015659955257271, | |
| "grad_norm": 2.4298672676086426, | |
| "learning_rate": 3.260835854466342e-05, | |
| "loss": 4.0085, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.40268456375838924, | |
| "grad_norm": 2.19952130317688, | |
| "learning_rate": 3.252462683837297e-05, | |
| "loss": 4.4371, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.40380313199105144, | |
| "grad_norm": 2.4244048595428467, | |
| "learning_rate": 3.244080221207674e-05, | |
| "loss": 4.2144, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.40492170022371365, | |
| "grad_norm": 2.4055869579315186, | |
| "learning_rate": 3.23568857009071e-05, | |
| "loss": 3.9106, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.40604026845637586, | |
| "grad_norm": 2.3508224487304688, | |
| "learning_rate": 3.2272878341131075e-05, | |
| "loss": 4.237, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.407158836689038, | |
| "grad_norm": 2.4762392044067383, | |
| "learning_rate": 3.218878117013756e-05, | |
| "loss": 4.0447, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4082774049217002, | |
| "grad_norm": 2.0015032291412354, | |
| "learning_rate": 3.210459522642452e-05, | |
| "loss": 4.0781, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.40939597315436244, | |
| "grad_norm": 2.745539665222168, | |
| "learning_rate": 3.2020321549586154e-05, | |
| "loss": 3.8968, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4105145413870246, | |
| "grad_norm": 2.3843181133270264, | |
| "learning_rate": 3.193596118030005e-05, | |
| "loss": 4.0991, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4116331096196868, | |
| "grad_norm": 2.248856782913208, | |
| "learning_rate": 3.185151516031434e-05, | |
| "loss": 4.034, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.412751677852349, | |
| "grad_norm": 2.3670918941497803, | |
| "learning_rate": 3.1766984532434853e-05, | |
| "loss": 4.0161, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.41387024608501116, | |
| "grad_norm": 2.298100709915161, | |
| "learning_rate": 3.1682370340512217e-05, | |
| "loss": 4.1546, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.41498881431767337, | |
| "grad_norm": 1.788293719291687, | |
| "learning_rate": 3.159767362942896e-05, | |
| "loss": 4.1429, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.4161073825503356, | |
| "grad_norm": 2.500016212463379, | |
| "learning_rate": 3.1512895445086636e-05, | |
| "loss": 3.7988, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4172259507829978, | |
| "grad_norm": 3.096670627593994, | |
| "learning_rate": 3.14280368343929e-05, | |
| "loss": 4.406, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.41834451901565994, | |
| "grad_norm": 2.44936466217041, | |
| "learning_rate": 3.134309884524856e-05, | |
| "loss": 4.1445, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.41946308724832215, | |
| "grad_norm": 2.5851917266845703, | |
| "learning_rate": 3.125808252653466e-05, | |
| "loss": 4.6243, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.42058165548098436, | |
| "grad_norm": 2.472642183303833, | |
| "learning_rate": 3.1172988928099525e-05, | |
| "loss": 4.2828, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4217002237136465, | |
| "grad_norm": 2.0510940551757812, | |
| "learning_rate": 3.108781910074578e-05, | |
| "loss": 3.7741, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4228187919463087, | |
| "grad_norm": 2.4596452713012695, | |
| "learning_rate": 3.100257409621738e-05, | |
| "loss": 3.974, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.42393736017897093, | |
| "grad_norm": 2.0561556816101074, | |
| "learning_rate": 3.0917254967186635e-05, | |
| "loss": 3.8241, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4250559284116331, | |
| "grad_norm": 1.8558775186538696, | |
| "learning_rate": 3.0831862767241205e-05, | |
| "loss": 4.1329, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4261744966442953, | |
| "grad_norm": 2.460034132003784, | |
| "learning_rate": 3.074639855087109e-05, | |
| "loss": 4.0921, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4272930648769575, | |
| "grad_norm": 2.0531368255615234, | |
| "learning_rate": 3.0660863373455595e-05, | |
| "loss": 3.8236, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.42841163310961966, | |
| "grad_norm": 2.9021363258361816, | |
| "learning_rate": 3.057525829125032e-05, | |
| "loss": 3.8496, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.42953020134228187, | |
| "grad_norm": 2.5614798069000244, | |
| "learning_rate": 3.0489584361374074e-05, | |
| "loss": 4.1064, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4306487695749441, | |
| "grad_norm": 2.2442526817321777, | |
| "learning_rate": 3.04038426417959e-05, | |
| "loss": 4.1327, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4317673378076063, | |
| "grad_norm": 2.7794744968414307, | |
| "learning_rate": 3.031803419132192e-05, | |
| "loss": 4.1708, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.43288590604026844, | |
| "grad_norm": 2.464383363723755, | |
| "learning_rate": 3.0232160069582332e-05, | |
| "loss": 4.1961, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.43400447427293065, | |
| "grad_norm": 1.990610122680664, | |
| "learning_rate": 3.014622133701826e-05, | |
| "loss": 3.8704, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.43512304250559286, | |
| "grad_norm": 2.630527973175049, | |
| "learning_rate": 3.0060219054868727e-05, | |
| "loss": 4.0868, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.436241610738255, | |
| "grad_norm": 1.9832707643508911, | |
| "learning_rate": 2.9974154285157497e-05, | |
| "loss": 4.0184, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4373601789709172, | |
| "grad_norm": 2.482058048248291, | |
| "learning_rate": 2.9888028090679982e-05, | |
| "loss": 4.0066, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.43847874720357943, | |
| "grad_norm": 2.484102487564087, | |
| "learning_rate": 2.9801841534990115e-05, | |
| "loss": 4.1228, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4395973154362416, | |
| "grad_norm": 2.7631897926330566, | |
| "learning_rate": 2.9715595682387242e-05, | |
| "loss": 3.863, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.4407158836689038, | |
| "grad_norm": 2.0470075607299805, | |
| "learning_rate": 2.9629291597902898e-05, | |
| "loss": 3.8224, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.441834451901566, | |
| "grad_norm": 1.8407243490219116, | |
| "learning_rate": 2.954293034728776e-05, | |
| "loss": 3.8581, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4429530201342282, | |
| "grad_norm": 1.9803217649459839, | |
| "learning_rate": 2.9456512996998424e-05, | |
| "loss": 3.914, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.44407158836689037, | |
| "grad_norm": 2.7368860244750977, | |
| "learning_rate": 2.9370040614184245e-05, | |
| "loss": 4.0718, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.4451901565995526, | |
| "grad_norm": 5.358017444610596, | |
| "learning_rate": 2.9283514266674168e-05, | |
| "loss": 3.7782, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4463087248322148, | |
| "grad_norm": 2.5471818447113037, | |
| "learning_rate": 2.9196935022963525e-05, | |
| "loss": 3.7342, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.44742729306487694, | |
| "grad_norm": 2.3943824768066406, | |
| "learning_rate": 2.9110303952200863e-05, | |
| "loss": 4.0393, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.44854586129753915, | |
| "grad_norm": 2.130612373352051, | |
| "learning_rate": 2.902362212417472e-05, | |
| "loss": 3.7082, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.44966442953020136, | |
| "grad_norm": 2.9800944328308105, | |
| "learning_rate": 2.893689060930045e-05, | |
| "loss": 3.8505, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4507829977628635, | |
| "grad_norm": 2.35976243019104, | |
| "learning_rate": 2.8850110478606938e-05, | |
| "loss": 4.1292, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4519015659955257, | |
| "grad_norm": 2.627814292907715, | |
| "learning_rate": 2.876328280372346e-05, | |
| "loss": 3.6394, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.45302013422818793, | |
| "grad_norm": 2.3630151748657227, | |
| "learning_rate": 2.8676408656866353e-05, | |
| "loss": 3.7281, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4541387024608501, | |
| "grad_norm": 2.5293986797332764, | |
| "learning_rate": 2.8589489110825897e-05, | |
| "loss": 3.7605, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4552572706935123, | |
| "grad_norm": 2.503148078918457, | |
| "learning_rate": 2.8502525238952916e-05, | |
| "loss": 4.1672, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4563758389261745, | |
| "grad_norm": 2.9510624408721924, | |
| "learning_rate": 2.8415518115145674e-05, | |
| "loss": 3.9833, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4574944071588367, | |
| "grad_norm": 2.3186118602752686, | |
| "learning_rate": 2.8328468813836493e-05, | |
| "loss": 4.1216, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.45861297539149887, | |
| "grad_norm": 3.173356294631958, | |
| "learning_rate": 2.824137840997858e-05, | |
| "loss": 3.9926, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4597315436241611, | |
| "grad_norm": 1.9115232229232788, | |
| "learning_rate": 2.8154247979032665e-05, | |
| "loss": 4.0426, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4608501118568233, | |
| "grad_norm": 2.5544466972351074, | |
| "learning_rate": 2.8067078596953796e-05, | |
| "loss": 3.9857, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.46196868008948544, | |
| "grad_norm": 1.8539265394210815, | |
| "learning_rate": 2.7979871340178003e-05, | |
| "loss": 3.894, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.46308724832214765, | |
| "grad_norm": 2.6608541011810303, | |
| "learning_rate": 2.7892627285609035e-05, | |
| "loss": 4.2197, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.46420581655480986, | |
| "grad_norm": 2.6225533485412598, | |
| "learning_rate": 2.780534751060504e-05, | |
| "loss": 3.8299, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.465324384787472, | |
| "grad_norm": 3.5249505043029785, | |
| "learning_rate": 2.771803309296527e-05, | |
| "loss": 3.4724, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4664429530201342, | |
| "grad_norm": 2.1255197525024414, | |
| "learning_rate": 2.7630685110916778e-05, | |
| "loss": 4.12, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.46756152125279643, | |
| "grad_norm": 2.8229820728302, | |
| "learning_rate": 2.754330464310108e-05, | |
| "loss": 4.3171, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.46868008948545864, | |
| "grad_norm": 2.2736423015594482, | |
| "learning_rate": 2.7455892768560888e-05, | |
| "loss": 3.9784, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.4697986577181208, | |
| "grad_norm": 1.8942090272903442, | |
| "learning_rate": 2.736845056672671e-05, | |
| "loss": 3.7484, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.470917225950783, | |
| "grad_norm": 3.5483438968658447, | |
| "learning_rate": 2.7280979117403575e-05, | |
| "loss": 4.1573, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4720357941834452, | |
| "grad_norm": 2.8570756912231445, | |
| "learning_rate": 2.7193479500757685e-05, | |
| "loss": 3.8323, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.47315436241610737, | |
| "grad_norm": 2.425438404083252, | |
| "learning_rate": 2.710595279730308e-05, | |
| "loss": 4.0871, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4742729306487696, | |
| "grad_norm": 2.321178674697876, | |
| "learning_rate": 2.7018400087888263e-05, | |
| "loss": 4.1138, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4753914988814318, | |
| "grad_norm": 2.9220635890960693, | |
| "learning_rate": 2.6930822453682915e-05, | |
| "loss": 3.8734, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.47651006711409394, | |
| "grad_norm": 3.5046780109405518, | |
| "learning_rate": 2.684322097616448e-05, | |
| "loss": 4.3458, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.47762863534675615, | |
| "grad_norm": 3.008169651031494, | |
| "learning_rate": 2.675559673710485e-05, | |
| "loss": 3.7159, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.47874720357941836, | |
| "grad_norm": 2.289055109024048, | |
| "learning_rate": 2.6667950818556993e-05, | |
| "loss": 4.2936, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4798657718120805, | |
| "grad_norm": 1.8744932413101196, | |
| "learning_rate": 2.658028430284159e-05, | |
| "loss": 3.9657, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4809843400447427, | |
| "grad_norm": 2.0840201377868652, | |
| "learning_rate": 2.649259827253368e-05, | |
| "loss": 4.265, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.48210290827740493, | |
| "grad_norm": 2.352440118789673, | |
| "learning_rate": 2.6404893810449272e-05, | |
| "loss": 3.7011, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.48322147651006714, | |
| "grad_norm": 2.6247146129608154, | |
| "learning_rate": 2.631717199963199e-05, | |
| "loss": 3.5364, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4843400447427293, | |
| "grad_norm": 3.915102243423462, | |
| "learning_rate": 2.6229433923339696e-05, | |
| "loss": 4.2285, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.4854586129753915, | |
| "grad_norm": 2.190736770629883, | |
| "learning_rate": 2.6141680665031116e-05, | |
| "loss": 3.9863, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4865771812080537, | |
| "grad_norm": 2.417031764984131, | |
| "learning_rate": 2.6053913308352428e-05, | |
| "loss": 3.9451, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.48769574944071586, | |
| "grad_norm": 2.4824278354644775, | |
| "learning_rate": 2.596613293712396e-05, | |
| "loss": 3.9539, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4888143176733781, | |
| "grad_norm": 2.9509193897247314, | |
| "learning_rate": 2.5878340635326686e-05, | |
| "loss": 3.8236, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4899328859060403, | |
| "grad_norm": 2.627767562866211, | |
| "learning_rate": 2.5790537487088974e-05, | |
| "loss": 4.0342, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.49105145413870244, | |
| "grad_norm": 2.270059108734131, | |
| "learning_rate": 2.5702724576673088e-05, | |
| "loss": 4.3256, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.49217002237136465, | |
| "grad_norm": 2.08921217918396, | |
| "learning_rate": 2.561490298846186e-05, | |
| "loss": 3.8144, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.49328859060402686, | |
| "grad_norm": 2.151421308517456, | |
| "learning_rate": 2.5527073806945278e-05, | |
| "loss": 3.881, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.49440715883668906, | |
| "grad_norm": 3.5347208976745605, | |
| "learning_rate": 2.5439238116707102e-05, | |
| "loss": 4.1678, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4955257270693512, | |
| "grad_norm": 1.9540199041366577, | |
| "learning_rate": 2.5351397002411477e-05, | |
| "loss": 4.028, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.4966442953020134, | |
| "grad_norm": 2.151306629180908, | |
| "learning_rate": 2.5263551548789495e-05, | |
| "loss": 3.6328, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.49776286353467564, | |
| "grad_norm": 3.1425187587738037, | |
| "learning_rate": 2.517570284062586e-05, | |
| "loss": 3.9881, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4988814317673378, | |
| "grad_norm": 2.3265583515167236, | |
| "learning_rate": 2.5087851962745468e-05, | |
| "loss": 3.9283, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.091590166091919, | |
| "learning_rate": 2.5e-05, | |
| "loss": 3.6892, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5011185682326622, | |
| "grad_norm": 2.2790603637695312, | |
| "learning_rate": 2.4912148037254535e-05, | |
| "loss": 3.7534, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5022371364653244, | |
| "grad_norm": 2.3313305377960205, | |
| "learning_rate": 2.4824297159374142e-05, | |
| "loss": 3.8916, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.5033557046979866, | |
| "grad_norm": 2.9091544151306152, | |
| "learning_rate": 2.473644845121051e-05, | |
| "loss": 4.1545, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5044742729306487, | |
| "grad_norm": 1.9648659229278564, | |
| "learning_rate": 2.464860299758854e-05, | |
| "loss": 3.9658, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5055928411633109, | |
| "grad_norm": 2.6131553649902344, | |
| "learning_rate": 2.45607618832929e-05, | |
| "loss": 3.9665, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5067114093959731, | |
| "grad_norm": 2.341771364212036, | |
| "learning_rate": 2.4472926193054728e-05, | |
| "loss": 3.9947, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5078299776286354, | |
| "grad_norm": 2.526048183441162, | |
| "learning_rate": 2.4385097011538144e-05, | |
| "loss": 4.0324, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5089485458612976, | |
| "grad_norm": 3.4752910137176514, | |
| "learning_rate": 2.4297275423326918e-05, | |
| "loss": 4.0246, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5100671140939598, | |
| "grad_norm": 2.2056233882904053, | |
| "learning_rate": 2.420946251291103e-05, | |
| "loss": 3.8516, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5111856823266219, | |
| "grad_norm": 2.7161202430725098, | |
| "learning_rate": 2.412165936467332e-05, | |
| "loss": 4.2025, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5123042505592841, | |
| "grad_norm": 2.7404301166534424, | |
| "learning_rate": 2.4033867062876052e-05, | |
| "loss": 3.7109, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5134228187919463, | |
| "grad_norm": 3.4687747955322266, | |
| "learning_rate": 2.3946086691647575e-05, | |
| "loss": 3.8982, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5145413870246085, | |
| "grad_norm": 2.116055488586426, | |
| "learning_rate": 2.3858319334968893e-05, | |
| "loss": 3.9515, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5156599552572707, | |
| "grad_norm": 2.8268518447875977, | |
| "learning_rate": 2.377056607666031e-05, | |
| "loss": 4.4184, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5167785234899329, | |
| "grad_norm": 2.4674103260040283, | |
| "learning_rate": 2.368282800036801e-05, | |
| "loss": 4.0095, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5178970917225951, | |
| "grad_norm": 2.0366992950439453, | |
| "learning_rate": 2.359510618955073e-05, | |
| "loss": 4.4928, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5190156599552572, | |
| "grad_norm": 2.1218111515045166, | |
| "learning_rate": 2.350740172746633e-05, | |
| "loss": 4.149, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5201342281879194, | |
| "grad_norm": 3.7365167140960693, | |
| "learning_rate": 2.3419715697158416e-05, | |
| "loss": 3.6304, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5212527964205816, | |
| "grad_norm": 2.4339170455932617, | |
| "learning_rate": 2.3332049181443016e-05, | |
| "loss": 3.9618, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5223713646532439, | |
| "grad_norm": 2.091418981552124, | |
| "learning_rate": 2.3244403262895153e-05, | |
| "loss": 4.3328, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5234899328859061, | |
| "grad_norm": 2.5647706985473633, | |
| "learning_rate": 2.3156779023835525e-05, | |
| "loss": 3.9168, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5246085011185683, | |
| "grad_norm": 3.0460050106048584, | |
| "learning_rate": 2.3069177546317087e-05, | |
| "loss": 3.6819, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5257270693512305, | |
| "grad_norm": 1.7986963987350464, | |
| "learning_rate": 2.2981599912111736e-05, | |
| "loss": 4.0007, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5268456375838926, | |
| "grad_norm": 2.6966090202331543, | |
| "learning_rate": 2.289404720269693e-05, | |
| "loss": 4.2807, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5279642058165548, | |
| "grad_norm": 2.5532920360565186, | |
| "learning_rate": 2.280652049924232e-05, | |
| "loss": 4.1665, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.529082774049217, | |
| "grad_norm": 2.1815285682678223, | |
| "learning_rate": 2.2719020882596427e-05, | |
| "loss": 3.6647, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5302013422818792, | |
| "grad_norm": 2.97127103805542, | |
| "learning_rate": 2.2631549433273293e-05, | |
| "loss": 3.8236, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5313199105145414, | |
| "grad_norm": 2.294546365737915, | |
| "learning_rate": 2.2544107231439114e-05, | |
| "loss": 3.6576, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5324384787472036, | |
| "grad_norm": 2.5872814655303955, | |
| "learning_rate": 2.2456695356898916e-05, | |
| "loss": 4.077, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5335570469798657, | |
| "grad_norm": 2.548797369003296, | |
| "learning_rate": 2.2369314889083235e-05, | |
| "loss": 3.7512, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5346756152125279, | |
| "grad_norm": 2.8900959491729736, | |
| "learning_rate": 2.2281966907034733e-05, | |
| "loss": 4.3499, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5357941834451901, | |
| "grad_norm": 3.7104897499084473, | |
| "learning_rate": 2.2194652489394967e-05, | |
| "loss": 3.5961, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5369127516778524, | |
| "grad_norm": 2.2205984592437744, | |
| "learning_rate": 2.2107372714390974e-05, | |
| "loss": 3.6201, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5380313199105146, | |
| "grad_norm": 1.851414442062378, | |
| "learning_rate": 2.2020128659822e-05, | |
| "loss": 4.0772, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5391498881431768, | |
| "grad_norm": 2.894435167312622, | |
| "learning_rate": 2.1932921403046207e-05, | |
| "loss": 3.6691, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.540268456375839, | |
| "grad_norm": 2.332059621810913, | |
| "learning_rate": 2.1845752020967337e-05, | |
| "loss": 3.7673, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5413870246085011, | |
| "grad_norm": 2.175400972366333, | |
| "learning_rate": 2.1758621590021426e-05, | |
| "loss": 3.8803, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5425055928411633, | |
| "grad_norm": 2.4179916381835938, | |
| "learning_rate": 2.1671531186163512e-05, | |
| "loss": 3.6459, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5436241610738255, | |
| "grad_norm": 3.060016393661499, | |
| "learning_rate": 2.158448188485433e-05, | |
| "loss": 3.8407, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5447427293064877, | |
| "grad_norm": 2.318657875061035, | |
| "learning_rate": 2.1497474761047086e-05, | |
| "loss": 3.7452, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5458612975391499, | |
| "grad_norm": 2.2398786544799805, | |
| "learning_rate": 2.141051088917411e-05, | |
| "loss": 4.078, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5469798657718121, | |
| "grad_norm": 2.4985172748565674, | |
| "learning_rate": 2.1323591343133646e-05, | |
| "loss": 3.8253, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5480984340044742, | |
| "grad_norm": 2.5000696182250977, | |
| "learning_rate": 2.1236717196276558e-05, | |
| "loss": 3.753, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5492170022371364, | |
| "grad_norm": 2.586850643157959, | |
| "learning_rate": 2.114988952139307e-05, | |
| "loss": 4.1803, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5503355704697986, | |
| "grad_norm": 2.7555367946624756, | |
| "learning_rate": 2.106310939069956e-05, | |
| "loss": 3.8567, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5514541387024608, | |
| "grad_norm": 2.633790969848633, | |
| "learning_rate": 2.0976377875825283e-05, | |
| "loss": 4.1461, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5525727069351231, | |
| "grad_norm": 3.8133838176727295, | |
| "learning_rate": 2.0889696047799143e-05, | |
| "loss": 4.0689, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5536912751677853, | |
| "grad_norm": 1.6977475881576538, | |
| "learning_rate": 2.0803064977036478e-05, | |
| "loss": 3.9074, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5548098434004475, | |
| "grad_norm": 3.245305299758911, | |
| "learning_rate": 2.071648573332583e-05, | |
| "loss": 3.7151, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5559284116331096, | |
| "grad_norm": 2.754373550415039, | |
| "learning_rate": 2.0629959385815757e-05, | |
| "loss": 4.0098, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5570469798657718, | |
| "grad_norm": 2.561891794204712, | |
| "learning_rate": 2.054348700300158e-05, | |
| "loss": 3.8599, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.558165548098434, | |
| "grad_norm": 2.573894500732422, | |
| "learning_rate": 2.0457069652712242e-05, | |
| "loss": 4.1772, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5592841163310962, | |
| "grad_norm": 2.2721850872039795, | |
| "learning_rate": 2.037070840209711e-05, | |
| "loss": 4.2124, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5604026845637584, | |
| "grad_norm": 2.0952377319335938, | |
| "learning_rate": 2.0284404317612764e-05, | |
| "loss": 3.9423, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5615212527964206, | |
| "grad_norm": 3.6689555644989014, | |
| "learning_rate": 2.019815846500988e-05, | |
| "loss": 3.8274, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5626398210290827, | |
| "grad_norm": 2.6612648963928223, | |
| "learning_rate": 2.0111971909320027e-05, | |
| "loss": 3.9185, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5637583892617449, | |
| "grad_norm": 2.889272928237915, | |
| "learning_rate": 2.0025845714842516e-05, | |
| "loss": 3.7501, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5648769574944071, | |
| "grad_norm": 3.470381736755371, | |
| "learning_rate": 1.993978094513128e-05, | |
| "loss": 3.6945, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5659955257270693, | |
| "grad_norm": 2.4324817657470703, | |
| "learning_rate": 1.9853778662981744e-05, | |
| "loss": 4.0761, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5671140939597316, | |
| "grad_norm": 2.1613693237304688, | |
| "learning_rate": 1.9767839930417673e-05, | |
| "loss": 4.3073, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5682326621923938, | |
| "grad_norm": 2.2693519592285156, | |
| "learning_rate": 1.968196580867808e-05, | |
| "loss": 4.0687, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.569351230425056, | |
| "grad_norm": 1.9982450008392334, | |
| "learning_rate": 1.9596157358204097e-05, | |
| "loss": 3.6727, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5704697986577181, | |
| "grad_norm": 3.018151044845581, | |
| "learning_rate": 1.9510415638625932e-05, | |
| "loss": 3.6253, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5715883668903803, | |
| "grad_norm": 1.8852089643478394, | |
| "learning_rate": 1.9424741708749695e-05, | |
| "loss": 4.0768, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5727069351230425, | |
| "grad_norm": 2.7679364681243896, | |
| "learning_rate": 1.9339136626544407e-05, | |
| "loss": 3.6646, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5738255033557047, | |
| "grad_norm": 2.5092828273773193, | |
| "learning_rate": 1.9253601449128914e-05, | |
| "loss": 4.0424, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5749440715883669, | |
| "grad_norm": 1.9775032997131348, | |
| "learning_rate": 1.9168137232758797e-05, | |
| "loss": 3.8255, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5760626398210291, | |
| "grad_norm": 2.3674395084381104, | |
| "learning_rate": 1.9082745032813368e-05, | |
| "loss": 3.9264, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5771812080536913, | |
| "grad_norm": 1.9700473546981812, | |
| "learning_rate": 1.8997425903782627e-05, | |
| "loss": 3.9236, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5782997762863534, | |
| "grad_norm": 2.295315742492676, | |
| "learning_rate": 1.891218089925423e-05, | |
| "loss": 3.773, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5794183445190156, | |
| "grad_norm": 1.7736364603042603, | |
| "learning_rate": 1.8827011071900474e-05, | |
| "loss": 3.8288, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5805369127516778, | |
| "grad_norm": 2.1893913745880127, | |
| "learning_rate": 1.874191747346534e-05, | |
| "loss": 3.8251, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5816554809843401, | |
| "grad_norm": 2.590500593185425, | |
| "learning_rate": 1.865690115475144e-05, | |
| "loss": 4.1849, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5827740492170023, | |
| "grad_norm": 2.320377826690674, | |
| "learning_rate": 1.8571963165607104e-05, | |
| "loss": 3.8381, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5838926174496645, | |
| "grad_norm": 2.334003210067749, | |
| "learning_rate": 1.8487104554913363e-05, | |
| "loss": 3.9728, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5850111856823266, | |
| "grad_norm": 2.939434051513672, | |
| "learning_rate": 1.8402326370571056e-05, | |
| "loss": 3.9855, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5861297539149888, | |
| "grad_norm": 2.41349720954895, | |
| "learning_rate": 1.8317629659487796e-05, | |
| "loss": 4.0445, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.587248322147651, | |
| "grad_norm": 2.5825881958007812, | |
| "learning_rate": 1.8233015467565152e-05, | |
| "loss": 3.852, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5883668903803132, | |
| "grad_norm": 2.247124671936035, | |
| "learning_rate": 1.8148484839685662e-05, | |
| "loss": 4.341, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5894854586129754, | |
| "grad_norm": 2.6871159076690674, | |
| "learning_rate": 1.806403881969996e-05, | |
| "loss": 3.9645, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5906040268456376, | |
| "grad_norm": 3.4999427795410156, | |
| "learning_rate": 1.7979678450413845e-05, | |
| "loss": 4.1412, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5917225950782998, | |
| "grad_norm": 3.4024899005889893, | |
| "learning_rate": 1.789540477357548e-05, | |
| "loss": 4.191, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5928411633109619, | |
| "grad_norm": 2.1801340579986572, | |
| "learning_rate": 1.781121882986245e-05, | |
| "loss": 3.6334, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5939597315436241, | |
| "grad_norm": 2.63584303855896, | |
| "learning_rate": 1.7727121658868934e-05, | |
| "loss": 3.7061, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5950782997762863, | |
| "grad_norm": 2.664759635925293, | |
| "learning_rate": 1.764311429909291e-05, | |
| "loss": 4.0002, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5961968680089486, | |
| "grad_norm": 3.4378206729888916, | |
| "learning_rate": 1.7559197787923263e-05, | |
| "loss": 3.8005, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5973154362416108, | |
| "grad_norm": 2.902148485183716, | |
| "learning_rate": 1.7475373161627034e-05, | |
| "loss": 4.086, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.598434004474273, | |
| "grad_norm": 1.8565155267715454, | |
| "learning_rate": 1.739164145533658e-05, | |
| "loss": 3.6199, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5995525727069351, | |
| "grad_norm": 2.2975833415985107, | |
| "learning_rate": 1.730800370303683e-05, | |
| "loss": 4.0409, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6006711409395973, | |
| "grad_norm": 2.3823060989379883, | |
| "learning_rate": 1.7224460937552446e-05, | |
| "loss": 3.9938, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6017897091722595, | |
| "grad_norm": 1.8713372945785522, | |
| "learning_rate": 1.714101419053518e-05, | |
| "loss": 3.9404, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6029082774049217, | |
| "grad_norm": 2.7200701236724854, | |
| "learning_rate": 1.7057664492451036e-05, | |
| "loss": 3.6198, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6040268456375839, | |
| "grad_norm": 2.899989604949951, | |
| "learning_rate": 1.6974412872567597e-05, | |
| "loss": 3.8505, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6051454138702461, | |
| "grad_norm": 3.0191915035247803, | |
| "learning_rate": 1.6891260358941276e-05, | |
| "loss": 3.8781, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6062639821029083, | |
| "grad_norm": 3.2697198390960693, | |
| "learning_rate": 1.6808207978404683e-05, | |
| "loss": 3.7293, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6073825503355704, | |
| "grad_norm": 4.28400182723999, | |
| "learning_rate": 1.672525675655387e-05, | |
| "loss": 4.0706, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6085011185682326, | |
| "grad_norm": 2.090829849243164, | |
| "learning_rate": 1.664240771773571e-05, | |
| "loss": 4.0724, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6096196868008948, | |
| "grad_norm": 2.9155173301696777, | |
| "learning_rate": 1.655966188503526e-05, | |
| "loss": 3.7549, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.610738255033557, | |
| "grad_norm": 2.9214680194854736, | |
| "learning_rate": 1.647702028026308e-05, | |
| "loss": 3.9448, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6118568232662193, | |
| "grad_norm": 1.5951290130615234, | |
| "learning_rate": 1.6394483923942655e-05, | |
| "loss": 4.1215, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6129753914988815, | |
| "grad_norm": 2.0903103351593018, | |
| "learning_rate": 1.6312053835297784e-05, | |
| "loss": 3.8019, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6140939597315436, | |
| "grad_norm": 2.552449941635132, | |
| "learning_rate": 1.6229731032239988e-05, | |
| "loss": 4.0168, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6152125279642058, | |
| "grad_norm": 2.0024003982543945, | |
| "learning_rate": 1.6147516531355912e-05, | |
| "loss": 3.8326, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.616331096196868, | |
| "grad_norm": 2.165592908859253, | |
| "learning_rate": 1.606541134789485e-05, | |
| "loss": 3.798, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6174496644295302, | |
| "grad_norm": 2.631091356277466, | |
| "learning_rate": 1.5983416495756116e-05, | |
| "loss": 3.8511, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6185682326621924, | |
| "grad_norm": 2.507314682006836, | |
| "learning_rate": 1.5901532987476598e-05, | |
| "loss": 4.1623, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6196868008948546, | |
| "grad_norm": 2.264253854751587, | |
| "learning_rate": 1.58197618342182e-05, | |
| "loss": 3.8904, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6208053691275168, | |
| "grad_norm": 2.0525124073028564, | |
| "learning_rate": 1.573810404575539e-05, | |
| "loss": 3.8115, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6219239373601789, | |
| "grad_norm": 2.31563401222229, | |
| "learning_rate": 1.565656063046272e-05, | |
| "loss": 3.9583, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6230425055928411, | |
| "grad_norm": 2.9089858531951904, | |
| "learning_rate": 1.5575132595302352e-05, | |
| "loss": 3.9061, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6241610738255033, | |
| "grad_norm": 2.3452372550964355, | |
| "learning_rate": 1.549382094581166e-05, | |
| "loss": 3.8342, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6252796420581656, | |
| "grad_norm": 2.0573930740356445, | |
| "learning_rate": 1.541262668609081e-05, | |
| "loss": 3.7931, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6263982102908278, | |
| "grad_norm": 2.165867328643799, | |
| "learning_rate": 1.5331550818790315e-05, | |
| "loss": 3.8872, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.62751677852349, | |
| "grad_norm": 2.885354518890381, | |
| "learning_rate": 1.525059434509871e-05, | |
| "loss": 3.9573, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6286353467561522, | |
| "grad_norm": 2.424731731414795, | |
| "learning_rate": 1.516975826473015e-05, | |
| "loss": 3.7392, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6297539149888143, | |
| "grad_norm": 1.858024001121521, | |
| "learning_rate": 1.5089043575912099e-05, | |
| "loss": 3.8223, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6308724832214765, | |
| "grad_norm": 2.57297420501709, | |
| "learning_rate": 1.5008451275372949e-05, | |
| "loss": 3.4912, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6319910514541387, | |
| "grad_norm": 2.9374403953552246, | |
| "learning_rate": 1.4927982358329768e-05, | |
| "loss": 4.142, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6331096196868009, | |
| "grad_norm": 2.8601393699645996, | |
| "learning_rate": 1.484763781847599e-05, | |
| "loss": 4.0499, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6342281879194631, | |
| "grad_norm": 2.611351251602173, | |
| "learning_rate": 1.4767418647969133e-05, | |
| "loss": 4.1721, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6353467561521253, | |
| "grad_norm": 2.2778544425964355, | |
| "learning_rate": 1.4687325837418563e-05, | |
| "loss": 4.0221, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6364653243847874, | |
| "grad_norm": 2.24289608001709, | |
| "learning_rate": 1.4607360375873246e-05, | |
| "loss": 4.0047, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6375838926174496, | |
| "grad_norm": 2.1393930912017822, | |
| "learning_rate": 1.4527523250809545e-05, | |
| "loss": 3.7402, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6387024608501118, | |
| "grad_norm": 2.4611830711364746, | |
| "learning_rate": 1.4447815448119017e-05, | |
| "loss": 4.1909, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.639821029082774, | |
| "grad_norm": 2.261955976486206, | |
| "learning_rate": 1.4368237952096258e-05, | |
| "loss": 3.6671, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6409395973154363, | |
| "grad_norm": 2.2543327808380127, | |
| "learning_rate": 1.4288791745426739e-05, | |
| "loss": 3.719, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6420581655480985, | |
| "grad_norm": 3.0173962116241455, | |
| "learning_rate": 1.4209477809174634e-05, | |
| "loss": 3.9079, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6431767337807607, | |
| "grad_norm": 2.1777987480163574, | |
| "learning_rate": 1.4130297122770774e-05, | |
| "loss": 3.6232, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6442953020134228, | |
| "grad_norm": 2.0531842708587646, | |
| "learning_rate": 1.4051250664000515e-05, | |
| "loss": 3.9533, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.645413870246085, | |
| "grad_norm": 2.7439255714416504, | |
| "learning_rate": 1.3972339408991626e-05, | |
| "loss": 3.8229, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6465324384787472, | |
| "grad_norm": 2.413195848464966, | |
| "learning_rate": 1.3893564332202319e-05, | |
| "loss": 4.346, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6476510067114094, | |
| "grad_norm": 2.382014274597168, | |
| "learning_rate": 1.381492640640914e-05, | |
| "loss": 3.771, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6487695749440716, | |
| "grad_norm": 1.874355673789978, | |
| "learning_rate": 1.3736426602694998e-05, | |
| "loss": 3.7268, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6498881431767338, | |
| "grad_norm": 1.958233118057251, | |
| "learning_rate": 1.365806589043718e-05, | |
| "loss": 3.9595, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6510067114093959, | |
| "grad_norm": 2.6319327354431152, | |
| "learning_rate": 1.357984523729533e-05, | |
| "loss": 4.2425, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6521252796420581, | |
| "grad_norm": 3.006190061569214, | |
| "learning_rate": 1.3501765609199534e-05, | |
| "loss": 3.8662, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6532438478747203, | |
| "grad_norm": 2.89542818069458, | |
| "learning_rate": 1.342382797033842e-05, | |
| "loss": 3.8635, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6543624161073825, | |
| "grad_norm": 2.149411916732788, | |
| "learning_rate": 1.3346033283147196e-05, | |
| "loss": 3.9787, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6554809843400448, | |
| "grad_norm": 2.2859368324279785, | |
| "learning_rate": 1.3268382508295812e-05, | |
| "loss": 3.735, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.656599552572707, | |
| "grad_norm": 3.07099986076355, | |
| "learning_rate": 1.3190876604677043e-05, | |
| "loss": 3.9014, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6577181208053692, | |
| "grad_norm": 2.119368553161621, | |
| "learning_rate": 1.3113516529394704e-05, | |
| "loss": 4.0083, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6588366890380313, | |
| "grad_norm": 2.585334300994873, | |
| "learning_rate": 1.3036303237751812e-05, | |
| "loss": 3.7846, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6599552572706935, | |
| "grad_norm": 2.7365801334381104, | |
| "learning_rate": 1.2959237683238767e-05, | |
| "loss": 3.6363, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6610738255033557, | |
| "grad_norm": 2.1108875274658203, | |
| "learning_rate": 1.2882320817521588e-05, | |
| "loss": 3.8154, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6621923937360179, | |
| "grad_norm": 2.750692129135132, | |
| "learning_rate": 1.2805553590430197e-05, | |
| "loss": 3.7864, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6633109619686801, | |
| "grad_norm": 1.9360623359680176, | |
| "learning_rate": 1.2728936949946638e-05, | |
| "loss": 3.5591, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6644295302013423, | |
| "grad_norm": 2.5163891315460205, | |
| "learning_rate": 1.2652471842193415e-05, | |
| "loss": 3.8985, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6655480984340044, | |
| "grad_norm": 2.350154399871826, | |
| "learning_rate": 1.2576159211421763e-05, | |
| "loss": 3.8853, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.915135622024536, | |
| "learning_rate": 1.2500000000000006e-05, | |
| "loss": 3.6496, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6677852348993288, | |
| "grad_norm": 3.425151824951172, | |
| "learning_rate": 1.2423995148401954e-05, | |
| "loss": 4.1841, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.668903803131991, | |
| "grad_norm": 2.1344897747039795, | |
| "learning_rate": 1.2348145595195245e-05, | |
| "loss": 3.6813, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6700223713646533, | |
| "grad_norm": 3.043466806411743, | |
| "learning_rate": 1.2272452277029783e-05, | |
| "loss": 3.717, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6711409395973155, | |
| "grad_norm": 2.1800496578216553, | |
| "learning_rate": 1.2196916128626126e-05, | |
| "loss": 3.7066, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6722595078299777, | |
| "grad_norm": 2.1941945552825928, | |
| "learning_rate": 1.2121538082764009e-05, | |
| "loss": 3.8536, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6733780760626398, | |
| "grad_norm": 1.9257776737213135, | |
| "learning_rate": 1.2046319070270792e-05, | |
| "loss": 3.8676, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.674496644295302, | |
| "grad_norm": 1.9963043928146362, | |
| "learning_rate": 1.1971260020009944e-05, | |
| "loss": 4.0604, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6756152125279642, | |
| "grad_norm": 4.963601112365723, | |
| "learning_rate": 1.1896361858869598e-05, | |
| "loss": 3.8178, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6767337807606264, | |
| "grad_norm": 2.1134705543518066, | |
| "learning_rate": 1.1821625511751122e-05, | |
| "loss": 4.3728, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6778523489932886, | |
| "grad_norm": 1.895908236503601, | |
| "learning_rate": 1.174705190155766e-05, | |
| "loss": 4.0292, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6789709172259508, | |
| "grad_norm": 2.871042490005493, | |
| "learning_rate": 1.1672641949182769e-05, | |
| "loss": 3.6701, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.680089485458613, | |
| "grad_norm": 3.3493497371673584, | |
| "learning_rate": 1.1598396573499005e-05, | |
| "loss": 4.0769, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6812080536912751, | |
| "grad_norm": 2.001706838607788, | |
| "learning_rate": 1.152431669134663e-05, | |
| "loss": 3.9975, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6823266219239373, | |
| "grad_norm": 2.3988122940063477, | |
| "learning_rate": 1.145040321752223e-05, | |
| "loss": 4.1463, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6834451901565995, | |
| "grad_norm": 2.896697759628296, | |
| "learning_rate": 1.1376657064767485e-05, | |
| "loss": 3.6005, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6845637583892618, | |
| "grad_norm": 2.0190067291259766, | |
| "learning_rate": 1.130307914375783e-05, | |
| "loss": 3.8035, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.685682326621924, | |
| "grad_norm": 1.734345555305481, | |
| "learning_rate": 1.122967036309127e-05, | |
| "loss": 3.9921, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6868008948545862, | |
| "grad_norm": 2.797583818435669, | |
| "learning_rate": 1.1156431629277118e-05, | |
| "loss": 3.7049, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6879194630872483, | |
| "grad_norm": 2.020956039428711, | |
| "learning_rate": 1.1083363846724822e-05, | |
| "loss": 3.9454, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6890380313199105, | |
| "grad_norm": 2.833671808242798, | |
| "learning_rate": 1.1010467917732784e-05, | |
| "loss": 3.9514, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6901565995525727, | |
| "grad_norm": 1.816365122795105, | |
| "learning_rate": 1.093774474247721e-05, | |
| "loss": 4.1637, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6912751677852349, | |
| "grad_norm": 2.2042744159698486, | |
| "learning_rate": 1.086519521900103e-05, | |
| "loss": 3.8951, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6923937360178971, | |
| "grad_norm": 3.488060712814331, | |
| "learning_rate": 1.0792820243202772e-05, | |
| "loss": 3.942, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6935123042505593, | |
| "grad_norm": 2.248445510864258, | |
| "learning_rate": 1.0720620708825536e-05, | |
| "loss": 3.8709, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6946308724832215, | |
| "grad_norm": 2.1262619495391846, | |
| "learning_rate": 1.0648597507445884e-05, | |
| "loss": 3.9484, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6957494407158836, | |
| "grad_norm": 2.552934169769287, | |
| "learning_rate": 1.0576751528462935e-05, | |
| "loss": 3.508, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6968680089485458, | |
| "grad_norm": 2.420132875442505, | |
| "learning_rate": 1.0505083659087284e-05, | |
| "loss": 3.6366, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.697986577181208, | |
| "grad_norm": 2.1934938430786133, | |
| "learning_rate": 1.043359478433012e-05, | |
| "loss": 4.1458, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6991051454138703, | |
| "grad_norm": 2.8820436000823975, | |
| "learning_rate": 1.0362285786992231e-05, | |
| "loss": 4.145, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7002237136465325, | |
| "grad_norm": 2.036956548690796, | |
| "learning_rate": 1.0291157547653172e-05, | |
| "loss": 4.0621, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7013422818791947, | |
| "grad_norm": 2.075648784637451, | |
| "learning_rate": 1.0220210944660338e-05, | |
| "loss": 4.1723, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.7024608501118568, | |
| "grad_norm": 2.007321357727051, | |
| "learning_rate": 1.0149446854118153e-05, | |
| "loss": 3.9694, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.703579418344519, | |
| "grad_norm": 2.436429977416992, | |
| "learning_rate": 1.0078866149877206e-05, | |
| "loss": 4.1056, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7046979865771812, | |
| "grad_norm": 1.8887969255447388, | |
| "learning_rate": 1.0008469703523493e-05, | |
| "loss": 3.848, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7058165548098434, | |
| "grad_norm": 2.362123489379883, | |
| "learning_rate": 9.93825838436767e-06, | |
| "loss": 4.1663, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7069351230425056, | |
| "grad_norm": 2.105339765548706, | |
| "learning_rate": 9.868233059434288e-06, | |
| "loss": 3.6117, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7080536912751678, | |
| "grad_norm": 2.77400541305542, | |
| "learning_rate": 9.798394593451091e-06, | |
| "loss": 4.035, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.70917225950783, | |
| "grad_norm": 3.4026005268096924, | |
| "learning_rate": 9.728743848838333e-06, | |
| "loss": 3.9344, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7102908277404921, | |
| "grad_norm": 3.4259018898010254, | |
| "learning_rate": 9.659281685698162e-06, | |
| "loss": 4.2997, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7114093959731543, | |
| "grad_norm": 2.261091470718384, | |
| "learning_rate": 9.590008961803943e-06, | |
| "loss": 3.7143, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7125279642058165, | |
| "grad_norm": 3.5165352821350098, | |
| "learning_rate": 9.520926532589725e-06, | |
| "loss": 3.8508, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7136465324384788, | |
| "grad_norm": 2.4055497646331787, | |
| "learning_rate": 9.45203525113962e-06, | |
| "loss": 3.6208, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.714765100671141, | |
| "grad_norm": 3.532019853591919, | |
| "learning_rate": 9.383335968177323e-06, | |
| "loss": 3.8882, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7158836689038032, | |
| "grad_norm": 2.0139145851135254, | |
| "learning_rate": 9.314829532055569e-06, | |
| "loss": 3.9722, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7170022371364653, | |
| "grad_norm": 3.198631525039673, | |
| "learning_rate": 9.24651678874568e-06, | |
| "loss": 3.7721, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7181208053691275, | |
| "grad_norm": 2.2410922050476074, | |
| "learning_rate": 9.178398581827085e-06, | |
| "loss": 3.978, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7192393736017897, | |
| "grad_norm": 2.0665810108184814, | |
| "learning_rate": 9.110475752476935e-06, | |
| "loss": 4.0286, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7203579418344519, | |
| "grad_norm": 2.212315559387207, | |
| "learning_rate": 9.04274913945971e-06, | |
| "loss": 3.92, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7214765100671141, | |
| "grad_norm": 2.330747127532959, | |
| "learning_rate": 8.975219579116863e-06, | |
| "loss": 4.0911, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7225950782997763, | |
| "grad_norm": 2.7871272563934326, | |
| "learning_rate": 8.90788790535646e-06, | |
| "loss": 3.8816, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7237136465324385, | |
| "grad_norm": 2.5167412757873535, | |
| "learning_rate": 8.840754949642935e-06, | |
| "loss": 3.8268, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7248322147651006, | |
| "grad_norm": 1.9781793355941772, | |
| "learning_rate": 8.77382154098679e-06, | |
| "loss": 3.7334, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7259507829977628, | |
| "grad_norm": 2.6232032775878906, | |
| "learning_rate": 8.70708850593434e-06, | |
| "loss": 3.7084, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.727069351230425, | |
| "grad_norm": 2.177091121673584, | |
| "learning_rate": 8.64055666855757e-06, | |
| "loss": 3.9966, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7281879194630873, | |
| "grad_norm": 2.444685220718384, | |
| "learning_rate": 8.574226850443872e-06, | |
| "loss": 3.7267, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7293064876957495, | |
| "grad_norm": 3.574202299118042, | |
| "learning_rate": 8.50809987068598e-06, | |
| "loss": 3.8466, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7304250559284117, | |
| "grad_norm": 3.1503660678863525, | |
| "learning_rate": 8.442176545871805e-06, | |
| "loss": 3.7964, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7315436241610739, | |
| "grad_norm": 1.677579641342163, | |
| "learning_rate": 8.376457690074385e-06, | |
| "loss": 3.8638, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.732662192393736, | |
| "grad_norm": 2.3635876178741455, | |
| "learning_rate": 8.310944114841786e-06, | |
| "loss": 3.8608, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7337807606263982, | |
| "grad_norm": 3.185817241668701, | |
| "learning_rate": 8.24563662918712e-06, | |
| "loss": 4.1601, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7348993288590604, | |
| "grad_norm": 3.7147934436798096, | |
| "learning_rate": 8.180536039578545e-06, | |
| "loss": 3.9529, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7360178970917226, | |
| "grad_norm": 3.5860955715179443, | |
| "learning_rate": 8.115643149929318e-06, | |
| "loss": 4.2173, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7371364653243848, | |
| "grad_norm": 2.398571252822876, | |
| "learning_rate": 8.05095876158782e-06, | |
| "loss": 3.7547, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.738255033557047, | |
| "grad_norm": 2.6575210094451904, | |
| "learning_rate": 7.986483673327724e-06, | |
| "loss": 3.9889, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7393736017897091, | |
| "grad_norm": 2.9067556858062744, | |
| "learning_rate": 7.922218681338095e-06, | |
| "loss": 3.6771, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7404921700223713, | |
| "grad_norm": 2.622535467147827, | |
| "learning_rate": 7.858164579213547e-06, | |
| "loss": 3.8512, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7416107382550335, | |
| "grad_norm": 2.1957478523254395, | |
| "learning_rate": 7.79432215794449e-06, | |
| "loss": 3.9811, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7427293064876958, | |
| "grad_norm": 2.333820343017578, | |
| "learning_rate": 7.730692205907294e-06, | |
| "loss": 3.5702, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.743847874720358, | |
| "grad_norm": 2.3756656646728516, | |
| "learning_rate": 7.667275508854627e-06, | |
| "loss": 4.0504, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7449664429530202, | |
| "grad_norm": 2.2094883918762207, | |
| "learning_rate": 7.604072849905708e-06, | |
| "loss": 3.773, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7460850111856824, | |
| "grad_norm": 2.3588247299194336, | |
| "learning_rate": 7.541085009536625e-06, | |
| "loss": 3.7015, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7472035794183445, | |
| "grad_norm": 2.534830093383789, | |
| "learning_rate": 7.4783127655707465e-06, | |
| "loss": 3.8194, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7483221476510067, | |
| "grad_norm": 2.694711208343506, | |
| "learning_rate": 7.415756893169062e-06, | |
| "loss": 3.9235, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7494407158836689, | |
| "grad_norm": 3.596769332885742, | |
| "learning_rate": 7.3534181648206555e-06, | |
| "loss": 3.8338, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7505592841163311, | |
| "grad_norm": 3.1873180866241455, | |
| "learning_rate": 7.291297350333137e-06, | |
| "loss": 3.6435, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7516778523489933, | |
| "grad_norm": 1.9209433794021606, | |
| "learning_rate": 7.2293952168231316e-06, | |
| "loss": 3.7388, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7527964205816555, | |
| "grad_norm": 2.5653648376464844, | |
| "learning_rate": 7.167712528706844e-06, | |
| "loss": 3.8961, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7539149888143176, | |
| "grad_norm": 1.9980870485305786, | |
| "learning_rate": 7.106250047690588e-06, | |
| "loss": 3.9753, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7550335570469798, | |
| "grad_norm": 2.534268379211426, | |
| "learning_rate": 7.045008532761366e-06, | |
| "loss": 3.7062, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.756152125279642, | |
| "grad_norm": 1.909271001815796, | |
| "learning_rate": 6.983988740177552e-06, | |
| "loss": 3.9121, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7572706935123042, | |
| "grad_norm": 1.9393848180770874, | |
| "learning_rate": 6.923191423459482e-06, | |
| "loss": 3.9378, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7583892617449665, | |
| "grad_norm": 3.104290723800659, | |
| "learning_rate": 6.862617333380214e-06, | |
| "loss": 3.6173, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7595078299776287, | |
| "grad_norm": 1.9617093801498413, | |
| "learning_rate": 6.802267217956221e-06, | |
| "loss": 3.8215, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7606263982102909, | |
| "grad_norm": 2.2684578895568848, | |
| "learning_rate": 6.742141822438144e-06, | |
| "loss": 3.8073, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.761744966442953, | |
| "grad_norm": 4.458706855773926, | |
| "learning_rate": 6.682241889301635e-06, | |
| "loss": 3.5522, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7628635346756152, | |
| "grad_norm": 1.9600862264633179, | |
| "learning_rate": 6.622568158238126e-06, | |
| "loss": 3.7883, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7639821029082774, | |
| "grad_norm": 2.454009771347046, | |
| "learning_rate": 6.563121366145758e-06, | |
| "loss": 3.723, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7651006711409396, | |
| "grad_norm": 1.8544883728027344, | |
| "learning_rate": 6.503902247120239e-06, | |
| "loss": 3.894, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7662192393736018, | |
| "grad_norm": 2.5093746185302734, | |
| "learning_rate": 6.444911532445782e-06, | |
| "loss": 3.9987, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.767337807606264, | |
| "grad_norm": 3.8050105571746826, | |
| "learning_rate": 6.386149950586098e-06, | |
| "loss": 3.7126, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7684563758389261, | |
| "grad_norm": 2.824566125869751, | |
| "learning_rate": 6.327618227175389e-06, | |
| "loss": 3.7332, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7695749440715883, | |
| "grad_norm": 3.873337984085083, | |
| "learning_rate": 6.269317085009363e-06, | |
| "loss": 3.6191, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7706935123042505, | |
| "grad_norm": 2.275029182434082, | |
| "learning_rate": 6.211247244036339e-06, | |
| "loss": 3.6649, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7718120805369127, | |
| "grad_norm": 3.252362012863159, | |
| "learning_rate": 6.153409421348358e-06, | |
| "loss": 3.8387, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.772930648769575, | |
| "grad_norm": 5.253241539001465, | |
| "learning_rate": 6.095804331172308e-06, | |
| "loss": 3.503, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7740492170022372, | |
| "grad_norm": 1.9518181085586548, | |
| "learning_rate": 6.0384326848611225e-06, | |
| "loss": 4.2226, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7751677852348994, | |
| "grad_norm": 2.372791051864624, | |
| "learning_rate": 5.981295190884961e-06, | |
| "loss": 4.0139, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7762863534675615, | |
| "grad_norm": 2.35370135307312, | |
| "learning_rate": 5.924392554822519e-06, | |
| "loss": 3.7916, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7774049217002237, | |
| "grad_norm": 2.5694122314453125, | |
| "learning_rate": 5.867725479352251e-06, | |
| "loss": 3.9369, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7785234899328859, | |
| "grad_norm": 2.143845558166504, | |
| "learning_rate": 5.811294664243752e-06, | |
| "loss": 4.0455, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7796420581655481, | |
| "grad_norm": 1.9714043140411377, | |
| "learning_rate": 5.755100806349076e-06, | |
| "loss": 3.9658, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7807606263982103, | |
| "grad_norm": 3.7511520385742188, | |
| "learning_rate": 5.699144599594131e-06, | |
| "loss": 3.4662, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7818791946308725, | |
| "grad_norm": 2.6238203048706055, | |
| "learning_rate": 5.643426734970139e-06, | |
| "loss": 3.7648, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7829977628635347, | |
| "grad_norm": 2.54085111618042, | |
| "learning_rate": 5.587947900525093e-06, | |
| "loss": 3.9747, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7841163310961968, | |
| "grad_norm": 2.562391519546509, | |
| "learning_rate": 5.532708781355231e-06, | |
| "loss": 3.7304, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.785234899328859, | |
| "grad_norm": 1.9780319929122925, | |
| "learning_rate": 5.4777100595965994e-06, | |
| "loss": 3.7713, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7863534675615212, | |
| "grad_norm": 2.6395468711853027, | |
| "learning_rate": 5.422952414416649e-06, | |
| "loss": 3.6651, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7874720357941835, | |
| "grad_norm": 2.2408347129821777, | |
| "learning_rate": 5.368436522005815e-06, | |
| "loss": 3.8067, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7885906040268457, | |
| "grad_norm": 2.3998045921325684, | |
| "learning_rate": 5.314163055569188e-06, | |
| "loss": 3.9425, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7897091722595079, | |
| "grad_norm": 1.973858118057251, | |
| "learning_rate": 5.26013268531817e-06, | |
| "loss": 4.0397, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.79082774049217, | |
| "grad_norm": 2.511659860610962, | |
| "learning_rate": 5.206346078462249e-06, | |
| "loss": 3.661, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7919463087248322, | |
| "grad_norm": 2.8732688426971436, | |
| "learning_rate": 5.1528038992007e-06, | |
| "loss": 3.8967, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7930648769574944, | |
| "grad_norm": 2.265322685241699, | |
| "learning_rate": 5.099506808714452e-06, | |
| "loss": 3.8421, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7941834451901566, | |
| "grad_norm": 3.5834262371063232, | |
| "learning_rate": 5.046455465157848e-06, | |
| "loss": 4.1527, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7953020134228188, | |
| "grad_norm": 3.056978225708008, | |
| "learning_rate": 4.993650523650575e-06, | |
| "loss": 3.4209, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.796420581655481, | |
| "grad_norm": 2.173541784286499, | |
| "learning_rate": 4.941092636269554e-06, | |
| "loss": 3.6813, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7975391498881432, | |
| "grad_norm": 2.102139949798584, | |
| "learning_rate": 4.888782452040885e-06, | |
| "loss": 3.7805, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7986577181208053, | |
| "grad_norm": 2.5020039081573486, | |
| "learning_rate": 4.836720616931831e-06, | |
| "loss": 3.7669, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7997762863534675, | |
| "grad_norm": 2.2521355152130127, | |
| "learning_rate": 4.784907773842839e-06, | |
| "loss": 4.0918, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.8008948545861297, | |
| "grad_norm": 2.366150379180908, | |
| "learning_rate": 4.733344562599623e-06, | |
| "loss": 4.0847, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.802013422818792, | |
| "grad_norm": 3.198983907699585, | |
| "learning_rate": 4.682031619945238e-06, | |
| "loss": 3.5284, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.8031319910514542, | |
| "grad_norm": 1.931961178779602, | |
| "learning_rate": 4.630969579532232e-06, | |
| "loss": 3.8843, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8042505592841164, | |
| "grad_norm": 2.4697344303131104, | |
| "learning_rate": 4.580159071914794e-06, | |
| "loss": 3.8388, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8053691275167785, | |
| "grad_norm": 2.202491283416748, | |
| "learning_rate": 4.529600724541022e-06, | |
| "loss": 3.8412, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8064876957494407, | |
| "grad_norm": 3.0058343410491943, | |
| "learning_rate": 4.479295161745109e-06, | |
| "loss": 4.1481, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8076062639821029, | |
| "grad_norm": 2.1180286407470703, | |
| "learning_rate": 4.4292430047396914e-06, | |
| "loss": 3.8576, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8087248322147651, | |
| "grad_norm": 2.6063430309295654, | |
| "learning_rate": 4.379444871608124e-06, | |
| "loss": 3.7986, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.8098434004474273, | |
| "grad_norm": 2.227067470550537, | |
| "learning_rate": 4.329901377296902e-06, | |
| "loss": 3.5418, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8109619686800895, | |
| "grad_norm": 2.3542473316192627, | |
| "learning_rate": 4.280613133608028e-06, | |
| "loss": 3.7732, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8120805369127517, | |
| "grad_norm": 3.143573045730591, | |
| "learning_rate": 4.231580749191474e-06, | |
| "loss": 4.1625, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8131991051454138, | |
| "grad_norm": 2.3651700019836426, | |
| "learning_rate": 4.182804829537654e-06, | |
| "loss": 4.3148, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.814317673378076, | |
| "grad_norm": 2.07985258102417, | |
| "learning_rate": 4.134285976969948e-06, | |
| "loss": 3.8726, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8154362416107382, | |
| "grad_norm": 3.1206445693969727, | |
| "learning_rate": 4.086024790637285e-06, | |
| "loss": 3.6906, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8165548098434005, | |
| "grad_norm": 2.517094850540161, | |
| "learning_rate": 4.038021866506725e-06, | |
| "loss": 4.0283, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8176733780760627, | |
| "grad_norm": 2.244453191757202, | |
| "learning_rate": 3.990277797356104e-06, | |
| "loss": 3.959, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8187919463087249, | |
| "grad_norm": 1.9631199836730957, | |
| "learning_rate": 3.942793172766699e-06, | |
| "loss": 4.2073, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.819910514541387, | |
| "grad_norm": 2.9783220291137695, | |
| "learning_rate": 3.895568579115983e-06, | |
| "loss": 3.7062, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.8210290827740492, | |
| "grad_norm": 2.1372721195220947, | |
| "learning_rate": 3.848604599570338e-06, | |
| "loss": 3.7815, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8221476510067114, | |
| "grad_norm": 2.8807451725006104, | |
| "learning_rate": 3.80190181407791e-06, | |
| "loss": 3.639, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8232662192393736, | |
| "grad_norm": 1.9709060192108154, | |
| "learning_rate": 3.755460799361382e-06, | |
| "loss": 3.8587, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8243847874720358, | |
| "grad_norm": 2.44718861579895, | |
| "learning_rate": 3.709282128910907e-06, | |
| "loss": 3.9482, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.825503355704698, | |
| "grad_norm": 2.036914110183716, | |
| "learning_rate": 3.6633663729770008e-06, | |
| "loss": 3.9106, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8266219239373602, | |
| "grad_norm": 2.3819079399108887, | |
| "learning_rate": 3.61771409856351e-06, | |
| "loss": 4.0078, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8277404921700223, | |
| "grad_norm": 2.206578493118286, | |
| "learning_rate": 3.572325869420587e-06, | |
| "loss": 3.839, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8288590604026845, | |
| "grad_norm": 2.039262294769287, | |
| "learning_rate": 3.527202246037756e-06, | |
| "loss": 3.8825, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8299776286353467, | |
| "grad_norm": 2.297976493835449, | |
| "learning_rate": 3.4823437856369794e-06, | |
| "loss": 3.9343, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.831096196868009, | |
| "grad_norm": 2.6157867908477783, | |
| "learning_rate": 3.4377510421657906e-06, | |
| "loss": 3.7783, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8322147651006712, | |
| "grad_norm": 2.9260687828063965, | |
| "learning_rate": 3.393424566290421e-06, | |
| "loss": 3.8069, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 4.025168418884277, | |
| "learning_rate": 3.3493649053890326e-06, | |
| "loss": 3.743, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8344519015659956, | |
| "grad_norm": 2.532195806503296, | |
| "learning_rate": 3.3055726035449484e-06, | |
| "loss": 3.6006, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8355704697986577, | |
| "grad_norm": 3.2442009449005127, | |
| "learning_rate": 3.2620482015399302e-06, | |
| "loss": 3.7358, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8366890380313199, | |
| "grad_norm": 4.921333312988281, | |
| "learning_rate": 3.2187922368474952e-06, | |
| "loss": 3.8525, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8378076062639821, | |
| "grad_norm": 2.6702213287353516, | |
| "learning_rate": 3.175805243626284e-06, | |
| "loss": 3.8665, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8389261744966443, | |
| "grad_norm": 4.156053066253662, | |
| "learning_rate": 3.133087752713479e-06, | |
| "loss": 4.2799, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8400447427293065, | |
| "grad_norm": 2.0885608196258545, | |
| "learning_rate": 3.0906402916182297e-06, | |
| "loss": 3.8346, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8411633109619687, | |
| "grad_norm": 2.58349871635437, | |
| "learning_rate": 3.0484633845151488e-06, | |
| "loss": 3.7313, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8422818791946308, | |
| "grad_norm": 3.2410836219787598, | |
| "learning_rate": 3.0065575522378264e-06, | |
| "loss": 3.7262, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.843400447427293, | |
| "grad_norm": 1.6323789358139038, | |
| "learning_rate": 2.9649233122724105e-06, | |
| "loss": 3.9603, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8445190156599552, | |
| "grad_norm": 1.915100336074829, | |
| "learning_rate": 2.923561178751219e-06, | |
| "loss": 4.0198, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8456375838926175, | |
| "grad_norm": 2.1250874996185303, | |
| "learning_rate": 2.88247166244639e-06, | |
| "loss": 3.9345, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8467561521252797, | |
| "grad_norm": 3.2116942405700684, | |
| "learning_rate": 2.841655270763549e-06, | |
| "loss": 3.6633, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8478747203579419, | |
| "grad_norm": 2.246950626373291, | |
| "learning_rate": 2.801112507735587e-06, | |
| "loss": 3.9842, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8489932885906041, | |
| "grad_norm": 1.9269198179244995, | |
| "learning_rate": 2.760843874016403e-06, | |
| "loss": 4.043, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8501118568232662, | |
| "grad_norm": 3.6027183532714844, | |
| "learning_rate": 2.720849866874739e-06, | |
| "loss": 3.693, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8512304250559284, | |
| "grad_norm": 2.76350474357605, | |
| "learning_rate": 2.6811309801880208e-06, | |
| "loss": 4.1343, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8523489932885906, | |
| "grad_norm": 2.693434238433838, | |
| "learning_rate": 2.6416877044362685e-06, | |
| "loss": 4.3856, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8534675615212528, | |
| "grad_norm": 2.068485736846924, | |
| "learning_rate": 2.602520526696059e-06, | |
| "loss": 4.0551, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.854586129753915, | |
| "grad_norm": 2.524249792098999, | |
| "learning_rate": 2.563629930634487e-06, | |
| "loss": 3.776, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8557046979865772, | |
| "grad_norm": 2.40083909034729, | |
| "learning_rate": 2.525016396503185e-06, | |
| "loss": 3.8733, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8568232662192393, | |
| "grad_norm": 2.9691314697265625, | |
| "learning_rate": 2.4866804011324296e-06, | |
| "loss": 3.7485, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8579418344519015, | |
| "grad_norm": 3.3835642337799072, | |
| "learning_rate": 2.448622417925214e-06, | |
| "loss": 3.7207, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8590604026845637, | |
| "grad_norm": 2.4943041801452637, | |
| "learning_rate": 2.4108429168514245e-06, | |
| "loss": 3.4865, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.860178970917226, | |
| "grad_norm": 2.2213940620422363, | |
| "learning_rate": 2.3733423644420353e-06, | |
| "loss": 3.9459, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8612975391498882, | |
| "grad_norm": 2.157849073410034, | |
| "learning_rate": 2.3361212237833356e-06, | |
| "loss": 3.8979, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8624161073825504, | |
| "grad_norm": 2.031329870223999, | |
| "learning_rate": 2.2991799545112215e-06, | |
| "loss": 3.9194, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.8635346756152126, | |
| "grad_norm": 3.01249098777771, | |
| "learning_rate": 2.2625190128055168e-06, | |
| "loss": 3.7412, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8646532438478747, | |
| "grad_norm": 2.1558024883270264, | |
| "learning_rate": 2.2261388513843515e-06, | |
| "loss": 4.12, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8657718120805369, | |
| "grad_norm": 3.150437593460083, | |
| "learning_rate": 2.190039919498543e-06, | |
| "loss": 3.8121, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8668903803131991, | |
| "grad_norm": 2.319565534591675, | |
| "learning_rate": 2.154222662926067e-06, | |
| "loss": 3.8679, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8680089485458613, | |
| "grad_norm": 2.686678171157837, | |
| "learning_rate": 2.118687523966559e-06, | |
| "loss": 4.2429, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8691275167785235, | |
| "grad_norm": 2.2821829319000244, | |
| "learning_rate": 2.0834349414358495e-06, | |
| "loss": 3.9263, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8702460850111857, | |
| "grad_norm": 2.5908377170562744, | |
| "learning_rate": 2.048465350660522e-06, | |
| "loss": 3.4955, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8713646532438478, | |
| "grad_norm": 3.3101370334625244, | |
| "learning_rate": 2.013779183472575e-06, | |
| "loss": 4.0192, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.87248322147651, | |
| "grad_norm": 2.681217908859253, | |
| "learning_rate": 1.9793768682040524e-06, | |
| "loss": 3.6952, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8736017897091722, | |
| "grad_norm": 2.5683112144470215, | |
| "learning_rate": 1.9452588296817843e-06, | |
| "loss": 3.9709, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8747203579418344, | |
| "grad_norm": 3.1038596630096436, | |
| "learning_rate": 1.911425489222127e-06, | |
| "loss": 4.0497, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8758389261744967, | |
| "grad_norm": 2.36666202545166, | |
| "learning_rate": 1.8778772646257491e-06, | |
| "loss": 4.0068, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8769574944071589, | |
| "grad_norm": 2.2028393745422363, | |
| "learning_rate": 1.8446145701724983e-06, | |
| "loss": 3.6578, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8780760626398211, | |
| "grad_norm": 4.168335437774658, | |
| "learning_rate": 1.8116378166162685e-06, | |
| "loss": 4.1479, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8791946308724832, | |
| "grad_norm": 2.6044557094573975, | |
| "learning_rate": 1.778947411179932e-06, | |
| "loss": 3.573, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8803131991051454, | |
| "grad_norm": 1.6643822193145752, | |
| "learning_rate": 1.7465437575502952e-06, | |
| "loss": 3.8277, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8814317673378076, | |
| "grad_norm": 2.9622278213500977, | |
| "learning_rate": 1.7144272558731467e-06, | |
| "loss": 3.6871, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8825503355704698, | |
| "grad_norm": 2.0143864154815674, | |
| "learning_rate": 1.6825983027482867e-06, | |
| "loss": 3.9285, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.883668903803132, | |
| "grad_norm": 2.577881097793579, | |
| "learning_rate": 1.6510572912246475e-06, | |
| "loss": 3.6799, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8847874720357942, | |
| "grad_norm": 2.5292065143585205, | |
| "learning_rate": 1.6198046107954223e-06, | |
| "loss": 3.9658, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8859060402684564, | |
| "grad_norm": 2.8256359100341797, | |
| "learning_rate": 1.5888406473932692e-06, | |
| "loss": 3.7874, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8870246085011185, | |
| "grad_norm": 4.170554161071777, | |
| "learning_rate": 1.5581657833855512e-06, | |
| "loss": 3.6907, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8881431767337807, | |
| "grad_norm": 1.9856733083724976, | |
| "learning_rate": 1.5277803975695864e-06, | |
| "loss": 3.8945, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.889261744966443, | |
| "grad_norm": 2.3850162029266357, | |
| "learning_rate": 1.497684865168006e-06, | |
| "loss": 3.7603, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8903803131991052, | |
| "grad_norm": 2.514587879180908, | |
| "learning_rate": 1.4678795578240894e-06, | |
| "loss": 3.9449, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8914988814317674, | |
| "grad_norm": 3.2767693996429443, | |
| "learning_rate": 1.4383648435972007e-06, | |
| "loss": 3.9914, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8926174496644296, | |
| "grad_norm": 2.3066697120666504, | |
| "learning_rate": 1.4091410869582267e-06, | |
| "loss": 3.7617, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8937360178970917, | |
| "grad_norm": 2.5513951778411865, | |
| "learning_rate": 1.3802086487850807e-06, | |
| "loss": 3.7506, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8948545861297539, | |
| "grad_norm": 3.390204429626465, | |
| "learning_rate": 1.3515678863582431e-06, | |
| "loss": 3.6553, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8959731543624161, | |
| "grad_norm": 1.921186923980713, | |
| "learning_rate": 1.3232191533563586e-06, | |
| "loss": 3.7809, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8970917225950783, | |
| "grad_norm": 2.1258394718170166, | |
| "learning_rate": 1.2951627998518623e-06, | |
| "loss": 3.8879, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8982102908277405, | |
| "grad_norm": 3.5940604209899902, | |
| "learning_rate": 1.267399172306663e-06, | |
| "loss": 3.5702, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8993288590604027, | |
| "grad_norm": 3.0253639221191406, | |
| "learning_rate": 1.2399286135678423e-06, | |
| "loss": 3.9817, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9004474272930649, | |
| "grad_norm": 3.926387071609497, | |
| "learning_rate": 1.212751462863454e-06, | |
| "loss": 3.449, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.901565995525727, | |
| "grad_norm": 3.5187435150146484, | |
| "learning_rate": 1.1858680557983171e-06, | |
| "loss": 3.8943, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9026845637583892, | |
| "grad_norm": 2.5346837043762207, | |
| "learning_rate": 1.1592787243498631e-06, | |
| "loss": 3.98, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.9038031319910514, | |
| "grad_norm": 2.5961804389953613, | |
| "learning_rate": 1.132983796864054e-06, | |
| "loss": 4.0311, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.9049217002237137, | |
| "grad_norm": 2.107836961746216, | |
| "learning_rate": 1.10698359805132e-06, | |
| "loss": 3.6457, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.9060402684563759, | |
| "grad_norm": 2.8412914276123047, | |
| "learning_rate": 1.0812784489825507e-06, | |
| "loss": 4.1803, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9071588366890381, | |
| "grad_norm": 2.366673469543457, | |
| "learning_rate": 1.0558686670851303e-06, | |
| "loss": 3.9735, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.9082774049217002, | |
| "grad_norm": 2.198279857635498, | |
| "learning_rate": 1.0307545661390139e-06, | |
| "loss": 3.7553, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9093959731543624, | |
| "grad_norm": 2.628816604614258, | |
| "learning_rate": 1.0059364562728518e-06, | |
| "loss": 3.7233, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.9105145413870246, | |
| "grad_norm": 2.737968921661377, | |
| "learning_rate": 9.81414643960174e-07, | |
| "loss": 3.8348, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9116331096196868, | |
| "grad_norm": 1.8730359077453613, | |
| "learning_rate": 9.5718943201559e-07, | |
| "loss": 3.946, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.912751677852349, | |
| "grad_norm": 2.630864143371582, | |
| "learning_rate": 9.332611195910584e-07, | |
| "loss": 3.9293, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9138702460850112, | |
| "grad_norm": 2.5964674949645996, | |
| "learning_rate": 9.09630002172182e-07, | |
| "loss": 3.9155, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.9149888143176734, | |
| "grad_norm": 1.8273096084594727, | |
| "learning_rate": 8.862963715745687e-07, | |
| "loss": 3.9176, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.9161073825503355, | |
| "grad_norm": 2.2656948566436768, | |
| "learning_rate": 8.632605159402341e-07, | |
| "loss": 4.1211, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9172259507829977, | |
| "grad_norm": 2.1946887969970703, | |
| "learning_rate": 8.405227197340216e-07, | |
| "loss": 4.3939, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9183445190156599, | |
| "grad_norm": 2.742736577987671, | |
| "learning_rate": 8.180832637401075e-07, | |
| "loss": 3.5518, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.9194630872483222, | |
| "grad_norm": 2.608285665512085, | |
| "learning_rate": 7.959424250585323e-07, | |
| "loss": 3.6809, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9205816554809844, | |
| "grad_norm": 2.192291498184204, | |
| "learning_rate": 7.741004771017691e-07, | |
| "loss": 4.1284, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.9217002237136466, | |
| "grad_norm": 2.3379416465759277, | |
| "learning_rate": 7.525576895913655e-07, | |
| "loss": 3.8505, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9228187919463087, | |
| "grad_norm": 2.133988618850708, | |
| "learning_rate": 7.313143285545832e-07, | |
| "loss": 3.9045, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9239373601789709, | |
| "grad_norm": 2.171081781387329, | |
| "learning_rate": 7.10370656321141e-07, | |
| "loss": 3.7649, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9250559284116331, | |
| "grad_norm": 1.7111164331436157, | |
| "learning_rate": 6.897269315199628e-07, | |
| "loss": 3.6116, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9261744966442953, | |
| "grad_norm": 2.5523173809051514, | |
| "learning_rate": 6.69383409075991e-07, | |
| "loss": 4.0026, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9272930648769575, | |
| "grad_norm": 2.1570398807525635, | |
| "learning_rate": 6.493403402070247e-07, | |
| "loss": 3.8648, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9284116331096197, | |
| "grad_norm": 1.9127180576324463, | |
| "learning_rate": 6.295979724206313e-07, | |
| "loss": 3.9427, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9295302013422819, | |
| "grad_norm": 2.699845790863037, | |
| "learning_rate": 6.101565495110817e-07, | |
| "loss": 3.5363, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.930648769574944, | |
| "grad_norm": 3.6851859092712402, | |
| "learning_rate": 5.910163115563471e-07, | |
| "loss": 3.7037, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9317673378076062, | |
| "grad_norm": 2.0086777210235596, | |
| "learning_rate": 5.721774949151298e-07, | |
| "loss": 3.8431, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9328859060402684, | |
| "grad_norm": 1.9770376682281494, | |
| "learning_rate": 5.536403322239369e-07, | |
| "loss": 3.8387, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9340044742729307, | |
| "grad_norm": 2.4417667388916016, | |
| "learning_rate": 5.35405052394225e-07, | |
| "loss": 3.8362, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9351230425055929, | |
| "grad_norm": 2.8682196140289307, | |
| "learning_rate": 5.17471880609563e-07, | |
| "loss": 3.8365, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9362416107382551, | |
| "grad_norm": 2.904750108718872, | |
| "learning_rate": 4.998410383228458e-07, | |
| "loss": 3.7828, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.9373601789709173, | |
| "grad_norm": 2.7965922355651855, | |
| "learning_rate": 4.825127432535714e-07, | |
| "loss": 3.5812, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9384787472035794, | |
| "grad_norm": 1.820050835609436, | |
| "learning_rate": 4.654872093851487e-07, | |
| "loss": 4.0985, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9395973154362416, | |
| "grad_norm": 2.777374029159546, | |
| "learning_rate": 4.487646469622464e-07, | |
| "loss": 3.5268, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9407158836689038, | |
| "grad_norm": 2.9808647632598877, | |
| "learning_rate": 4.3234526248820686e-07, | |
| "loss": 3.5723, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.941834451901566, | |
| "grad_norm": 3.188647985458374, | |
| "learning_rate": 4.162292587224947e-07, | |
| "loss": 3.3647, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9429530201342282, | |
| "grad_norm": 2.2936878204345703, | |
| "learning_rate": 4.0041683467819115e-07, | |
| "loss": 3.887, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9440715883668904, | |
| "grad_norm": 2.2591426372528076, | |
| "learning_rate": 3.8490818561953414e-07, | |
| "loss": 3.8009, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.9451901565995525, | |
| "grad_norm": 2.7897520065307617, | |
| "learning_rate": 3.697035030595125e-07, | |
| "loss": 3.556, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9463087248322147, | |
| "grad_norm": 2.164191246032715, | |
| "learning_rate": 3.548029747574927e-07, | |
| "loss": 3.7152, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9474272930648769, | |
| "grad_norm": 2.1192331314086914, | |
| "learning_rate": 3.4020678471690934e-07, | |
| "loss": 4.0369, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9485458612975392, | |
| "grad_norm": 2.3934147357940674, | |
| "learning_rate": 3.2591511318298686e-07, | |
| "loss": 4.0403, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9496644295302014, | |
| "grad_norm": 2.3561530113220215, | |
| "learning_rate": 3.119281366405213e-07, | |
| "loss": 3.7943, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9507829977628636, | |
| "grad_norm": 2.1101136207580566, | |
| "learning_rate": 2.9824602781168833e-07, | |
| "loss": 3.9526, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9519015659955258, | |
| "grad_norm": 2.3856959342956543, | |
| "learning_rate": 2.8486895565391913e-07, | |
| "loss": 3.7386, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9530201342281879, | |
| "grad_norm": 2.2807648181915283, | |
| "learning_rate": 2.7179708535781943e-07, | |
| "loss": 3.6456, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9541387024608501, | |
| "grad_norm": 2.5656046867370605, | |
| "learning_rate": 2.590305783451097e-07, | |
| "loss": 3.8164, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9552572706935123, | |
| "grad_norm": 2.58715558052063, | |
| "learning_rate": 2.4656959226665446e-07, | |
| "loss": 4.0307, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9563758389261745, | |
| "grad_norm": 2.5126988887786865, | |
| "learning_rate": 2.3441428100050566e-07, | |
| "loss": 3.7051, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9574944071588367, | |
| "grad_norm": 2.549105644226074, | |
| "learning_rate": 2.2256479464999313e-07, | |
| "loss": 3.8441, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9586129753914989, | |
| "grad_norm": 2.6297028064727783, | |
| "learning_rate": 2.110212795418953e-07, | |
| "loss": 3.4869, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.959731543624161, | |
| "grad_norm": 1.7067815065383911, | |
| "learning_rate": 1.9978387822460197e-07, | |
| "loss": 3.8855, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9608501118568232, | |
| "grad_norm": 2.1829380989074707, | |
| "learning_rate": 1.8885272946637944e-07, | |
| "loss": 3.7166, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9619686800894854, | |
| "grad_norm": 2.6756093502044678, | |
| "learning_rate": 1.782279682536414e-07, | |
| "loss": 3.706, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9630872483221476, | |
| "grad_norm": 2.74208664894104, | |
| "learning_rate": 1.679097257892892e-07, | |
| "loss": 4.2839, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9642058165548099, | |
| "grad_norm": 2.6451363563537598, | |
| "learning_rate": 1.578981294910936e-07, | |
| "loss": 3.8752, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9653243847874721, | |
| "grad_norm": 2.704803705215454, | |
| "learning_rate": 1.4819330299011004e-07, | |
| "loss": 3.9167, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9664429530201343, | |
| "grad_norm": 1.9471423625946045, | |
| "learning_rate": 1.38795366129163e-07, | |
| "loss": 3.9097, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9675615212527964, | |
| "grad_norm": 2.0253708362579346, | |
| "learning_rate": 1.2970443496136407e-07, | |
| "loss": 3.8803, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9686800894854586, | |
| "grad_norm": 1.8609451055526733, | |
| "learning_rate": 1.2092062174867413e-07, | |
| "loss": 3.8417, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9697986577181208, | |
| "grad_norm": 2.4085533618927, | |
| "learning_rate": 1.1244403496052658e-07, | |
| "loss": 3.7451, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.970917225950783, | |
| "grad_norm": 3.3671045303344727, | |
| "learning_rate": 1.042747792724702e-07, | |
| "loss": 3.7212, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9720357941834452, | |
| "grad_norm": 1.9560606479644775, | |
| "learning_rate": 9.641295556489793e-08, | |
| "loss": 3.9571, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9731543624161074, | |
| "grad_norm": 1.984646201133728, | |
| "learning_rate": 8.885866092178952e-08, | |
| "loss": 3.9128, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9742729306487695, | |
| "grad_norm": 2.1129088401794434, | |
| "learning_rate": 8.161198862950692e-08, | |
| "loss": 3.8933, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9753914988814317, | |
| "grad_norm": 2.0977556705474854, | |
| "learning_rate": 7.46730281756619e-08, | |
| "loss": 3.9295, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9765100671140939, | |
| "grad_norm": 2.7585601806640625, | |
| "learning_rate": 6.804186524798362e-08, | |
| "loss": 3.8923, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9776286353467561, | |
| "grad_norm": 3.502300977706909, | |
| "learning_rate": 6.171858173328604e-08, | |
| "loss": 3.9105, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9787472035794184, | |
| "grad_norm": 3.1645312309265137, | |
| "learning_rate": 5.5703255716446637e-08, | |
| "loss": 3.912, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9798657718120806, | |
| "grad_norm": 3.4221417903900146, | |
| "learning_rate": 4.999596147943486e-08, | |
| "loss": 3.8685, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9809843400447428, | |
| "grad_norm": 3.9628350734710693, | |
| "learning_rate": 4.4596769500407366e-08, | |
| "loss": 3.8902, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9821029082774049, | |
| "grad_norm": 2.23947811126709, | |
| "learning_rate": 3.9505746452830896e-08, | |
| "loss": 3.7735, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9832214765100671, | |
| "grad_norm": 3.1684134006500244, | |
| "learning_rate": 3.4722955204652406e-08, | |
| "loss": 3.7775, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9843400447427293, | |
| "grad_norm": 2.4224395751953125, | |
| "learning_rate": 3.02484548175469e-08, | |
| "loss": 3.8073, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9854586129753915, | |
| "grad_norm": 2.877962589263916, | |
| "learning_rate": 2.6082300546154127e-08, | |
| "loss": 3.9051, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9865771812080537, | |
| "grad_norm": 1.9865648746490479, | |
| "learning_rate": 2.2224543837423562e-08, | |
| "loss": 3.8927, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9876957494407159, | |
| "grad_norm": 2.3272900581359863, | |
| "learning_rate": 1.8675232329967708e-08, | |
| "loss": 3.9353, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9888143176733781, | |
| "grad_norm": 1.9652239084243774, | |
| "learning_rate": 1.5434409853473662e-08, | |
| "loss": 3.8841, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9899328859060402, | |
| "grad_norm": 3.6001975536346436, | |
| "learning_rate": 1.2502116428161902e-08, | |
| "loss": 4.0209, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9910514541387024, | |
| "grad_norm": 2.0110864639282227, | |
| "learning_rate": 9.878388264300541e-09, | |
| "loss": 3.9221, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9921700223713646, | |
| "grad_norm": 2.5183699131011963, | |
| "learning_rate": 7.563257761744601e-09, | |
| "loss": 3.6199, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9932885906040269, | |
| "grad_norm": 2.3704769611358643, | |
| "learning_rate": 5.55675350954743e-09, | |
| "loss": 3.603, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9944071588366891, | |
| "grad_norm": 2.055521249771118, | |
| "learning_rate": 3.8589002855971e-09, | |
| "loss": 3.906, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9955257270693513, | |
| "grad_norm": 2.1967926025390625, | |
| "learning_rate": 2.4697190563194305e-09, | |
| "loss": 3.9161, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9966442953020134, | |
| "grad_norm": 3.107943534851074, | |
| "learning_rate": 1.3892269764198551e-09, | |
| "loss": 3.9214, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.9977628635346756, | |
| "grad_norm": 2.653813123703003, | |
| "learning_rate": 6.174373886586037e-10, | |
| "loss": 3.7566, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9988814317673378, | |
| "grad_norm": 2.262310743331909, | |
| "learning_rate": 1.5435982370359813e-10, | |
| "loss": 3.6915, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.972426414489746, | |
| "learning_rate": 0.0, | |
| "loss": 4.5787, | |
| "step": 894 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 894, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4928497725151232e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |