| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.95475113122172, | |
| "eval_steps": 500, | |
| "global_step": 550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01809954751131222, | |
| "grad_norm": 1.6741957199641677, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.392, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03619909502262444, | |
| "grad_norm": 1.526970859287005, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.3479, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05429864253393665, | |
| "grad_norm": 1.8103690939719148, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.363, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07239819004524888, | |
| "grad_norm": 1.568077888738942, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.3513, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09049773755656108, | |
| "grad_norm": 1.668945098216231, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.3759, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1085972850678733, | |
| "grad_norm": 1.3864660758192329, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3525, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.12669683257918551, | |
| "grad_norm": 1.538592504007101, | |
| "learning_rate": 4.99995831202958e-06, | |
| "loss": 0.3904, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.14479638009049775, | |
| "grad_norm": 1.2047351614977708, | |
| "learning_rate": 4.999833249508629e-06, | |
| "loss": 0.3924, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.16289592760180996, | |
| "grad_norm": 1.0640124047316322, | |
| "learning_rate": 4.999624816608027e-06, | |
| "loss": 0.375, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.18099547511312217, | |
| "grad_norm": 0.7966517341350207, | |
| "learning_rate": 4.999333020279094e-06, | |
| "loss": 0.356, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19909502262443438, | |
| "grad_norm": 0.4554353875165799, | |
| "learning_rate": 4.998957870253344e-06, | |
| "loss": 0.3598, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.2171945701357466, | |
| "grad_norm": 0.6557533564712539, | |
| "learning_rate": 4.998499379042172e-06, | |
| "loss": 0.3392, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 0.7936629840883419, | |
| "learning_rate": 4.997957561936433e-06, | |
| "loss": 0.3691, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.25339366515837103, | |
| "grad_norm": 0.7547277609627707, | |
| "learning_rate": 4.997332437005932e-06, | |
| "loss": 0.352, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.27149321266968324, | |
| "grad_norm": 0.8087501558896228, | |
| "learning_rate": 4.996624025098819e-06, | |
| "loss": 0.3449, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2895927601809955, | |
| "grad_norm": 0.7820896976667914, | |
| "learning_rate": 4.9958323498409e-06, | |
| "loss": 0.3401, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.7431270073814646, | |
| "learning_rate": 4.99495743763484e-06, | |
| "loss": 0.3567, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.3257918552036199, | |
| "grad_norm": 0.6777032410783791, | |
| "learning_rate": 4.993999317659293e-06, | |
| "loss": 0.3585, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3438914027149321, | |
| "grad_norm": 0.6196369624534765, | |
| "learning_rate": 4.9929580218679195e-06, | |
| "loss": 0.3293, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.36199095022624433, | |
| "grad_norm": 0.5604472586874513, | |
| "learning_rate": 4.991833584988326e-06, | |
| "loss": 0.3437, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.38009049773755654, | |
| "grad_norm": 0.5137629265098744, | |
| "learning_rate": 4.990626044520905e-06, | |
| "loss": 0.3249, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.39819004524886875, | |
| "grad_norm": 0.547237003947588, | |
| "learning_rate": 4.989335440737587e-06, | |
| "loss": 0.3532, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.416289592760181, | |
| "grad_norm": 0.4164415963578454, | |
| "learning_rate": 4.987961816680493e-06, | |
| "loss": 0.3533, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.4343891402714932, | |
| "grad_norm": 0.35699522892651586, | |
| "learning_rate": 4.986505218160502e-06, | |
| "loss": 0.3268, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.45248868778280543, | |
| "grad_norm": 0.4026088211790661, | |
| "learning_rate": 4.984965693755723e-06, | |
| "loss": 0.3332, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.35057192166080064, | |
| "learning_rate": 4.983343294809875e-06, | |
| "loss": 0.3245, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.48868778280542985, | |
| "grad_norm": 0.3639947181438965, | |
| "learning_rate": 4.981638075430572e-06, | |
| "loss": 0.3199, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.5067873303167421, | |
| "grad_norm": 0.3387354723957761, | |
| "learning_rate": 4.979850092487525e-06, | |
| "loss": 0.3282, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.5248868778280543, | |
| "grad_norm": 0.3528078697583281, | |
| "learning_rate": 4.977979405610635e-06, | |
| "loss": 0.337, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5429864253393665, | |
| "grad_norm": 0.3126032062813636, | |
| "learning_rate": 4.976026077188013e-06, | |
| "loss": 0.3265, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5610859728506787, | |
| "grad_norm": 0.3584209299955196, | |
| "learning_rate": 4.973990172363899e-06, | |
| "loss": 0.3568, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.579185520361991, | |
| "grad_norm": 0.4239503710543474, | |
| "learning_rate": 4.9718717590364855e-06, | |
| "loss": 0.3287, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5972850678733032, | |
| "grad_norm": 0.41156579276283284, | |
| "learning_rate": 4.969670907855651e-06, | |
| "loss": 0.3267, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.33536968371087267, | |
| "learning_rate": 4.967387692220615e-06, | |
| "loss": 0.3367, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.6334841628959276, | |
| "grad_norm": 0.30272106018319034, | |
| "learning_rate": 4.965022188277474e-06, | |
| "loss": 0.3236, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6515837104072398, | |
| "grad_norm": 0.28697723150322, | |
| "learning_rate": 4.962574474916678e-06, | |
| "loss": 0.3236, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.669683257918552, | |
| "grad_norm": 0.21062422377276369, | |
| "learning_rate": 4.960044633770387e-06, | |
| "loss": 0.3295, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6877828054298643, | |
| "grad_norm": 0.28283155334950705, | |
| "learning_rate": 4.957432749209755e-06, | |
| "loss": 0.3453, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 0.2161778814999892, | |
| "learning_rate": 4.954738908342116e-06, | |
| "loss": 0.3645, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7239819004524887, | |
| "grad_norm": 0.2354424408008659, | |
| "learning_rate": 4.9519632010080765e-06, | |
| "loss": 0.3372, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7420814479638009, | |
| "grad_norm": 0.26054770828411217, | |
| "learning_rate": 4.9491057197785205e-06, | |
| "loss": 0.3349, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.7601809954751131, | |
| "grad_norm": 0.2596310001381547, | |
| "learning_rate": 4.946166559951523e-06, | |
| "loss": 0.3174, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7782805429864253, | |
| "grad_norm": 0.2763815562688228, | |
| "learning_rate": 4.943145819549169e-06, | |
| "loss": 0.3464, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7963800904977375, | |
| "grad_norm": 0.2508801820692124, | |
| "learning_rate": 4.9400435993142895e-06, | |
| "loss": 0.3277, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.8144796380090498, | |
| "grad_norm": 0.25823275674527674, | |
| "learning_rate": 4.936860002707096e-06, | |
| "loss": 0.343, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.832579185520362, | |
| "grad_norm": 0.23862916529933217, | |
| "learning_rate": 4.933595135901733e-06, | |
| "loss": 0.3425, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.8506787330316742, | |
| "grad_norm": 0.2377285409864031, | |
| "learning_rate": 4.9302491077827366e-06, | |
| "loss": 0.3345, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8687782805429864, | |
| "grad_norm": 0.2054263655021643, | |
| "learning_rate": 4.926822029941406e-06, | |
| "loss": 0.3599, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8868778280542986, | |
| "grad_norm": 0.21857378026560212, | |
| "learning_rate": 4.923314016672075e-06, | |
| "loss": 0.3293, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.9049773755656109, | |
| "grad_norm": 0.20834775020466292, | |
| "learning_rate": 4.919725184968307e-06, | |
| "loss": 0.3231, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.2000139484905926, | |
| "learning_rate": 4.9160556545189895e-06, | |
| "loss": 0.3248, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.24124485812118368, | |
| "learning_rate": 4.9123055477043454e-06, | |
| "loss": 0.3314, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.9592760180995475, | |
| "grad_norm": 0.26803109191751107, | |
| "learning_rate": 4.908474989591846e-06, | |
| "loss": 0.3341, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.9773755656108597, | |
| "grad_norm": 0.21490833872159623, | |
| "learning_rate": 4.904564107932048e-06, | |
| "loss": 0.3189, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.995475113122172, | |
| "grad_norm": 0.22738113980709365, | |
| "learning_rate": 4.900573033154325e-06, | |
| "loss": 0.3198, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0135746606334841, | |
| "grad_norm": 0.1860953606536629, | |
| "learning_rate": 4.8965018983625245e-06, | |
| "loss": 0.3273, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.0316742081447963, | |
| "grad_norm": 0.2170252756734204, | |
| "learning_rate": 4.8923508393305224e-06, | |
| "loss": 0.3058, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.0497737556561086, | |
| "grad_norm": 0.19753070998453712, | |
| "learning_rate": 4.888119994497701e-06, | |
| "loss": 0.2949, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.0678733031674208, | |
| "grad_norm": 0.21040212719480175, | |
| "learning_rate": 4.883809504964325e-06, | |
| "loss": 0.298, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.085972850678733, | |
| "grad_norm": 0.20799415615187367, | |
| "learning_rate": 4.879419514486846e-06, | |
| "loss": 0.3201, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1040723981900453, | |
| "grad_norm": 0.19784508945913667, | |
| "learning_rate": 4.874950169473097e-06, | |
| "loss": 0.3338, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.1221719457013575, | |
| "grad_norm": 0.20898074744097636, | |
| "learning_rate": 4.870401618977415e-06, | |
| "loss": 0.3053, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.1402714932126696, | |
| "grad_norm": 0.21530409824217756, | |
| "learning_rate": 4.8657740146956724e-06, | |
| "loss": 0.3346, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.1583710407239818, | |
| "grad_norm": 0.21656570481740497, | |
| "learning_rate": 4.8610675109602135e-06, | |
| "loss": 0.3175, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.18916438407683134, | |
| "learning_rate": 4.856282264734708e-06, | |
| "loss": 0.2973, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.1945701357466063, | |
| "grad_norm": 0.19298959302885896, | |
| "learning_rate": 4.851418435608919e-06, | |
| "loss": 0.3328, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.2126696832579185, | |
| "grad_norm": 0.19382840884955524, | |
| "learning_rate": 4.84647618579338e-06, | |
| "loss": 0.3233, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.22308099956654967, | |
| "learning_rate": 4.841455680113979e-06, | |
| "loss": 0.3401, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.248868778280543, | |
| "grad_norm": 0.1908581730308582, | |
| "learning_rate": 4.836357086006471e-06, | |
| "loss": 0.3199, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.2669683257918551, | |
| "grad_norm": 0.1900661127768816, | |
| "learning_rate": 4.83118057351089e-06, | |
| "loss": 0.3193, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2850678733031673, | |
| "grad_norm": 0.1842083788274683, | |
| "learning_rate": 4.825926315265874e-06, | |
| "loss": 0.3093, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.3031674208144797, | |
| "grad_norm": 0.19304820753044424, | |
| "learning_rate": 4.820594486502913e-06, | |
| "loss": 0.3147, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.3212669683257918, | |
| "grad_norm": 0.1865184743330753, | |
| "learning_rate": 4.815185265040504e-06, | |
| "loss": 0.3371, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.3393665158371042, | |
| "grad_norm": 0.21257371675686554, | |
| "learning_rate": 4.809698831278217e-06, | |
| "loss": 0.3556, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.3574660633484164, | |
| "grad_norm": 0.19738810108074692, | |
| "learning_rate": 4.804135368190684e-06, | |
| "loss": 0.3098, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.3755656108597285, | |
| "grad_norm": 0.20419379710110824, | |
| "learning_rate": 4.798495061321492e-06, | |
| "loss": 0.3037, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.3936651583710407, | |
| "grad_norm": 0.21182701854581448, | |
| "learning_rate": 4.792778098776997e-06, | |
| "loss": 0.3046, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 0.20966701782750055, | |
| "learning_rate": 4.786984671220053e-06, | |
| "loss": 0.3146, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.4298642533936652, | |
| "grad_norm": 0.2228994463496351, | |
| "learning_rate": 4.7811149718636475e-06, | |
| "loss": 0.3133, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.4479638009049773, | |
| "grad_norm": 0.2125517747018847, | |
| "learning_rate": 4.7751691964644655e-06, | |
| "loss": 0.3181, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4660633484162897, | |
| "grad_norm": 0.18774294015726306, | |
| "learning_rate": 4.7691475433163515e-06, | |
| "loss": 0.3107, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.4841628959276019, | |
| "grad_norm": 0.2105655304494509, | |
| "learning_rate": 4.763050213243705e-06, | |
| "loss": 0.3193, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.502262443438914, | |
| "grad_norm": 0.2101302949838479, | |
| "learning_rate": 4.7568774095947804e-06, | |
| "loss": 0.3372, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.5203619909502262, | |
| "grad_norm": 0.1761520660073366, | |
| "learning_rate": 4.7506293382349e-06, | |
| "loss": 0.3058, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.20214706457289192, | |
| "learning_rate": 4.744306207539595e-06, | |
| "loss": 0.34, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.5565610859728507, | |
| "grad_norm": 0.21608846929666756, | |
| "learning_rate": 4.737908228387656e-06, | |
| "loss": 0.3285, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.5746606334841629, | |
| "grad_norm": 0.19692503921435273, | |
| "learning_rate": 4.731435614154094e-06, | |
| "loss": 0.3134, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.5927601809954752, | |
| "grad_norm": 0.19107736826101185, | |
| "learning_rate": 4.72488858070303e-06, | |
| "loss": 0.305, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.6108597285067874, | |
| "grad_norm": 0.19148405595657123, | |
| "learning_rate": 4.718267346380492e-06, | |
| "loss": 0.3157, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.6289592760180995, | |
| "grad_norm": 0.19180277215162053, | |
| "learning_rate": 4.711572132007139e-06, | |
| "loss": 0.3124, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 0.19539080957269014, | |
| "learning_rate": 4.704803160870888e-06, | |
| "loss": 0.3306, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.6651583710407238, | |
| "grad_norm": 0.21052797618402563, | |
| "learning_rate": 4.697960658719475e-06, | |
| "loss": 0.3061, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.6832579185520362, | |
| "grad_norm": 0.20191616959818315, | |
| "learning_rate": 4.69104485375292e-06, | |
| "loss": 0.3098, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.7013574660633484, | |
| "grad_norm": 0.2159013380308242, | |
| "learning_rate": 4.684055976615924e-06, | |
| "loss": 0.3088, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.7194570135746607, | |
| "grad_norm": 0.18904626555927467, | |
| "learning_rate": 4.676994260390168e-06, | |
| "loss": 0.2912, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.737556561085973, | |
| "grad_norm": 0.19467640291002175, | |
| "learning_rate": 4.6698599405865465e-06, | |
| "loss": 0.303, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.755656108597285, | |
| "grad_norm": 0.2880548104749461, | |
| "learning_rate": 4.662653255137308e-06, | |
| "loss": 0.3348, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.7737556561085972, | |
| "grad_norm": 0.2019155699824381, | |
| "learning_rate": 4.655374444388127e-06, | |
| "loss": 0.327, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.7918552036199094, | |
| "grad_norm": 0.2592156259533593, | |
| "learning_rate": 4.648023751090079e-06, | |
| "loss": 0.3363, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.8099547511312217, | |
| "grad_norm": 0.2180192099378802, | |
| "learning_rate": 4.640601420391554e-06, | |
| "loss": 0.3113, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8280542986425339, | |
| "grad_norm": 0.20679493678747934, | |
| "learning_rate": 4.633107699830073e-06, | |
| "loss": 0.3148, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.2053440368213778, | |
| "learning_rate": 4.625542839324036e-06, | |
| "loss": 0.2967, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.8642533936651584, | |
| "grad_norm": 0.19200611510261656, | |
| "learning_rate": 4.617907091164389e-06, | |
| "loss": 0.3188, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 0.2302101510970096, | |
| "learning_rate": 4.610200710006206e-06, | |
| "loss": 0.3121, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.9004524886877827, | |
| "grad_norm": 0.2221804604843677, | |
| "learning_rate": 4.602423952860199e-06, | |
| "loss": 0.3146, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.9185520361990949, | |
| "grad_norm": 0.21983834708053807, | |
| "learning_rate": 4.594577079084146e-06, | |
| "loss": 0.3405, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.9366515837104072, | |
| "grad_norm": 0.21085636909889235, | |
| "learning_rate": 4.58666035037424e-06, | |
| "loss": 0.3089, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.9547511312217196, | |
| "grad_norm": 0.2016884181282795, | |
| "learning_rate": 4.578674030756364e-06, | |
| "loss": 0.3229, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.9728506787330318, | |
| "grad_norm": 0.19657023773974253, | |
| "learning_rate": 4.57061838657728e-06, | |
| "loss": 0.3237, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.990950226244344, | |
| "grad_norm": 0.20813455436587358, | |
| "learning_rate": 4.562493686495756e-06, | |
| "loss": 0.3255, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.009049773755656, | |
| "grad_norm": 0.18872409832307335, | |
| "learning_rate": 4.5543002014735955e-06, | |
| "loss": 0.2988, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.0271493212669682, | |
| "grad_norm": 0.19594421270270285, | |
| "learning_rate": 4.546038204766609e-06, | |
| "loss": 0.3109, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.0452488687782804, | |
| "grad_norm": 0.22355285614452686, | |
| "learning_rate": 4.537707971915495e-06, | |
| "loss": 0.3066, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.0633484162895925, | |
| "grad_norm": 0.2017792264758022, | |
| "learning_rate": 4.529309780736654e-06, | |
| "loss": 0.2939, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.081447963800905, | |
| "grad_norm": 0.20223483022494018, | |
| "learning_rate": 4.520843911312922e-06, | |
| "loss": 0.294, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.0995475113122173, | |
| "grad_norm": 0.20322098664858632, | |
| "learning_rate": 4.512310645984231e-06, | |
| "loss": 0.2984, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 0.20705072743185104, | |
| "learning_rate": 4.503710269338191e-06, | |
| "loss": 0.2694, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.1357466063348416, | |
| "grad_norm": 0.18442012590242893, | |
| "learning_rate": 4.4950430682005995e-06, | |
| "loss": 0.2979, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.2076635780792367, | |
| "learning_rate": 4.486309331625877e-06, | |
| "loss": 0.2874, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.171945701357466, | |
| "grad_norm": 0.19968964517363474, | |
| "learning_rate": 4.477509350887424e-06, | |
| "loss": 0.291, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.1900452488687785, | |
| "grad_norm": 0.18959726179400077, | |
| "learning_rate": 4.468643419467909e-06, | |
| "loss": 0.2921, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.2081447963800906, | |
| "grad_norm": 0.2388780187488927, | |
| "learning_rate": 4.459711833049485e-06, | |
| "loss": 0.3061, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.226244343891403, | |
| "grad_norm": 0.22092548916393367, | |
| "learning_rate": 4.4507148895039165e-06, | |
| "loss": 0.2765, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.244343891402715, | |
| "grad_norm": 0.21070917452514223, | |
| "learning_rate": 4.4416528888826595e-06, | |
| "loss": 0.2969, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.262443438914027, | |
| "grad_norm": 0.19807472108481627, | |
| "learning_rate": 4.432526133406843e-06, | |
| "loss": 0.3044, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.2805429864253393, | |
| "grad_norm": 0.1910225174641335, | |
| "learning_rate": 4.423334927457198e-06, | |
| "loss": 0.3132, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.2986425339366514, | |
| "grad_norm": 0.2203923882052516, | |
| "learning_rate": 4.414079577563901e-06, | |
| "loss": 0.3032, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.3167420814479636, | |
| "grad_norm": 0.21331518168793756, | |
| "learning_rate": 4.404760392396355e-06, | |
| "loss": 0.3033, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.334841628959276, | |
| "grad_norm": 0.21461268917839496, | |
| "learning_rate": 4.3953776827528925e-06, | |
| "loss": 0.3039, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 0.1862241130798519, | |
| "learning_rate": 4.385931761550411e-06, | |
| "loss": 0.2793, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.3710407239819005, | |
| "grad_norm": 0.19779667332990994, | |
| "learning_rate": 4.376422943813936e-06, | |
| "loss": 0.2849, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.3891402714932126, | |
| "grad_norm": 0.20538470648954774, | |
| "learning_rate": 4.366851546666118e-06, | |
| "loss": 0.3129, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.4072398190045248, | |
| "grad_norm": 0.20067043214876432, | |
| "learning_rate": 4.357217889316657e-06, | |
| "loss": 0.3041, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.425339366515837, | |
| "grad_norm": 0.1997136625573991, | |
| "learning_rate": 4.3475222930516484e-06, | |
| "loss": 0.2839, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.4434389140271495, | |
| "grad_norm": 0.20004099038403145, | |
| "learning_rate": 4.3377650812228765e-06, | |
| "loss": 0.3014, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.19311135694466858, | |
| "learning_rate": 4.327946579237028e-06, | |
| "loss": 0.2834, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.479638009049774, | |
| "grad_norm": 0.21078445039076968, | |
| "learning_rate": 4.318067114544838e-06, | |
| "loss": 0.2796, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 2.497737556561086, | |
| "grad_norm": 0.21975365365759061, | |
| "learning_rate": 4.308127016630176e-06, | |
| "loss": 0.2972, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.515837104072398, | |
| "grad_norm": 0.21203142423348517, | |
| "learning_rate": 4.2981266169990436e-06, | |
| "loss": 0.3196, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 2.5339366515837103, | |
| "grad_norm": 0.20131092451024465, | |
| "learning_rate": 4.2880662491685345e-06, | |
| "loss": 0.3003, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.5520361990950224, | |
| "grad_norm": 0.22294798360675439, | |
| "learning_rate": 4.277946248655701e-06, | |
| "loss": 0.2947, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 2.5701357466063346, | |
| "grad_norm": 0.22859386995564024, | |
| "learning_rate": 4.267766952966369e-06, | |
| "loss": 0.2958, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 0.19567845392715985, | |
| "learning_rate": 4.257528701583882e-06, | |
| "loss": 0.2998, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.6063348416289593, | |
| "grad_norm": 0.19741413456031112, | |
| "learning_rate": 4.247231835957773e-06, | |
| "loss": 0.3408, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.6244343891402715, | |
| "grad_norm": 0.19905612890447116, | |
| "learning_rate": 4.236876699492391e-06, | |
| "loss": 0.3117, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.6425339366515836, | |
| "grad_norm": 0.1942385041095113, | |
| "learning_rate": 4.226463637535429e-06, | |
| "loss": 0.3152, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.660633484162896, | |
| "grad_norm": 0.22327732166814804, | |
| "learning_rate": 4.215992997366425e-06, | |
| "loss": 0.3142, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 2.6787330316742084, | |
| "grad_norm": 0.1935161282714164, | |
| "learning_rate": 4.2054651281851685e-06, | |
| "loss": 0.3081, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.6968325791855206, | |
| "grad_norm": 0.23957566926280122, | |
| "learning_rate": 4.1948803811000585e-06, | |
| "loss": 0.2894, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 2.7149321266968327, | |
| "grad_norm": 0.18805009890662516, | |
| "learning_rate": 4.184239109116393e-06, | |
| "loss": 0.2984, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.733031674208145, | |
| "grad_norm": 0.212580814141281, | |
| "learning_rate": 4.173541667124599e-06, | |
| "loss": 0.3097, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 2.751131221719457, | |
| "grad_norm": 0.19712271093008257, | |
| "learning_rate": 4.1627884118883925e-06, | |
| "loss": 0.3177, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.2278946892968003, | |
| "learning_rate": 4.1519797020328815e-06, | |
| "loss": 0.3101, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.7873303167420813, | |
| "grad_norm": 0.21064766645861627, | |
| "learning_rate": 4.141115898032607e-06, | |
| "loss": 0.274, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.8054298642533935, | |
| "grad_norm": 0.20995612915210915, | |
| "learning_rate": 4.130197362199521e-06, | |
| "loss": 0.2926, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 0.21633523471290103, | |
| "learning_rate": 4.119224458670905e-06, | |
| "loss": 0.2875, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.841628959276018, | |
| "grad_norm": 0.21266765467202223, | |
| "learning_rate": 4.1081975533972185e-06, | |
| "loss": 0.2947, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.8597285067873304, | |
| "grad_norm": 0.19506346084116072, | |
| "learning_rate": 4.097117014129903e-06, | |
| "loss": 0.296, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.8778280542986425, | |
| "grad_norm": 0.1986031276610744, | |
| "learning_rate": 4.085983210409114e-06, | |
| "loss": 0.2988, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.8959276018099547, | |
| "grad_norm": 0.22662474336309782, | |
| "learning_rate": 4.074796513551395e-06, | |
| "loss": 0.2952, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.914027149321267, | |
| "grad_norm": 0.21721813582738397, | |
| "learning_rate": 4.063557296637295e-06, | |
| "loss": 0.3099, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.9321266968325794, | |
| "grad_norm": 0.2133328804989817, | |
| "learning_rate": 4.052265934498929e-06, | |
| "loss": 0.2974, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.9502262443438916, | |
| "grad_norm": 0.1960218423105953, | |
| "learning_rate": 4.040922803707474e-06, | |
| "loss": 0.3065, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.9683257918552037, | |
| "grad_norm": 0.22167341080572722, | |
| "learning_rate": 4.029528282560609e-06, | |
| "loss": 0.2886, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.986425339366516, | |
| "grad_norm": 0.20386239234209946, | |
| "learning_rate": 4.018082751069904e-06, | |
| "loss": 0.3076, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.004524886877828, | |
| "grad_norm": 0.23748187918298697, | |
| "learning_rate": 4.006586590948141e-06, | |
| "loss": 0.2985, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 3.02262443438914, | |
| "grad_norm": 0.22617083435609797, | |
| "learning_rate": 3.995040185596588e-06, | |
| "loss": 0.2754, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 3.0407239819004523, | |
| "grad_norm": 0.23986769037952196, | |
| "learning_rate": 3.983443920092206e-06, | |
| "loss": 0.2854, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.0588235294117645, | |
| "grad_norm": 0.20150185345396, | |
| "learning_rate": 3.971798181174816e-06, | |
| "loss": 0.2832, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 0.20913884879987113, | |
| "learning_rate": 3.960103357234192e-06, | |
| "loss": 0.2986, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.0950226244343892, | |
| "grad_norm": 0.20477890710932672, | |
| "learning_rate": 3.948359838297115e-06, | |
| "loss": 0.2876, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.1131221719457014, | |
| "grad_norm": 0.2031405516319826, | |
| "learning_rate": 3.9365680160143595e-06, | |
| "loss": 0.2971, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 3.1312217194570136, | |
| "grad_norm": 0.185634722744017, | |
| "learning_rate": 3.924728283647638e-06, | |
| "loss": 0.279, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 3.1493212669683257, | |
| "grad_norm": 0.20745314489894484, | |
| "learning_rate": 3.91284103605648e-06, | |
| "loss": 0.2903, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.167420814479638, | |
| "grad_norm": 0.20741649089082642, | |
| "learning_rate": 3.9009066696850664e-06, | |
| "loss": 0.2964, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.1855203619909505, | |
| "grad_norm": 0.20883578071304365, | |
| "learning_rate": 3.888925582549006e-06, | |
| "loss": 0.2946, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 3.2036199095022626, | |
| "grad_norm": 0.21451927304435986, | |
| "learning_rate": 3.8768981742220646e-06, | |
| "loss": 0.2811, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 3.2217194570135748, | |
| "grad_norm": 0.21080586953456093, | |
| "learning_rate": 3.864824845822837e-06, | |
| "loss": 0.2825, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.239819004524887, | |
| "grad_norm": 0.20609867837665838, | |
| "learning_rate": 3.852706000001367e-06, | |
| "loss": 0.2903, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 3.257918552036199, | |
| "grad_norm": 0.1972989252518201, | |
| "learning_rate": 3.840542040925725e-06, | |
| "loss": 0.2626, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.276018099547511, | |
| "grad_norm": 0.21568160972522105, | |
| "learning_rate": 3.828333374268523e-06, | |
| "loss": 0.2906, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 3.2941176470588234, | |
| "grad_norm": 0.1997067887507601, | |
| "learning_rate": 3.81608040719339e-06, | |
| "loss": 0.2862, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.3122171945701355, | |
| "grad_norm": 0.2017980016690952, | |
| "learning_rate": 3.8037835483413877e-06, | |
| "loss": 0.2855, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 3.330316742081448, | |
| "grad_norm": 0.20121264738949698, | |
| "learning_rate": 3.7914432078173867e-06, | |
| "loss": 0.2795, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 3.3484162895927603, | |
| "grad_norm": 0.22611700851822947, | |
| "learning_rate": 3.7790597971763892e-06, | |
| "loss": 0.2836, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.3665158371040724, | |
| "grad_norm": 0.2353941218093955, | |
| "learning_rate": 3.7666337294097987e-06, | |
| "loss": 0.288, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 0.18605988505854537, | |
| "learning_rate": 3.7541654189316525e-06, | |
| "loss": 0.275, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 3.4027149321266967, | |
| "grad_norm": 0.22628052198695675, | |
| "learning_rate": 3.741655281564796e-06, | |
| "loss": 0.2966, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 3.420814479638009, | |
| "grad_norm": 0.21236583352079183, | |
| "learning_rate": 3.72910373452702e-06, | |
| "loss": 0.2702, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.4389140271493215, | |
| "grad_norm": 0.22886628365130654, | |
| "learning_rate": 3.7165111964171407e-06, | |
| "loss": 0.2718, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.4570135746606336, | |
| "grad_norm": 0.19177205299999378, | |
| "learning_rate": 3.703878087201044e-06, | |
| "loss": 0.2785, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 3.475113122171946, | |
| "grad_norm": 0.21164213919986813, | |
| "learning_rate": 3.6912048281976764e-06, | |
| "loss": 0.2991, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.493212669683258, | |
| "grad_norm": 0.20082392739954888, | |
| "learning_rate": 3.6784918420649952e-06, | |
| "loss": 0.2814, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 3.51131221719457, | |
| "grad_norm": 0.21531730826425216, | |
| "learning_rate": 3.66573955278587e-06, | |
| "loss": 0.2719, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 3.5294117647058822, | |
| "grad_norm": 0.20053264760640085, | |
| "learning_rate": 3.6529483856539512e-06, | |
| "loss": 0.2639, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.5475113122171944, | |
| "grad_norm": 0.18714903727677973, | |
| "learning_rate": 3.640118767259474e-06, | |
| "loss": 0.2712, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.5656108597285066, | |
| "grad_norm": 0.19923843024788357, | |
| "learning_rate": 3.6272511254750403e-06, | |
| "loss": 0.2825, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 3.583710407239819, | |
| "grad_norm": 0.2016875130706868, | |
| "learning_rate": 3.6143458894413463e-06, | |
| "loss": 0.2977, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 3.6018099547511313, | |
| "grad_norm": 0.21861290041385015, | |
| "learning_rate": 3.6014034895528705e-06, | |
| "loss": 0.284, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 3.6199095022624435, | |
| "grad_norm": 0.16879798551287897, | |
| "learning_rate": 3.588424357443521e-06, | |
| "loss": 0.2782, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.6380090497737556, | |
| "grad_norm": 0.22087168375536256, | |
| "learning_rate": 3.5754089259722365e-06, | |
| "loss": 0.2902, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.6561085972850678, | |
| "grad_norm": 0.2219253724635141, | |
| "learning_rate": 3.5623576292085555e-06, | |
| "loss": 0.294, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 3.6742081447963804, | |
| "grad_norm": 0.19173446074813308, | |
| "learning_rate": 3.549270902418136e-06, | |
| "loss": 0.2715, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 0.20017688015075918, | |
| "learning_rate": 3.536149182048243e-06, | |
| "loss": 0.2823, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 3.7104072398190047, | |
| "grad_norm": 0.19825967232402708, | |
| "learning_rate": 3.5229929057131877e-06, | |
| "loss": 0.2881, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.728506787330317, | |
| "grad_norm": 0.218766824892692, | |
| "learning_rate": 3.5098025121797375e-06, | |
| "loss": 0.2999, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 3.746606334841629, | |
| "grad_norm": 0.18895340916933914, | |
| "learning_rate": 3.496578441352481e-06, | |
| "loss": 0.2687, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 3.764705882352941, | |
| "grad_norm": 0.19002028251920547, | |
| "learning_rate": 3.4833211342591565e-06, | |
| "loss": 0.2866, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 3.7828054298642533, | |
| "grad_norm": 0.20990408194136284, | |
| "learning_rate": 3.4700310330359456e-06, | |
| "loss": 0.2805, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 3.8009049773755654, | |
| "grad_norm": 0.1929810608863491, | |
| "learning_rate": 3.4567085809127247e-06, | |
| "loss": 0.2864, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.8190045248868776, | |
| "grad_norm": 0.2026264509793902, | |
| "learning_rate": 3.4433542221982863e-06, | |
| "loss": 0.2847, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 3.83710407239819, | |
| "grad_norm": 0.20527047718646593, | |
| "learning_rate": 3.4299684022655196e-06, | |
| "loss": 0.285, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.8552036199095023, | |
| "grad_norm": 0.2004805974514508, | |
| "learning_rate": 3.4165515675365558e-06, | |
| "loss": 0.2862, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 3.8733031674208145, | |
| "grad_norm": 0.18650100732919933, | |
| "learning_rate": 3.403104165467883e-06, | |
| "loss": 0.2748, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 3.8914027149321266, | |
| "grad_norm": 0.21391027704520638, | |
| "learning_rate": 3.3896266445354208e-06, | |
| "loss": 0.2875, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.909502262443439, | |
| "grad_norm": 0.19076214648617013, | |
| "learning_rate": 3.376119454219565e-06, | |
| "loss": 0.2811, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 3.9276018099547514, | |
| "grad_norm": 0.22812272390771685, | |
| "learning_rate": 3.362583044990195e-06, | |
| "loss": 0.2923, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 3.9457013574660635, | |
| "grad_norm": 0.2114461488141671, | |
| "learning_rate": 3.3490178682916534e-06, | |
| "loss": 0.2784, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 3.9638009049773757, | |
| "grad_norm": 0.20971069504695025, | |
| "learning_rate": 3.335424376527688e-06, | |
| "loss": 0.2796, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 3.981900452488688, | |
| "grad_norm": 0.20721944316747504, | |
| "learning_rate": 3.321803023046366e-06, | |
| "loss": 0.2855, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.19529166110448204, | |
| "learning_rate": 3.3081542621249503e-06, | |
| "loss": 0.2722, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 4.018099547511312, | |
| "grad_norm": 0.2181909689708081, | |
| "learning_rate": 3.2944785489547544e-06, | |
| "loss": 0.2769, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 4.036199095022624, | |
| "grad_norm": 0.2041261831519089, | |
| "learning_rate": 3.2807763396259597e-06, | |
| "loss": 0.2755, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 4.0542986425339365, | |
| "grad_norm": 0.17317381953746722, | |
| "learning_rate": 3.2670480911124045e-06, | |
| "loss": 0.2457, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.072398190045249, | |
| "grad_norm": 0.20985257213280492, | |
| "learning_rate": 3.2532942612563436e-06, | |
| "loss": 0.3084, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.090497737556561, | |
| "grad_norm": 0.1805568892682367, | |
| "learning_rate": 3.2395153087531767e-06, | |
| "loss": 0.2688, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 4.108597285067873, | |
| "grad_norm": 0.20212288478471152, | |
| "learning_rate": 3.225711693136156e-06, | |
| "loss": 0.2678, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 4.126696832579185, | |
| "grad_norm": 0.20073111869372287, | |
| "learning_rate": 3.211883874761058e-06, | |
| "loss": 0.2636, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 4.144796380090498, | |
| "grad_norm": 0.21913185170065114, | |
| "learning_rate": 3.19803231479083e-06, | |
| "loss": 0.282, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 4.16289592760181, | |
| "grad_norm": 0.20273970778843858, | |
| "learning_rate": 3.184157475180208e-06, | |
| "loss": 0.2689, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.180995475113122, | |
| "grad_norm": 0.17451394799617262, | |
| "learning_rate": 3.1702598186603152e-06, | |
| "loss": 0.2583, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 4.199095022624435, | |
| "grad_norm": 0.1918998406915714, | |
| "learning_rate": 3.1563398087232265e-06, | |
| "loss": 0.2795, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 4.217194570135747, | |
| "grad_norm": 0.1970262585975004, | |
| "learning_rate": 3.1423979096065134e-06, | |
| "loss": 0.2605, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 4.235294117647059, | |
| "grad_norm": 0.18769426018045784, | |
| "learning_rate": 3.1284345862777572e-06, | |
| "loss": 0.2592, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 4.253393665158371, | |
| "grad_norm": 0.18870339061291633, | |
| "learning_rate": 3.1144503044190456e-06, | |
| "loss": 0.2642, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.271493212669683, | |
| "grad_norm": 0.18389039727257753, | |
| "learning_rate": 3.100445530411442e-06, | |
| "loss": 0.2376, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 4.289592760180995, | |
| "grad_norm": 0.20812476367192412, | |
| "learning_rate": 3.086420731319429e-06, | |
| "loss": 0.2708, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 0.197363611944904, | |
| "learning_rate": 3.0723763748753354e-06, | |
| "loss": 0.2844, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.32579185520362, | |
| "grad_norm": 0.20806148893145612, | |
| "learning_rate": 3.0583129294637342e-06, | |
| "loss": 0.2487, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 4.343891402714932, | |
| "grad_norm": 0.1903831131540733, | |
| "learning_rate": 3.044230864105821e-06, | |
| "loss": 0.256, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.361990950226244, | |
| "grad_norm": 0.19042569669796638, | |
| "learning_rate": 3.030130648443777e-06, | |
| "loss": 0.2788, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 4.380090497737557, | |
| "grad_norm": 0.18850059553704934, | |
| "learning_rate": 3.0160127527250993e-06, | |
| "loss": 0.2808, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 4.398190045248869, | |
| "grad_norm": 0.20047462331384677, | |
| "learning_rate": 3.0018776477869244e-06, | |
| "loss": 0.2654, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 4.416289592760181, | |
| "grad_norm": 0.20580882731270267, | |
| "learning_rate": 2.9877258050403214e-06, | |
| "loss": 0.2753, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 4.4343891402714934, | |
| "grad_norm": 0.18933219589587963, | |
| "learning_rate": 2.973557696454571e-06, | |
| "loss": 0.2627, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.452488687782806, | |
| "grad_norm": 0.2019766662866527, | |
| "learning_rate": 2.9593737945414264e-06, | |
| "loss": 0.2779, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 4.470588235294118, | |
| "grad_norm": 0.21814847735110302, | |
| "learning_rate": 2.9451745723393547e-06, | |
| "loss": 0.2747, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 4.48868778280543, | |
| "grad_norm": 0.2044800165411035, | |
| "learning_rate": 2.930960503397761e-06, | |
| "loss": 0.2726, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 4.506787330316742, | |
| "grad_norm": 0.21142216034006506, | |
| "learning_rate": 2.916732061761192e-06, | |
| "loss": 0.2646, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 4.524886877828054, | |
| "grad_norm": 0.20150189042920258, | |
| "learning_rate": 2.9024897219535326e-06, | |
| "loss": 0.279, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.542986425339366, | |
| "grad_norm": 0.19100387279656014, | |
| "learning_rate": 2.8882339589621742e-06, | |
| "loss": 0.2795, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 4.5610859728506785, | |
| "grad_norm": 0.18930829794972215, | |
| "learning_rate": 2.873965248222178e-06, | |
| "loss": 0.2672, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 4.579185520361991, | |
| "grad_norm": 0.18814066866007795, | |
| "learning_rate": 2.859684065600417e-06, | |
| "loss": 0.2478, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 4.597285067873303, | |
| "grad_norm": 0.19644782065692218, | |
| "learning_rate": 2.845390887379706e-06, | |
| "loss": 0.2639, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 0.20255200557130154, | |
| "learning_rate": 2.8310861902429176e-06, | |
| "loss": 0.2725, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.633484162895927, | |
| "grad_norm": 0.19849695081074425, | |
| "learning_rate": 2.816770451257085e-06, | |
| "loss": 0.2685, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.65158371040724, | |
| "grad_norm": 0.20106804584886076, | |
| "learning_rate": 2.80244414785749e-06, | |
| "loss": 0.2572, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 4.669683257918552, | |
| "grad_norm": 0.2021059922332257, | |
| "learning_rate": 2.7881077578317445e-06, | |
| "loss": 0.2924, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 4.6877828054298645, | |
| "grad_norm": 0.21055754243512417, | |
| "learning_rate": 2.7737617593038493e-06, | |
| "loss": 0.2714, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 0.18496100638206028, | |
| "learning_rate": 2.759406630718255e-06, | |
| "loss": 0.2609, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.723981900452489, | |
| "grad_norm": 0.18437211430441194, | |
| "learning_rate": 2.7450428508239024e-06, | |
| "loss": 0.2662, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 4.742081447963801, | |
| "grad_norm": 0.18828985621936872, | |
| "learning_rate": 2.730670898658255e-06, | |
| "loss": 0.2549, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 4.760180995475113, | |
| "grad_norm": 0.19972804362365068, | |
| "learning_rate": 2.716291253531329e-06, | |
| "loss": 0.2873, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 4.778280542986425, | |
| "grad_norm": 0.2092834898971349, | |
| "learning_rate": 2.7019043950096992e-06, | |
| "loss": 0.2674, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 4.796380090497737, | |
| "grad_norm": 0.19131496744019671, | |
| "learning_rate": 2.6875108029005113e-06, | |
| "loss": 0.2724, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.8144796380090495, | |
| "grad_norm": 0.21255643670178404, | |
| "learning_rate": 2.6731109572354795e-06, | |
| "loss": 0.2684, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 4.832579185520362, | |
| "grad_norm": 0.18562764869110326, | |
| "learning_rate": 2.658705338254876e-06, | |
| "loss": 0.271, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 4.850678733031674, | |
| "grad_norm": 0.21207337609644833, | |
| "learning_rate": 2.6442944263915153e-06, | |
| "loss": 0.2719, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 4.868778280542987, | |
| "grad_norm": 0.2129223213748196, | |
| "learning_rate": 2.6298787022547317e-06, | |
| "loss": 0.2666, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 4.886877828054299, | |
| "grad_norm": 0.18692841429903953, | |
| "learning_rate": 2.6154586466143495e-06, | |
| "loss": 0.2755, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.904977375565611, | |
| "grad_norm": 0.19199436687113453, | |
| "learning_rate": 2.6010347403846508e-06, | |
| "loss": 0.2864, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "grad_norm": 0.19327034490069303, | |
| "learning_rate": 2.5866074646083385e-06, | |
| "loss": 0.2694, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "grad_norm": 0.26379305562686184, | |
| "learning_rate": 2.572177300440487e-06, | |
| "loss": 0.2597, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 4.959276018099548, | |
| "grad_norm": 0.1894366168665776, | |
| "learning_rate": 2.557744729132503e-06, | |
| "loss": 0.2825, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 4.97737556561086, | |
| "grad_norm": 0.19519701404452072, | |
| "learning_rate": 2.5433102320160713e-06, | |
| "loss": 0.2893, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.995475113122172, | |
| "grad_norm": 0.19163121413004777, | |
| "learning_rate": 2.528874290487102e-06, | |
| "loss": 0.2508, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 5.013574660633484, | |
| "grad_norm": 0.18512352279959782, | |
| "learning_rate": 2.5144373859896792e-06, | |
| "loss": 0.2589, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 5.031674208144796, | |
| "grad_norm": 0.18339390733870273, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2621, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 5.049773755656108, | |
| "grad_norm": 0.1942547479998011, | |
| "learning_rate": 2.4855626140103216e-06, | |
| "loss": 0.245, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 5.067873303167421, | |
| "grad_norm": 0.201133955927992, | |
| "learning_rate": 2.4711257095128987e-06, | |
| "loss": 0.2428, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.085972850678733, | |
| "grad_norm": 0.19802266448824934, | |
| "learning_rate": 2.4566897679839295e-06, | |
| "loss": 0.2756, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 5.104072398190045, | |
| "grad_norm": 0.19714188235491836, | |
| "learning_rate": 2.4422552708674977e-06, | |
| "loss": 0.2626, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 5.122171945701357, | |
| "grad_norm": 0.18710363733656865, | |
| "learning_rate": 2.427822699559514e-06, | |
| "loss": 0.2616, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 5.14027149321267, | |
| "grad_norm": 0.18029896729988643, | |
| "learning_rate": 2.413392535391663e-06, | |
| "loss": 0.2671, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 5.158371040723982, | |
| "grad_norm": 0.19353123935666788, | |
| "learning_rate": 2.3989652596153496e-06, | |
| "loss": 0.2518, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.176470588235294, | |
| "grad_norm": 0.1999507247304982, | |
| "learning_rate": 2.3845413533856517e-06, | |
| "loss": 0.2691, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 5.1945701357466065, | |
| "grad_norm": 0.1802458898092889, | |
| "learning_rate": 2.3701212977452683e-06, | |
| "loss": 0.2662, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 5.212669683257919, | |
| "grad_norm": 0.20005237780106283, | |
| "learning_rate": 2.3557055736084847e-06, | |
| "loss": 0.2706, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 5.230769230769231, | |
| "grad_norm": 0.20349821320072675, | |
| "learning_rate": 2.3412946617451242e-06, | |
| "loss": 0.2651, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 5.248868778280543, | |
| "grad_norm": 0.19275858047883396, | |
| "learning_rate": 2.3268890427645213e-06, | |
| "loss": 0.2809, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.266968325791855, | |
| "grad_norm": 0.19491454590375834, | |
| "learning_rate": 2.312489197099489e-06, | |
| "loss": 0.242, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 5.285067873303167, | |
| "grad_norm": 0.17860701410760396, | |
| "learning_rate": 2.298095604990302e-06, | |
| "loss": 0.252, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 5.3031674208144794, | |
| "grad_norm": 0.18166338870243837, | |
| "learning_rate": 2.283708746468672e-06, | |
| "loss": 0.2687, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 5.321266968325792, | |
| "grad_norm": 0.20860085100238554, | |
| "learning_rate": 2.269329101341745e-06, | |
| "loss": 0.2749, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.339366515837104, | |
| "grad_norm": 0.18128543910141529, | |
| "learning_rate": 2.2549571491760985e-06, | |
| "loss": 0.2423, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.357466063348416, | |
| "grad_norm": 0.23828035104300602, | |
| "learning_rate": 2.2405933692817458e-06, | |
| "loss": 0.2582, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 5.375565610859729, | |
| "grad_norm": 0.19867583702537983, | |
| "learning_rate": 2.226238240696151e-06, | |
| "loss": 0.2505, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 5.393665158371041, | |
| "grad_norm": 0.2238993077156904, | |
| "learning_rate": 2.2118922421682563e-06, | |
| "loss": 0.2547, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 5.411764705882353, | |
| "grad_norm": 0.18659890730168405, | |
| "learning_rate": 2.1975558521425106e-06, | |
| "loss": 0.2541, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 5.429864253393665, | |
| "grad_norm": 0.2086208336638683, | |
| "learning_rate": 2.183229548742916e-06, | |
| "loss": 0.2449, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.447963800904978, | |
| "grad_norm": 0.19744096649329249, | |
| "learning_rate": 2.1689138097570832e-06, | |
| "loss": 0.2529, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 5.46606334841629, | |
| "grad_norm": 0.1905137878945102, | |
| "learning_rate": 2.1546091126202955e-06, | |
| "loss": 0.2549, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 5.484162895927602, | |
| "grad_norm": 0.18724152511382108, | |
| "learning_rate": 2.1403159343995845e-06, | |
| "loss": 0.2544, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 5.502262443438914, | |
| "grad_norm": 0.18137306072412968, | |
| "learning_rate": 2.1260347517778223e-06, | |
| "loss": 0.2472, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 5.520361990950226, | |
| "grad_norm": 0.21137486256539126, | |
| "learning_rate": 2.111766041037826e-06, | |
| "loss": 0.2663, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.538461538461538, | |
| "grad_norm": 0.18969561601900994, | |
| "learning_rate": 2.0975102780464674e-06, | |
| "loss": 0.2654, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 5.5565610859728505, | |
| "grad_norm": 0.18687293378459552, | |
| "learning_rate": 2.083267938238808e-06, | |
| "loss": 0.2521, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 5.574660633484163, | |
| "grad_norm": 0.18563465250651875, | |
| "learning_rate": 2.0690394966022397e-06, | |
| "loss": 0.2599, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 5.592760180995475, | |
| "grad_norm": 0.18961353982721652, | |
| "learning_rate": 2.0548254276606457e-06, | |
| "loss": 0.253, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 5.610859728506787, | |
| "grad_norm": 0.19358594701649867, | |
| "learning_rate": 2.040626205458574e-06, | |
| "loss": 0.268, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.628959276018099, | |
| "grad_norm": 0.18903082550740266, | |
| "learning_rate": 2.02644230354543e-06, | |
| "loss": 0.2794, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 5.647058823529412, | |
| "grad_norm": 0.18955280198715693, | |
| "learning_rate": 2.01227419495968e-06, | |
| "loss": 0.2466, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 5.665158371040724, | |
| "grad_norm": 0.21673963839382857, | |
| "learning_rate": 1.9981223522130764e-06, | |
| "loss": 0.2646, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 5.683257918552036, | |
| "grad_norm": 0.18658355423161882, | |
| "learning_rate": 1.9839872472749016e-06, | |
| "loss": 0.2524, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 5.701357466063349, | |
| "grad_norm": 0.18351414151686257, | |
| "learning_rate": 1.9698693515562235e-06, | |
| "loss": 0.2484, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.719457013574661, | |
| "grad_norm": 0.19521700431845607, | |
| "learning_rate": 1.9557691358941796e-06, | |
| "loss": 0.241, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 5.737556561085973, | |
| "grad_norm": 0.18325038007655156, | |
| "learning_rate": 1.941687070536267e-06, | |
| "loss": 0.2834, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 5.755656108597285, | |
| "grad_norm": 0.2024434466335083, | |
| "learning_rate": 1.9276236251246655e-06, | |
| "loss": 0.2617, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 5.773755656108597, | |
| "grad_norm": 0.19282545684546182, | |
| "learning_rate": 1.913579268680572e-06, | |
| "loss": 0.251, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 5.791855203619909, | |
| "grad_norm": 0.1985416405665436, | |
| "learning_rate": 1.8995544695885593e-06, | |
| "loss": 0.2528, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.8099547511312215, | |
| "grad_norm": 0.19180458814723977, | |
| "learning_rate": 1.8855496955809546e-06, | |
| "loss": 0.2623, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 5.828054298642534, | |
| "grad_norm": 0.19714720164607588, | |
| "learning_rate": 1.8715654137222434e-06, | |
| "loss": 0.2603, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 5.846153846153846, | |
| "grad_norm": 0.17915913395978303, | |
| "learning_rate": 1.8576020903934872e-06, | |
| "loss": 0.2461, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 5.864253393665159, | |
| "grad_norm": 0.1872517611416961, | |
| "learning_rate": 1.8436601912767737e-06, | |
| "loss": 0.2443, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 5.882352941176471, | |
| "grad_norm": 0.2088265937495008, | |
| "learning_rate": 1.8297401813396854e-06, | |
| "loss": 0.2606, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.900452488687783, | |
| "grad_norm": 0.20072778739580704, | |
| "learning_rate": 1.8158425248197931e-06, | |
| "loss": 0.2683, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 5.918552036199095, | |
| "grad_norm": 0.20162018571475668, | |
| "learning_rate": 1.801967685209171e-06, | |
| "loss": 0.2674, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 5.9366515837104075, | |
| "grad_norm": 0.19962010438752759, | |
| "learning_rate": 1.7881161252389423e-06, | |
| "loss": 0.2518, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 5.95475113122172, | |
| "grad_norm": 0.1924016139723619, | |
| "learning_rate": 1.7742883068638447e-06, | |
| "loss": 0.2332, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 5.972850678733032, | |
| "grad_norm": 0.19688732396260147, | |
| "learning_rate": 1.7604846912468243e-06, | |
| "loss": 0.2758, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.990950226244344, | |
| "grad_norm": 0.21367643724553775, | |
| "learning_rate": 1.7467057387436577e-06, | |
| "loss": 0.2722, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 6.009049773755656, | |
| "grad_norm": 0.18143686535639186, | |
| "learning_rate": 1.7329519088875959e-06, | |
| "loss": 0.2505, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 6.027149321266968, | |
| "grad_norm": 0.19884601017939751, | |
| "learning_rate": 1.719223660374041e-06, | |
| "loss": 0.2406, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 6.04524886877828, | |
| "grad_norm": 0.19790104231314157, | |
| "learning_rate": 1.7055214510452462e-06, | |
| "loss": 0.2459, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 6.0633484162895925, | |
| "grad_norm": 0.21259902967676111, | |
| "learning_rate": 1.6918457378750511e-06, | |
| "loss": 0.256, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 6.081447963800905, | |
| "grad_norm": 0.213170628627418, | |
| "learning_rate": 1.6781969769536356e-06, | |
| "loss": 0.2606, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 6.099547511312217, | |
| "grad_norm": 0.18867147575952214, | |
| "learning_rate": 1.6645756234723127e-06, | |
| "loss": 0.2445, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 6.117647058823529, | |
| "grad_norm": 0.18694162673757048, | |
| "learning_rate": 1.6509821317083466e-06, | |
| "loss": 0.2346, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 6.135746606334842, | |
| "grad_norm": 0.19692152056487713, | |
| "learning_rate": 1.6374169550098052e-06, | |
| "loss": 0.2645, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 6.153846153846154, | |
| "grad_norm": 0.18900423846777845, | |
| "learning_rate": 1.6238805457804353e-06, | |
| "loss": 0.2409, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.171945701357466, | |
| "grad_norm": 0.19281737146761763, | |
| "learning_rate": 1.6103733554645794e-06, | |
| "loss": 0.2511, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 6.1900452488687785, | |
| "grad_norm": 0.18576535863582108, | |
| "learning_rate": 1.5968958345321178e-06, | |
| "loss": 0.2562, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 6.208144796380091, | |
| "grad_norm": 0.1937616575487202, | |
| "learning_rate": 1.5834484324634453e-06, | |
| "loss": 0.2558, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 6.226244343891403, | |
| "grad_norm": 0.20266025820130834, | |
| "learning_rate": 1.5700315977344813e-06, | |
| "loss": 0.2619, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 6.244343891402715, | |
| "grad_norm": 0.19244645126328583, | |
| "learning_rate": 1.5566457778017141e-06, | |
| "loss": 0.2357, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 6.262443438914027, | |
| "grad_norm": 0.19529354957198908, | |
| "learning_rate": 1.5432914190872757e-06, | |
| "loss": 0.2547, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 6.280542986425339, | |
| "grad_norm": 0.1977639183994923, | |
| "learning_rate": 1.529968966964055e-06, | |
| "loss": 0.253, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 6.298642533936651, | |
| "grad_norm": 0.19407931113719454, | |
| "learning_rate": 1.5166788657408441e-06, | |
| "loss": 0.2632, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 6.316742081447964, | |
| "grad_norm": 0.19011112857943221, | |
| "learning_rate": 1.5034215586475194e-06, | |
| "loss": 0.2647, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 6.334841628959276, | |
| "grad_norm": 0.2186558043805355, | |
| "learning_rate": 1.490197487820263e-06, | |
| "loss": 0.2395, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.352941176470588, | |
| "grad_norm": 0.18367578824384137, | |
| "learning_rate": 1.477007094286813e-06, | |
| "loss": 0.2516, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 6.371040723981901, | |
| "grad_norm": 0.18371310311269254, | |
| "learning_rate": 1.4638508179517583e-06, | |
| "loss": 0.2709, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 6.389140271493213, | |
| "grad_norm": 0.19750798322441557, | |
| "learning_rate": 1.4507290975818648e-06, | |
| "loss": 0.2497, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 6.407239819004525, | |
| "grad_norm": 0.17489326087119314, | |
| "learning_rate": 1.4376423707914462e-06, | |
| "loss": 0.2518, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 6.425339366515837, | |
| "grad_norm": 0.19109685375971255, | |
| "learning_rate": 1.4245910740277642e-06, | |
| "loss": 0.2464, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.4434389140271495, | |
| "grad_norm": 0.18732644035351217, | |
| "learning_rate": 1.4115756425564798e-06, | |
| "loss": 0.2554, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 6.461538461538462, | |
| "grad_norm": 0.2042904942174333, | |
| "learning_rate": 1.39859651044713e-06, | |
| "loss": 0.2677, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 6.479638009049774, | |
| "grad_norm": 0.20346012347129977, | |
| "learning_rate": 1.3856541105586545e-06, | |
| "loss": 0.2433, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 6.497737556561086, | |
| "grad_norm": 0.18096207448536866, | |
| "learning_rate": 1.372748874524961e-06, | |
| "loss": 0.248, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 6.515837104072398, | |
| "grad_norm": 0.18311281316650868, | |
| "learning_rate": 1.3598812327405274e-06, | |
| "loss": 0.2433, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.53393665158371, | |
| "grad_norm": 0.19877832010020277, | |
| "learning_rate": 1.3470516143460494e-06, | |
| "loss": 0.2419, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 6.552036199095022, | |
| "grad_norm": 0.19411009696243373, | |
| "learning_rate": 1.3342604472141296e-06, | |
| "loss": 0.2485, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 6.570135746606335, | |
| "grad_norm": 0.18775697820498174, | |
| "learning_rate": 1.3215081579350058e-06, | |
| "loss": 0.2514, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 6.588235294117647, | |
| "grad_norm": 0.1974485040630947, | |
| "learning_rate": 1.308795171802324e-06, | |
| "loss": 0.2623, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 6.606334841628959, | |
| "grad_norm": 0.20195192192796554, | |
| "learning_rate": 1.2961219127989562e-06, | |
| "loss": 0.2523, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 6.624434389140271, | |
| "grad_norm": 0.1867586520187508, | |
| "learning_rate": 1.2834888035828597e-06, | |
| "loss": 0.2434, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 6.642533936651584, | |
| "grad_norm": 0.19535767032905008, | |
| "learning_rate": 1.2708962654729812e-06, | |
| "loss": 0.2246, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 6.660633484162896, | |
| "grad_norm": 0.17951796660986621, | |
| "learning_rate": 1.258344718435205e-06, | |
| "loss": 0.2548, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 6.678733031674208, | |
| "grad_norm": 0.1838076745236157, | |
| "learning_rate": 1.2458345810683492e-06, | |
| "loss": 0.2517, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 6.6968325791855206, | |
| "grad_norm": 0.1987502629500275, | |
| "learning_rate": 1.233366270590202e-06, | |
| "loss": 0.2373, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.714932126696833, | |
| "grad_norm": 0.1921556070273265, | |
| "learning_rate": 1.2209402028236114e-06, | |
| "loss": 0.2444, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 6.733031674208145, | |
| "grad_norm": 0.18753751737041122, | |
| "learning_rate": 1.2085567921826128e-06, | |
| "loss": 0.2429, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 6.751131221719457, | |
| "grad_norm": 0.17267111610692507, | |
| "learning_rate": 1.1962164516586123e-06, | |
| "loss": 0.2408, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 6.769230769230769, | |
| "grad_norm": 0.1785397882614972, | |
| "learning_rate": 1.1839195928066101e-06, | |
| "loss": 0.2364, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 6.787330316742081, | |
| "grad_norm": 0.1974641160114867, | |
| "learning_rate": 1.171666625731477e-06, | |
| "loss": 0.2502, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.8054298642533935, | |
| "grad_norm": 0.1936200917713445, | |
| "learning_rate": 1.1594579590742758e-06, | |
| "loss": 0.2495, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 6.823529411764706, | |
| "grad_norm": 0.20474767855899034, | |
| "learning_rate": 1.1472939999986338e-06, | |
| "loss": 0.2444, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 6.841628959276018, | |
| "grad_norm": 0.21747609011178112, | |
| "learning_rate": 1.1351751541771644e-06, | |
| "loss": 0.2423, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 6.859728506787331, | |
| "grad_norm": 0.2024534108733349, | |
| "learning_rate": 1.1231018257779363e-06, | |
| "loss": 0.2641, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 6.877828054298643, | |
| "grad_norm": 0.19486585090979294, | |
| "learning_rate": 1.1110744174509952e-06, | |
| "loss": 0.2463, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.895927601809955, | |
| "grad_norm": 0.17849040364534344, | |
| "learning_rate": 1.0990933303149342e-06, | |
| "loss": 0.2631, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 6.914027149321267, | |
| "grad_norm": 0.19002926125887049, | |
| "learning_rate": 1.0871589639435204e-06, | |
| "loss": 0.2481, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 6.932126696832579, | |
| "grad_norm": 0.18083592050616315, | |
| "learning_rate": 1.0752717163523623e-06, | |
| "loss": 0.241, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 6.950226244343892, | |
| "grad_norm": 0.19496492930938145, | |
| "learning_rate": 1.0634319839856407e-06, | |
| "loss": 0.2527, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 6.968325791855204, | |
| "grad_norm": 0.19417699707230154, | |
| "learning_rate": 1.0516401617028863e-06, | |
| "loss": 0.2322, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.986425339366516, | |
| "grad_norm": 0.18003217148044237, | |
| "learning_rate": 1.0398966427658091e-06, | |
| "loss": 0.2357, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 7.004524886877828, | |
| "grad_norm": 0.18246799637458713, | |
| "learning_rate": 1.0282018188251854e-06, | |
| "loss": 0.2568, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 7.02262443438914, | |
| "grad_norm": 0.18781508356688068, | |
| "learning_rate": 1.0165560799077952e-06, | |
| "loss": 0.2387, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 7.040723981900452, | |
| "grad_norm": 0.17588577341825412, | |
| "learning_rate": 1.004959814403413e-06, | |
| "loss": 0.262, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 7.0588235294117645, | |
| "grad_norm": 0.19676767898186667, | |
| "learning_rate": 9.934134090518593e-07, | |
| "loss": 0.2374, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 7.076923076923077, | |
| "grad_norm": 0.19345676011938345, | |
| "learning_rate": 9.81917248930096e-07, | |
| "loss": 0.2162, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 7.095022624434389, | |
| "grad_norm": 0.2178742299523153, | |
| "learning_rate": 9.704717174393912e-07, | |
| "loss": 0.2495, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 7.113122171945701, | |
| "grad_norm": 0.18628703610003405, | |
| "learning_rate": 9.590771962925272e-07, | |
| "loss": 0.2596, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 7.131221719457014, | |
| "grad_norm": 0.18042019029734135, | |
| "learning_rate": 9.477340655010717e-07, | |
| "loss": 0.2465, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 7.149321266968326, | |
| "grad_norm": 0.1924619560299915, | |
| "learning_rate": 9.36442703362706e-07, | |
| "loss": 0.2395, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 7.167420814479638, | |
| "grad_norm": 0.18162050443390207, | |
| "learning_rate": 9.252034864486062e-07, | |
| "loss": 0.2425, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 7.1855203619909505, | |
| "grad_norm": 0.1725352404799184, | |
| "learning_rate": 9.140167895908867e-07, | |
| "loss": 0.2257, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 7.203619909502263, | |
| "grad_norm": 0.17850869622337964, | |
| "learning_rate": 9.028829858700974e-07, | |
| "loss": 0.2313, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 7.221719457013575, | |
| "grad_norm": 0.1896145123389741, | |
| "learning_rate": 8.918024466027822e-07, | |
| "loss": 0.2462, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 7.239819004524887, | |
| "grad_norm": 0.1878899849862918, | |
| "learning_rate": 8.807755413290953e-07, | |
| "loss": 0.2502, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.257918552036199, | |
| "grad_norm": 0.19070595484051797, | |
| "learning_rate": 8.698026378004787e-07, | |
| "loss": 0.2433, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 7.276018099547511, | |
| "grad_norm": 0.17359356109341043, | |
| "learning_rate": 8.588841019673938e-07, | |
| "loss": 0.2604, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 7.294117647058823, | |
| "grad_norm": 0.20358309076003017, | |
| "learning_rate": 8.480202979671201e-07, | |
| "loss": 0.2327, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 7.3122171945701355, | |
| "grad_norm": 0.1835516820557226, | |
| "learning_rate": 8.372115881116089e-07, | |
| "loss": 0.2409, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 7.330316742081448, | |
| "grad_norm": 0.18238130931189853, | |
| "learning_rate": 8.264583328754017e-07, | |
| "loss": 0.2393, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 7.34841628959276, | |
| "grad_norm": 0.17542601825119047, | |
| "learning_rate": 8.157608908836071e-07, | |
| "loss": 0.2312, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 7.366515837104072, | |
| "grad_norm": 0.18257023212771115, | |
| "learning_rate": 8.051196188999425e-07, | |
| "loss": 0.2503, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 7.384615384615385, | |
| "grad_norm": 0.1967778738312882, | |
| "learning_rate": 7.945348718148324e-07, | |
| "loss": 0.2419, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 7.402714932126697, | |
| "grad_norm": 0.18755379540882788, | |
| "learning_rate": 7.840070026335758e-07, | |
| "loss": 0.2332, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 7.420814479638009, | |
| "grad_norm": 0.1911070489817504, | |
| "learning_rate": 7.735363624645712e-07, | |
| "loss": 0.2484, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 7.4389140271493215, | |
| "grad_norm": 0.1882055636984676, | |
| "learning_rate": 7.6312330050761e-07, | |
| "loss": 0.2404, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 7.457013574660634, | |
| "grad_norm": 0.20190668623593286, | |
| "learning_rate": 7.527681640422265e-07, | |
| "loss": 0.2526, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 7.475113122171946, | |
| "grad_norm": 0.1974234563343766, | |
| "learning_rate": 7.424712984161192e-07, | |
| "loss": 0.2688, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 7.493212669683258, | |
| "grad_norm": 0.17631879313649837, | |
| "learning_rate": 7.322330470336314e-07, | |
| "loss": 0.2508, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 7.51131221719457, | |
| "grad_norm": 0.18714884817468105, | |
| "learning_rate": 7.220537513442999e-07, | |
| "loss": 0.2486, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 7.529411764705882, | |
| "grad_norm": 0.19399653562175878, | |
| "learning_rate": 7.11933750831467e-07, | |
| "loss": 0.2618, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 7.547511312217194, | |
| "grad_norm": 0.1881943799081702, | |
| "learning_rate": 7.018733830009578e-07, | |
| "loss": 0.2745, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 7.5656108597285066, | |
| "grad_norm": 0.19410422423302068, | |
| "learning_rate": 6.91872983369826e-07, | |
| "loss": 0.2575, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 7.583710407239819, | |
| "grad_norm": 0.19139908757724744, | |
| "learning_rate": 6.819328854551619e-07, | |
| "loss": 0.2431, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 7.601809954751131, | |
| "grad_norm": 0.19407692138480465, | |
| "learning_rate": 6.720534207629731e-07, | |
| "loss": 0.2612, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.619909502262443, | |
| "grad_norm": 0.19077609905815648, | |
| "learning_rate": 6.622349187771246e-07, | |
| "loss": 0.2363, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 7.638009049773755, | |
| "grad_norm": 0.19785590661298624, | |
| "learning_rate": 6.524777069483526e-07, | |
| "loss": 0.2165, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 7.656108597285068, | |
| "grad_norm": 0.18170589381863933, | |
| "learning_rate": 6.427821106833429e-07, | |
| "loss": 0.2518, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 7.67420814479638, | |
| "grad_norm": 0.19082550580582264, | |
| "learning_rate": 6.33148453333881e-07, | |
| "loss": 0.2497, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 7.6923076923076925, | |
| "grad_norm": 0.2010429672996338, | |
| "learning_rate": 6.235770561860646e-07, | |
| "loss": 0.2735, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 7.710407239819005, | |
| "grad_norm": 0.20631621699435826, | |
| "learning_rate": 6.140682384495902e-07, | |
| "loss": 0.2638, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 7.728506787330317, | |
| "grad_norm": 0.18857883979117615, | |
| "learning_rate": 6.046223172471083e-07, | |
| "loss": 0.2511, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 7.746606334841629, | |
| "grad_norm": 0.19438107603701976, | |
| "learning_rate": 5.952396076036457e-07, | |
| "loss": 0.2411, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 7.764705882352941, | |
| "grad_norm": 0.18435853585586434, | |
| "learning_rate": 5.85920422436099e-07, | |
| "loss": 0.2337, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 7.782805429864253, | |
| "grad_norm": 0.19759361458272545, | |
| "learning_rate": 5.766650725428027e-07, | |
| "loss": 0.2304, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.800904977375565, | |
| "grad_norm": 0.17820786715247264, | |
| "learning_rate": 5.674738665931575e-07, | |
| "loss": 0.2302, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 7.819004524886878, | |
| "grad_norm": 0.18336638108510472, | |
| "learning_rate": 5.583471111173414e-07, | |
| "loss": 0.2415, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 7.83710407239819, | |
| "grad_norm": 0.1861341218211825, | |
| "learning_rate": 5.492851104960839e-07, | |
| "loss": 0.2347, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 7.855203619909502, | |
| "grad_norm": 0.18671520221803245, | |
| "learning_rate": 5.402881669505164e-07, | |
| "loss": 0.2433, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 7.873303167420815, | |
| "grad_norm": 0.18470916369258913, | |
| "learning_rate": 5.313565805320914e-07, | |
| "loss": 0.2392, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 7.891402714932127, | |
| "grad_norm": 0.18145209957770228, | |
| "learning_rate": 5.224906491125778e-07, | |
| "loss": 0.2491, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 7.909502262443439, | |
| "grad_norm": 0.1841316864472566, | |
| "learning_rate": 5.13690668374125e-07, | |
| "loss": 0.2374, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 7.927601809954751, | |
| "grad_norm": 0.16991217903448427, | |
| "learning_rate": 5.049569317994013e-07, | |
| "loss": 0.2222, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 7.9457013574660635, | |
| "grad_norm": 0.18977292588230824, | |
| "learning_rate": 4.962897306618101e-07, | |
| "loss": 0.2413, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 7.963800904977376, | |
| "grad_norm": 0.2034200762540194, | |
| "learning_rate": 4.876893540157692e-07, | |
| "loss": 0.2526, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.981900452488688, | |
| "grad_norm": 0.18561076018112563, | |
| "learning_rate": 4.791560886870786e-07, | |
| "loss": 0.2505, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.1808509581648577, | |
| "learning_rate": 4.70690219263347e-07, | |
| "loss": 0.2397, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 8.018099547511312, | |
| "grad_norm": 0.1983786803651098, | |
| "learning_rate": 4.6229202808450587e-07, | |
| "loss": 0.2384, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 8.036199095022624, | |
| "grad_norm": 0.19613362321076386, | |
| "learning_rate": 4.539617952333913e-07, | |
| "loss": 0.2396, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 8.054298642533936, | |
| "grad_norm": 0.18104571677229486, | |
| "learning_rate": 4.4569979852640444e-07, | |
| "loss": 0.2481, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 8.072398190045249, | |
| "grad_norm": 0.18894956462902818, | |
| "learning_rate": 4.3750631350424456e-07, | |
| "loss": 0.2331, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 8.09049773755656, | |
| "grad_norm": 0.1856642703057781, | |
| "learning_rate": 4.2938161342272024e-07, | |
| "loss": 0.2398, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 8.108597285067873, | |
| "grad_norm": 0.19509279291436657, | |
| "learning_rate": 4.2132596924363666e-07, | |
| "loss": 0.2396, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 8.126696832579185, | |
| "grad_norm": 0.18583235612820456, | |
| "learning_rate": 4.1333964962575995e-07, | |
| "loss": 0.2457, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 8.144796380090497, | |
| "grad_norm": 0.19414831334323818, | |
| "learning_rate": 4.0542292091585447e-07, | |
| "loss": 0.2557, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 8.16289592760181, | |
| "grad_norm": 0.1948999434614907, | |
| "learning_rate": 3.975760471398013e-07, | |
| "loss": 0.2346, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 8.180995475113122, | |
| "grad_norm": 0.18223819061827173, | |
| "learning_rate": 3.89799289993795e-07, | |
| "loss": 0.2176, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 8.199095022624434, | |
| "grad_norm": 0.19449644313553408, | |
| "learning_rate": 3.8209290883561205e-07, | |
| "loss": 0.247, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 8.217194570135746, | |
| "grad_norm": 0.1930258214779179, | |
| "learning_rate": 3.7445716067596506e-07, | |
| "loss": 0.2298, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 8.235294117647058, | |
| "grad_norm": 0.18628969575946702, | |
| "learning_rate": 3.668923001699284e-07, | |
| "loss": 0.2385, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 8.25339366515837, | |
| "grad_norm": 0.18169941514755078, | |
| "learning_rate": 3.593985796084468e-07, | |
| "loss": 0.2519, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 8.271493212669684, | |
| "grad_norm": 0.1837119269988211, | |
| "learning_rate": 3.519762489099207e-07, | |
| "loss": 0.2602, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 8.289592760180996, | |
| "grad_norm": 0.1953248401558189, | |
| "learning_rate": 3.446255556118736e-07, | |
| "loss": 0.2567, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 8.307692307692308, | |
| "grad_norm": 0.17837155536528138, | |
| "learning_rate": 3.373467448626916e-07, | |
| "loss": 0.2332, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 8.32579185520362, | |
| "grad_norm": 0.1879124674324348, | |
| "learning_rate": 3.3014005941345406e-07, | |
| "loss": 0.2357, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.343891402714933, | |
| "grad_norm": 0.19669583622722217, | |
| "learning_rate": 3.230057396098321e-07, | |
| "loss": 0.2188, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 8.361990950226245, | |
| "grad_norm": 0.19436805306375338, | |
| "learning_rate": 3.1594402338407633e-07, | |
| "loss": 0.2595, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 8.380090497737557, | |
| "grad_norm": 0.1731035690780127, | |
| "learning_rate": 3.0895514624707994e-07, | |
| "loss": 0.2293, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 8.39819004524887, | |
| "grad_norm": 0.19086125694967881, | |
| "learning_rate": 3.020393412805259e-07, | |
| "loss": 0.2305, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 8.416289592760181, | |
| "grad_norm": 0.18779406733198983, | |
| "learning_rate": 2.9519683912911267e-07, | |
| "loss": 0.2596, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 8.434389140271493, | |
| "grad_norm": 0.18546808477280827, | |
| "learning_rate": 2.8842786799286204e-07, | |
| "loss": 0.2435, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 8.452488687782806, | |
| "grad_norm": 0.1896684936541315, | |
| "learning_rate": 2.8173265361950837e-07, | |
| "loss": 0.2386, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 8.470588235294118, | |
| "grad_norm": 0.17852233356583405, | |
| "learning_rate": 2.751114192969709e-07, | |
| "loss": 0.231, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 8.48868778280543, | |
| "grad_norm": 0.18399543647963754, | |
| "learning_rate": 2.685643858459064e-07, | |
| "loss": 0.2477, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 8.506787330316742, | |
| "grad_norm": 0.18054851239071437, | |
| "learning_rate": 2.620917716123444e-07, | |
| "loss": 0.2504, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 8.524886877828054, | |
| "grad_norm": 0.19308936407562874, | |
| "learning_rate": 2.55693792460405e-07, | |
| "loss": 0.2545, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 8.542986425339366, | |
| "grad_norm": 0.19847333989927235, | |
| "learning_rate": 2.4937066176510123e-07, | |
| "loss": 0.2462, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 8.561085972850679, | |
| "grad_norm": 0.20082127472743996, | |
| "learning_rate": 2.4312259040522093e-07, | |
| "loss": 0.2449, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 8.57918552036199, | |
| "grad_norm": 0.1843637491284879, | |
| "learning_rate": 2.3694978675629476e-07, | |
| "loss": 0.2422, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 8.597285067873303, | |
| "grad_norm": 0.18297796260401825, | |
| "learning_rate": 2.3085245668364897e-07, | |
| "loss": 0.2492, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "grad_norm": 0.18214698681303781, | |
| "learning_rate": 2.2483080353553537e-07, | |
| "loss": 0.2435, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 8.633484162895927, | |
| "grad_norm": 0.1932187580551005, | |
| "learning_rate": 2.1888502813635276e-07, | |
| "loss": 0.2471, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 8.65158371040724, | |
| "grad_norm": 0.1862160611593082, | |
| "learning_rate": 2.1301532877994747e-07, | |
| "loss": 0.2367, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 8.669683257918551, | |
| "grad_norm": 0.1853161129752053, | |
| "learning_rate": 2.0722190122300311e-07, | |
| "loss": 0.2344, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 8.687782805429864, | |
| "grad_norm": 0.18442104500106515, | |
| "learning_rate": 2.0150493867850867e-07, | |
| "loss": 0.2394, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.705882352941176, | |
| "grad_norm": 0.1836768530394557, | |
| "learning_rate": 1.9586463180931658e-07, | |
| "loss": 0.242, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 8.723981900452488, | |
| "grad_norm": 0.18225478866484207, | |
| "learning_rate": 1.9030116872178317e-07, | |
| "loss": 0.2571, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 8.742081447963802, | |
| "grad_norm": 0.19072644512673081, | |
| "learning_rate": 1.848147349594967e-07, | |
| "loss": 0.2457, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 8.760180995475114, | |
| "grad_norm": 0.18223857901348137, | |
| "learning_rate": 1.7940551349708734e-07, | |
| "loss": 0.2351, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 8.778280542986426, | |
| "grad_norm": 0.23050285345657223, | |
| "learning_rate": 1.7407368473412678e-07, | |
| "loss": 0.2355, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 8.796380090497738, | |
| "grad_norm": 0.18880764635155572, | |
| "learning_rate": 1.6881942648911077e-07, | |
| "loss": 0.2287, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 8.81447963800905, | |
| "grad_norm": 0.1830117965150596, | |
| "learning_rate": 1.6364291399352916e-07, | |
| "loss": 0.2447, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 8.832579185520363, | |
| "grad_norm": 0.18803881671915923, | |
| "learning_rate": 1.5854431988602175e-07, | |
| "loss": 0.2431, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 8.850678733031675, | |
| "grad_norm": 0.18013778534000302, | |
| "learning_rate": 1.5352381420662144e-07, | |
| "loss": 0.2397, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 8.868778280542987, | |
| "grad_norm": 0.2003292008190993, | |
| "learning_rate": 1.4858156439108097e-07, | |
| "loss": 0.2291, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 8.886877828054299, | |
| "grad_norm": 0.1780640301175049, | |
| "learning_rate": 1.4371773526529216e-07, | |
| "loss": 0.2138, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 8.904977375565611, | |
| "grad_norm": 0.1858049004037094, | |
| "learning_rate": 1.3893248903978695e-07, | |
| "loss": 0.2248, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 8.923076923076923, | |
| "grad_norm": 0.1870658138910751, | |
| "learning_rate": 1.342259853043279e-07, | |
| "loss": 0.2628, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 8.941176470588236, | |
| "grad_norm": 0.1837618747915919, | |
| "learning_rate": 1.2959838102258537e-07, | |
| "loss": 0.2369, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 8.959276018099548, | |
| "grad_norm": 0.1825018533847707, | |
| "learning_rate": 1.2504983052690406e-07, | |
| "loss": 0.2371, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 8.97737556561086, | |
| "grad_norm": 0.18050085376698732, | |
| "learning_rate": 1.2058048551315455e-07, | |
| "loss": 0.2364, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 8.995475113122172, | |
| "grad_norm": 0.17972618184239006, | |
| "learning_rate": 1.1619049503567486e-07, | |
| "loss": 0.2473, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 9.013574660633484, | |
| "grad_norm": 0.1830792217516428, | |
| "learning_rate": 1.1188000550230005e-07, | |
| "loss": 0.2352, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 9.031674208144796, | |
| "grad_norm": 0.17879744556952354, | |
| "learning_rate": 1.0764916066947795e-07, | |
| "loss": 0.2641, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 9.049773755656108, | |
| "grad_norm": 0.18166675174635316, | |
| "learning_rate": 1.0349810163747587e-07, | |
| "loss": 0.2324, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 9.06787330316742, | |
| "grad_norm": 0.169470120760864, | |
| "learning_rate": 9.942696684567488e-08, | |
| "loss": 0.2433, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 9.085972850678733, | |
| "grad_norm": 0.18110948245786077, | |
| "learning_rate": 9.54358920679524e-08, | |
| "loss": 0.2374, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 9.104072398190045, | |
| "grad_norm": 0.18319694777040335, | |
| "learning_rate": 9.152501040815442e-08, | |
| "loss": 0.254, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 9.122171945701357, | |
| "grad_norm": 0.1915504535166829, | |
| "learning_rate": 8.769445229565549e-08, | |
| "loss": 0.2325, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 9.14027149321267, | |
| "grad_norm": 0.17665350982157665, | |
| "learning_rate": 8.394434548101099e-08, | |
| "loss": 0.2251, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 9.158371040723981, | |
| "grad_norm": 0.17427434868030764, | |
| "learning_rate": 8.027481503169371e-08, | |
| "loss": 0.2345, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 9.176470588235293, | |
| "grad_norm": 0.1787409835322033, | |
| "learning_rate": 7.66859833279257e-08, | |
| "loss": 0.2389, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 9.194570135746606, | |
| "grad_norm": 0.18100016492103735, | |
| "learning_rate": 7.317797005859467e-08, | |
| "loss": 0.2519, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 9.212669683257918, | |
| "grad_norm": 0.17821751417293089, | |
| "learning_rate": 6.97508922172635e-08, | |
| "loss": 0.2287, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 9.23076923076923, | |
| "grad_norm": 0.20843753394336795, | |
| "learning_rate": 6.640486409826785e-08, | |
| "loss": 0.2444, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 9.248868778280542, | |
| "grad_norm": 0.17620285125559612, | |
| "learning_rate": 6.313999729290476e-08, | |
| "loss": 0.2601, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 9.266968325791856, | |
| "grad_norm": 0.18672849956899618, | |
| "learning_rate": 5.99564006857109e-08, | |
| "loss": 0.2247, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 9.285067873303168, | |
| "grad_norm": 0.18049275292301087, | |
| "learning_rate": 5.685418045083102e-08, | |
| "loss": 0.2511, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 9.30316742081448, | |
| "grad_norm": 0.17415682650124498, | |
| "learning_rate": 5.383344004847774e-08, | |
| "loss": 0.2122, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 9.321266968325792, | |
| "grad_norm": 0.18556393996618256, | |
| "learning_rate": 5.0894280221479855e-08, | |
| "loss": 0.2294, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 9.339366515837105, | |
| "grad_norm": 0.1838789712871206, | |
| "learning_rate": 4.8036798991923925e-08, | |
| "loss": 0.2223, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 9.357466063348417, | |
| "grad_norm": 0.19715964425866056, | |
| "learning_rate": 4.526109165788439e-08, | |
| "loss": 0.2381, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 9.375565610859729, | |
| "grad_norm": 0.1855854696991745, | |
| "learning_rate": 4.256725079024554e-08, | |
| "loss": 0.2342, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 9.393665158371041, | |
| "grad_norm": 0.18048725239749752, | |
| "learning_rate": 3.995536622961399e-08, | |
| "loss": 0.2524, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 9.411764705882353, | |
| "grad_norm": 0.19277390554384807, | |
| "learning_rate": 3.7425525083322755e-08, | |
| "loss": 0.2488, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.429864253393665, | |
| "grad_norm": 0.18825292413778436, | |
| "learning_rate": 3.4977811722526065e-08, | |
| "loss": 0.2263, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 9.447963800904978, | |
| "grad_norm": 0.17855716822938666, | |
| "learning_rate": 3.261230777938607e-08, | |
| "loss": 0.2549, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 9.46606334841629, | |
| "grad_norm": 0.18271161254439716, | |
| "learning_rate": 3.032909214434887e-08, | |
| "loss": 0.2062, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 9.484162895927602, | |
| "grad_norm": 0.1985253721454189, | |
| "learning_rate": 2.8128240963515574e-08, | |
| "loss": 0.2395, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 9.502262443438914, | |
| "grad_norm": 0.17838320881574793, | |
| "learning_rate": 2.600982763610094e-08, | |
| "loss": 0.2526, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 9.520361990950226, | |
| "grad_norm": 0.18314903249677716, | |
| "learning_rate": 2.3973922811987295e-08, | |
| "loss": 0.2264, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 9.538461538461538, | |
| "grad_norm": 0.19459399624660845, | |
| "learning_rate": 2.202059438936588e-08, | |
| "loss": 0.2589, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 9.55656108597285, | |
| "grad_norm": 0.1904697767266005, | |
| "learning_rate": 2.0149907512475585e-08, | |
| "loss": 0.2515, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 9.574660633484163, | |
| "grad_norm": 0.19523143039480956, | |
| "learning_rate": 1.8361924569427204e-08, | |
| "loss": 0.2525, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 9.592760180995475, | |
| "grad_norm": 0.17856164334939217, | |
| "learning_rate": 1.6656705190125078e-08, | |
| "loss": 0.2276, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 9.610859728506787, | |
| "grad_norm": 0.18321195430667842, | |
| "learning_rate": 1.5034306244277042e-08, | |
| "loss": 0.2418, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 9.628959276018099, | |
| "grad_norm": 0.17787016407899692, | |
| "learning_rate": 1.3494781839498428e-08, | |
| "loss": 0.2342, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 9.647058823529411, | |
| "grad_norm": 0.19126723554650038, | |
| "learning_rate": 1.2038183319507957e-08, | |
| "loss": 0.2469, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 9.665158371040723, | |
| "grad_norm": 0.1892990291817674, | |
| "learning_rate": 1.0664559262413831e-08, | |
| "loss": 0.2549, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 9.683257918552036, | |
| "grad_norm": 0.1793510043645716, | |
| "learning_rate": 9.373955479095587e-09, | |
| "loss": 0.2299, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 9.701357466063348, | |
| "grad_norm": 0.18123530213186048, | |
| "learning_rate": 8.166415011675032e-09, | |
| "loss": 0.238, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 9.71945701357466, | |
| "grad_norm": 0.19155362352898522, | |
| "learning_rate": 7.041978132081295e-09, | |
| "loss": 0.2505, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 9.737556561085974, | |
| "grad_norm": 0.19166375374826475, | |
| "learning_rate": 6.00068234070772e-09, | |
| "loss": 0.2486, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 9.755656108597286, | |
| "grad_norm": 0.19649835123518228, | |
| "learning_rate": 5.042562365160375e-09, | |
| "loss": 0.2339, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 9.773755656108598, | |
| "grad_norm": 0.17975277337095447, | |
| "learning_rate": 4.167650159100922e-09, | |
| "loss": 0.2386, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.79185520361991, | |
| "grad_norm": 0.19853806613153782, | |
| "learning_rate": 3.375974901181356e-09, | |
| "loss": 0.2651, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 9.809954751131222, | |
| "grad_norm": 0.17741051186070012, | |
| "learning_rate": 2.6675629940689508e-09, | |
| "loss": 0.2345, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 9.828054298642535, | |
| "grad_norm": 0.21010956050591995, | |
| "learning_rate": 2.0424380635675202e-09, | |
| "loss": 0.2433, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 9.846153846153847, | |
| "grad_norm": 0.1925677170949037, | |
| "learning_rate": 1.5006209578286024e-09, | |
| "loss": 0.2442, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 9.864253393665159, | |
| "grad_norm": 0.18453673405456344, | |
| "learning_rate": 1.0421297466570169e-09, | |
| "loss": 0.2302, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 9.882352941176471, | |
| "grad_norm": 0.20600991870093216, | |
| "learning_rate": 6.669797209069018e-10, | |
| "loss": 0.2338, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 9.900452488687783, | |
| "grad_norm": 0.19783049619088353, | |
| "learning_rate": 3.7518339197267774e-10, | |
| "loss": 0.2584, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 9.918552036199095, | |
| "grad_norm": 0.2143160581746704, | |
| "learning_rate": 1.6675049137188094e-10, | |
| "loss": 0.2481, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 9.936651583710407, | |
| "grad_norm": 0.18644523978656508, | |
| "learning_rate": 4.1687970420423165e-11, | |
| "loss": 0.2456, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 9.95475113122172, | |
| "grad_norm": 0.19744141932163012, | |
| "learning_rate": 0.0, | |
| "loss": 0.2503, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 9.95475113122172, | |
| "step": 550, | |
| "total_flos": 9.907464757911224e+17, | |
| "train_loss": 0.2748682842471383, | |
| "train_runtime": 89439.6385, | |
| "train_samples_per_second": 0.395, | |
| "train_steps_per_second": 0.006 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 550, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.907464757911224e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |