| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.012523314681588, |
| "eval_steps": 500, |
| "global_step": 950, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.057669635862112045, |
| "learning_rate": 1.0638297872340426e-07, |
| "loss": 1.3494, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.06152823567390442, |
| "learning_rate": 2.1276595744680852e-07, |
| "loss": 1.2781, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.05398479849100113, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 1.3861, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.059882670640945435, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 1.4557, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.059738870710134506, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 1.3505, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05607615038752556, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 1.4366, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05640924349427223, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 1.3647, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05529299005866051, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 1.3731, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05329303443431854, |
| "learning_rate": 9.574468085106382e-07, |
| "loss": 1.4332, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.06044170632958412, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 1.4435, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05790272355079651, |
| "learning_rate": 1.170212765957447e-06, |
| "loss": 1.3862, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.06205734983086586, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 1.4129, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.059065915644168854, |
| "learning_rate": 1.3829787234042553e-06, |
| "loss": 1.3791, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.05682244524359703, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 1.431, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.059100136160850525, |
| "learning_rate": 1.5957446808510639e-06, |
| "loss": 1.4507, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.059931278228759766, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 1.3852, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.056699033826589584, |
| "learning_rate": 1.8085106382978722e-06, |
| "loss": 1.2552, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.05666350945830345, |
| "learning_rate": 1.9148936170212763e-06, |
| "loss": 1.5264, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.05692203715443611, |
| "learning_rate": 2.021276595744681e-06, |
| "loss": 1.4234, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.06246646121144295, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 1.3362, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.056722771376371384, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 1.3446, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.05918258801102638, |
| "learning_rate": 2.340425531914894e-06, |
| "loss": 1.4613, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.05318083241581917, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 1.4447, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.0610308013856411, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 1.3705, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.0567488893866539, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 1.4548, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.06045207753777504, |
| "learning_rate": 2.7659574468085106e-06, |
| "loss": 1.389, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.05329489707946777, |
| "learning_rate": 2.872340425531915e-06, |
| "loss": 1.3524, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.07088607549667358, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 1.2576, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.07728853821754456, |
| "learning_rate": 3.0851063829787233e-06, |
| "loss": 1.4285, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.0550098791718483, |
| "learning_rate": 3.1914893617021277e-06, |
| "loss": 1.2073, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.0542742982506752, |
| "learning_rate": 3.2978723404255322e-06, |
| "loss": 1.354, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.06096404418349266, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 1.2414, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.062333572655916214, |
| "learning_rate": 3.5106382978723404e-06, |
| "loss": 1.4806, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.05783746764063835, |
| "learning_rate": 3.6170212765957445e-06, |
| "loss": 1.4405, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.095641128718853, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 1.4705, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.14163658022880554, |
| "learning_rate": 3.829787234042553e-06, |
| "loss": 1.4584, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.05600857362151146, |
| "learning_rate": 3.936170212765957e-06, |
| "loss": 1.4418, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.05181832239031792, |
| "learning_rate": 4.042553191489362e-06, |
| "loss": 1.3885, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.09394165873527527, |
| "learning_rate": 4.148936170212766e-06, |
| "loss": 1.4062, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.0698341354727745, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 1.2806, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.055212073028087616, |
| "learning_rate": 4.361702127659574e-06, |
| "loss": 1.4028, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.05782864987850189, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 1.4935, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05666356906294823, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 1.435, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05617048591375351, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 1.2891, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05038372054696083, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 1.4356, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.049356039613485336, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 1.3012, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05181947723031044, |
| "learning_rate": 4.9999999999999996e-06, |
| "loss": 1.3487, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05446089804172516, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 1.4829, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05051864683628082, |
| "learning_rate": 5.2127659574468086e-06, |
| "loss": 1.3265, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05824195221066475, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 1.3334, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05690138414502144, |
| "learning_rate": 5.4255319148936176e-06, |
| "loss": 1.4572, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.09080282598733902, |
| "learning_rate": 5.531914893617021e-06, |
| "loss": 1.4261, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.04653813689947128, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 1.262, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.044598598033189774, |
| "learning_rate": 5.74468085106383e-06, |
| "loss": 1.3706, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.04525616019964218, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 1.2848, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.05448417738080025, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 1.3936, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.04820968583226204, |
| "learning_rate": 6.063829787234042e-06, |
| "loss": 1.3226, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.052025895565748215, |
| "learning_rate": 6.1702127659574465e-06, |
| "loss": 1.3372, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.046200189739465714, |
| "learning_rate": 6.276595744680851e-06, |
| "loss": 1.4202, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.05124253034591675, |
| "learning_rate": 6.3829787234042555e-06, |
| "loss": 1.3549, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04417189210653305, |
| "learning_rate": 6.48936170212766e-06, |
| "loss": 1.4315, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.05024256929755211, |
| "learning_rate": 6.5957446808510645e-06, |
| "loss": 1.3213, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04305976629257202, |
| "learning_rate": 6.702127659574468e-06, |
| "loss": 1.1938, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04368586093187332, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 1.2782, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04419870302081108, |
| "learning_rate": 6.914893617021277e-06, |
| "loss": 1.2692, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04923289269208908, |
| "learning_rate": 7.021276595744681e-06, |
| "loss": 1.4176, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.05031334236264229, |
| "learning_rate": 7.127659574468085e-06, |
| "loss": 1.4667, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.05887551233172417, |
| "learning_rate": 7.234042553191489e-06, |
| "loss": 1.365, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.04077250510454178, |
| "learning_rate": 7.3404255319148934e-06, |
| "loss": 1.263, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.046896953135728836, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 1.2821, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.045666612684726715, |
| "learning_rate": 7.553191489361702e-06, |
| "loss": 1.4069, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.07492675632238388, |
| "learning_rate": 7.659574468085105e-06, |
| "loss": 1.2269, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.05555059388279915, |
| "learning_rate": 7.76595744680851e-06, |
| "loss": 1.4084, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.04331756755709648, |
| "learning_rate": 7.872340425531914e-06, |
| "loss": 1.3114, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.0964915007352829, |
| "learning_rate": 7.978723404255319e-06, |
| "loss": 1.3633, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.046700503677129745, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 1.3688, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.04673081636428833, |
| "learning_rate": 8.191489361702128e-06, |
| "loss": 1.2467, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.04381676763296127, |
| "learning_rate": 8.297872340425532e-06, |
| "loss": 1.2566, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.05993415787816048, |
| "learning_rate": 8.404255319148937e-06, |
| "loss": 1.3618, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.06440860033035278, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 1.5144, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.040967535227537155, |
| "learning_rate": 8.617021276595746e-06, |
| "loss": 1.2927, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.04743165895342827, |
| "learning_rate": 8.723404255319149e-06, |
| "loss": 1.3762, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.04562428966164589, |
| "learning_rate": 8.829787234042553e-06, |
| "loss": 1.377, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.051328569650650024, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 1.2928, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.05074065551161766, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 1.2354, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.04352608695626259, |
| "learning_rate": 9.148936170212767e-06, |
| "loss": 1.4465, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.04803245887160301, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 1.3891, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.08481187373399734, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 1.1974, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.0441882386803627, |
| "learning_rate": 9.46808510638298e-06, |
| "loss": 1.3792, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.04781670495867729, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 1.3, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.04570171609520912, |
| "learning_rate": 9.680851063829787e-06, |
| "loss": 1.3352, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.041407499462366104, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 1.2453, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.06641850620508194, |
| "learning_rate": 9.893617021276595e-06, |
| "loss": 1.2, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.04800207167863846, |
| "learning_rate": 9.999999999999999e-06, |
| "loss": 1.3938, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.04421789571642876, |
| "learning_rate": 1.0106382978723404e-05, |
| "loss": 1.3387, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.04799410328269005, |
| "learning_rate": 1.0212765957446808e-05, |
| "loss": 1.3857, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.0477750189602375, |
| "learning_rate": 1.0319148936170213e-05, |
| "loss": 1.3585, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.042658887803554535, |
| "learning_rate": 1.0425531914893617e-05, |
| "loss": 1.2802, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.046312738209962845, |
| "learning_rate": 1.0531914893617022e-05, |
| "loss": 1.3663, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.04404019936919212, |
| "learning_rate": 1.0638297872340426e-05, |
| "loss": 1.3354, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.055406101047992706, |
| "learning_rate": 1.074468085106383e-05, |
| "loss": 1.2967, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.045367974787950516, |
| "learning_rate": 1.0851063829787235e-05, |
| "loss": 1.2914, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.049990568310022354, |
| "learning_rate": 1.095744680851064e-05, |
| "loss": 1.1151, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.04687273129820824, |
| "learning_rate": 1.1063829787234042e-05, |
| "loss": 1.2756, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.04907204583287239, |
| "learning_rate": 1.1170212765957447e-05, |
| "loss": 1.3726, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.057853613048791885, |
| "learning_rate": 1.1276595744680851e-05, |
| "loss": 1.2599, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.0487029105424881, |
| "learning_rate": 1.1382978723404256e-05, |
| "loss": 1.2803, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.049311500042676926, |
| "learning_rate": 1.148936170212766e-05, |
| "loss": 1.3391, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.04935484379529953, |
| "learning_rate": 1.1595744680851065e-05, |
| "loss": 1.3807, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.04239289090037346, |
| "learning_rate": 1.170212765957447e-05, |
| "loss": 1.2443, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.076308473944664, |
| "learning_rate": 1.1808510638297872e-05, |
| "loss": 1.3879, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.048640284687280655, |
| "learning_rate": 1.1914893617021277e-05, |
| "loss": 1.3674, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.04677354916930199, |
| "learning_rate": 1.2021276595744681e-05, |
| "loss": 1.3282, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.04633704200387001, |
| "learning_rate": 1.2127659574468084e-05, |
| "loss": 1.3136, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.05674600228667259, |
| "learning_rate": 1.2234042553191489e-05, |
| "loss": 1.4119, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.052234843373298645, |
| "learning_rate": 1.2340425531914893e-05, |
| "loss": 1.3089, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.043746188282966614, |
| "learning_rate": 1.2446808510638298e-05, |
| "loss": 1.3544, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.044916700571775436, |
| "learning_rate": 1.2553191489361702e-05, |
| "loss": 1.2777, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.05204184353351593, |
| "learning_rate": 1.2659574468085106e-05, |
| "loss": 1.3396, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.04302500560879707, |
| "learning_rate": 1.2765957446808511e-05, |
| "loss": 1.3921, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.06135503947734833, |
| "learning_rate": 1.2872340425531915e-05, |
| "loss": 1.2857, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.05298823118209839, |
| "learning_rate": 1.297872340425532e-05, |
| "loss": 1.3892, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.09373245388269424, |
| "learning_rate": 1.3085106382978724e-05, |
| "loss": 1.3407, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.0466972291469574, |
| "learning_rate": 1.3191489361702129e-05, |
| "loss": 1.3431, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.04748416692018509, |
| "learning_rate": 1.3297872340425532e-05, |
| "loss": 1.3532, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.04710518568754196, |
| "learning_rate": 1.3404255319148936e-05, |
| "loss": 1.325, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.04562179371714592, |
| "learning_rate": 1.351063829787234e-05, |
| "loss": 1.3433, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.0475505031645298, |
| "learning_rate": 1.3617021276595745e-05, |
| "loss": 1.3037, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.045639630407094955, |
| "learning_rate": 1.372340425531915e-05, |
| "loss": 1.3, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.04853609576821327, |
| "learning_rate": 1.3829787234042554e-05, |
| "loss": 1.4097, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.04848809540271759, |
| "learning_rate": 1.3936170212765957e-05, |
| "loss": 1.1995, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.0436336025595665, |
| "learning_rate": 1.4042553191489362e-05, |
| "loss": 1.0676, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.05467860400676727, |
| "learning_rate": 1.4148936170212766e-05, |
| "loss": 1.428, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.05164318531751633, |
| "learning_rate": 1.425531914893617e-05, |
| "loss": 1.3411, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.04806946590542793, |
| "learning_rate": 1.4361702127659575e-05, |
| "loss": 1.2856, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.04568091407418251, |
| "learning_rate": 1.4468085106382978e-05, |
| "loss": 1.2643, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.051061000674963, |
| "learning_rate": 1.4574468085106382e-05, |
| "loss": 1.2959, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.043656568974256516, |
| "learning_rate": 1.4680851063829787e-05, |
| "loss": 1.2584, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.04155721887946129, |
| "learning_rate": 1.4787234042553191e-05, |
| "loss": 1.3342, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.05964464321732521, |
| "learning_rate": 1.4893617021276596e-05, |
| "loss": 1.284, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.04124300926923752, |
| "learning_rate": 1.5e-05, |
| "loss": 1.2437, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.055146049708127975, |
| "learning_rate": 1.5106382978723403e-05, |
| "loss": 1.5472, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.0521329827606678, |
| "learning_rate": 1.521276595744681e-05, |
| "loss": 1.2676, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.046129606664180756, |
| "learning_rate": 1.531914893617021e-05, |
| "loss": 1.3367, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.044848017394542694, |
| "learning_rate": 1.5425531914893617e-05, |
| "loss": 1.352, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.05782546475529671, |
| "learning_rate": 1.553191489361702e-05, |
| "loss": 1.1799, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.05274609848856926, |
| "learning_rate": 1.5638297872340426e-05, |
| "loss": 1.4095, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.047185566276311874, |
| "learning_rate": 1.574468085106383e-05, |
| "loss": 1.2185, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.048585060983896255, |
| "learning_rate": 1.5851063829787235e-05, |
| "loss": 1.3097, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.05114852264523506, |
| "learning_rate": 1.5957446808510637e-05, |
| "loss": 1.3263, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.04557744786143303, |
| "learning_rate": 1.6063829787234044e-05, |
| "loss": 1.4026, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.04844217747449875, |
| "learning_rate": 1.6170212765957446e-05, |
| "loss": 1.2965, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.06304433941841125, |
| "learning_rate": 1.627659574468085e-05, |
| "loss": 1.3728, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.11255922168493271, |
| "learning_rate": 1.6382978723404255e-05, |
| "loss": 1.2606, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.056885555386543274, |
| "learning_rate": 1.6489361702127658e-05, |
| "loss": 1.3312, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.07175802439451218, |
| "learning_rate": 1.6595744680851064e-05, |
| "loss": 1.3512, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.049801841378211975, |
| "learning_rate": 1.6702127659574467e-05, |
| "loss": 1.3083, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.05776335299015045, |
| "learning_rate": 1.6808510638297873e-05, |
| "loss": 1.3987, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.051518019288778305, |
| "learning_rate": 1.6914893617021276e-05, |
| "loss": 1.4337, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.0506494864821434, |
| "learning_rate": 1.7021276595744682e-05, |
| "loss": 1.3609, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.04913006350398064, |
| "learning_rate": 1.7127659574468085e-05, |
| "loss": 1.3788, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.0492931567132473, |
| "learning_rate": 1.723404255319149e-05, |
| "loss": 1.2924, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.05355142429471016, |
| "learning_rate": 1.7340425531914894e-05, |
| "loss": 1.3872, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.0524597205221653, |
| "learning_rate": 1.7446808510638297e-05, |
| "loss": 1.4147, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.049367666244506836, |
| "learning_rate": 1.7553191489361703e-05, |
| "loss": 1.3175, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.05368790030479431, |
| "learning_rate": 1.7659574468085106e-05, |
| "loss": 1.3939, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.047138139605522156, |
| "learning_rate": 1.7765957446808512e-05, |
| "loss": 1.3383, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.05449504777789116, |
| "learning_rate": 1.7872340425531915e-05, |
| "loss": 1.3612, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.0647950991988182, |
| "learning_rate": 1.797872340425532e-05, |
| "loss": 1.2121, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.05256028473377228, |
| "learning_rate": 1.8085106382978724e-05, |
| "loss": 1.4302, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.052294984459877014, |
| "learning_rate": 1.819148936170213e-05, |
| "loss": 1.3529, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.043925654143095016, |
| "learning_rate": 1.8297872340425533e-05, |
| "loss": 1.1719, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.04635035991668701, |
| "learning_rate": 1.840425531914894e-05, |
| "loss": 1.3229, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.07445945590734482, |
| "learning_rate": 1.8510638297872342e-05, |
| "loss": 1.2615, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.050731562077999115, |
| "learning_rate": 1.8617021276595745e-05, |
| "loss": 1.3552, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.04691868647933006, |
| "learning_rate": 1.872340425531915e-05, |
| "loss": 1.1959, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.047465287148952484, |
| "learning_rate": 1.8829787234042554e-05, |
| "loss": 1.3113, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.05117448791861534, |
| "learning_rate": 1.893617021276596e-05, |
| "loss": 1.3717, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.0473572202026844, |
| "learning_rate": 1.9042553191489363e-05, |
| "loss": 1.3947, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.05099477618932724, |
| "learning_rate": 1.914893617021277e-05, |
| "loss": 1.4049, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.04812943935394287, |
| "learning_rate": 1.9255319148936172e-05, |
| "loss": 1.3586, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.05050328001379967, |
| "learning_rate": 1.9361702127659575e-05, |
| "loss": 1.2463, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.05110020935535431, |
| "learning_rate": 1.9468085106382977e-05, |
| "loss": 1.2487, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.05224141478538513, |
| "learning_rate": 1.9574468085106384e-05, |
| "loss": 1.3602, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.05101168900728226, |
| "learning_rate": 1.9680851063829786e-05, |
| "loss": 1.429, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.09453223645687103, |
| "learning_rate": 1.978723404255319e-05, |
| "loss": 1.2619, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.060608815401792526, |
| "learning_rate": 1.9893617021276595e-05, |
| "loss": 1.3107, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.044588133692741394, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 1.3813, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.0486648753285408, |
| "learning_rate": 2.0106382978723404e-05, |
| "loss": 1.3399, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.052965641021728516, |
| "learning_rate": 2.0212765957446807e-05, |
| "loss": 1.3557, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.059409502893686295, |
| "learning_rate": 2.0319148936170213e-05, |
| "loss": 1.3735, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.05749582126736641, |
| "learning_rate": 2.0425531914893616e-05, |
| "loss": 1.2828, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.04642318934202194, |
| "learning_rate": 2.0531914893617022e-05, |
| "loss": 1.3082, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.04926323518157005, |
| "learning_rate": 2.0638297872340425e-05, |
| "loss": 1.2715, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.05380849912762642, |
| "learning_rate": 2.074468085106383e-05, |
| "loss": 1.4502, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.0523720309138298, |
| "learning_rate": 2.0851063829787234e-05, |
| "loss": 1.4332, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.04891609400510788, |
| "learning_rate": 2.0957446808510637e-05, |
| "loss": 1.2524, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.06308029592037201, |
| "learning_rate": 2.1063829787234043e-05, |
| "loss": 1.4187, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.05856901407241821, |
| "learning_rate": 2.1170212765957446e-05, |
| "loss": 1.2604, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.048220206052064896, |
| "learning_rate": 2.1276595744680852e-05, |
| "loss": 1.3664, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.048834629356861115, |
| "learning_rate": 2.1382978723404255e-05, |
| "loss": 1.1301, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.05079879239201546, |
| "learning_rate": 2.148936170212766e-05, |
| "loss": 1.3391, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.04985832795500755, |
| "learning_rate": 2.1595744680851064e-05, |
| "loss": 1.2989, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.04496655985713005, |
| "learning_rate": 2.170212765957447e-05, |
| "loss": 1.2832, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.048497602343559265, |
| "learning_rate": 2.1808510638297873e-05, |
| "loss": 1.2237, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.050113365054130554, |
| "learning_rate": 2.191489361702128e-05, |
| "loss": 1.3618, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.04853734374046326, |
| "learning_rate": 2.2021276595744682e-05, |
| "loss": 1.2066, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.047260671854019165, |
| "learning_rate": 2.2127659574468085e-05, |
| "loss": 1.2672, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.0458863228559494, |
| "learning_rate": 2.223404255319149e-05, |
| "loss": 1.2686, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.0532243587076664, |
| "learning_rate": 2.2340425531914894e-05, |
| "loss": 1.4068, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.05112733691930771, |
| "learning_rate": 2.24468085106383e-05, |
| "loss": 1.4665, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.05383682623505592, |
| "learning_rate": 2.2553191489361703e-05, |
| "loss": 1.3952, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.05613754689693451, |
| "learning_rate": 2.265957446808511e-05, |
| "loss": 1.2767, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.04550475999712944, |
| "learning_rate": 2.2765957446808512e-05, |
| "loss": 1.237, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.04769672453403473, |
| "learning_rate": 2.2872340425531918e-05, |
| "loss": 1.4378, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.0261180400848389, |
| "learning_rate": 2.297872340425532e-05, |
| "loss": 1.2374, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.04573334380984306, |
| "learning_rate": 2.3085106382978724e-05, |
| "loss": 1.2826, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.04903516545891762, |
| "learning_rate": 2.319148936170213e-05, |
| "loss": 1.313, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.049122072756290436, |
| "learning_rate": 2.3297872340425533e-05, |
| "loss": 1.4223, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.04724901542067528, |
| "learning_rate": 2.340425531914894e-05, |
| "loss": 1.2758, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.04624621570110321, |
| "learning_rate": 2.351063829787234e-05, |
| "loss": 1.2517, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.056727565824985504, |
| "learning_rate": 2.3617021276595744e-05, |
| "loss": 1.3039, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.05122361332178116, |
| "learning_rate": 2.3723404255319147e-05, |
| "loss": 1.3883, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.05158834904432297, |
| "learning_rate": 2.3829787234042553e-05, |
| "loss": 1.4112, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.07292402535676956, |
| "learning_rate": 2.3936170212765956e-05, |
| "loss": 1.3428, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.05974160134792328, |
| "learning_rate": 2.4042553191489362e-05, |
| "loss": 1.201, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.05300895869731903, |
| "learning_rate": 2.4148936170212765e-05, |
| "loss": 1.3143, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.045893993228673935, |
| "learning_rate": 2.4255319148936168e-05, |
| "loss": 1.3017, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.051792554557323456, |
| "learning_rate": 2.4361702127659574e-05, |
| "loss": 1.382, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.10382523387670517, |
| "learning_rate": 2.4468085106382977e-05, |
| "loss": 1.2969, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.052977245301008224, |
| "learning_rate": 2.4574468085106383e-05, |
| "loss": 1.3849, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.04847870394587517, |
| "learning_rate": 2.4680851063829786e-05, |
| "loss": 1.2976, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.07622654736042023, |
| "learning_rate": 2.4787234042553192e-05, |
| "loss": 1.4746, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.051023781299591064, |
| "learning_rate": 2.4893617021276595e-05, |
| "loss": 1.3735, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.0486944243311882, |
| "learning_rate": 2.5e-05, |
| "loss": 1.3675, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.048643093556165695, |
| "learning_rate": 2.5106382978723404e-05, |
| "loss": 1.2665, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.051694802939891815, |
| "learning_rate": 2.521276595744681e-05, |
| "loss": 1.4357, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.051911093294620514, |
| "learning_rate": 2.5319148936170213e-05, |
| "loss": 1.303, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.044154971837997437, |
| "learning_rate": 2.5425531914893616e-05, |
| "loss": 1.207, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.048809949308633804, |
| "learning_rate": 2.5531914893617022e-05, |
| "loss": 1.2401, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.040964074432849884, |
| "learning_rate": 2.5638297872340425e-05, |
| "loss": 1.1855, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.0428529791533947, |
| "learning_rate": 2.574468085106383e-05, |
| "loss": 1.2966, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.04508029296994209, |
| "learning_rate": 2.5851063829787234e-05, |
| "loss": 1.2393, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.0446944423019886, |
| "learning_rate": 2.595744680851064e-05, |
| "loss": 1.249, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.0411514937877655, |
| "learning_rate": 2.6063829787234043e-05, |
| "loss": 1.2409, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.04906069114804268, |
| "learning_rate": 2.617021276595745e-05, |
| "loss": 1.4407, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.043277911841869354, |
| "learning_rate": 2.6276595744680852e-05, |
| "loss": 1.2552, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.045710548758506775, |
| "learning_rate": 2.6382978723404258e-05, |
| "loss": 1.3415, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04261719062924385, |
| "learning_rate": 2.648936170212766e-05, |
| "loss": 1.3518, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.044100042432546616, |
| "learning_rate": 2.6595744680851064e-05, |
| "loss": 1.3098, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04507607966661453, |
| "learning_rate": 2.670212765957447e-05, |
| "loss": 1.1852, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04363155737519264, |
| "learning_rate": 2.6808510638297873e-05, |
| "loss": 1.2703, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.07022202759981155, |
| "learning_rate": 2.691489361702128e-05, |
| "loss": 1.175, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.050820302218198776, |
| "learning_rate": 2.702127659574468e-05, |
| "loss": 1.3806, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04110841080546379, |
| "learning_rate": 2.7127659574468088e-05, |
| "loss": 1.1963, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04145009443163872, |
| "learning_rate": 2.723404255319149e-05, |
| "loss": 1.2588, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.04492926597595215, |
| "learning_rate": 2.7340425531914897e-05, |
| "loss": 1.1934, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.05001668259501457, |
| "learning_rate": 2.74468085106383e-05, |
| "loss": 1.3063, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.05600470304489136, |
| "learning_rate": 2.7553191489361706e-05, |
| "loss": 1.303, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.04361084848642349, |
| "learning_rate": 2.765957446808511e-05, |
| "loss": 1.2889, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.043035976588726044, |
| "learning_rate": 2.776595744680851e-05, |
| "loss": 1.2992, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.044470012187957764, |
| "learning_rate": 2.7872340425531914e-05, |
| "loss": 1.4082, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.043607208877801895, |
| "learning_rate": 2.7978723404255317e-05, |
| "loss": 1.3521, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.047062911093235016, |
| "learning_rate": 2.8085106382978723e-05, |
| "loss": 1.2293, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.051996584981679916, |
| "learning_rate": 2.8191489361702126e-05, |
| "loss": 1.21, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.055774882435798645, |
| "learning_rate": 2.8297872340425532e-05, |
| "loss": 1.3027, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.04664234817028046, |
| "learning_rate": 2.8404255319148935e-05, |
| "loss": 1.2192, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.05001696199178696, |
| "learning_rate": 2.851063829787234e-05, |
| "loss": 1.2731, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.04934161901473999, |
| "learning_rate": 2.8617021276595744e-05, |
| "loss": 1.267, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.05439780652523041, |
| "learning_rate": 2.872340425531915e-05, |
| "loss": 1.3526, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.04861316457390785, |
| "learning_rate": 2.8829787234042553e-05, |
| "loss": 1.3071, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.04321487993001938, |
| "learning_rate": 2.8936170212765956e-05, |
| "loss": 1.2382, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.07793135941028595, |
| "learning_rate": 2.9042553191489362e-05, |
| "loss": 1.2424, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.04725024476647377, |
| "learning_rate": 2.9148936170212765e-05, |
| "loss": 1.2969, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.04626401886343956, |
| "learning_rate": 2.925531914893617e-05, |
| "loss": 1.3019, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.045404914766550064, |
| "learning_rate": 2.9361702127659574e-05, |
| "loss": 1.2824, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.05153006315231323, |
| "learning_rate": 2.946808510638298e-05, |
| "loss": 1.3043, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.04886719956994057, |
| "learning_rate": 2.9574468085106383e-05, |
| "loss": 1.3016, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.045437101274728775, |
| "learning_rate": 2.968085106382979e-05, |
| "loss": 1.3027, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.044898632913827896, |
| "learning_rate": 2.9787234042553192e-05, |
| "loss": 1.2955, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.15321741998195648, |
| "learning_rate": 2.9893617021276598e-05, |
| "loss": 1.3549, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.04944868013262749, |
| "learning_rate": 3e-05, |
| "loss": 1.2074, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.04308634251356125, |
| "learning_rate": 2.9999988453946903e-05, |
| "loss": 1.2587, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.04539733752608299, |
| "learning_rate": 2.9999953815805386e-05, |
| "loss": 1.2209, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.05106598138809204, |
| "learning_rate": 2.9999896085628773e-05, |
| "loss": 1.2978, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.04330745339393616, |
| "learning_rate": 2.9999815263505937e-05, |
| "loss": 1.3341, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.49190011620521545, |
| "learning_rate": 2.999971134956131e-05, |
| "loss": 1.1967, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.12518596649169922, |
| "learning_rate": 2.9999584343954855e-05, |
| "loss": 1.3994, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.0452754944562912, |
| "learning_rate": 2.9999434246882094e-05, |
| "loss": 1.3459, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.15973900258541107, |
| "learning_rate": 2.9999261058574106e-05, |
| "loss": 1.2315, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.05226750299334526, |
| "learning_rate": 2.99990647792975e-05, |
| "loss": 1.3226, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.05565487965941429, |
| "learning_rate": 2.999884540935445e-05, |
| "loss": 1.274, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.042020637542009354, |
| "learning_rate": 2.9998602949082663e-05, |
| "loss": 1.2769, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.04224269464612007, |
| "learning_rate": 2.999833739885541e-05, |
| "loss": 1.2758, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.04438871517777443, |
| "learning_rate": 2.999804875908149e-05, |
| "loss": 1.2992, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.04211125522851944, |
| "learning_rate": 2.999773703020526e-05, |
| "loss": 1.2551, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.044215280562639236, |
| "learning_rate": 2.999740221270662e-05, |
| "loss": 1.3433, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.0452650748193264, |
| "learning_rate": 2.999704430710101e-05, |
| "loss": 1.0829, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.04780622571706772, |
| "learning_rate": 2.9996663313939412e-05, |
| "loss": 1.3217, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.042652346193790436, |
| "learning_rate": 2.999625923380837e-05, |
| "loss": 1.2241, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.038346268236637115, |
| "learning_rate": 2.9995832067329933e-05, |
| "loss": 1.2186, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.04913242533802986, |
| "learning_rate": 2.9995381815161732e-05, |
| "loss": 1.3639, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.046195752918720245, |
| "learning_rate": 2.9994908477996913e-05, |
| "loss": 1.2807, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.04986129701137543, |
| "learning_rate": 2.9994412056564157e-05, |
| "loss": 1.3036, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.053075432777404785, |
| "learning_rate": 2.9993892551627702e-05, |
| "loss": 1.3456, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04712017998099327, |
| "learning_rate": 2.9993349963987306e-05, |
| "loss": 1.4103, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.0960259810090065, |
| "learning_rate": 2.9992784294478277e-05, |
| "loss": 1.2689, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04377806931734085, |
| "learning_rate": 2.9992195543971437e-05, |
| "loss": 1.2946, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04737775772809982, |
| "learning_rate": 2.999158371337316e-05, |
| "loss": 1.3504, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04784572497010231, |
| "learning_rate": 2.9990948803625344e-05, |
| "loss": 1.217, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.046987585723400116, |
| "learning_rate": 2.999029081570541e-05, |
| "loss": 1.1544, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04943583905696869, |
| "learning_rate": 2.9989609750626313e-05, |
| "loss": 1.2561, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04215755686163902, |
| "learning_rate": 2.998890560943654e-05, |
| "loss": 1.2639, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.04213941469788551, |
| "learning_rate": 2.99881783932201e-05, |
| "loss": 1.2206, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.057730745524168015, |
| "learning_rate": 2.9987428103096507e-05, |
| "loss": 1.3025, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.04681975021958351, |
| "learning_rate": 2.9986654740220835e-05, |
| "loss": 1.3503, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.047414880245923996, |
| "learning_rate": 2.9985858305783643e-05, |
| "loss": 1.3505, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.042863670736551285, |
| "learning_rate": 2.998503880101102e-05, |
| "loss": 1.2928, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.041847921907901764, |
| "learning_rate": 2.998419622716458e-05, |
| "loss": 1.2902, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.04203863441944122, |
| "learning_rate": 2.998333058554144e-05, |
| "loss": 1.226, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.045996423810720444, |
| "learning_rate": 2.9982441877474225e-05, |
| "loss": 1.2747, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.04503284767270088, |
| "learning_rate": 2.9981530104331087e-05, |
| "loss": 1.3563, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.04348743334412575, |
| "learning_rate": 2.9980595267515677e-05, |
| "loss": 1.3476, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.05177181586623192, |
| "learning_rate": 2.9979637368467143e-05, |
| "loss": 1.2985, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04224792867898941, |
| "learning_rate": 2.9978656408660157e-05, |
| "loss": 1.263, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04062522202730179, |
| "learning_rate": 2.9977652389604867e-05, |
| "loss": 1.2936, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04481014609336853, |
| "learning_rate": 2.9976625312846952e-05, |
| "loss": 1.3739, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04340618476271629, |
| "learning_rate": 2.9975575179967552e-05, |
| "loss": 1.3147, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.038771748542785645, |
| "learning_rate": 2.9974501992583333e-05, |
| "loss": 1.1779, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04300400987267494, |
| "learning_rate": 2.9973405752346424e-05, |
| "loss": 1.32, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04594357684254646, |
| "learning_rate": 2.9972286460944477e-05, |
| "loss": 1.2634, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.041061241179704666, |
| "learning_rate": 2.997114412010059e-05, |
| "loss": 1.2628, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.04096174240112305, |
| "learning_rate": 2.9969978731573384e-05, |
| "loss": 1.2161, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.051395904272794724, |
| "learning_rate": 2.996879029715694e-05, |
| "loss": 1.3317, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04332401603460312, |
| "learning_rate": 2.9967578818680817e-05, |
| "loss": 1.1812, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04285382851958275, |
| "learning_rate": 2.9966344298010055e-05, |
| "loss": 1.3631, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04322006553411484, |
| "learning_rate": 2.996508673704517e-05, |
| "loss": 1.3598, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04882935434579849, |
| "learning_rate": 2.9963806137722145e-05, |
| "loss": 1.3121, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04319699481129646, |
| "learning_rate": 2.996250250201242e-05, |
| "loss": 1.1114, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.044990669935941696, |
| "learning_rate": 2.996117583192292e-05, |
| "loss": 1.4712, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.04074972867965698, |
| "learning_rate": 2.995982612949601e-05, |
| "loss": 1.1548, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.042052388191223145, |
| "learning_rate": 2.9958453396809524e-05, |
| "loss": 1.2864, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.04926493391394615, |
| "learning_rate": 2.995705763597675e-05, |
| "loss": 1.295, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.04231850057840347, |
| "learning_rate": 2.9955638849146422e-05, |
| "loss": 1.2497, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.04407385364174843, |
| "learning_rate": 2.9954197038502727e-05, |
| "loss": 1.3868, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.04645228013396263, |
| "learning_rate": 2.9952732206265295e-05, |
| "loss": 1.2736, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.042599406093358994, |
| "learning_rate": 2.9951244354689195e-05, |
| "loss": 1.3913, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.04068687558174133, |
| "learning_rate": 2.994973348606494e-05, |
| "loss": 1.267, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.039476945996284485, |
| "learning_rate": 2.9948199602718463e-05, |
| "loss": 1.3569, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.041935890913009644, |
| "learning_rate": 2.9946642707011144e-05, |
| "loss": 1.0696, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.042615581303834915, |
| "learning_rate": 2.9945062801339784e-05, |
| "loss": 1.2675, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.03951689228415489, |
| "learning_rate": 2.9943459888136607e-05, |
| "loss": 1.2978, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.042145539075136185, |
| "learning_rate": 2.994183396986925e-05, |
| "loss": 1.3425, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.04591543972492218, |
| "learning_rate": 2.994018504904078e-05, |
| "loss": 1.2591, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.042830970138311386, |
| "learning_rate": 2.993851312818965e-05, |
| "loss": 1.3181, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.04242956265807152, |
| "learning_rate": 2.9936818209889764e-05, |
| "loss": 1.3664, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.04106505587697029, |
| "learning_rate": 2.993510029675038e-05, |
| "loss": 1.2306, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.04205963388085365, |
| "learning_rate": 2.9933359391416197e-05, |
| "loss": 1.3389, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.043333835899829865, |
| "learning_rate": 2.9931595496567285e-05, |
| "loss": 1.3347, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.03695542365312576, |
| "learning_rate": 2.9929808614919114e-05, |
| "loss": 1.3666, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.03952499479055405, |
| "learning_rate": 2.9927998749222546e-05, |
| "loss": 1.346, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.0432700589299202, |
| "learning_rate": 2.9926165902263814e-05, |
| "loss": 1.4122, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.04118409752845764, |
| "learning_rate": 2.992431007686455e-05, |
| "loss": 1.3356, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.041710883378982544, |
| "learning_rate": 2.9922431275881736e-05, |
| "loss": 1.2396, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.040193233639001846, |
| "learning_rate": 2.9920529502207744e-05, |
| "loss": 1.3486, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.07698454707860947, |
| "learning_rate": 2.9918604758770298e-05, |
| "loss": 1.351, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.04122081398963928, |
| "learning_rate": 2.9916657048532498e-05, |
| "loss": 1.2852, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.0415961854159832, |
| "learning_rate": 2.991468637449279e-05, |
| "loss": 1.1954, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.05483356490731239, |
| "learning_rate": 2.9912692739684973e-05, |
| "loss": 1.1881, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.043232712894678116, |
| "learning_rate": 2.9910676147178194e-05, |
| "loss": 1.3717, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.04087051749229431, |
| "learning_rate": 2.990863660007695e-05, |
| "loss": 1.2881, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.0421091727912426, |
| "learning_rate": 2.9906574101521068e-05, |
| "loss": 1.3797, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.0437094122171402, |
| "learning_rate": 2.9904488654685706e-05, |
| "loss": 1.3868, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.044396668672561646, |
| "learning_rate": 2.990238026278136e-05, |
| "loss": 1.3332, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.0429987758398056, |
| "learning_rate": 2.990024892905384e-05, |
| "loss": 1.3638, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.05373203381896019, |
| "learning_rate": 2.9898094656784283e-05, |
| "loss": 1.274, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.04591381922364235, |
| "learning_rate": 2.9895917449289128e-05, |
| "loss": 1.2414, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.04005320370197296, |
| "learning_rate": 2.9893717309920134e-05, |
| "loss": 1.3568, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.042381271719932556, |
| "learning_rate": 2.989149424206436e-05, |
| "loss": 1.3165, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.04066908732056618, |
| "learning_rate": 2.9889248249144153e-05, |
| "loss": 1.1674, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.06059327349066734, |
| "learning_rate": 2.9886979334617167e-05, |
| "loss": 1.3155, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.05215556547045708, |
| "learning_rate": 2.9884687501976336e-05, |
| "loss": 1.2876, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.039729043841362, |
| "learning_rate": 2.9882372754749867e-05, |
| "loss": 1.3197, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.04095631465315819, |
| "learning_rate": 2.9880035096501265e-05, |
| "loss": 1.2877, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.04893979802727699, |
| "learning_rate": 2.9877674530829286e-05, |
| "loss": 1.2932, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.043521177023649216, |
| "learning_rate": 2.987529106136796e-05, |
| "loss": 1.359, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.04460636153817177, |
| "learning_rate": 2.9872884691786576e-05, |
| "loss": 1.3171, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.039205264300107956, |
| "learning_rate": 2.9870455425789678e-05, |
| "loss": 1.1949, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.04290665313601494, |
| "learning_rate": 2.986800326711706e-05, |
| "loss": 1.3771, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04245166853070259, |
| "learning_rate": 2.9865528219543747e-05, |
| "loss": 1.3134, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04090409353375435, |
| "learning_rate": 2.9863030286880017e-05, |
| "loss": 1.387, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04054385796189308, |
| "learning_rate": 2.986050947297137e-05, |
| "loss": 1.2733, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04850127920508385, |
| "learning_rate": 2.985796578169853e-05, |
| "loss": 1.2143, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.05087016895413399, |
| "learning_rate": 2.9855399216977453e-05, |
| "loss": 1.3101, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.044336311519145966, |
| "learning_rate": 2.9852809782759285e-05, |
| "loss": 1.2811, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04210364446043968, |
| "learning_rate": 2.9850197483030397e-05, |
| "loss": 1.2509, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.043686628341674805, |
| "learning_rate": 2.9847562321812358e-05, |
| "loss": 1.262, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.04465370252728462, |
| "learning_rate": 2.9844904303161925e-05, |
| "loss": 1.2717, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.041322916746139526, |
| "learning_rate": 2.9842223431171056e-05, |
| "loss": 1.3261, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.05307582765817642, |
| "learning_rate": 2.9839519709966875e-05, |
| "loss": 1.229, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.05377936363220215, |
| "learning_rate": 2.9836793143711692e-05, |
| "loss": 1.3681, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.06358564645051956, |
| "learning_rate": 2.9834043736602985e-05, |
| "loss": 1.3573, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.046525027602910995, |
| "learning_rate": 2.9831271492873396e-05, |
| "loss": 1.1367, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.04199456423521042, |
| "learning_rate": 2.982847641679072e-05, |
| "loss": 1.2746, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.04503254592418671, |
| "learning_rate": 2.9825658512657902e-05, |
| "loss": 1.2289, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.0457291416823864, |
| "learning_rate": 2.982281778481303e-05, |
| "loss": 1.2515, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.04370247200131416, |
| "learning_rate": 2.9819954237629333e-05, |
| "loss": 1.1892, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.04227456450462341, |
| "learning_rate": 2.9817067875515165e-05, |
| "loss": 1.3287, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.04234752431511879, |
| "learning_rate": 2.981415870291401e-05, |
| "loss": 1.3267, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.04369445517659187, |
| "learning_rate": 2.981122672430445e-05, |
| "loss": 1.2835, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.05714486539363861, |
| "learning_rate": 2.9808271944200208e-05, |
| "loss": 1.3951, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.04107912257313728, |
| "learning_rate": 2.980529436715007e-05, |
| "loss": 1.272, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.040028270334005356, |
| "learning_rate": 2.980229399773795e-05, |
| "loss": 1.2165, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.03916673734784126, |
| "learning_rate": 2.9799270840582838e-05, |
| "loss": 1.1649, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.03947027027606964, |
| "learning_rate": 2.97962249003388e-05, |
| "loss": 1.1785, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.10322597622871399, |
| "learning_rate": 2.979315618169499e-05, |
| "loss": 1.1446, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.044616565108299255, |
| "learning_rate": 2.9790064689375605e-05, |
| "loss": 1.2937, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.0792105421423912, |
| "learning_rate": 2.9786950428139926e-05, |
| "loss": 1.0531, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.042875826358795166, |
| "learning_rate": 2.978381340278228e-05, |
| "loss": 1.4458, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.042151302099227905, |
| "learning_rate": 2.9780653618132026e-05, |
| "loss": 1.4125, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.0390373058617115, |
| "learning_rate": 2.9777471079053573e-05, |
| "loss": 1.2743, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.04105671867728233, |
| "learning_rate": 2.977426579044636e-05, |
| "loss": 1.3484, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.04211370646953583, |
| "learning_rate": 2.977103775724484e-05, |
| "loss": 1.3602, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.07791785150766373, |
| "learning_rate": 2.9767786984418484e-05, |
| "loss": 1.1127, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.03947106748819351, |
| "learning_rate": 2.9764513476971783e-05, |
| "loss": 1.489, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.04182416945695877, |
| "learning_rate": 2.9761217239944202e-05, |
| "loss": 1.2798, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.0393020324409008, |
| "learning_rate": 2.9757898278410216e-05, |
| "loss": 1.2509, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.042079027742147446, |
| "learning_rate": 2.975455659747928e-05, |
| "loss": 1.2902, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.0440564788877964, |
| "learning_rate": 2.9751192202295824e-05, |
| "loss": 1.4684, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.041536420583724976, |
| "learning_rate": 2.9747805098039246e-05, |
| "loss": 1.1899, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.05359746143221855, |
| "learning_rate": 2.9744395289923903e-05, |
| "loss": 1.1661, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.037684116512537, |
| "learning_rate": 2.974096278319911e-05, |
| "loss": 1.301, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.038028784096241, |
| "learning_rate": 2.9737507583149116e-05, |
| "loss": 1.3669, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.04109985753893852, |
| "learning_rate": 2.973402969509311e-05, |
| "loss": 1.3343, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.04338208958506584, |
| "learning_rate": 2.973052912438521e-05, |
| "loss": 1.2158, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.06308567523956299, |
| "learning_rate": 2.9727005876414452e-05, |
| "loss": 1.2786, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.06322115659713745, |
| "learning_rate": 2.972345995660479e-05, |
| "loss": 1.2336, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.0424451045691967, |
| "learning_rate": 2.9719891370415072e-05, |
| "loss": 1.2459, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.044482551515102386, |
| "learning_rate": 2.9716300123339034e-05, |
| "loss": 1.2846, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.039096757769584656, |
| "learning_rate": 2.9712686220905318e-05, |
| "loss": 1.4319, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.3719871938228607, |
| "learning_rate": 2.9709049668677425e-05, |
| "loss": 1.326, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.040621835738420486, |
| "learning_rate": 2.9705390472253738e-05, |
| "loss": 1.3761, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.03975391760468483, |
| "learning_rate": 2.9701708637267487e-05, |
| "loss": 1.3475, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.04256337508559227, |
| "learning_rate": 2.9698004169386762e-05, |
| "loss": 1.3495, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.04694630578160286, |
| "learning_rate": 2.969427707431449e-05, |
| "loss": 1.3478, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04910700023174286, |
| "learning_rate": 2.9690527357788452e-05, |
| "loss": 1.2657, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04067717120051384, |
| "learning_rate": 2.9686755025581224e-05, |
| "loss": 1.3389, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04208545386791229, |
| "learning_rate": 2.9682960083500214e-05, |
| "loss": 1.3061, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04172629490494728, |
| "learning_rate": 2.9679142537387636e-05, |
| "loss": 1.252, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04157547652721405, |
| "learning_rate": 2.9675302393120506e-05, |
| "loss": 1.2367, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04133530333638191, |
| "learning_rate": 2.9671439656610622e-05, |
| "loss": 1.2163, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.03688955307006836, |
| "learning_rate": 2.966755433380457e-05, |
| "loss": 1.057, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.041086386889219284, |
| "learning_rate": 2.9663646430683695e-05, |
| "loss": 1.2647, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.04206252843141556, |
| "learning_rate": 2.9659715953264114e-05, |
| "loss": 1.2702, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.044997621327638626, |
| "learning_rate": 2.9655762907596695e-05, |
| "loss": 1.2934, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04272051900625229, |
| "learning_rate": 2.9651787299767044e-05, |
| "loss": 1.16, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04372408613562584, |
| "learning_rate": 2.9647789135895514e-05, |
| "loss": 1.3399, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04171840101480484, |
| "learning_rate": 2.9643768422137167e-05, |
| "loss": 1.1995, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04140634834766388, |
| "learning_rate": 2.963972516468179e-05, |
| "loss": 1.2467, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.041341882199048996, |
| "learning_rate": 2.9635659369753865e-05, |
| "loss": 1.2195, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.044502872973680496, |
| "learning_rate": 2.963157104361258e-05, |
| "loss": 1.1808, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04876202344894409, |
| "learning_rate": 2.9627460192551806e-05, |
| "loss": 1.3945, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.0470486581325531, |
| "learning_rate": 2.9623326822900094e-05, |
| "loss": 1.3613, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.04317512735724449, |
| "learning_rate": 2.9619170941020652e-05, |
| "loss": 1.3771, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.046641379594802856, |
| "learning_rate": 2.9614992553311356e-05, |
| "loss": 1.5062, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04240740090608597, |
| "learning_rate": 2.9610791666204715e-05, |
| "loss": 1.228, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04536912590265274, |
| "learning_rate": 2.9606568286167897e-05, |
| "loss": 1.4184, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04742661491036415, |
| "learning_rate": 2.960232241970268e-05, |
| "loss": 1.2567, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.041507788002491, |
| "learning_rate": 2.959805407334546e-05, |
| "loss": 1.2605, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04272027313709259, |
| "learning_rate": 2.959376325366725e-05, |
| "loss": 1.2577, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04529990255832672, |
| "learning_rate": 2.9589449967273647e-05, |
| "loss": 1.2425, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.044238731265068054, |
| "learning_rate": 2.9585114220804848e-05, |
| "loss": 1.3398, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.04481234773993492, |
| "learning_rate": 2.9580756020935615e-05, |
| "loss": 1.1884, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.04729278013110161, |
| "learning_rate": 2.957637537437529e-05, |
| "loss": 1.3259, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.05300765484571457, |
| "learning_rate": 2.9571972287867767e-05, |
| "loss": 1.3226, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.04174305126070976, |
| "learning_rate": 2.9567546768191463e-05, |
| "loss": 1.0939, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.043156612664461136, |
| "learning_rate": 2.956309882215937e-05, |
| "loss": 1.3715, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.0387759804725647, |
| "learning_rate": 2.955862845661897e-05, |
| "loss": 1.198, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.08106935769319534, |
| "learning_rate": 2.9554135678452284e-05, |
| "loss": 1.3499, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.04314654693007469, |
| "learning_rate": 2.9549620494575816e-05, |
| "loss": 1.1616, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.04383983090519905, |
| "learning_rate": 2.954508291194058e-05, |
| "loss": 1.3067, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.03872944414615631, |
| "learning_rate": 2.954052293753206e-05, |
| "loss": 1.2056, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.04128652438521385, |
| "learning_rate": 2.953594057837023e-05, |
| "loss": 1.164, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.043227825313806534, |
| "learning_rate": 2.9531335841509495e-05, |
| "loss": 1.3105, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.03934094309806824, |
| "learning_rate": 2.952670873403873e-05, |
| "loss": 1.2328, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.050571732223033905, |
| "learning_rate": 2.952205926308125e-05, |
| "loss": 1.1993, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.044990718364715576, |
| "learning_rate": 2.9517387435794796e-05, |
| "loss": 1.3368, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.043135274201631546, |
| "learning_rate": 2.9512693259371518e-05, |
| "loss": 1.3259, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.04202147200703621, |
| "learning_rate": 2.950797674103798e-05, |
| "loss": 1.3862, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.044921282678842545, |
| "learning_rate": 2.9503237888055136e-05, |
| "loss": 1.3632, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.041155070066452026, |
| "learning_rate": 2.9498476707718328e-05, |
| "loss": 1.304, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.0547085665166378, |
| "learning_rate": 2.9493693207357266e-05, |
| "loss": 1.3055, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.04637569189071655, |
| "learning_rate": 2.9488887394336025e-05, |
| "loss": 1.264, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.0417875237762928, |
| "learning_rate": 2.9484059276053027e-05, |
| "loss": 1.4294, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.04684137552976608, |
| "learning_rate": 2.9479208859941034e-05, |
| "loss": 1.3123, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.04218194633722305, |
| "learning_rate": 2.9474336153467135e-05, |
| "loss": 1.1791, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.037852074950933456, |
| "learning_rate": 2.946944116413273e-05, |
| "loss": 1.2582, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.039469506591558456, |
| "learning_rate": 2.946452389947353e-05, |
| "loss": 1.1058, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.043612875044345856, |
| "learning_rate": 2.9459584367059533e-05, |
| "loss": 1.2935, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.04316055774688721, |
| "learning_rate": 2.9454622574495022e-05, |
| "loss": 1.3011, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.04812432825565338, |
| "learning_rate": 2.9449638529418544e-05, |
| "loss": 1.2616, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.11042577773332596, |
| "learning_rate": 2.9444632239502906e-05, |
| "loss": 1.301, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.046350039541721344, |
| "learning_rate": 2.9439603712455163e-05, |
| "loss": 1.4199, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.04246099293231964, |
| "learning_rate": 2.943455295601659e-05, |
| "loss": 1.2141, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.041246578097343445, |
| "learning_rate": 2.9429479977962712e-05, |
| "loss": 1.3512, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.042336758226156235, |
| "learning_rate": 2.942438478610323e-05, |
| "loss": 1.3296, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.041006408631801605, |
| "learning_rate": 2.941926738828206e-05, |
| "loss": 1.3967, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.04907152056694031, |
| "learning_rate": 2.9414127792377314e-05, |
| "loss": 1.323, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.03816520795226097, |
| "learning_rate": 2.9408966006301247e-05, |
| "loss": 1.3492, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.03879634290933609, |
| "learning_rate": 2.9403782038000306e-05, |
| "loss": 1.2649, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.042063359171152115, |
| "learning_rate": 2.939857589545507e-05, |
| "loss": 1.3584, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.04285269230604172, |
| "learning_rate": 2.9393347586680255e-05, |
| "loss": 1.2769, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.12035181373357773, |
| "learning_rate": 2.938809711972471e-05, |
| "loss": 1.347, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.05274464190006256, |
| "learning_rate": 2.9382824502671392e-05, |
| "loss": 1.3312, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.10297731310129166, |
| "learning_rate": 2.937752974363736e-05, |
| "loss": 1.339, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.10403720289468765, |
| "learning_rate": 2.9372212850773742e-05, |
| "loss": 1.3299, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.0409269854426384, |
| "learning_rate": 2.9366873832265766e-05, |
| "loss": 1.2374, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.13820002973079681, |
| "learning_rate": 2.9361512696332714e-05, |
| "loss": 1.2301, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.05118661746382713, |
| "learning_rate": 2.9356129451227903e-05, |
| "loss": 1.2539, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.039746690541505814, |
| "learning_rate": 2.9350724105238703e-05, |
| "loss": 1.335, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.04060789570212364, |
| "learning_rate": 2.9345296666686505e-05, |
| "loss": 1.3324, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.03841068223118782, |
| "learning_rate": 2.9339847143926705e-05, |
| "loss": 1.1333, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.04431808739900589, |
| "learning_rate": 2.93343755453487e-05, |
| "loss": 1.279, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.03678007051348686, |
| "learning_rate": 2.932888187937587e-05, |
| "loss": 1.1657, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.06775739043951035, |
| "learning_rate": 2.9323366154465584e-05, |
| "loss": 1.3119, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.03947743773460388, |
| "learning_rate": 2.9317828379109137e-05, |
| "loss": 1.2451, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.0774097591638565, |
| "learning_rate": 2.9312268561831797e-05, |
| "loss": 1.3464, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.04033540561795235, |
| "learning_rate": 2.9306686711192752e-05, |
| "loss": 1.2757, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.04514104872941971, |
| "learning_rate": 2.9301082835785123e-05, |
| "loss": 1.286, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.04130253940820694, |
| "learning_rate": 2.9295456944235928e-05, |
| "loss": 1.3865, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.0434497706592083, |
| "learning_rate": 2.9289809045206067e-05, |
| "loss": 1.2949, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.05484382435679436, |
| "learning_rate": 2.928413914739035e-05, |
| "loss": 1.3318, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.0377834215760231, |
| "learning_rate": 2.9278447259517423e-05, |
| "loss": 1.2115, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.04128464683890343, |
| "learning_rate": 2.92727333903498e-05, |
| "loss": 1.3297, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.044725801795721054, |
| "learning_rate": 2.9266997548683838e-05, |
| "loss": 1.3444, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.04274127259850502, |
| "learning_rate": 2.9261239743349708e-05, |
| "loss": 1.4422, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.04571430757641792, |
| "learning_rate": 2.9255459983211406e-05, |
| "loss": 1.368, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.043202176690101624, |
| "learning_rate": 2.924965827716672e-05, |
| "loss": 1.3105, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.04540957510471344, |
| "learning_rate": 2.924383463414722e-05, |
| "loss": 1.2472, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.0444292277097702, |
| "learning_rate": 2.9237989063118253e-05, |
| "loss": 1.2525, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.04204078018665314, |
| "learning_rate": 2.9232121573078923e-05, |
| "loss": 1.3316, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.045940667390823364, |
| "learning_rate": 2.922623217306208e-05, |
| "loss": 1.3357, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.06495653092861176, |
| "learning_rate": 2.9220320872134298e-05, |
| "loss": 1.2616, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.04345276579260826, |
| "learning_rate": 2.9214387679395868e-05, |
| "loss": 1.3768, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.04214997962117195, |
| "learning_rate": 2.9208432603980784e-05, |
| "loss": 1.2542, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.04196178540587425, |
| "learning_rate": 2.9202455655056732e-05, |
| "loss": 1.2082, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.04009537026286125, |
| "learning_rate": 2.9196456841825064e-05, |
| "loss": 1.337, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.049930717796087265, |
| "learning_rate": 2.9190436173520797e-05, |
| "loss": 1.2476, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.04278023913502693, |
| "learning_rate": 2.9184393659412597e-05, |
| "loss": 1.2621, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.04328072443604469, |
| "learning_rate": 2.9178329308802745e-05, |
| "loss": 1.2938, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.049271196126937866, |
| "learning_rate": 2.9172243131027163e-05, |
| "loss": 1.3036, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.03971061483025551, |
| "learning_rate": 2.9166135135455348e-05, |
| "loss": 1.3597, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.03890816867351532, |
| "learning_rate": 2.916000533149041e-05, |
| "loss": 1.305, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.03783348947763443, |
| "learning_rate": 2.9153853728569013e-05, |
| "loss": 1.2693, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.0400649756193161, |
| "learning_rate": 2.9147680336161394e-05, |
| "loss": 1.3958, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.05036576837301254, |
| "learning_rate": 2.9141485163771328e-05, |
| "loss": 1.3374, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.039989981800317764, |
| "learning_rate": 2.913526822093611e-05, |
| "loss": 1.2158, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.03950633481144905, |
| "learning_rate": 2.912902951722658e-05, |
| "loss": 1.2969, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.03998475894331932, |
| "learning_rate": 2.9122769062247042e-05, |
| "loss": 1.2929, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.041236512362957, |
| "learning_rate": 2.9116486865635305e-05, |
| "loss": 1.345, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.03818591311573982, |
| "learning_rate": 2.9110182937062655e-05, |
| "loss": 1.442, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.06457041203975677, |
| "learning_rate": 2.9103857286233815e-05, |
| "loss": 1.2847, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.06327484548091888, |
| "learning_rate": 2.909750992288696e-05, |
| "loss": 1.2155, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.04206790402531624, |
| "learning_rate": 2.909114085679369e-05, |
| "loss": 1.3996, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.0427677147090435, |
| "learning_rate": 2.9084750097759013e-05, |
| "loss": 1.3078, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.043656881898641586, |
| "learning_rate": 2.9078337655621347e-05, |
| "loss": 1.3627, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.04029693827033043, |
| "learning_rate": 2.907190354025246e-05, |
| "loss": 1.2636, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.04099896177649498, |
| "learning_rate": 2.9065447761557514e-05, |
| "loss": 1.2378, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.04414224252104759, |
| "learning_rate": 2.9058970329475012e-05, |
| "loss": 1.3569, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.04171021282672882, |
| "learning_rate": 2.9052471253976782e-05, |
| "loss": 1.2692, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.03992126137018204, |
| "learning_rate": 2.904595054506799e-05, |
| "loss": 1.2591, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.04168523848056793, |
| "learning_rate": 2.9039408212787094e-05, |
| "loss": 1.3033, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.049589138478040695, |
| "learning_rate": 2.9032844267205838e-05, |
| "loss": 1.2772, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.03980257362127304, |
| "learning_rate": 2.9026258718429245e-05, |
| "loss": 1.1404, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.05382708087563515, |
| "learning_rate": 2.9019651576595597e-05, |
| "loss": 1.2493, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.04534833878278732, |
| "learning_rate": 2.9013022851876416e-05, |
| "loss": 1.429, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.03764050453901291, |
| "learning_rate": 2.9006372554476445e-05, |
| "loss": 1.0558, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.04251871630549431, |
| "learning_rate": 2.8999700694633654e-05, |
| "loss": 1.324, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.04109210520982742, |
| "learning_rate": 2.899300728261918e-05, |
| "loss": 1.3635, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.03659521043300629, |
| "learning_rate": 2.898629232873736e-05, |
| "loss": 1.2116, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.04072749614715576, |
| "learning_rate": 2.89795558433257e-05, |
| "loss": 1.3335, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.03850967437028885, |
| "learning_rate": 2.897279783675483e-05, |
| "loss": 1.2563, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.042142994701862335, |
| "learning_rate": 2.8966018319428524e-05, |
| "loss": 1.4424, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.04661838337779045, |
| "learning_rate": 2.8959217301783682e-05, |
| "loss": 1.3586, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.042438358068466187, |
| "learning_rate": 2.8952394794290284e-05, |
| "loss": 1.2736, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.03861173614859581, |
| "learning_rate": 2.8945550807451395e-05, |
| "loss": 1.2671, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.044916752725839615, |
| "learning_rate": 2.8938685351803168e-05, |
| "loss": 1.2626, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.043335918337106705, |
| "learning_rate": 2.8931798437914778e-05, |
| "loss": 1.1822, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.040797822177410126, |
| "learning_rate": 2.892489007638846e-05, |
| "loss": 1.1828, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.04138658568263054, |
| "learning_rate": 2.8917960277859442e-05, |
| "loss": 1.2201, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.042210813611745834, |
| "learning_rate": 2.891100905299598e-05, |
| "loss": 1.3068, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.03918137401342392, |
| "learning_rate": 2.8904036412499297e-05, |
| "loss": 1.2342, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.04338767006993294, |
| "learning_rate": 2.8897042367103588e-05, |
| "loss": 1.3416, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.042658790946006775, |
| "learning_rate": 2.8890026927576e-05, |
| "loss": 1.3262, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.040876757353544235, |
| "learning_rate": 2.8882990104716624e-05, |
| "loss": 1.3446, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04136212170124054, |
| "learning_rate": 2.8875931909358462e-05, |
| "loss": 1.3195, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04199006408452988, |
| "learning_rate": 2.886885235236742e-05, |
| "loss": 1.3002, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04258933663368225, |
| "learning_rate": 2.886175144464229e-05, |
| "loss": 1.2437, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.051209740340709686, |
| "learning_rate": 2.885462919711473e-05, |
| "loss": 1.3638, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.05017193779349327, |
| "learning_rate": 2.884748562074926e-05, |
| "loss": 1.3595, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04402180016040802, |
| "learning_rate": 2.8840320726543226e-05, |
| "loss": 1.2947, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04097672924399376, |
| "learning_rate": 2.883313452552679e-05, |
| "loss": 1.238, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.04349937289953232, |
| "learning_rate": 2.8825927028762923e-05, |
| "loss": 1.2293, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.03766897693276405, |
| "learning_rate": 2.881869824734738e-05, |
| "loss": 1.3303, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.041030097752809525, |
| "learning_rate": 2.8811448192408675e-05, |
| "loss": 1.3331, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.0407574400305748, |
| "learning_rate": 2.880417687510808e-05, |
| "loss": 1.1861, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.0400300994515419, |
| "learning_rate": 2.8796884306639596e-05, |
| "loss": 1.3008, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.03695922717452049, |
| "learning_rate": 2.8789570498229937e-05, |
| "loss": 1.2157, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.039951398968696594, |
| "learning_rate": 2.878223546113853e-05, |
| "loss": 1.2931, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.041708268225193024, |
| "learning_rate": 2.877487920665746e-05, |
| "loss": 1.3288, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.15182824432849884, |
| "learning_rate": 2.8767501746111494e-05, |
| "loss": 1.2683, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.042990490794181824, |
| "learning_rate": 2.876010309085804e-05, |
| "loss": 1.2458, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.04103465750813484, |
| "learning_rate": 2.8752683252287134e-05, |
| "loss": 1.2582, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.05002060532569885, |
| "learning_rate": 2.8745242241821413e-05, |
| "loss": 1.3105, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.043134111911058426, |
| "learning_rate": 2.873778007091613e-05, |
| "loss": 1.1705, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.05832618102431297, |
| "learning_rate": 2.8730296751059087e-05, |
| "loss": 1.2837, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.0403001494705677, |
| "learning_rate": 2.872279229377067e-05, |
| "loss": 1.3912, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.03862776979804039, |
| "learning_rate": 2.87152667106038e-05, |
| "loss": 1.206, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.03989758342504501, |
| "learning_rate": 2.8707720013143896e-05, |
| "loss": 1.296, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.04268000274896622, |
| "learning_rate": 2.870015221300891e-05, |
| "loss": 1.298, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.043824777007102966, |
| "learning_rate": 2.8692563321849277e-05, |
| "loss": 1.2559, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.04446446895599365, |
| "learning_rate": 2.8684953351347883e-05, |
| "loss": 1.1997, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.04489293321967125, |
| "learning_rate": 2.8677322313220093e-05, |
| "loss": 1.2014, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.03992080315947533, |
| "learning_rate": 2.8669670219213674e-05, |
| "loss": 1.3665, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04235706850886345, |
| "learning_rate": 2.866199708110884e-05, |
| "loss": 1.2692, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.0407961942255497, |
| "learning_rate": 2.8654302910718173e-05, |
| "loss": 1.3089, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04869687184691429, |
| "learning_rate": 2.8646587719886653e-05, |
| "loss": 1.4737, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.045265767723321915, |
| "learning_rate": 2.863885152049161e-05, |
| "loss": 1.2694, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04179368540644646, |
| "learning_rate": 2.863109432444272e-05, |
| "loss": 1.2708, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.044434092938899994, |
| "learning_rate": 2.862331614368199e-05, |
| "loss": 1.2315, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04100622236728668, |
| "learning_rate": 2.8615516990183715e-05, |
| "loss": 1.2529, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04165005311369896, |
| "learning_rate": 2.860769687595449e-05, |
| "loss": 1.316, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.04114099219441414, |
| "learning_rate": 2.859985581303318e-05, |
| "loss": 1.2013, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.038906700909137726, |
| "learning_rate": 2.859199381349089e-05, |
| "loss": 1.2939, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.04231356829404831, |
| "learning_rate": 2.8584110889430968e-05, |
| "loss": 1.3255, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.040223341435194016, |
| "learning_rate": 2.857620705298896e-05, |
| "loss": 1.1812, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.041090674698352814, |
| "learning_rate": 2.8568282316332623e-05, |
| "loss": 1.2755, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.0405619814991951, |
| "learning_rate": 2.8560336691661873e-05, |
| "loss": 1.2817, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.041612740606069565, |
| "learning_rate": 2.85523701912088e-05, |
| "loss": 1.4021, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.047294363379478455, |
| "learning_rate": 2.8544382827237616e-05, |
| "loss": 1.3847, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.040402382612228394, |
| "learning_rate": 2.853637461204466e-05, |
| "loss": 1.3138, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.045728690922260284, |
| "learning_rate": 2.8528345557958365e-05, |
| "loss": 1.3433, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.04862399771809578, |
| "learning_rate": 2.8520295677339256e-05, |
| "loss": 1.4913, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.04778144508600235, |
| "learning_rate": 2.851222498257991e-05, |
| "loss": 1.1898, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.054490551352500916, |
| "learning_rate": 2.850413348610495e-05, |
| "loss": 1.3287, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.04370054230093956, |
| "learning_rate": 2.8496021200371018e-05, |
| "loss": 1.3576, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.04136567935347557, |
| "learning_rate": 2.848788813786677e-05, |
| "loss": 1.3145, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.042698897421360016, |
| "learning_rate": 2.847973431111284e-05, |
| "loss": 1.2438, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.03822973743081093, |
| "learning_rate": 2.847155973266183e-05, |
| "loss": 1.2262, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.040024396032094955, |
| "learning_rate": 2.8463364415098295e-05, |
| "loss": 1.3446, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.04169702157378197, |
| "learning_rate": 2.84551483710387e-05, |
| "loss": 1.3185, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.062234602868556976, |
| "learning_rate": 2.8446911613131437e-05, |
| "loss": 1.366, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.044699717313051224, |
| "learning_rate": 2.843865415405678e-05, |
| "loss": 1.1966, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.049964308738708496, |
| "learning_rate": 2.8430376006526862e-05, |
| "loss": 1.3174, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.039814963936805725, |
| "learning_rate": 2.8422077183285686e-05, |
| "loss": 1.3091, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.03822264447808266, |
| "learning_rate": 2.841375769710906e-05, |
| "loss": 1.2779, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.04611162468791008, |
| "learning_rate": 2.8405417560804618e-05, |
| "loss": 1.2708, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.04191526770591736, |
| "learning_rate": 2.8397056787211787e-05, |
| "loss": 1.3307, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.04210788011550903, |
| "learning_rate": 2.838867538920175e-05, |
| "loss": 1.4363, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.04391501471400261, |
| "learning_rate": 2.8380273379677463e-05, |
| "loss": 1.2504, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.04508218541741371, |
| "learning_rate": 2.837185077157358e-05, |
| "loss": 1.2052, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.03969848155975342, |
| "learning_rate": 2.8363407577856498e-05, |
| "loss": 1.2945, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.04233105480670929, |
| "learning_rate": 2.835494381152429e-05, |
| "loss": 1.3685, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.0404694527387619, |
| "learning_rate": 2.83464594856067e-05, |
| "loss": 1.1128, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.04120095074176788, |
| "learning_rate": 2.8337954613165124e-05, |
| "loss": 1.4247, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.0430976077914238, |
| "learning_rate": 2.832942920729259e-05, |
| "loss": 1.2779, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.046013325452804565, |
| "learning_rate": 2.8320883281113744e-05, |
| "loss": 1.3943, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.0432153195142746, |
| "learning_rate": 2.8312316847784805e-05, |
| "loss": 1.3161, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.04044407978653908, |
| "learning_rate": 2.8303729920493578e-05, |
| "loss": 1.1947, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.04066836088895798, |
| "learning_rate": 2.8295122512459412e-05, |
| "loss": 1.307, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.039611902087926865, |
| "learning_rate": 2.8286494636933182e-05, |
| "loss": 1.2142, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.03812957555055618, |
| "learning_rate": 2.827784630719728e-05, |
| "loss": 1.2966, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.03867647051811218, |
| "learning_rate": 2.8269177536565578e-05, |
| "loss": 1.3149, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.038965556770563126, |
| "learning_rate": 2.8260488338383424e-05, |
| "loss": 1.2024, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.040611233562231064, |
| "learning_rate": 2.825177872602761e-05, |
| "loss": 1.3236, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.04156762361526489, |
| "learning_rate": 2.8243048712906356e-05, |
| "loss": 1.2006, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.04537190496921539, |
| "learning_rate": 2.8234298312459287e-05, |
| "loss": 1.2474, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.04531079903244972, |
| "learning_rate": 2.8225527538157413e-05, |
| "loss": 1.2898, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.0478559210896492, |
| "learning_rate": 2.8216736403503117e-05, |
| "loss": 1.2698, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.03838729113340378, |
| "learning_rate": 2.8207924922030116e-05, |
| "loss": 1.244, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.04377003014087677, |
| "learning_rate": 2.819909310730345e-05, |
| "loss": 1.2137, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.05953631177544594, |
| "learning_rate": 2.8190240972919474e-05, |
| "loss": 1.306, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.05924424156546593, |
| "learning_rate": 2.8181368532505812e-05, |
| "loss": 1.2539, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.04990265145897865, |
| "learning_rate": 2.8172475799721353e-05, |
| "loss": 1.23, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.05186406522989273, |
| "learning_rate": 2.816356278825623e-05, |
| "loss": 1.2383, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.04090893268585205, |
| "learning_rate": 2.8154629511831784e-05, |
| "loss": 1.2833, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.04050731286406517, |
| "learning_rate": 2.814567598420056e-05, |
| "loss": 1.206, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.044532258063554764, |
| "learning_rate": 2.8136702219146285e-05, |
| "loss": 1.4248, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.040547750890254974, |
| "learning_rate": 2.8127708230483825e-05, |
| "loss": 1.219, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.07601740211248398, |
| "learning_rate": 2.81186940320592e-05, |
| "loss": 1.2666, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.04106530919671059, |
| "learning_rate": 2.8109659637749525e-05, |
| "loss": 1.2695, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.04745073616504669, |
| "learning_rate": 2.8100605061463015e-05, |
| "loss": 1.4064, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.0383027046918869, |
| "learning_rate": 2.8091530317138953e-05, |
| "loss": 1.2877, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.04280005767941475, |
| "learning_rate": 2.808243541874767e-05, |
| "loss": 1.2139, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.0441480427980423, |
| "learning_rate": 2.807332038029052e-05, |
| "loss": 1.3107, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.04255714640021324, |
| "learning_rate": 2.806418521579987e-05, |
| "loss": 1.1937, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.040505584329366684, |
| "learning_rate": 2.8055029939339055e-05, |
| "loss": 1.2599, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.043979816138744354, |
| "learning_rate": 2.80458545650024e-05, |
| "loss": 1.2452, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.039587393403053284, |
| "learning_rate": 2.8036659106915145e-05, |
| "loss": 1.285, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.03848938271403313, |
| "learning_rate": 2.802744357923345e-05, |
| "loss": 1.3721, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.35720014572143555, |
| "learning_rate": 2.8018207996144388e-05, |
| "loss": 1.3332, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.03978364169597626, |
| "learning_rate": 2.8008952371865886e-05, |
| "loss": 1.1509, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.03576143831014633, |
| "learning_rate": 2.7999676720646744e-05, |
| "loss": 1.2826, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.040340177714824677, |
| "learning_rate": 2.7990381056766583e-05, |
| "loss": 1.3036, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.04206692427396774, |
| "learning_rate": 2.7981065394535824e-05, |
| "loss": 1.2688, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.044807758182287216, |
| "learning_rate": 2.7971729748295697e-05, |
| "loss": 1.0828, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.037925321608781815, |
| "learning_rate": 2.7962374132418176e-05, |
| "loss": 1.2166, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.0471733994781971, |
| "learning_rate": 2.795299856130599e-05, |
| "loss": 1.183, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.03962987661361694, |
| "learning_rate": 2.7943603049392578e-05, |
| "loss": 1.2063, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04328012466430664, |
| "learning_rate": 2.7934187611142093e-05, |
| "loss": 1.3816, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04300890862941742, |
| "learning_rate": 2.792475226104935e-05, |
| "loss": 1.3163, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04583253711462021, |
| "learning_rate": 2.7915297013639828e-05, |
| "loss": 1.4251, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04772812873125076, |
| "learning_rate": 2.790582188346962e-05, |
| "loss": 1.2981, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04397529363632202, |
| "learning_rate": 2.789632688512545e-05, |
| "loss": 1.2911, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04336230084300041, |
| "learning_rate": 2.7886812033224618e-05, |
| "loss": 1.3388, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.04407043755054474, |
| "learning_rate": 2.787727734241499e-05, |
| "loss": 1.1775, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.03971351310610771, |
| "learning_rate": 2.7867722827374964e-05, |
| "loss": 1.3216, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.043983425945043564, |
| "learning_rate": 2.7858148502813477e-05, |
| "loss": 1.4081, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.035951223224401474, |
| "learning_rate": 2.784855438346994e-05, |
| "loss": 1.3331, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.04295084998011589, |
| "learning_rate": 2.783894048411425e-05, |
| "loss": 1.3059, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.047317974269390106, |
| "learning_rate": 2.7829306819546756e-05, |
| "loss": 1.2763, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.03932027146220207, |
| "learning_rate": 2.781965340459823e-05, |
| "loss": 1.2098, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.03825077414512634, |
| "learning_rate": 2.780998025412985e-05, |
| "loss": 1.2286, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.04540243372321129, |
| "learning_rate": 2.780028738303318e-05, |
| "loss": 1.36, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.04947693273425102, |
| "learning_rate": 2.7790574806230137e-05, |
| "loss": 1.3462, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.040425993502140045, |
| "learning_rate": 2.7780842538672983e-05, |
| "loss": 1.2426, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.04652491956949234, |
| "learning_rate": 2.777109059534428e-05, |
| "loss": 1.2174, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.04260154813528061, |
| "learning_rate": 2.77613189912569e-05, |
| "loss": 1.3217, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.04189547896385193, |
| "learning_rate": 2.775152774145396e-05, |
| "loss": 1.3517, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.05027705430984497, |
| "learning_rate": 2.7741716861008838e-05, |
| "loss": 1.2322, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.04095868766307831, |
| "learning_rate": 2.7731886365025128e-05, |
| "loss": 1.2756, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.042236629873514175, |
| "learning_rate": 2.7722036268636613e-05, |
| "loss": 1.3117, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.03990009054541588, |
| "learning_rate": 2.771216658700727e-05, |
| "loss": 1.22, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.04218020290136337, |
| "learning_rate": 2.77022773353312e-05, |
| "loss": 1.1763, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.03973948955535889, |
| "learning_rate": 2.769236852883266e-05, |
| "loss": 1.2557, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.04438718408346176, |
| "learning_rate": 2.7682440182765987e-05, |
| "loss": 1.2702, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.04357394203543663, |
| "learning_rate": 2.767249231241562e-05, |
| "loss": 1.4582, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.03988838940858841, |
| "learning_rate": 2.766252493309603e-05, |
| "loss": 1.2441, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.040849536657333374, |
| "learning_rate": 2.7652538060151747e-05, |
| "loss": 1.1811, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.04130591079592705, |
| "learning_rate": 2.7642531708957297e-05, |
| "loss": 1.392, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.03944481164216995, |
| "learning_rate": 2.7632505894917194e-05, |
| "loss": 1.278, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.04051590338349342, |
| "learning_rate": 2.7622460633465915e-05, |
| "loss": 1.2217, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.048737745732069016, |
| "learning_rate": 2.7612395940067875e-05, |
| "loss": 1.364, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.03939497843384743, |
| "learning_rate": 2.760231183021741e-05, |
| "loss": 1.2369, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.044156353920698166, |
| "learning_rate": 2.759220831943874e-05, |
| "loss": 1.2601, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.04156497120857239, |
| "learning_rate": 2.7582085423285952e-05, |
| "loss": 1.3323, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.049551285803318024, |
| "learning_rate": 2.757194315734298e-05, |
| "loss": 1.3135, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.0678929015994072, |
| "learning_rate": 2.756178153722358e-05, |
| "loss": 1.331, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.04068392515182495, |
| "learning_rate": 2.7551600578571298e-05, |
| "loss": 1.3646, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.044063862413167953, |
| "learning_rate": 2.7541400297059452e-05, |
| "loss": 1.2513, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.040134869515895844, |
| "learning_rate": 2.75311807083911e-05, |
| "loss": 1.2778, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.04928048327565193, |
| "learning_rate": 2.7520941828299043e-05, |
| "loss": 1.4237, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.1714377999305725, |
| "learning_rate": 2.751068367254576e-05, |
| "loss": 1.1875, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.040971677750349045, |
| "learning_rate": 2.7500406256923418e-05, |
| "loss": 1.2199, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.042567458003759384, |
| "learning_rate": 2.749010959725382e-05, |
| "loss": 1.3554, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.04325272887945175, |
| "learning_rate": 2.747979370938841e-05, |
| "loss": 1.19, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.04391823336482048, |
| "learning_rate": 2.746945860920823e-05, |
| "loss": 1.3019, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.041555944830179214, |
| "learning_rate": 2.7459104312623886e-05, |
| "loss": 1.2084, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.042668092995882034, |
| "learning_rate": 2.7448730835575552e-05, |
| "loss": 1.222, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.0391099713742733, |
| "learning_rate": 2.7438338194032922e-05, |
| "loss": 1.2251, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.0442223884165287, |
| "learning_rate": 2.7427926403995193e-05, |
| "loss": 1.1863, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.04271350055932999, |
| "learning_rate": 2.7417495481491047e-05, |
| "loss": 1.362, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.04445281997323036, |
| "learning_rate": 2.7407045442578608e-05, |
| "loss": 1.3202, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.038975391536951065, |
| "learning_rate": 2.7396576303345445e-05, |
| "loss": 1.2612, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.04713955149054527, |
| "learning_rate": 2.7386088079908515e-05, |
| "loss": 1.2862, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.04461180418729782, |
| "learning_rate": 2.7375580788414172e-05, |
| "loss": 1.3267, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.039272554218769073, |
| "learning_rate": 2.7365054445038104e-05, |
| "loss": 1.1617, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.043195609003305435, |
| "learning_rate": 2.7354509065985352e-05, |
| "loss": 1.3224, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.04246910288929939, |
| "learning_rate": 2.734394466749024e-05, |
| "loss": 1.3336, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.061883531510829926, |
| "learning_rate": 2.733336126581639e-05, |
| "loss": 1.3105, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.040977708995342255, |
| "learning_rate": 2.7322758877256666e-05, |
| "loss": 1.2908, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.04068838432431221, |
| "learning_rate": 2.7312137518133164e-05, |
| "loss": 1.3416, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.05024155229330063, |
| "learning_rate": 2.73014972047972e-05, |
| "loss": 1.2781, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.038461074233055115, |
| "learning_rate": 2.7290837953629243e-05, |
| "loss": 1.1326, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.04922636225819588, |
| "learning_rate": 2.728015978103894e-05, |
| "loss": 1.2539, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.04049984738230705, |
| "learning_rate": 2.726946270346505e-05, |
| "loss": 1.1698, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.036820389330387115, |
| "learning_rate": 2.725874673737545e-05, |
| "loss": 1.2235, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.043402861803770065, |
| "learning_rate": 2.724801189926708e-05, |
| "loss": 1.3451, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.04600697383284569, |
| "learning_rate": 2.7237258205665955e-05, |
| "loss": 1.265, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.042860984802246094, |
| "learning_rate": 2.7226485673127088e-05, |
| "loss": 1.4376, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.08648369461297989, |
| "learning_rate": 2.7215694318234525e-05, |
| "loss": 1.2199, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.04271225258708, |
| "learning_rate": 2.720488415760126e-05, |
| "loss": 1.2322, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.038085825741291046, |
| "learning_rate": 2.719405520786926e-05, |
| "loss": 1.1582, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.04303895682096481, |
| "learning_rate": 2.7183207485709404e-05, |
| "loss": 1.4118, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.040101755410432816, |
| "learning_rate": 2.7172341007821485e-05, |
| "loss": 1.2151, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.041162651032209396, |
| "learning_rate": 2.716145579093415e-05, |
| "loss": 1.2997, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.05283331498503685, |
| "learning_rate": 2.7150551851804904e-05, |
| "loss": 1.3426, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.040588632225990295, |
| "learning_rate": 2.713962920722008e-05, |
| "loss": 1.1467, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.041328929364681244, |
| "learning_rate": 2.7128687873994807e-05, |
| "loss": 1.2999, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.04245878756046295, |
| "learning_rate": 2.7117727868972968e-05, |
| "loss": 1.2076, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.04110388457775116, |
| "learning_rate": 2.7106749209027216e-05, |
| "loss": 1.3715, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.04127703979611397, |
| "learning_rate": 2.70957519110589e-05, |
| "loss": 1.1791, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.03841520473361015, |
| "learning_rate": 2.7084735991998077e-05, |
| "loss": 1.2702, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.04112560302019119, |
| "learning_rate": 2.707370146880346e-05, |
| "loss": 1.2306, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.03800290822982788, |
| "learning_rate": 2.7062648358462417e-05, |
| "loss": 1.3471, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.05236433446407318, |
| "learning_rate": 2.705157667799091e-05, |
| "loss": 1.2621, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.044191982597112656, |
| "learning_rate": 2.7040486444433506e-05, |
| "loss": 1.1561, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.04605194926261902, |
| "learning_rate": 2.7029377674863332e-05, |
| "loss": 1.3653, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.04338742420077324, |
| "learning_rate": 2.7018250386382036e-05, |
| "loss": 1.104, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.04334155097603798, |
| "learning_rate": 2.70071045961198e-05, |
| "loss": 1.3779, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.03772331029176712, |
| "learning_rate": 2.699594032123527e-05, |
| "loss": 1.3273, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.05215068534016609, |
| "learning_rate": 2.6984757578915546e-05, |
| "loss": 1.3153, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04011908918619156, |
| "learning_rate": 2.6973556386376178e-05, |
| "loss": 1.337, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04127516970038414, |
| "learning_rate": 2.6962336760861107e-05, |
| "loss": 1.2966, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04114411026239395, |
| "learning_rate": 2.6951098719642643e-05, |
| "loss": 1.3454, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04113148897886276, |
| "learning_rate": 2.693984228002146e-05, |
| "loss": 1.3322, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04927441105246544, |
| "learning_rate": 2.6928567459326558e-05, |
| "loss": 1.3766, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.038145359605550766, |
| "learning_rate": 2.6917274274915215e-05, |
| "loss": 1.1307, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.04447488114237785, |
| "learning_rate": 2.6905962744173002e-05, |
| "loss": 1.1212, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.08171934634447098, |
| "learning_rate": 2.689463288451372e-05, |
| "loss": 1.3832, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.0424637608230114, |
| "learning_rate": 2.6883284713379388e-05, |
| "loss": 1.3069, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.03858843818306923, |
| "learning_rate": 2.687191824824022e-05, |
| "loss": 1.2404, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.038555946201086044, |
| "learning_rate": 2.686053350659459e-05, |
| "loss": 1.2822, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.046382464468479156, |
| "learning_rate": 2.6849130505969014e-05, |
| "loss": 1.3299, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.04165438562631607, |
| "learning_rate": 2.6837709263918102e-05, |
| "loss": 1.3089, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.05100713670253754, |
| "learning_rate": 2.6826269798024566e-05, |
| "loss": 1.2938, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.04596945270895958, |
| "learning_rate": 2.6814812125899154e-05, |
| "loss": 1.2895, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.03950990363955498, |
| "learning_rate": 2.680333626518066e-05, |
| "loss": 1.3278, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.04206983745098114, |
| "learning_rate": 2.679184223353587e-05, |
| "loss": 1.2307, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.042084578424692154, |
| "learning_rate": 2.678033004865954e-05, |
| "loss": 1.2185, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.038785211741924286, |
| "learning_rate": 2.6768799728274372e-05, |
| "loss": 1.1745, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.039074115455150604, |
| "learning_rate": 2.6757251290131002e-05, |
| "loss": 1.2186, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.05322974920272827, |
| "learning_rate": 2.6745684752007943e-05, |
| "loss": 1.313, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.1969892978668213, |
| "learning_rate": 2.673410013171157e-05, |
| "loss": 1.2945, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.03685871139168739, |
| "learning_rate": 2.6722497447076114e-05, |
| "loss": 1.263, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.042687300592660904, |
| "learning_rate": 2.671087671596359e-05, |
| "loss": 1.387, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.043946314603090286, |
| "learning_rate": 2.6699237956263817e-05, |
| "loss": 1.292, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.04511501267552376, |
| "learning_rate": 2.6687581185894363e-05, |
| "loss": 1.2041, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.04132469370961189, |
| "learning_rate": 2.6675906422800514e-05, |
| "loss": 1.2325, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.06625653058290482, |
| "learning_rate": 2.6664213684955267e-05, |
| "loss": 1.3227, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.17180198431015015, |
| "learning_rate": 2.6652502990359272e-05, |
| "loss": 1.2708, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.04059137776494026, |
| "learning_rate": 2.6640774357040846e-05, |
| "loss": 1.2888, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.045602891594171524, |
| "learning_rate": 2.6629027803055917e-05, |
| "loss": 1.1677, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.039703212678432465, |
| "learning_rate": 2.6617263346487987e-05, |
| "loss": 1.3742, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.04089050367474556, |
| "learning_rate": 2.660548100544813e-05, |
| "loss": 1.3612, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.04322800040245056, |
| "learning_rate": 2.6593680798074952e-05, |
| "loss": 1.2789, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.04172592982649803, |
| "learning_rate": 2.6581862742534563e-05, |
| "loss": 1.3865, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.04252813756465912, |
| "learning_rate": 2.657002685702055e-05, |
| "loss": 1.3192, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.03941137343645096, |
| "learning_rate": 2.6558173159753946e-05, |
| "loss": 1.3576, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.04537597671151161, |
| "learning_rate": 2.6546301668983206e-05, |
| "loss": 1.3712, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.041427433490753174, |
| "learning_rate": 2.653441240298418e-05, |
| "loss": 1.1632, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.05885668843984604, |
| "learning_rate": 2.6522505380060078e-05, |
| "loss": 1.2416, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.043777357786893845, |
| "learning_rate": 2.6510580618541458e-05, |
| "loss": 1.3483, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.039015110582113266, |
| "learning_rate": 2.6498638136786166e-05, |
| "loss": 1.2819, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.040871817618608475, |
| "learning_rate": 2.6486677953179344e-05, |
| "loss": 1.2825, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.04270527511835098, |
| "learning_rate": 2.6474700086133384e-05, |
| "loss": 1.1776, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.046877775341272354, |
| "learning_rate": 2.6462704554087894e-05, |
| "loss": 1.2799, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.037062957882881165, |
| "learning_rate": 2.645069137550968e-05, |
| "loss": 1.2094, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.04165401682257652, |
| "learning_rate": 2.643866056889272e-05, |
| "loss": 1.3647, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.04253152012825012, |
| "learning_rate": 2.6426612152758118e-05, |
| "loss": 1.3101, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.039812587201595306, |
| "learning_rate": 2.6414546145654097e-05, |
| "loss": 1.2479, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.040445294231176376, |
| "learning_rate": 2.640246256615596e-05, |
| "loss": 1.23, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.04047653079032898, |
| "learning_rate": 2.6390361432866058e-05, |
| "loss": 1.2417, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.04961085692048073, |
| "learning_rate": 2.6378242764413773e-05, |
| "loss": 1.2508, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.0414389967918396, |
| "learning_rate": 2.6366106579455468e-05, |
| "loss": 1.2624, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.03844548389315605, |
| "learning_rate": 2.635395289667449e-05, |
| "loss": 1.2981, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.04521048441529274, |
| "learning_rate": 2.6341781734781106e-05, |
| "loss": 1.2127, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.04535103589296341, |
| "learning_rate": 2.6329593112512508e-05, |
| "loss": 1.3261, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.04055513069033623, |
| "learning_rate": 2.6317387048632757e-05, |
| "loss": 1.2221, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.04123299941420555, |
| "learning_rate": 2.6305163561932773e-05, |
| "loss": 1.4202, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.053416475653648376, |
| "learning_rate": 2.629292267123028e-05, |
| "loss": 1.253, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.041178278625011444, |
| "learning_rate": 2.628066439536982e-05, |
| "loss": 1.0999, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.04176180437207222, |
| "learning_rate": 2.6268388753222677e-05, |
| "loss": 1.1518, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.04024680331349373, |
| "learning_rate": 2.6256095763686895e-05, |
| "loss": 1.2264, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.04161696508526802, |
| "learning_rate": 2.6243785445687192e-05, |
| "loss": 1.3583, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.046228162944316864, |
| "learning_rate": 2.6231457818174986e-05, |
| "loss": 1.2576, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.03873226419091225, |
| "learning_rate": 2.6219112900128337e-05, |
| "loss": 1.1708, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.04049040004611015, |
| "learning_rate": 2.6206750710551922e-05, |
| "loss": 1.2502, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.04042857140302658, |
| "learning_rate": 2.6194371268477008e-05, |
| "loss": 1.1942, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.040148042142391205, |
| "learning_rate": 2.6181974592961417e-05, |
| "loss": 1.1096, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.041213199496269226, |
| "learning_rate": 2.616956070308951e-05, |
| "loss": 1.3469, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.0426071472465992, |
| "learning_rate": 2.615712961797214e-05, |
| "loss": 1.2508, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.04406768083572388, |
| "learning_rate": 2.6144681356746647e-05, |
| "loss": 1.3422, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.039021048694849014, |
| "learning_rate": 2.6132215938576787e-05, |
| "loss": 1.248, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.04162931442260742, |
| "learning_rate": 2.6119733382652755e-05, |
| "loss": 1.3637, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.03739694878458977, |
| "learning_rate": 2.6107233708191108e-05, |
| "loss": 1.2575, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.04426296800374985, |
| "learning_rate": 2.6094716934434784e-05, |
| "loss": 1.2268, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.041838765144348145, |
| "learning_rate": 2.608218308065301e-05, |
| "loss": 1.2094, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.04208039864897728, |
| "learning_rate": 2.606963216614133e-05, |
| "loss": 1.3097, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.0421212799847126, |
| "learning_rate": 2.6057064210221556e-05, |
| "loss": 1.3256, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.04151439294219017, |
| "learning_rate": 2.6044479232241713e-05, |
| "loss": 1.2748, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.048513032495975494, |
| "learning_rate": 2.6031877251576054e-05, |
| "loss": 1.2447, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.04105342924594879, |
| "learning_rate": 2.6019258287624988e-05, |
| "loss": 1.1613, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.044454991817474365, |
| "learning_rate": 2.600662235981509e-05, |
| "loss": 1.1713, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.07957032322883606, |
| "learning_rate": 2.599396948759903e-05, |
| "loss": 1.4649, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.07526232302188873, |
| "learning_rate": 2.598129969045558e-05, |
| "loss": 1.1874, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04056404158473015, |
| "learning_rate": 2.5968612987889553e-05, |
| "loss": 1.1964, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.043638139963150024, |
| "learning_rate": 2.5955909399431798e-05, |
| "loss": 1.2819, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.041582487523555756, |
| "learning_rate": 2.594318894463916e-05, |
| "loss": 1.2305, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.041533682495355606, |
| "learning_rate": 2.5930451643094435e-05, |
| "loss": 1.2939, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.03732079640030861, |
| "learning_rate": 2.5917697514406374e-05, |
| "loss": 1.2324, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04289107024669647, |
| "learning_rate": 2.5904926578209617e-05, |
| "loss": 1.3801, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04154540225863457, |
| "learning_rate": 2.589213885416469e-05, |
| "loss": 1.2219, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04077089950442314, |
| "learning_rate": 2.5879334361957955e-05, |
| "loss": 1.1392, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04148285463452339, |
| "learning_rate": 2.5866513121301592e-05, |
| "loss": 1.3283, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.04197124391794205, |
| "learning_rate": 2.5853675151933565e-05, |
| "loss": 1.2432, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.04120796546339989, |
| "learning_rate": 2.584082047361759e-05, |
| "loss": 1.3758, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.05007264018058777, |
| "learning_rate": 2.5827949106143113e-05, |
| "loss": 1.3027, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.03969128802418709, |
| "learning_rate": 2.5815061069325252e-05, |
| "loss": 1.366, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.05790838971734047, |
| "learning_rate": 2.5802156383004817e-05, |
| "loss": 1.2677, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.057674333453178406, |
| "learning_rate": 2.5789235067048224e-05, |
| "loss": 1.3836, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.03906143456697464, |
| "learning_rate": 2.57762971413475e-05, |
| "loss": 1.2964, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.05400891602039337, |
| "learning_rate": 2.576334262582025e-05, |
| "loss": 1.261, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.04630432277917862, |
| "learning_rate": 2.57503715404096e-05, |
| "loss": 1.3345, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.039165519177913666, |
| "learning_rate": 2.5737383905084207e-05, |
| "loss": 1.2549, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.16873140633106232, |
| "learning_rate": 2.572437973983818e-05, |
| "loss": 1.365, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.03847840055823326, |
| "learning_rate": 2.5711359064691105e-05, |
| "loss": 1.2175, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.0415102019906044, |
| "learning_rate": 2.569832189968796e-05, |
| "loss": 1.2817, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.04224313050508499, |
| "learning_rate": 2.5685268264899117e-05, |
| "loss": 1.4298, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.042650867253541946, |
| "learning_rate": 2.567219818042031e-05, |
| "loss": 1.2426, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.04199182987213135, |
| "learning_rate": 2.5659111666372593e-05, |
| "loss": 1.1769, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.041494037955999374, |
| "learning_rate": 2.5646008742902305e-05, |
| "loss": 1.3261, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.03892706334590912, |
| "learning_rate": 2.5632889430181054e-05, |
| "loss": 1.1609, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.046930767595767975, |
| "learning_rate": 2.561975374840568e-05, |
| "loss": 1.2276, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.038510628044605255, |
| "learning_rate": 2.5606601717798212e-05, |
| "loss": 1.2124, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.04090854153037071, |
| "learning_rate": 2.5593433358605867e-05, |
| "loss": 1.2333, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.042670849710702896, |
| "learning_rate": 2.558024869110098e-05, |
| "loss": 1.2648, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.042589254677295685, |
| "learning_rate": 2.556704773558101e-05, |
| "loss": 1.2574, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.04135293513536453, |
| "learning_rate": 2.555383051236847e-05, |
| "loss": 1.2974, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.03972950950264931, |
| "learning_rate": 2.554059704181093e-05, |
| "loss": 1.3323, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.03985543176531792, |
| "learning_rate": 2.5527347344280977e-05, |
| "loss": 1.2946, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.04313001036643982, |
| "learning_rate": 2.5514081440176173e-05, |
| "loss": 1.3231, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.04007202759385109, |
| "learning_rate": 2.5500799349919023e-05, |
| "loss": 1.2983, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.03902252018451691, |
| "learning_rate": 2.5487501093956956e-05, |
| "loss": 1.2326, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.04481017589569092, |
| "learning_rate": 2.5474186692762294e-05, |
| "loss": 1.2874, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.04088298976421356, |
| "learning_rate": 2.5460856166832204e-05, |
| "loss": 1.2841, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.03811021149158478, |
| "learning_rate": 2.544750953668868e-05, |
| "loss": 1.214, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.03895227238535881, |
| "learning_rate": 2.543414682287851e-05, |
| "loss": 1.3366, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.04189634323120117, |
| "learning_rate": 2.542076804597324e-05, |
| "loss": 1.436, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.043176136910915375, |
| "learning_rate": 2.540737322656915e-05, |
| "loss": 1.2532, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.03896043822169304, |
| "learning_rate": 2.539396238528721e-05, |
| "loss": 1.2106, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.03971550986170769, |
| "learning_rate": 2.5380535542773052e-05, |
| "loss": 1.2685, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.04015112668275833, |
| "learning_rate": 2.5367092719696957e-05, |
| "loss": 1.2589, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.052997201681137085, |
| "learning_rate": 2.53536339367538e-05, |
| "loss": 1.25, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.03955504670739174, |
| "learning_rate": 2.5340159214663007e-05, |
| "loss": 1.3744, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0384516641497612, |
| "learning_rate": 2.532666857416858e-05, |
| "loss": 1.1435, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0435960479080677, |
| "learning_rate": 2.531316203603899e-05, |
| "loss": 1.2677, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0423857718706131, |
| "learning_rate": 2.529963962106721e-05, |
| "loss": 1.2528, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.05119800567626953, |
| "learning_rate": 2.5286101350070638e-05, |
| "loss": 1.2663, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.04668194055557251, |
| "learning_rate": 2.5272547243891076e-05, |
| "loss": 1.2601, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.04023146629333496, |
| "learning_rate": 2.525897732339473e-05, |
| "loss": 1.3211, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.04063122346997261, |
| "learning_rate": 2.524539160947213e-05, |
| "loss": 1.2688, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.04180854186415672, |
| "learning_rate": 2.523179012303813e-05, |
| "loss": 1.3655, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.04253614321351051, |
| "learning_rate": 2.5218172885031854e-05, |
| "loss": 1.1455, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.03980337083339691, |
| "learning_rate": 2.520453991641669e-05, |
| "loss": 1.1863, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.03819122910499573, |
| "learning_rate": 2.519089123818023e-05, |
| "loss": 1.2007, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.03928643837571144, |
| "learning_rate": 2.517722687133426e-05, |
| "loss": 1.084, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.04488477110862732, |
| "learning_rate": 2.5163546836914705e-05, |
| "loss": 1.2667, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.0637240782380104, |
| "learning_rate": 2.5149851155981626e-05, |
| "loss": 1.2081, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.03988213464617729, |
| "learning_rate": 2.5136139849619164e-05, |
| "loss": 1.3405, |
| "step": 950 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2814, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "total_flos": 1.9321548867807216e+19, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|