| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.99965266119415, | |
| "eval_steps": 500, | |
| "global_step": 1295550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.6310589688175354e-05, | |
| "loss": 4.5055, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.262117937635071e-05, | |
| "loss": 1.4851, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00013893176906452608, | |
| "loss": 1.1808, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018524235875270141, | |
| "loss": 1.132, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002315529484408768, | |
| "loss": 1.1349, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00027786353812905216, | |
| "loss": 1.1422, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002997558073716234, | |
| "loss": 1.1513, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002992880054015534, | |
| "loss": 1.142, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00029882020343148334, | |
| "loss": 1.1303, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002983524014614133, | |
| "loss": 1.1215, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002978845994913433, | |
| "loss": 1.109, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00029741679752127325, | |
| "loss": 1.1034, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0002969489955512032, | |
| "loss": 1.0998, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00029648119358113324, | |
| "loss": 1.0979, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00029601339161106316, | |
| "loss": 1.083, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002955455896409932, | |
| "loss": 1.0741, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0002950777876709231, | |
| "loss": 1.0741, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0002946099857008531, | |
| "loss": 1.0707, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00029414218373078303, | |
| "loss": 1.0689, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00029367438176071306, | |
| "loss": 1.0536, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00029320657979064297, | |
| "loss": 1.0556, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.000292738777820573, | |
| "loss": 1.0358, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0002922709758505029, | |
| "loss": 1.018, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00029180317388043293, | |
| "loss": 1.0162, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00029133537191036285, | |
| "loss": 1.017, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00029086756994029287, | |
| "loss": 1.0145, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0002903997679702228, | |
| "loss": 1.0103, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0002899319660001528, | |
| "loss": 1.0046, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0002894641640300827, | |
| "loss": 1.01, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00028899636206001274, | |
| "loss": 1.0146, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00028852856008994266, | |
| "loss": 1.0122, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002880607581198727, | |
| "loss": 0.9961, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00028759295614980265, | |
| "loss": 0.9914, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0002871251541797326, | |
| "loss": 0.9936, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0002866573522096626, | |
| "loss": 0.9998, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00028618955023959256, | |
| "loss": 0.9833, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.0002857217482695225, | |
| "loss": 0.9901, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0002852539462994525, | |
| "loss": 0.9905, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00028478614432938246, | |
| "loss": 0.9845, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00028431834235931243, | |
| "loss": 0.9886, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0002838505403892424, | |
| "loss": 0.9848, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00028338273841917237, | |
| "loss": 0.9827, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00028291493644910234, | |
| "loss": 0.9764, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.0002824471344790323, | |
| "loss": 0.9546, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.0002819793325089623, | |
| "loss": 0.9388, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.00028151153053889225, | |
| "loss": 0.9432, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.0002810437285688222, | |
| "loss": 0.9591, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.0002805759265987522, | |
| "loss": 0.9431, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00028010812462868215, | |
| "loss": 0.9388, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.0002796403226586121, | |
| "loss": 0.9421, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.0002791725206885421, | |
| "loss": 0.9452, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.0002787047187184721, | |
| "loss": 0.9535, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.00027823691674840203, | |
| "loss": 0.9373, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.00027776911477833205, | |
| "loss": 0.9345, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.00027730131280826197, | |
| "loss": 0.9478, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.000276833510838192, | |
| "loss": 0.9445, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.0002763657088681219, | |
| "loss": 0.9297, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.0002758979068980519, | |
| "loss": 0.9294, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.00027543010492798184, | |
| "loss": 0.9273, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00027496230295791186, | |
| "loss": 0.9242, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.0002744945009878418, | |
| "loss": 0.9228, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.0002740266990177718, | |
| "loss": 0.9316, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.0002735588970477017, | |
| "loss": 0.9448, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.00027309109507763174, | |
| "loss": 0.9253, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 0.00027262329310756165, | |
| "loss": 0.9197, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.0002721554911374917, | |
| "loss": 0.9025, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0002716876891674216, | |
| "loss": 0.8983, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.0002712198871973516, | |
| "loss": 0.9013, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0002707520852272816, | |
| "loss": 0.9019, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.00027028428325721155, | |
| "loss": 0.9016, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.0002698164812871415, | |
| "loss": 0.8979, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.0002693486793170715, | |
| "loss": 0.8961, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.00026888087734700146, | |
| "loss": 0.9007, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.00026841307537693143, | |
| "loss": 0.8951, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.0002679452734068614, | |
| "loss": 0.8926, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.00026747747143679137, | |
| "loss": 0.8924, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.00026700966946672134, | |
| "loss": 0.9044, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 0.0002665418674966513, | |
| "loss": 0.8952, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.00026607406552658127, | |
| "loss": 0.9001, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.00026560626355651124, | |
| "loss": 0.8898, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.0002651384615864412, | |
| "loss": 0.895, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.0002646706596163712, | |
| "loss": 0.9015, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.00026420285764630115, | |
| "loss": 0.892, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.0002637350556762311, | |
| "loss": 0.8903, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.0002632672537061611, | |
| "loss": 0.8916, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.00026279945173609105, | |
| "loss": 0.8941, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 0.000262331649766021, | |
| "loss": 0.8771, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.000261863847795951, | |
| "loss": 0.8716, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.00026139604582588096, | |
| "loss": 0.8632, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 0.00026092824385581093, | |
| "loss": 0.8573, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 0.0002604604418857409, | |
| "loss": 0.8642, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 0.00025999263991567087, | |
| "loss": 0.8642, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00025952483794560084, | |
| "loss": 0.8617, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 0.0002590570359755308, | |
| "loss": 0.8574, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.0002585892340054608, | |
| "loss": 0.8612, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 0.00025812143203539074, | |
| "loss": 0.8706, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 0.0002576536300653207, | |
| "loss": 0.8605, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0002571858280952507, | |
| "loss": 0.8703, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.00025671802612518065, | |
| "loss": 0.8691, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 0.0002562502241551106, | |
| "loss": 0.873, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 0.0002557824221850406, | |
| "loss": 0.857, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.00025531462021497056, | |
| "loss": 0.8686, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 0.0002548468182449005, | |
| "loss": 0.868, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.0002543790162748305, | |
| "loss": 0.8756, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.0002539112143047605, | |
| "loss": 0.8656, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 0.00025344341233469043, | |
| "loss": 0.8725, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00025297561036462045, | |
| "loss": 0.8675, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.00025250780839455037, | |
| "loss": 0.8644, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 0.0002520400064244804, | |
| "loss": 0.8397, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 0.0002515722044544103, | |
| "loss": 0.8374, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.00025110440248434033, | |
| "loss": 0.8364, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 0.00025063660051427025, | |
| "loss": 0.8454, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 0.00025016879854420027, | |
| "loss": 0.8367, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 0.0002497009965741302, | |
| "loss": 0.8275, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.0002492331946040602, | |
| "loss": 0.8398, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 0.0002487653926339901, | |
| "loss": 0.8403, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.00024829759066392014, | |
| "loss": 0.8409, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 0.00024782978869385006, | |
| "loss": 0.8366, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 0.0002473619867237801, | |
| "loss": 0.8381, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 0.00024689418475371, | |
| "loss": 0.842, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.00024642638278364, | |
| "loss": 0.843, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 0.00024595858081357, | |
| "loss": 0.8489, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 0.00024549077884349996, | |
| "loss": 0.8313, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.0002450229768734299, | |
| "loss": 0.8468, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 0.0002445551749033599, | |
| "loss": 0.8446, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.00024408737293328986, | |
| "loss": 0.8329, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.00024361957096321983, | |
| "loss": 0.8402, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 0.0002431517689931498, | |
| "loss": 0.8502, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 0.00024268396702307977, | |
| "loss": 0.8437, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 0.00024221616505300974, | |
| "loss": 0.8341, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.0002417483630829397, | |
| "loss": 0.8152, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 0.00024128056111286968, | |
| "loss": 0.8189, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.00024081275914279965, | |
| "loss": 0.8193, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 0.00024034495717272961, | |
| "loss": 0.825, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.00023987715520265958, | |
| "loss": 0.818, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00023940935323258955, | |
| "loss": 0.8204, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00023894155126251952, | |
| "loss": 0.823, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 0.0002384737492924495, | |
| "loss": 0.8179, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 0.00023800594732237946, | |
| "loss": 0.8152, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.00023753814535230943, | |
| "loss": 0.8178, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 0.00023707034338223942, | |
| "loss": 0.8212, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 0.00023660254141216937, | |
| "loss": 0.8161, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 0.00023613473944209936, | |
| "loss": 0.8162, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 0.0002356669374720293, | |
| "loss": 0.828, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.0002351991355019593, | |
| "loss": 0.823, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 0.00023473133353188927, | |
| "loss": 0.8156, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 0.00023426353156181924, | |
| "loss": 0.8176, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 0.0002337957295917492, | |
| "loss": 0.8226, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.00023332792762167917, | |
| "loss": 0.8189, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 0.00023286012565160914, | |
| "loss": 0.8082, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 0.0002323923236815391, | |
| "loss": 0.8257, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 0.00023192452171146908, | |
| "loss": 0.7925, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 0.00023145671974139905, | |
| "loss": 0.7902, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 0.00023098891777132902, | |
| "loss": 0.7994, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 0.000230521115801259, | |
| "loss": 0.8029, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 0.00023005331383118896, | |
| "loss": 0.7983, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 0.00022958551186111893, | |
| "loss": 0.797, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.00022911770989104892, | |
| "loss": 0.8009, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 0.00022864990792097886, | |
| "loss": 0.7979, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 0.00022818210595090886, | |
| "loss": 0.8055, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.0002277143039808388, | |
| "loss": 0.7971, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.0002272465020107688, | |
| "loss": 0.8066, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 0.00022677870004069874, | |
| "loss": 0.7975, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 0.00022631089807062873, | |
| "loss": 0.7912, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 0.00022584309610055868, | |
| "loss": 0.7988, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 0.00022537529413048867, | |
| "loss": 0.7999, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 0.00022490749216041861, | |
| "loss": 0.8019, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.0002244396901903486, | |
| "loss": 0.8108, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 0.00022397188822027855, | |
| "loss": 0.8075, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 0.00022350408625020855, | |
| "loss": 0.7995, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 0.0002230362842801385, | |
| "loss": 0.802, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 0.00022256848231006848, | |
| "loss": 0.7998, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 0.00022210068033999843, | |
| "loss": 0.8023, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.00022163287836992842, | |
| "loss": 0.7748, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 0.00022116507639985836, | |
| "loss": 0.7839, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 0.00022069727442978836, | |
| "loss": 0.7806, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 0.00022022947245971836, | |
| "loss": 0.7775, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 0.0002197616704896483, | |
| "loss": 0.7734, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 0.0002192938685195783, | |
| "loss": 0.7728, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 0.00021882606654950824, | |
| "loss": 0.7879, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 0.00021835826457943823, | |
| "loss": 0.7891, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 0.00021789046260936817, | |
| "loss": 0.7922, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 0.00021742266063929817, | |
| "loss": 0.7837, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 0.0002169548586692281, | |
| "loss": 0.7838, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 0.0002164870566991581, | |
| "loss": 0.7797, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 0.00021601925472908805, | |
| "loss": 0.7818, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 0.00021555145275901804, | |
| "loss": 0.7838, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 0.000215083650788948, | |
| "loss": 0.7828, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 0.00021461584881887798, | |
| "loss": 0.7822, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 0.00021414804684880792, | |
| "loss": 0.7952, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 0.00021368024487873792, | |
| "loss": 0.7888, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 0.00021321244290866786, | |
| "loss": 0.7813, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 0.00021274464093859786, | |
| "loss": 0.7784, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 0.00021227683896852783, | |
| "loss": 0.776, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 0.0002118090369984578, | |
| "loss": 0.7686, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 0.0002113412350283878, | |
| "loss": 0.7576, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 0.00021087343305831773, | |
| "loss": 0.7611, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 0.00021040563108824773, | |
| "loss": 0.7698, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 0.00020993782911817767, | |
| "loss": 0.7632, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 0.00020947002714810767, | |
| "loss": 0.7725, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 0.0002090022251780376, | |
| "loss": 0.7706, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 0.0002085344232079676, | |
| "loss": 0.7709, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 0.00020806662123789755, | |
| "loss": 0.7651, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 0.00020759881926782754, | |
| "loss": 0.7657, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 0.00020713101729775748, | |
| "loss": 0.7589, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 0.00020666321532768748, | |
| "loss": 0.7683, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 0.00020619541335761742, | |
| "loss": 0.7684, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 0.00020572761138754742, | |
| "loss": 0.7756, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 0.00020525980941747736, | |
| "loss": 0.7653, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 0.00020479200744740736, | |
| "loss": 0.7718, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 0.0002043242054773373, | |
| "loss": 0.7676, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 0.0002038564035072673, | |
| "loss": 0.772, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 0.00020338860153719726, | |
| "loss": 0.766, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 0.00020292079956712723, | |
| "loss": 0.7739, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 0.0002024529975970572, | |
| "loss": 0.7743, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00020198519562698717, | |
| "loss": 0.7719, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 0.00020151739365691714, | |
| "loss": 0.7443, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 0.0002010495916868471, | |
| "loss": 0.7573, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 0.00020058178971677708, | |
| "loss": 0.7546, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 0.00020011398774670704, | |
| "loss": 0.7516, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 0.000199646185776637, | |
| "loss": 0.7444, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 0.00019917838380656698, | |
| "loss": 0.7656, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 0.00019871058183649695, | |
| "loss": 0.7452, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 0.00019824277986642692, | |
| "loss": 0.7555, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 0.0001977749778963569, | |
| "loss": 0.7486, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 0.00019730717592628686, | |
| "loss": 0.7509, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 0.00019683937395621683, | |
| "loss": 0.7484, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 0.0001963715719861468, | |
| "loss": 0.7554, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 0.00019590377001607676, | |
| "loss": 0.7557, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 0.00019543596804600676, | |
| "loss": 0.7603, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 0.00019496816607593673, | |
| "loss": 0.7577, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 0.0001945003641058667, | |
| "loss": 0.7641, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "learning_rate": 0.00019403256213579667, | |
| "loss": 0.7648, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 0.00019356476016572664, | |
| "loss": 0.755, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 0.0001930969581956566, | |
| "loss": 0.7445, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "learning_rate": 0.00019262915622558657, | |
| "loss": 0.7614, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 0.00019216135425551654, | |
| "loss": 0.7526, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "learning_rate": 0.0001916935522854465, | |
| "loss": 0.7493, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 0.00019122575031537648, | |
| "loss": 0.7299, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 0.00019075794834530645, | |
| "loss": 0.7379, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 0.00019029014637523642, | |
| "loss": 0.7365, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 0.00018982234440516639, | |
| "loss": 0.7402, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 0.00018935454243509636, | |
| "loss": 0.7409, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 0.00018888674046502632, | |
| "loss": 0.7294, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 0.0001884189384949563, | |
| "loss": 0.7467, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 0.00018795113652488626, | |
| "loss": 0.7357, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 0.00018748333455481623, | |
| "loss": 0.744, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 0.0001870155325847462, | |
| "loss": 0.741, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 0.0001865477306146762, | |
| "loss": 0.7404, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 0.00018607992864460614, | |
| "loss": 0.749, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "learning_rate": 0.00018561212667453613, | |
| "loss": 0.7388, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 0.00018514432470446607, | |
| "loss": 0.742, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 0.00018467652273439607, | |
| "loss": 0.7481, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 0.000184208720764326, | |
| "loss": 0.7553, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "learning_rate": 0.000183740918794256, | |
| "loss": 0.7457, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 0.00018327311682418595, | |
| "loss": 0.7447, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 0.00018280531485411595, | |
| "loss": 0.752, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "learning_rate": 0.0001823375128840459, | |
| "loss": 0.7419, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 0.00018186971091397588, | |
| "loss": 0.7412, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 0.00018140190894390583, | |
| "loss": 0.7262, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 0.00018093410697383582, | |
| "loss": 0.7299, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "learning_rate": 0.00018046630500376576, | |
| "loss": 0.7279, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "learning_rate": 0.00017999850303369576, | |
| "loss": 0.7308, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 0.0001795307010636257, | |
| "loss": 0.7305, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "learning_rate": 0.0001790628990935557, | |
| "loss": 0.7348, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "learning_rate": 0.0001785950971234857, | |
| "loss": 0.7312, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "learning_rate": 0.00017812729515341563, | |
| "loss": 0.7275, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 0.00017765949318334563, | |
| "loss": 0.7291, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "learning_rate": 0.00017719169121327557, | |
| "loss": 0.7265, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 0.00017672388924320557, | |
| "loss": 0.7224, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "learning_rate": 0.0001762560872731355, | |
| "loss": 0.7232, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 12.6, | |
| "learning_rate": 0.0001757882853030655, | |
| "loss": 0.7272, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "learning_rate": 0.00017532048333299545, | |
| "loss": 0.7305, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 0.00017485268136292544, | |
| "loss": 0.7375, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "learning_rate": 0.00017438487939285539, | |
| "loss": 0.7377, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "learning_rate": 0.00017391707742278538, | |
| "loss": 0.7278, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 0.00017344927545271532, | |
| "loss": 0.7369, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 12.87, | |
| "learning_rate": 0.00017298147348264532, | |
| "loss": 0.7366, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 0.00017251367151257526, | |
| "loss": 0.736, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 0.00017204586954250526, | |
| "loss": 0.737, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "learning_rate": 0.0001715780675724352, | |
| "loss": 0.7301, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "learning_rate": 0.0001711102656023652, | |
| "loss": 0.7003, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 0.00017064246363229514, | |
| "loss": 0.7132, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "learning_rate": 0.00017017466166222513, | |
| "loss": 0.7178, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 0.00016970685969215513, | |
| "loss": 0.7187, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "learning_rate": 0.00016923905772208507, | |
| "loss": 0.7239, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "learning_rate": 0.00016877125575201507, | |
| "loss": 0.7271, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "learning_rate": 0.000168303453781945, | |
| "loss": 0.7208, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 0.000167835651811875, | |
| "loss": 0.7199, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 0.00016736784984180495, | |
| "loss": 0.7094, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 0.00016690004787173494, | |
| "loss": 0.7114, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 0.00016643224590166488, | |
| "loss": 0.7196, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 13.57, | |
| "learning_rate": 0.00016596444393159488, | |
| "loss": 0.7222, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 13.62, | |
| "learning_rate": 0.00016549664196152482, | |
| "loss": 0.7345, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "learning_rate": 0.00016502883999145482, | |
| "loss": 0.7208, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "learning_rate": 0.00016456103802138476, | |
| "loss": 0.7298, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "learning_rate": 0.00016409323605131475, | |
| "loss": 0.7324, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 0.0001636254340812447, | |
| "loss": 0.7243, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "learning_rate": 0.0001631576321111747, | |
| "loss": 0.7215, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "learning_rate": 0.00016268983014110463, | |
| "loss": 0.7246, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 0.00016222202817103463, | |
| "loss": 0.7219, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001617542262009646, | |
| "loss": 0.7248, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "learning_rate": 0.00016128642423089457, | |
| "loss": 0.7139, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 0.00016081862226082454, | |
| "loss": 0.7026, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "learning_rate": 0.0001603508202907545, | |
| "loss": 0.7107, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "learning_rate": 0.00015988301832068447, | |
| "loss": 0.7037, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 0.00015941521635061444, | |
| "loss": 0.7145, | |
| "step": 614000 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "learning_rate": 0.0001589474143805444, | |
| "loss": 0.7181, | |
| "step": 616000 | |
| }, | |
| { | |
| "epoch": 14.31, | |
| "learning_rate": 0.00015847961241047438, | |
| "loss": 0.7026, | |
| "step": 618000 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "learning_rate": 0.00015801181044040435, | |
| "loss": 0.7142, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 0.00015754400847033432, | |
| "loss": 0.7087, | |
| "step": 622000 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "learning_rate": 0.0001570762065002643, | |
| "loss": 0.7109, | |
| "step": 624000 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 0.00015660840453019426, | |
| "loss": 0.7031, | |
| "step": 626000 | |
| }, | |
| { | |
| "epoch": 14.54, | |
| "learning_rate": 0.00015614060256012425, | |
| "loss": 0.7101, | |
| "step": 628000 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "learning_rate": 0.0001556728005900542, | |
| "loss": 0.7152, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 0.0001552049986199842, | |
| "loss": 0.7147, | |
| "step": 632000 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "learning_rate": 0.00015473719664991413, | |
| "loss": 0.7144, | |
| "step": 634000 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 0.00015426939467984413, | |
| "loss": 0.7113, | |
| "step": 636000 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 0.00015380159270977407, | |
| "loss": 0.7071, | |
| "step": 638000 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 0.00015333379073970407, | |
| "loss": 0.7118, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "learning_rate": 0.00015286598876963403, | |
| "loss": 0.7098, | |
| "step": 642000 | |
| }, | |
| { | |
| "epoch": 14.91, | |
| "learning_rate": 0.000152398186799564, | |
| "loss": 0.706, | |
| "step": 644000 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 0.00015193038482949397, | |
| "loss": 0.709, | |
| "step": 646000 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "learning_rate": 0.00015146258285942394, | |
| "loss": 0.7087, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 0.0001509947808893539, | |
| "loss": 0.6983, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "learning_rate": 0.00015052697891928388, | |
| "loss": 0.693, | |
| "step": 652000 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 0.00015005917694921385, | |
| "loss": 0.6953, | |
| "step": 654000 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "learning_rate": 0.00014959137497914382, | |
| "loss": 0.6994, | |
| "step": 656000 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 0.00014912357300907379, | |
| "loss": 0.6975, | |
| "step": 658000 | |
| }, | |
| { | |
| "epoch": 15.28, | |
| "learning_rate": 0.00014865577103900375, | |
| "loss": 0.7047, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 0.00014818796906893372, | |
| "loss": 0.6975, | |
| "step": 662000 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 0.0001477201670988637, | |
| "loss": 0.704, | |
| "step": 664000 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 0.00014725236512879366, | |
| "loss": 0.7042, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 15.47, | |
| "learning_rate": 0.00014678456315872363, | |
| "loss": 0.6917, | |
| "step": 668000 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "learning_rate": 0.0001463167611886536, | |
| "loss": 0.6914, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 0.00014584895921858357, | |
| "loss": 0.7018, | |
| "step": 672000 | |
| }, | |
| { | |
| "epoch": 15.61, | |
| "learning_rate": 0.00014538115724851354, | |
| "loss": 0.7016, | |
| "step": 674000 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 0.0001449133552784435, | |
| "loss": 0.7078, | |
| "step": 676000 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 0.00014444555330837347, | |
| "loss": 0.6932, | |
| "step": 678000 | |
| }, | |
| { | |
| "epoch": 15.75, | |
| "learning_rate": 0.00014397775133830344, | |
| "loss": 0.6964, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "learning_rate": 0.0001435099493682334, | |
| "loss": 0.6997, | |
| "step": 682000 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "learning_rate": 0.00014304214739816338, | |
| "loss": 0.7065, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "learning_rate": 0.00014257434542809335, | |
| "loss": 0.7047, | |
| "step": 686000 | |
| }, | |
| { | |
| "epoch": 15.93, | |
| "learning_rate": 0.00014210654345802332, | |
| "loss": 0.7154, | |
| "step": 688000 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 0.0001416387414879533, | |
| "loss": 0.6993, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "learning_rate": 0.00014117093951788326, | |
| "loss": 0.6969, | |
| "step": 692000 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 0.00014070313754781325, | |
| "loss": 0.689, | |
| "step": 694000 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 0.00014023533557774322, | |
| "loss": 0.6888, | |
| "step": 696000 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "learning_rate": 0.0001397675336076732, | |
| "loss": 0.6818, | |
| "step": 698000 | |
| }, | |
| { | |
| "epoch": 16.21, | |
| "learning_rate": 0.00013929973163760316, | |
| "loss": 0.693, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "learning_rate": 0.00013883192966753313, | |
| "loss": 0.6909, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "learning_rate": 0.0001383641276974631, | |
| "loss": 0.6873, | |
| "step": 704000 | |
| }, | |
| { | |
| "epoch": 16.35, | |
| "learning_rate": 0.00013789632572739307, | |
| "loss": 0.6906, | |
| "step": 706000 | |
| }, | |
| { | |
| "epoch": 16.39, | |
| "learning_rate": 0.00013742852375732303, | |
| "loss": 0.6866, | |
| "step": 708000 | |
| }, | |
| { | |
| "epoch": 16.44, | |
| "learning_rate": 0.000136960721787253, | |
| "loss": 0.701, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "learning_rate": 0.00013649291981718297, | |
| "loss": 0.6937, | |
| "step": 712000 | |
| }, | |
| { | |
| "epoch": 16.53, | |
| "learning_rate": 0.00013602511784711294, | |
| "loss": 0.6907, | |
| "step": 714000 | |
| }, | |
| { | |
| "epoch": 16.58, | |
| "learning_rate": 0.0001355573158770429, | |
| "loss": 0.6897, | |
| "step": 716000 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "learning_rate": 0.00013508951390697288, | |
| "loss": 0.6952, | |
| "step": 718000 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "learning_rate": 0.00013462171193690285, | |
| "loss": 0.6865, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "learning_rate": 0.00013415390996683282, | |
| "loss": 0.6935, | |
| "step": 722000 | |
| }, | |
| { | |
| "epoch": 16.76, | |
| "learning_rate": 0.00013368610799676278, | |
| "loss": 0.6919, | |
| "step": 724000 | |
| }, | |
| { | |
| "epoch": 16.81, | |
| "learning_rate": 0.00013321830602669275, | |
| "loss": 0.6904, | |
| "step": 726000 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "learning_rate": 0.00013275050405662272, | |
| "loss": 0.6964, | |
| "step": 728000 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 0.00013228270208655272, | |
| "loss": 0.6943, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "learning_rate": 0.0001318149001164827, | |
| "loss": 0.6949, | |
| "step": 732000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.00013134709814641266, | |
| "loss": 0.6943, | |
| "step": 734000 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "learning_rate": 0.00013087929617634263, | |
| "loss": 0.6851, | |
| "step": 736000 | |
| }, | |
| { | |
| "epoch": 17.09, | |
| "learning_rate": 0.0001304114942062726, | |
| "loss": 0.6802, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 0.00012994369223620256, | |
| "loss": 0.6801, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "learning_rate": 0.00012947589026613253, | |
| "loss": 0.6756, | |
| "step": 742000 | |
| }, | |
| { | |
| "epoch": 17.23, | |
| "learning_rate": 0.0001290080882960625, | |
| "loss": 0.6824, | |
| "step": 744000 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "learning_rate": 0.00012854028632599247, | |
| "loss": 0.6894, | |
| "step": 746000 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "learning_rate": 0.00012807248435592244, | |
| "loss": 0.682, | |
| "step": 748000 | |
| }, | |
| { | |
| "epoch": 17.37, | |
| "learning_rate": 0.0001276046823858524, | |
| "loss": 0.6814, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 17.41, | |
| "learning_rate": 0.00012713688041578238, | |
| "loss": 0.6737, | |
| "step": 752000 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "learning_rate": 0.00012666907844571234, | |
| "loss": 0.6874, | |
| "step": 754000 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "learning_rate": 0.00012620127647564231, | |
| "loss": 0.6842, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 17.55, | |
| "learning_rate": 0.00012573347450557228, | |
| "loss": 0.6871, | |
| "step": 758000 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "learning_rate": 0.00012526567253550225, | |
| "loss": 0.6833, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "learning_rate": 0.00012479787056543222, | |
| "loss": 0.6818, | |
| "step": 762000 | |
| }, | |
| { | |
| "epoch": 17.69, | |
| "learning_rate": 0.0001243300685953622, | |
| "loss": 0.6824, | |
| "step": 764000 | |
| }, | |
| { | |
| "epoch": 17.74, | |
| "learning_rate": 0.00012386226662529219, | |
| "loss": 0.684, | |
| "step": 766000 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "learning_rate": 0.00012339446465522215, | |
| "loss": 0.6822, | |
| "step": 768000 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 0.00012292666268515212, | |
| "loss": 0.68, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 17.88, | |
| "learning_rate": 0.0001224588607150821, | |
| "loss": 0.6842, | |
| "step": 772000 | |
| }, | |
| { | |
| "epoch": 17.92, | |
| "learning_rate": 0.00012199105874501206, | |
| "loss": 0.6827, | |
| "step": 774000 | |
| }, | |
| { | |
| "epoch": 17.97, | |
| "learning_rate": 0.00012152325677494203, | |
| "loss": 0.6901, | |
| "step": 776000 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 0.000121055454804872, | |
| "loss": 0.6781, | |
| "step": 778000 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 0.00012058765283480197, | |
| "loss": 0.6768, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 18.11, | |
| "learning_rate": 0.00012011985086473194, | |
| "loss": 0.6704, | |
| "step": 782000 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 0.0001196520488946619, | |
| "loss": 0.6767, | |
| "step": 784000 | |
| }, | |
| { | |
| "epoch": 18.2, | |
| "learning_rate": 0.00011918424692459187, | |
| "loss": 0.6696, | |
| "step": 786000 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "learning_rate": 0.00011871644495452184, | |
| "loss": 0.6717, | |
| "step": 788000 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "learning_rate": 0.00011824864298445181, | |
| "loss": 0.6666, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 18.34, | |
| "learning_rate": 0.00011778084101438178, | |
| "loss": 0.6681, | |
| "step": 792000 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "learning_rate": 0.00011731303904431175, | |
| "loss": 0.6688, | |
| "step": 794000 | |
| }, | |
| { | |
| "epoch": 18.43, | |
| "learning_rate": 0.00011684523707424172, | |
| "loss": 0.6809, | |
| "step": 796000 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "learning_rate": 0.00011637743510417169, | |
| "loss": 0.6704, | |
| "step": 798000 | |
| }, | |
| { | |
| "epoch": 18.52, | |
| "learning_rate": 0.00011590963313410166, | |
| "loss": 0.6732, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 18.57, | |
| "learning_rate": 0.00011544183116403164, | |
| "loss": 0.6688, | |
| "step": 802000 | |
| }, | |
| { | |
| "epoch": 18.62, | |
| "learning_rate": 0.00011497402919396161, | |
| "loss": 0.6767, | |
| "step": 804000 | |
| }, | |
| { | |
| "epoch": 18.66, | |
| "learning_rate": 0.00011450622722389158, | |
| "loss": 0.6721, | |
| "step": 806000 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "learning_rate": 0.00011403842525382154, | |
| "loss": 0.6716, | |
| "step": 808000 | |
| }, | |
| { | |
| "epoch": 18.76, | |
| "learning_rate": 0.00011357062328375151, | |
| "loss": 0.673, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "learning_rate": 0.00011310282131368148, | |
| "loss": 0.6717, | |
| "step": 812000 | |
| }, | |
| { | |
| "epoch": 18.85, | |
| "learning_rate": 0.00011263501934361145, | |
| "loss": 0.6607, | |
| "step": 814000 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "learning_rate": 0.00011216721737354142, | |
| "loss": 0.6732, | |
| "step": 816000 | |
| }, | |
| { | |
| "epoch": 18.94, | |
| "learning_rate": 0.00011169941540347139, | |
| "loss": 0.6715, | |
| "step": 818000 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 0.00011123161343340136, | |
| "loss": 0.678, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "learning_rate": 0.00011076381146333133, | |
| "loss": 0.6618, | |
| "step": 822000 | |
| }, | |
| { | |
| "epoch": 19.08, | |
| "learning_rate": 0.0001102960094932613, | |
| "loss": 0.6589, | |
| "step": 824000 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 0.00010982820752319126, | |
| "loss": 0.6624, | |
| "step": 826000 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "learning_rate": 0.00010936040555312123, | |
| "loss": 0.6618, | |
| "step": 828000 | |
| }, | |
| { | |
| "epoch": 19.22, | |
| "learning_rate": 0.0001088926035830512, | |
| "loss": 0.6666, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 19.27, | |
| "learning_rate": 0.00010842480161298117, | |
| "loss": 0.6645, | |
| "step": 832000 | |
| }, | |
| { | |
| "epoch": 19.31, | |
| "learning_rate": 0.00010795699964291114, | |
| "loss": 0.6667, | |
| "step": 834000 | |
| }, | |
| { | |
| "epoch": 19.36, | |
| "learning_rate": 0.00010748919767284111, | |
| "loss": 0.6649, | |
| "step": 836000 | |
| }, | |
| { | |
| "epoch": 19.4, | |
| "learning_rate": 0.0001070213957027711, | |
| "loss": 0.659, | |
| "step": 838000 | |
| }, | |
| { | |
| "epoch": 19.45, | |
| "learning_rate": 0.00010655359373270107, | |
| "loss": 0.6611, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "learning_rate": 0.00010608579176263104, | |
| "loss": 0.6565, | |
| "step": 842000 | |
| }, | |
| { | |
| "epoch": 19.54, | |
| "learning_rate": 0.00010561798979256101, | |
| "loss": 0.6631, | |
| "step": 844000 | |
| }, | |
| { | |
| "epoch": 19.59, | |
| "learning_rate": 0.00010515018782249098, | |
| "loss": 0.6593, | |
| "step": 846000 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "learning_rate": 0.00010468238585242095, | |
| "loss": 0.6654, | |
| "step": 848000 | |
| }, | |
| { | |
| "epoch": 19.68, | |
| "learning_rate": 0.00010421458388235092, | |
| "loss": 0.6621, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 19.73, | |
| "learning_rate": 0.00010374678191228089, | |
| "loss": 0.661, | |
| "step": 852000 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 0.00010327897994221086, | |
| "loss": 0.6515, | |
| "step": 854000 | |
| }, | |
| { | |
| "epoch": 19.82, | |
| "learning_rate": 0.00010281117797214082, | |
| "loss": 0.6614, | |
| "step": 856000 | |
| }, | |
| { | |
| "epoch": 19.87, | |
| "learning_rate": 0.0001023433760020708, | |
| "loss": 0.6616, | |
| "step": 858000 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 0.00010187557403200076, | |
| "loss": 0.6598, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 19.96, | |
| "learning_rate": 0.00010140777206193073, | |
| "loss": 0.6616, | |
| "step": 862000 | |
| }, | |
| { | |
| "epoch": 20.01, | |
| "learning_rate": 0.0001009399700918607, | |
| "loss": 0.6679, | |
| "step": 864000 | |
| }, | |
| { | |
| "epoch": 20.05, | |
| "learning_rate": 0.00010047216812179067, | |
| "loss": 0.6518, | |
| "step": 866000 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "learning_rate": 0.00010000436615172064, | |
| "loss": 0.6463, | |
| "step": 868000 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "learning_rate": 9.95365641816506e-05, | |
| "loss": 0.6529, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 9.906876221158058e-05, | |
| "loss": 0.6463, | |
| "step": 872000 | |
| }, | |
| { | |
| "epoch": 20.24, | |
| "learning_rate": 9.860096024151056e-05, | |
| "loss": 0.6545, | |
| "step": 874000 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 9.813315827144053e-05, | |
| "loss": 0.6531, | |
| "step": 876000 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "learning_rate": 9.76653563013705e-05, | |
| "loss": 0.6442, | |
| "step": 878000 | |
| }, | |
| { | |
| "epoch": 20.38, | |
| "learning_rate": 9.719755433130046e-05, | |
| "loss": 0.65, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "learning_rate": 9.672975236123043e-05, | |
| "loss": 0.6518, | |
| "step": 882000 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "learning_rate": 9.62619503911604e-05, | |
| "loss": 0.6546, | |
| "step": 884000 | |
| }, | |
| { | |
| "epoch": 20.52, | |
| "learning_rate": 9.579414842109037e-05, | |
| "loss": 0.6494, | |
| "step": 886000 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 9.532634645102035e-05, | |
| "loss": 0.654, | |
| "step": 888000 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "learning_rate": 9.485854448095032e-05, | |
| "loss": 0.6536, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 20.66, | |
| "learning_rate": 9.439074251088029e-05, | |
| "loss": 0.6547, | |
| "step": 892000 | |
| }, | |
| { | |
| "epoch": 20.7, | |
| "learning_rate": 9.392294054081026e-05, | |
| "loss": 0.6421, | |
| "step": 894000 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 9.345513857074023e-05, | |
| "loss": 0.6506, | |
| "step": 896000 | |
| }, | |
| { | |
| "epoch": 20.79, | |
| "learning_rate": 9.29873366006702e-05, | |
| "loss": 0.6551, | |
| "step": 898000 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "learning_rate": 9.251953463060017e-05, | |
| "loss": 0.6542, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "learning_rate": 9.205173266053014e-05, | |
| "loss": 0.6398, | |
| "step": 902000 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "learning_rate": 9.15839306904601e-05, | |
| "loss": 0.653, | |
| "step": 904000 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 9.111612872039007e-05, | |
| "loss": 0.6476, | |
| "step": 906000 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "learning_rate": 9.064832675032004e-05, | |
| "loss": 0.6378, | |
| "step": 908000 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "learning_rate": 9.018052478025002e-05, | |
| "loss": 0.6413, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 8.971272281017999e-05, | |
| "loss": 0.6368, | |
| "step": 912000 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "learning_rate": 8.924492084010996e-05, | |
| "loss": 0.6366, | |
| "step": 914000 | |
| }, | |
| { | |
| "epoch": 21.21, | |
| "learning_rate": 8.877711887003993e-05, | |
| "loss": 0.6455, | |
| "step": 916000 | |
| }, | |
| { | |
| "epoch": 21.26, | |
| "learning_rate": 8.83093168999699e-05, | |
| "loss": 0.6448, | |
| "step": 918000 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "learning_rate": 8.784151492989987e-05, | |
| "loss": 0.6371, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 21.35, | |
| "learning_rate": 8.737371295982984e-05, | |
| "loss": 0.6457, | |
| "step": 922000 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "learning_rate": 8.69059109897598e-05, | |
| "loss": 0.6399, | |
| "step": 924000 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "learning_rate": 8.643810901968978e-05, | |
| "loss": 0.6389, | |
| "step": 926000 | |
| }, | |
| { | |
| "epoch": 21.49, | |
| "learning_rate": 8.597030704961974e-05, | |
| "loss": 0.6444, | |
| "step": 928000 | |
| }, | |
| { | |
| "epoch": 21.54, | |
| "learning_rate": 8.550250507954971e-05, | |
| "loss": 0.6346, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 21.58, | |
| "learning_rate": 8.503470310947968e-05, | |
| "loss": 0.6394, | |
| "step": 932000 | |
| }, | |
| { | |
| "epoch": 21.63, | |
| "learning_rate": 8.456690113940965e-05, | |
| "loss": 0.6397, | |
| "step": 934000 | |
| }, | |
| { | |
| "epoch": 21.67, | |
| "learning_rate": 8.409909916933962e-05, | |
| "loss": 0.6411, | |
| "step": 936000 | |
| }, | |
| { | |
| "epoch": 21.72, | |
| "learning_rate": 8.363129719926959e-05, | |
| "loss": 0.6383, | |
| "step": 938000 | |
| }, | |
| { | |
| "epoch": 21.77, | |
| "learning_rate": 8.316349522919956e-05, | |
| "loss": 0.6416, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 21.81, | |
| "learning_rate": 8.269569325912953e-05, | |
| "loss": 0.635, | |
| "step": 942000 | |
| }, | |
| { | |
| "epoch": 21.86, | |
| "learning_rate": 8.22278912890595e-05, | |
| "loss": 0.6371, | |
| "step": 944000 | |
| }, | |
| { | |
| "epoch": 21.91, | |
| "learning_rate": 8.176008931898949e-05, | |
| "loss": 0.6412, | |
| "step": 946000 | |
| }, | |
| { | |
| "epoch": 21.95, | |
| "learning_rate": 8.129228734891946e-05, | |
| "loss": 0.6414, | |
| "step": 948000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 8.082448537884943e-05, | |
| "loss": 0.6285, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "learning_rate": 8.03566834087794e-05, | |
| "loss": 0.6285, | |
| "step": 952000 | |
| }, | |
| { | |
| "epoch": 22.09, | |
| "learning_rate": 7.988888143870937e-05, | |
| "loss": 0.6268, | |
| "step": 954000 | |
| }, | |
| { | |
| "epoch": 22.14, | |
| "learning_rate": 7.942107946863934e-05, | |
| "loss": 0.6251, | |
| "step": 956000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "learning_rate": 7.89532774985693e-05, | |
| "loss": 0.6306, | |
| "step": 958000 | |
| }, | |
| { | |
| "epoch": 22.23, | |
| "learning_rate": 7.848547552849927e-05, | |
| "loss": 0.6283, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 22.28, | |
| "learning_rate": 7.801767355842924e-05, | |
| "loss": 0.6264, | |
| "step": 962000 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 7.754987158835921e-05, | |
| "loss": 0.6279, | |
| "step": 964000 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "learning_rate": 7.708206961828918e-05, | |
| "loss": 0.6272, | |
| "step": 966000 | |
| }, | |
| { | |
| "epoch": 22.41, | |
| "learning_rate": 7.661426764821915e-05, | |
| "loss": 0.6355, | |
| "step": 968000 | |
| }, | |
| { | |
| "epoch": 22.46, | |
| "learning_rate": 7.614646567814912e-05, | |
| "loss": 0.6349, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 22.51, | |
| "learning_rate": 7.567866370807909e-05, | |
| "loss": 0.6281, | |
| "step": 972000 | |
| }, | |
| { | |
| "epoch": 22.55, | |
| "learning_rate": 7.521086173800905e-05, | |
| "loss": 0.6269, | |
| "step": 974000 | |
| }, | |
| { | |
| "epoch": 22.6, | |
| "learning_rate": 7.474305976793904e-05, | |
| "loss": 0.6221, | |
| "step": 976000 | |
| }, | |
| { | |
| "epoch": 22.65, | |
| "learning_rate": 7.4275257797869e-05, | |
| "loss": 0.6295, | |
| "step": 978000 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "learning_rate": 7.380745582779897e-05, | |
| "loss": 0.6265, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 22.74, | |
| "learning_rate": 7.333965385772894e-05, | |
| "loss": 0.6203, | |
| "step": 982000 | |
| }, | |
| { | |
| "epoch": 22.79, | |
| "learning_rate": 7.287185188765891e-05, | |
| "loss": 0.6306, | |
| "step": 984000 | |
| }, | |
| { | |
| "epoch": 22.83, | |
| "learning_rate": 7.240404991758888e-05, | |
| "loss": 0.6319, | |
| "step": 986000 | |
| }, | |
| { | |
| "epoch": 22.88, | |
| "learning_rate": 7.193624794751885e-05, | |
| "loss": 0.6211, | |
| "step": 988000 | |
| }, | |
| { | |
| "epoch": 22.92, | |
| "learning_rate": 7.146844597744882e-05, | |
| "loss": 0.6244, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 22.97, | |
| "learning_rate": 7.100064400737879e-05, | |
| "loss": 0.6262, | |
| "step": 992000 | |
| }, | |
| { | |
| "epoch": 23.02, | |
| "learning_rate": 7.053284203730876e-05, | |
| "loss": 0.6166, | |
| "step": 994000 | |
| }, | |
| { | |
| "epoch": 23.06, | |
| "learning_rate": 7.006504006723873e-05, | |
| "loss": 0.6166, | |
| "step": 996000 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 6.95972380971687e-05, | |
| "loss": 0.6175, | |
| "step": 998000 | |
| }, | |
| { | |
| "epoch": 23.16, | |
| "learning_rate": 6.912943612709866e-05, | |
| "loss": 0.6151, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "learning_rate": 6.866163415702863e-05, | |
| "loss": 0.6153, | |
| "step": 1002000 | |
| }, | |
| { | |
| "epoch": 23.25, | |
| "learning_rate": 6.81938321869586e-05, | |
| "loss": 0.6212, | |
| "step": 1004000 | |
| }, | |
| { | |
| "epoch": 23.29, | |
| "learning_rate": 6.772603021688858e-05, | |
| "loss": 0.6161, | |
| "step": 1006000 | |
| }, | |
| { | |
| "epoch": 23.34, | |
| "learning_rate": 6.725822824681855e-05, | |
| "loss": 0.6158, | |
| "step": 1008000 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "learning_rate": 6.679042627674852e-05, | |
| "loss": 0.6089, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "learning_rate": 6.632262430667849e-05, | |
| "loss": 0.6166, | |
| "step": 1012000 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 6.585482233660846e-05, | |
| "loss": 0.6134, | |
| "step": 1014000 | |
| }, | |
| { | |
| "epoch": 23.53, | |
| "learning_rate": 6.538702036653843e-05, | |
| "loss": 0.6171, | |
| "step": 1016000 | |
| }, | |
| { | |
| "epoch": 23.57, | |
| "learning_rate": 6.49192183964684e-05, | |
| "loss": 0.6122, | |
| "step": 1018000 | |
| }, | |
| { | |
| "epoch": 23.62, | |
| "learning_rate": 6.445141642639837e-05, | |
| "loss": 0.6176, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 23.67, | |
| "learning_rate": 6.398361445632833e-05, | |
| "loss": 0.6146, | |
| "step": 1022000 | |
| }, | |
| { | |
| "epoch": 23.71, | |
| "learning_rate": 6.35158124862583e-05, | |
| "loss": 0.6069, | |
| "step": 1024000 | |
| }, | |
| { | |
| "epoch": 23.76, | |
| "learning_rate": 6.304801051618829e-05, | |
| "loss": 0.6169, | |
| "step": 1026000 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "learning_rate": 6.258020854611825e-05, | |
| "loss": 0.6222, | |
| "step": 1028000 | |
| }, | |
| { | |
| "epoch": 23.85, | |
| "learning_rate": 6.211240657604822e-05, | |
| "loss": 0.6152, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 23.9, | |
| "learning_rate": 6.164460460597819e-05, | |
| "loss": 0.6181, | |
| "step": 1032000 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "learning_rate": 6.117680263590816e-05, | |
| "loss": 0.6123, | |
| "step": 1034000 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "learning_rate": 6.070900066583813e-05, | |
| "loss": 0.619, | |
| "step": 1036000 | |
| }, | |
| { | |
| "epoch": 24.04, | |
| "learning_rate": 6.02411986957681e-05, | |
| "loss": 0.6099, | |
| "step": 1038000 | |
| }, | |
| { | |
| "epoch": 24.08, | |
| "learning_rate": 5.977339672569807e-05, | |
| "loss": 0.6098, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "learning_rate": 5.930559475562804e-05, | |
| "loss": 0.5965, | |
| "step": 1042000 | |
| }, | |
| { | |
| "epoch": 24.17, | |
| "learning_rate": 5.883779278555802e-05, | |
| "loss": 0.6059, | |
| "step": 1044000 | |
| }, | |
| { | |
| "epoch": 24.22, | |
| "learning_rate": 5.836999081548799e-05, | |
| "loss": 0.6021, | |
| "step": 1046000 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "learning_rate": 5.790218884541796e-05, | |
| "loss": 0.6093, | |
| "step": 1048000 | |
| }, | |
| { | |
| "epoch": 24.31, | |
| "learning_rate": 5.7434386875347926e-05, | |
| "loss": 0.6031, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 24.36, | |
| "learning_rate": 5.6966584905277895e-05, | |
| "loss": 0.6053, | |
| "step": 1052000 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "learning_rate": 5.6498782935207863e-05, | |
| "loss": 0.6036, | |
| "step": 1054000 | |
| }, | |
| { | |
| "epoch": 24.45, | |
| "learning_rate": 5.603098096513783e-05, | |
| "loss": 0.6011, | |
| "step": 1056000 | |
| }, | |
| { | |
| "epoch": 24.5, | |
| "learning_rate": 5.55631789950678e-05, | |
| "loss": 0.6035, | |
| "step": 1058000 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "learning_rate": 5.509537702499777e-05, | |
| "loss": 0.6066, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 24.59, | |
| "learning_rate": 5.4627575054927746e-05, | |
| "loss": 0.6061, | |
| "step": 1062000 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "learning_rate": 5.4159773084857714e-05, | |
| "loss": 0.6027, | |
| "step": 1064000 | |
| }, | |
| { | |
| "epoch": 24.68, | |
| "learning_rate": 5.369197111478768e-05, | |
| "loss": 0.6, | |
| "step": 1066000 | |
| }, | |
| { | |
| "epoch": 24.73, | |
| "learning_rate": 5.322416914471765e-05, | |
| "loss": 0.6062, | |
| "step": 1068000 | |
| }, | |
| { | |
| "epoch": 24.78, | |
| "learning_rate": 5.275636717464762e-05, | |
| "loss": 0.6003, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "learning_rate": 5.228856520457759e-05, | |
| "loss": 0.5988, | |
| "step": 1072000 | |
| }, | |
| { | |
| "epoch": 24.87, | |
| "learning_rate": 5.182076323450756e-05, | |
| "loss": 0.6096, | |
| "step": 1074000 | |
| }, | |
| { | |
| "epoch": 24.92, | |
| "learning_rate": 5.135296126443753e-05, | |
| "loss": 0.5988, | |
| "step": 1076000 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "learning_rate": 5.08851592943675e-05, | |
| "loss": 0.6086, | |
| "step": 1078000 | |
| }, | |
| { | |
| "epoch": 25.01, | |
| "learning_rate": 5.041735732429748e-05, | |
| "loss": 0.5942, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 4.994955535422745e-05, | |
| "loss": 0.5954, | |
| "step": 1082000 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "learning_rate": 4.9481753384157417e-05, | |
| "loss": 0.5948, | |
| "step": 1084000 | |
| }, | |
| { | |
| "epoch": 25.15, | |
| "learning_rate": 4.9013951414087385e-05, | |
| "loss": 0.5946, | |
| "step": 1086000 | |
| }, | |
| { | |
| "epoch": 25.19, | |
| "learning_rate": 4.8546149444017354e-05, | |
| "loss": 0.5938, | |
| "step": 1088000 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "learning_rate": 4.807834747394732e-05, | |
| "loss": 0.5961, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 25.29, | |
| "learning_rate": 4.761054550387729e-05, | |
| "loss": 0.5947, | |
| "step": 1092000 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 4.714274353380726e-05, | |
| "loss": 0.6019, | |
| "step": 1094000 | |
| }, | |
| { | |
| "epoch": 25.38, | |
| "learning_rate": 4.667494156373723e-05, | |
| "loss": 0.5927, | |
| "step": 1096000 | |
| }, | |
| { | |
| "epoch": 25.43, | |
| "learning_rate": 4.6207139593667205e-05, | |
| "loss": 0.5921, | |
| "step": 1098000 | |
| }, | |
| { | |
| "epoch": 25.47, | |
| "learning_rate": 4.5739337623597174e-05, | |
| "loss": 0.5954, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 25.52, | |
| "learning_rate": 4.527153565352715e-05, | |
| "loss": 0.5926, | |
| "step": 1102000 | |
| }, | |
| { | |
| "epoch": 25.56, | |
| "learning_rate": 4.480373368345712e-05, | |
| "loss": 0.5963, | |
| "step": 1104000 | |
| }, | |
| { | |
| "epoch": 25.61, | |
| "learning_rate": 4.433593171338709e-05, | |
| "loss": 0.5902, | |
| "step": 1106000 | |
| }, | |
| { | |
| "epoch": 25.66, | |
| "learning_rate": 4.3868129743317056e-05, | |
| "loss": 0.5952, | |
| "step": 1108000 | |
| }, | |
| { | |
| "epoch": 25.7, | |
| "learning_rate": 4.3400327773247025e-05, | |
| "loss": 0.5878, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "learning_rate": 4.2932525803176994e-05, | |
| "loss": 0.5926, | |
| "step": 1112000 | |
| }, | |
| { | |
| "epoch": 25.8, | |
| "learning_rate": 4.246472383310696e-05, | |
| "loss": 0.5854, | |
| "step": 1114000 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "learning_rate": 4.199692186303694e-05, | |
| "loss": 0.5916, | |
| "step": 1116000 | |
| }, | |
| { | |
| "epoch": 25.89, | |
| "learning_rate": 4.152911989296691e-05, | |
| "loss": 0.5869, | |
| "step": 1118000 | |
| }, | |
| { | |
| "epoch": 25.93, | |
| "learning_rate": 4.1061317922896876e-05, | |
| "loss": 0.5913, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "learning_rate": 4.0593515952826845e-05, | |
| "loss": 0.5822, | |
| "step": 1122000 | |
| }, | |
| { | |
| "epoch": 26.03, | |
| "learning_rate": 4.0125713982756814e-05, | |
| "loss": 0.5831, | |
| "step": 1124000 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "learning_rate": 3.965791201268678e-05, | |
| "loss": 0.5847, | |
| "step": 1126000 | |
| }, | |
| { | |
| "epoch": 26.12, | |
| "learning_rate": 3.919011004261675e-05, | |
| "loss": 0.5828, | |
| "step": 1128000 | |
| }, | |
| { | |
| "epoch": 26.17, | |
| "learning_rate": 3.872230807254672e-05, | |
| "loss": 0.5825, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "learning_rate": 3.825450610247669e-05, | |
| "loss": 0.5848, | |
| "step": 1132000 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "learning_rate": 3.778670413240667e-05, | |
| "loss": 0.5866, | |
| "step": 1134000 | |
| }, | |
| { | |
| "epoch": 26.31, | |
| "learning_rate": 3.7318902162336634e-05, | |
| "loss": 0.5832, | |
| "step": 1136000 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "learning_rate": 3.685110019226661e-05, | |
| "loss": 0.58, | |
| "step": 1138000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "learning_rate": 3.638329822219658e-05, | |
| "loss": 0.5767, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 26.44, | |
| "learning_rate": 3.591549625212655e-05, | |
| "loss": 0.5792, | |
| "step": 1142000 | |
| }, | |
| { | |
| "epoch": 26.49, | |
| "learning_rate": 3.5447694282056516e-05, | |
| "loss": 0.5764, | |
| "step": 1144000 | |
| }, | |
| { | |
| "epoch": 26.54, | |
| "learning_rate": 3.4979892311986485e-05, | |
| "loss": 0.5794, | |
| "step": 1146000 | |
| }, | |
| { | |
| "epoch": 26.58, | |
| "learning_rate": 3.451209034191646e-05, | |
| "loss": 0.5738, | |
| "step": 1148000 | |
| }, | |
| { | |
| "epoch": 26.63, | |
| "learning_rate": 3.404428837184643e-05, | |
| "loss": 0.5822, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 26.68, | |
| "learning_rate": 3.35764864017764e-05, | |
| "loss": 0.5734, | |
| "step": 1152000 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "learning_rate": 3.310868443170637e-05, | |
| "loss": 0.5794, | |
| "step": 1154000 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "learning_rate": 3.2640882461636336e-05, | |
| "loss": 0.5853, | |
| "step": 1156000 | |
| }, | |
| { | |
| "epoch": 26.81, | |
| "learning_rate": 3.2173080491566305e-05, | |
| "loss": 0.5842, | |
| "step": 1158000 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "learning_rate": 3.170527852149628e-05, | |
| "loss": 0.5847, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 26.91, | |
| "learning_rate": 3.123747655142625e-05, | |
| "loss": 0.5786, | |
| "step": 1162000 | |
| }, | |
| { | |
| "epoch": 26.95, | |
| "learning_rate": 3.076967458135622e-05, | |
| "loss": 0.5818, | |
| "step": 1164000 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 3.030187261128619e-05, | |
| "loss": 0.5722, | |
| "step": 1166000 | |
| }, | |
| { | |
| "epoch": 27.05, | |
| "learning_rate": 2.983407064121616e-05, | |
| "loss": 0.5726, | |
| "step": 1168000 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "learning_rate": 2.936626867114613e-05, | |
| "loss": 0.5745, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "learning_rate": 2.8898466701076097e-05, | |
| "loss": 0.5655, | |
| "step": 1172000 | |
| }, | |
| { | |
| "epoch": 27.19, | |
| "learning_rate": 2.843066473100607e-05, | |
| "loss": 0.5747, | |
| "step": 1174000 | |
| }, | |
| { | |
| "epoch": 27.23, | |
| "learning_rate": 2.7962862760936038e-05, | |
| "loss": 0.5734, | |
| "step": 1176000 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "learning_rate": 2.7495060790866007e-05, | |
| "loss": 0.5752, | |
| "step": 1178000 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "learning_rate": 2.7027258820795976e-05, | |
| "loss": 0.5784, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "learning_rate": 2.6559456850725945e-05, | |
| "loss": 0.5667, | |
| "step": 1182000 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "learning_rate": 2.609165488065592e-05, | |
| "loss": 0.5748, | |
| "step": 1184000 | |
| }, | |
| { | |
| "epoch": 27.46, | |
| "learning_rate": 2.562385291058589e-05, | |
| "loss": 0.5762, | |
| "step": 1186000 | |
| }, | |
| { | |
| "epoch": 27.51, | |
| "learning_rate": 2.5156050940515858e-05, | |
| "loss": 0.5783, | |
| "step": 1188000 | |
| }, | |
| { | |
| "epoch": 27.56, | |
| "learning_rate": 2.4688248970445827e-05, | |
| "loss": 0.5668, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "learning_rate": 2.42204470003758e-05, | |
| "loss": 0.5671, | |
| "step": 1192000 | |
| }, | |
| { | |
| "epoch": 27.65, | |
| "learning_rate": 2.3752645030305768e-05, | |
| "loss": 0.5688, | |
| "step": 1194000 | |
| }, | |
| { | |
| "epoch": 27.69, | |
| "learning_rate": 2.328484306023574e-05, | |
| "loss": 0.5643, | |
| "step": 1196000 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "learning_rate": 2.281704109016571e-05, | |
| "loss": 0.5688, | |
| "step": 1198000 | |
| }, | |
| { | |
| "epoch": 27.79, | |
| "learning_rate": 2.2349239120095678e-05, | |
| "loss": 0.5651, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "learning_rate": 2.188143715002565e-05, | |
| "loss": 0.5705, | |
| "step": 1202000 | |
| }, | |
| { | |
| "epoch": 27.88, | |
| "learning_rate": 2.141363517995562e-05, | |
| "loss": 0.5684, | |
| "step": 1204000 | |
| }, | |
| { | |
| "epoch": 27.93, | |
| "learning_rate": 2.0945833209885588e-05, | |
| "loss": 0.567, | |
| "step": 1206000 | |
| }, | |
| { | |
| "epoch": 27.97, | |
| "learning_rate": 2.0478031239815557e-05, | |
| "loss": 0.5711, | |
| "step": 1208000 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "learning_rate": 2.0010229269745533e-05, | |
| "loss": 0.5684, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 28.06, | |
| "learning_rate": 1.95424272996755e-05, | |
| "loss": 0.5604, | |
| "step": 1212000 | |
| }, | |
| { | |
| "epoch": 28.11, | |
| "learning_rate": 1.907462532960547e-05, | |
| "loss": 0.5615, | |
| "step": 1214000 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "learning_rate": 1.860682335953544e-05, | |
| "loss": 0.5679, | |
| "step": 1216000 | |
| }, | |
| { | |
| "epoch": 28.2, | |
| "learning_rate": 1.813902138946541e-05, | |
| "loss": 0.5644, | |
| "step": 1218000 | |
| }, | |
| { | |
| "epoch": 28.25, | |
| "learning_rate": 1.767121941939538e-05, | |
| "loss": 0.5663, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 28.3, | |
| "learning_rate": 1.720341744932535e-05, | |
| "loss": 0.5584, | |
| "step": 1222000 | |
| }, | |
| { | |
| "epoch": 28.34, | |
| "learning_rate": 1.6735615479255318e-05, | |
| "loss": 0.558, | |
| "step": 1224000 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 1.626781350918529e-05, | |
| "loss": 0.5575, | |
| "step": 1226000 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "learning_rate": 1.580001153911526e-05, | |
| "loss": 0.5728, | |
| "step": 1228000 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "learning_rate": 1.533220956904523e-05, | |
| "loss": 0.5653, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 28.53, | |
| "learning_rate": 1.48644075989752e-05, | |
| "loss": 0.5603, | |
| "step": 1232000 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 1.4396605628905172e-05, | |
| "loss": 0.5613, | |
| "step": 1234000 | |
| }, | |
| { | |
| "epoch": 28.62, | |
| "learning_rate": 1.3928803658835141e-05, | |
| "loss": 0.5563, | |
| "step": 1236000 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "learning_rate": 1.346100168876511e-05, | |
| "loss": 0.5705, | |
| "step": 1238000 | |
| }, | |
| { | |
| "epoch": 28.71, | |
| "learning_rate": 1.299319971869508e-05, | |
| "loss": 0.5568, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 28.76, | |
| "learning_rate": 1.252539774862505e-05, | |
| "loss": 0.5517, | |
| "step": 1242000 | |
| }, | |
| { | |
| "epoch": 28.81, | |
| "learning_rate": 1.2057595778555022e-05, | |
| "loss": 0.5647, | |
| "step": 1244000 | |
| }, | |
| { | |
| "epoch": 28.85, | |
| "learning_rate": 1.158979380848499e-05, | |
| "loss": 0.5551, | |
| "step": 1246000 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "learning_rate": 1.1121991838414961e-05, | |
| "loss": 0.5598, | |
| "step": 1248000 | |
| }, | |
| { | |
| "epoch": 28.94, | |
| "learning_rate": 1.0654189868344932e-05, | |
| "loss": 0.562, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "learning_rate": 1.0186387898274902e-05, | |
| "loss": 0.5563, | |
| "step": 1252000 | |
| }, | |
| { | |
| "epoch": 29.04, | |
| "learning_rate": 9.718585928204871e-06, | |
| "loss": 0.5606, | |
| "step": 1254000 | |
| }, | |
| { | |
| "epoch": 29.08, | |
| "learning_rate": 9.250783958134842e-06, | |
| "loss": 0.5509, | |
| "step": 1256000 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "learning_rate": 8.782981988064812e-06, | |
| "loss": 0.551, | |
| "step": 1258000 | |
| }, | |
| { | |
| "epoch": 29.18, | |
| "learning_rate": 8.315180017994783e-06, | |
| "loss": 0.5548, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "learning_rate": 7.847378047924752e-06, | |
| "loss": 0.5562, | |
| "step": 1262000 | |
| }, | |
| { | |
| "epoch": 29.27, | |
| "learning_rate": 7.3795760778547214e-06, | |
| "loss": 0.5563, | |
| "step": 1264000 | |
| }, | |
| { | |
| "epoch": 29.32, | |
| "learning_rate": 6.911774107784692e-06, | |
| "loss": 0.5551, | |
| "step": 1266000 | |
| }, | |
| { | |
| "epoch": 29.36, | |
| "learning_rate": 6.443972137714662e-06, | |
| "loss": 0.555, | |
| "step": 1268000 | |
| }, | |
| { | |
| "epoch": 29.41, | |
| "learning_rate": 5.976170167644632e-06, | |
| "loss": 0.554, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 29.45, | |
| "learning_rate": 5.508368197574602e-06, | |
| "loss": 0.5522, | |
| "step": 1272000 | |
| }, | |
| { | |
| "epoch": 29.5, | |
| "learning_rate": 5.0405662275045725e-06, | |
| "loss": 0.5522, | |
| "step": 1274000 | |
| }, | |
| { | |
| "epoch": 29.55, | |
| "learning_rate": 4.572764257434542e-06, | |
| "loss": 0.5601, | |
| "step": 1276000 | |
| }, | |
| { | |
| "epoch": 29.59, | |
| "learning_rate": 4.104962287364513e-06, | |
| "loss": 0.5578, | |
| "step": 1278000 | |
| }, | |
| { | |
| "epoch": 29.64, | |
| "learning_rate": 3.6371603172944825e-06, | |
| "loss": 0.5602, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 29.69, | |
| "learning_rate": 3.1693583472244526e-06, | |
| "loss": 0.5517, | |
| "step": 1282000 | |
| }, | |
| { | |
| "epoch": 29.73, | |
| "learning_rate": 2.701556377154423e-06, | |
| "loss": 0.5538, | |
| "step": 1284000 | |
| }, | |
| { | |
| "epoch": 29.78, | |
| "learning_rate": 2.233754407084393e-06, | |
| "loss": 0.549, | |
| "step": 1286000 | |
| }, | |
| { | |
| "epoch": 29.82, | |
| "learning_rate": 1.765952437014363e-06, | |
| "loss": 0.5595, | |
| "step": 1288000 | |
| }, | |
| { | |
| "epoch": 29.87, | |
| "learning_rate": 1.2981504669443331e-06, | |
| "loss": 0.5546, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 29.92, | |
| "learning_rate": 8.303484968743031e-07, | |
| "loss": 0.5547, | |
| "step": 1292000 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "learning_rate": 3.6254652680427316e-07, | |
| "loss": 0.5494, | |
| "step": 1294000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 1295550, | |
| "total_flos": 2.6480449905256835e+21, | |
| "train_loss": 0.7350748663054241, | |
| "train_runtime": 658563.1153, | |
| "train_samples_per_second": 31.476, | |
| "train_steps_per_second": 1.967 | |
| } | |
| ], | |
| "logging_steps": 2000, | |
| "max_steps": 1295550, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 2.6480449905256835e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |