diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -26457,3 +26457,1009 @@ Use FP16 precision: False 02/25/2022 11:15:45 - INFO - codeparrot_training - Step 25998: {'lr': 0.0002500327249233815, 'samples': 13311488, 'steps': 25998, 'loss/train': 1.2565412521362305} 02/25/2022 11:15:48 - INFO - codeparrot_training - Step 25999: {'lr': 0.0002500163624617258, 'samples': 13312000, 'steps': 25999, 'loss/train': 1.7097930908203125} 02/25/2022 11:15:48 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 11:16:06 - WARNING - huggingface_hub.repository - Several commits (26) will be pushed upstream. +02/25/2022 11:16:06 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 11:16:40 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 5549929..86c4c54 floral-grass-11 -> floral-grass-11 + +02/25/2022 11:16:47 - INFO - codeparrot_training - Step 26000: {'lr': 0.00025, 'samples': 13312512, 'steps': 26000, 'loss/train': 0.37809640169143677} +02/25/2022 11:16:51 - INFO - codeparrot_training - Step 26001: {'lr': 0.00024998363753827424, 'samples': 13313024, 'steps': 26001, 'loss/train': 1.5100133419036865} +02/25/2022 11:16:56 - INFO - codeparrot_training - Step 26002: {'lr': 0.0002499672750766185, 'samples': 13313536, 'steps': 26002, 'loss/train': 2.3396451473236084} +02/25/2022 11:17:00 - INFO - codeparrot_training - Step 26003: {'lr': 0.0002499509126151031, 'samples': 13314048, 'steps': 26003, 'loss/train': 1.9778226613998413} +02/25/2022 11:17:05 - INFO - codeparrot_training - Step 26004: {'lr': 0.0002499345501537979, 'samples': 13314560, 'steps': 26004, 'loss/train': 0.13023696839809418} +02/25/2022 11:17:09 - INFO - codeparrot_training - Step 26005: {'lr': 0.000249918187692773, 'samples': 13315072, 'steps': 26005, 'loss/train': 2.3946428298950195} +02/25/2022 11:17:14 - INFO - codeparrot_training - Step 26006: {'lr': 0.0002499018252320986, 'samples': 13315584, 'steps': 26006, 'loss/train': 1.8516818284988403} +02/25/2022 11:17:18 - INFO - codeparrot_training - Step 26007: {'lr': 0.0002498854627718448, 'samples': 13316096, 'steps': 26007, 'loss/train': 2.227653980255127} +02/25/2022 11:17:24 - INFO - codeparrot_training - Step 26008: {'lr': 0.0002498691003120816, 'samples': 13316608, 'steps': 26008, 'loss/train': 0.6200409531593323} +02/25/2022 11:17:27 - INFO - codeparrot_training - Step 26009: {'lr': 0.0002498527378528791, 'samples': 13317120, 'steps': 26009, 'loss/train': 1.5642789602279663} +02/25/2022 11:17:33 - INFO - codeparrot_training - Step 26010: {'lr': 0.0002498363753943074, 'samples': 13317632, 'steps': 26010, 'loss/train': 1.069700002670288} +02/25/2022 11:17:36 - INFO - codeparrot_training - Step 26011: {'lr': 0.0002498200129364368, 'samples': 13318144, 'steps': 26011, 'loss/train': 1.6406219005584717} +02/25/2022 11:17:43 - INFO - codeparrot_training - Step 26012: {'lr': 0.00024980365047933705, 'samples': 13318656, 'steps': 26012, 'loss/train': 1.312057614326477} +02/25/2022 11:17:47 - INFO - codeparrot_training - Step 26013: {'lr': 0.0002497872880230784, 'samples': 13319168, 'steps': 26013, 'loss/train': 2.089871406555176} +02/25/2022 11:17:52 - INFO - codeparrot_training - Step 26014: {'lr': 0.000249770925567731, 'samples': 13319680, 'steps': 26014, 'loss/train': 1.568368911743164} +02/25/2022 11:17:56 - INFO - codeparrot_training - Step 26015: {'lr': 0.00024975456311336484, 'samples': 13320192, 'steps': 26015, 'loss/train': 2.7935009002685547} +02/25/2022 11:18:01 - INFO - codeparrot_training - Step 26016: {'lr': 0.00024973820066005005, 'samples': 13320704, 'steps': 26016, 'loss/train': 2.1883440017700195} +02/25/2022 11:18:05 - INFO - codeparrot_training - Step 26017: {'lr': 0.00024972183820785675, 'samples': 13321216, 'steps': 26017, 'loss/train': 2.4874684810638428} +02/25/2022 11:18:10 - INFO - codeparrot_training - Step 26018: {'lr': 0.00024970547575685494, 'samples': 13321728, 'steps': 26018, 'loss/train': 1.6686476469039917} +02/25/2022 11:18:14 - INFO - codeparrot_training - Step 26019: {'lr': 0.00024968911330711487, 'samples': 13322240, 'steps': 26019, 'loss/train': 1.7602189779281616} +02/25/2022 11:18:19 - INFO - codeparrot_training - Step 26020: {'lr': 0.00024967275085870653, 'samples': 13322752, 'steps': 26020, 'loss/train': 1.928260087966919} +02/25/2022 11:18:23 - INFO - codeparrot_training - Step 26021: {'lr': 0.0002496563884117, 'samples': 13323264, 'steps': 26021, 'loss/train': 2.0191547870635986} +02/25/2022 11:18:30 - INFO - codeparrot_training - Step 26022: {'lr': 0.00024964002596616544, 'samples': 13323776, 'steps': 26022, 'loss/train': 1.2096259593963623} +02/25/2022 11:18:34 - INFO - codeparrot_training - Step 26023: {'lr': 0.0002496236635221728, 'samples': 13324288, 'steps': 26023, 'loss/train': 1.2350517511367798} +02/25/2022 11:18:39 - INFO - codeparrot_training - Step 26024: {'lr': 0.00024960730107979233, 'samples': 13324800, 'steps': 26024, 'loss/train': 3.2793495655059814} +02/25/2022 11:18:43 - INFO - codeparrot_training - Step 26025: {'lr': 0.000249590938639094, 'samples': 13325312, 'steps': 26025, 'loss/train': 2.6286778450012207} +02/25/2022 11:18:48 - INFO - codeparrot_training - Step 26026: {'lr': 0.00024957457620014805, 'samples': 13325824, 'steps': 26026, 'loss/train': 2.493715524673462} +02/25/2022 11:18:52 - INFO - codeparrot_training - Step 26027: {'lr': 0.00024955821376302436, 'samples': 13326336, 'steps': 26027, 'loss/train': 1.7127689123153687} +02/25/2022 11:18:57 - INFO - codeparrot_training - Step 26028: {'lr': 0.0002495418513277932, 'samples': 13326848, 'steps': 26028, 'loss/train': 2.8942880630493164} +02/25/2022 11:19:01 - INFO - codeparrot_training - Step 26029: {'lr': 0.0002495254888945247, 'samples': 13327360, 'steps': 26029, 'loss/train': 1.8060437440872192} +02/25/2022 11:19:06 - INFO - codeparrot_training - Step 26030: {'lr': 0.00024950912646328876, 'samples': 13327872, 'steps': 26030, 'loss/train': 0.9384188055992126} +02/25/2022 11:19:10 - INFO - codeparrot_training - Step 26031: {'lr': 0.0002494927640341555, 'samples': 13328384, 'steps': 26031, 'loss/train': 1.7117457389831543} +02/25/2022 11:19:17 - INFO - codeparrot_training - Step 26032: {'lr': 0.00024947640160719514, 'samples': 13328896, 'steps': 26032, 'loss/train': 1.9165449142456055} +02/25/2022 11:19:20 - INFO - codeparrot_training - Step 26033: {'lr': 0.0002494600391824778, 'samples': 13329408, 'steps': 26033, 'loss/train': 1.6713306903839111} +02/25/2022 11:19:26 - INFO - codeparrot_training - Step 26034: {'lr': 0.0002494436767600734, 'samples': 13329920, 'steps': 26034, 'loss/train': 3.214975118637085} +02/25/2022 11:19:30 - INFO - codeparrot_training - Step 26035: {'lr': 0.00024942731434005207, 'samples': 13330432, 'steps': 26035, 'loss/train': 0.9205017685890198} +02/25/2022 11:19:35 - INFO - codeparrot_training - Step 26036: {'lr': 0.00024941095192248397, 'samples': 13330944, 'steps': 26036, 'loss/train': 1.7402057647705078} +02/25/2022 11:19:39 - INFO - codeparrot_training - Step 26037: {'lr': 0.0002493945895074391, 'samples': 13331456, 'steps': 26037, 'loss/train': 2.598801851272583} +02/25/2022 11:19:44 - INFO - codeparrot_training - Step 26038: {'lr': 0.00024937822709498786, 'samples': 13331968, 'steps': 26038, 'loss/train': 2.4083430767059326} +02/25/2022 11:19:48 - INFO - codeparrot_training - Step 26039: {'lr': 0.0002493618646851999, 'samples': 13332480, 'steps': 26039, 'loss/train': 1.244691014289856} +02/25/2022 11:19:53 - INFO - codeparrot_training - Step 26040: {'lr': 0.0002493455022781455, 'samples': 13332992, 'steps': 26040, 'loss/train': 3.324190139770508} +02/25/2022 11:19:57 - INFO - codeparrot_training - Step 26041: {'lr': 0.00024932913987389476, 'samples': 13333504, 'steps': 26041, 'loss/train': 1.1800880432128906} +02/25/2022 11:20:04 - INFO - codeparrot_training - Step 26042: {'lr': 0.0002493127774725179, 'samples': 13334016, 'steps': 26042, 'loss/train': 0.3366134762763977} +02/25/2022 11:20:07 - INFO - codeparrot_training - Step 26043: {'lr': 0.0002492964150740848, 'samples': 13334528, 'steps': 26043, 'loss/train': 1.7161277532577515} +02/25/2022 11:20:13 - INFO - codeparrot_training - Step 26044: {'lr': 0.00024928005267866563, 'samples': 13335040, 'steps': 26044, 'loss/train': 1.6405366659164429} +02/25/2022 11:20:16 - INFO - codeparrot_training - Step 26045: {'lr': 0.00024926369028633043, 'samples': 13335552, 'steps': 26045, 'loss/train': 2.173854351043701} +02/25/2022 11:20:22 - INFO - codeparrot_training - Step 26046: {'lr': 0.0002492473278971495, 'samples': 13336064, 'steps': 26046, 'loss/train': 2.255094051361084} +02/25/2022 11:20:26 - INFO - codeparrot_training - Step 26047: {'lr': 0.00024923096551119267, 'samples': 13336576, 'steps': 26047, 'loss/train': 2.2586750984191895} +02/25/2022 11:20:31 - INFO - codeparrot_training - Step 26048: {'lr': 0.0002492146031285301, 'samples': 13337088, 'steps': 26048, 'loss/train': 2.7552316188812256} +02/25/2022 11:20:35 - INFO - codeparrot_training - Step 26049: {'lr': 0.000249198240749232, 'samples': 13337600, 'steps': 26049, 'loss/train': 2.1905179023742676} +02/25/2022 11:20:40 - INFO - codeparrot_training - Step 26050: {'lr': 0.0002491818783733683, 'samples': 13338112, 'steps': 26050, 'loss/train': 2.317699909210205} +02/25/2022 11:20:44 - INFO - codeparrot_training - Step 26051: {'lr': 0.0002491655160010093, 'samples': 13338624, 'steps': 26051, 'loss/train': 1.9859191179275513} +02/25/2022 11:20:49 - INFO - codeparrot_training - Step 26052: {'lr': 0.0002491491536322249, 'samples': 13339136, 'steps': 26052, 'loss/train': 1.8399955034255981} +02/25/2022 11:20:53 - INFO - codeparrot_training - Step 26053: {'lr': 0.0002491327912670852, 'samples': 13339648, 'steps': 26053, 'loss/train': 1.77420175075531} +02/25/2022 11:20:59 - INFO - codeparrot_training - Step 26054: {'lr': 0.0002491164289056604, 'samples': 13340160, 'steps': 26054, 'loss/train': 2.473858118057251} +02/25/2022 11:21:02 - INFO - codeparrot_training - Step 26055: {'lr': 0.0002491000665480206, 'samples': 13340672, 'steps': 26055, 'loss/train': 2.050698757171631} +02/25/2022 11:21:07 - INFO - codeparrot_training - Step 26056: {'lr': 0.00024908370419423573, 'samples': 13341184, 'steps': 26056, 'loss/train': 1.9810444116592407} +02/25/2022 11:21:11 - INFO - codeparrot_training - Step 26057: {'lr': 0.000249067341844376, 'samples': 13341696, 'steps': 26057, 'loss/train': 1.2977389097213745} +02/25/2022 11:21:18 - INFO - codeparrot_training - Step 26058: {'lr': 0.00024905097949851144, 'samples': 13342208, 'steps': 26058, 'loss/train': 2.452275037765503} +02/25/2022 11:21:22 - INFO - codeparrot_training - Step 26059: {'lr': 0.0002490346171567124, 'samples': 13342720, 'steps': 26059, 'loss/train': 2.565781831741333} +02/25/2022 11:21:28 - INFO - codeparrot_training - Step 26060: {'lr': 0.0002490182548190485, 'samples': 13343232, 'steps': 26060, 'loss/train': 2.2402310371398926} +02/25/2022 11:21:31 - INFO - codeparrot_training - Step 26061: {'lr': 0.0002490018924855902, 'samples': 13343744, 'steps': 26061, 'loss/train': 1.701701283454895} +02/25/2022 11:21:37 - INFO - codeparrot_training - Step 26062: {'lr': 0.00024898553015640745, 'samples': 13344256, 'steps': 26062, 'loss/train': 1.6444568634033203} +02/25/2022 11:21:40 - INFO - codeparrot_training - Step 26063: {'lr': 0.00024896916783157035, 'samples': 13344768, 'steps': 26063, 'loss/train': 1.571385145187378} +02/25/2022 11:21:46 - INFO - codeparrot_training - Step 26064: {'lr': 0.0002489528055111491, 'samples': 13345280, 'steps': 26064, 'loss/train': 1.5612801313400269} +02/25/2022 11:21:49 - INFO - codeparrot_training - Step 26065: {'lr': 0.00024893644319521355, 'samples': 13345792, 'steps': 26065, 'loss/train': 1.6536020040512085} +02/25/2022 11:21:55 - INFO - codeparrot_training - Step 26066: {'lr': 0.00024892008088383405, 'samples': 13346304, 'steps': 26066, 'loss/train': 0.6262559294700623} +02/25/2022 11:21:58 - INFO - codeparrot_training - Step 26067: {'lr': 0.0002489037185770805, 'samples': 13346816, 'steps': 26067, 'loss/train': 1.5273154973983765} +02/25/2022 11:22:05 - INFO - codeparrot_training - Step 26068: {'lr': 0.0002488873562750232, 'samples': 13347328, 'steps': 26068, 'loss/train': 1.6312168836593628} +02/25/2022 11:22:09 - INFO - codeparrot_training - Step 26069: {'lr': 0.00024887099397773204, 'samples': 13347840, 'steps': 26069, 'loss/train': 1.3209587335586548} +02/25/2022 11:22:14 - INFO - codeparrot_training - Step 26070: {'lr': 0.0002488546316852771, 'samples': 13348352, 'steps': 26070, 'loss/train': 2.204545497894287} +02/25/2022 11:22:18 - INFO - codeparrot_training - Step 26071: {'lr': 0.00024883826939772866, 'samples': 13348864, 'steps': 26071, 'loss/train': 1.6728625297546387} +02/25/2022 11:22:23 - INFO - codeparrot_training - Step 26072: {'lr': 0.0002488219071151567, 'samples': 13349376, 'steps': 26072, 'loss/train': 1.9980379343032837} +02/25/2022 11:22:27 - INFO - codeparrot_training - Step 26073: {'lr': 0.0002488055448376313, 'samples': 13349888, 'steps': 26073, 'loss/train': 2.60790753364563} +02/25/2022 11:22:32 - INFO - codeparrot_training - Step 26074: {'lr': 0.00024878918256522256, 'samples': 13350400, 'steps': 26074, 'loss/train': 1.8955438137054443} +02/25/2022 11:22:36 - INFO - codeparrot_training - Step 26075: {'lr': 0.0002487728202980005, 'samples': 13350912, 'steps': 26075, 'loss/train': 1.9609507322311401} +02/25/2022 11:22:41 - INFO - codeparrot_training - Step 26076: {'lr': 0.00024875645803603536, 'samples': 13351424, 'steps': 26076, 'loss/train': 2.889387845993042} +02/25/2022 11:22:45 - INFO - codeparrot_training - Step 26077: {'lr': 0.0002487400957793972, 'samples': 13351936, 'steps': 26077, 'loss/train': 1.2577725648880005} +02/25/2022 11:22:52 - INFO - codeparrot_training - Step 26078: {'lr': 0.00024872373352815603, 'samples': 13352448, 'steps': 26078, 'loss/train': 1.705277919769287} +02/25/2022 11:22:56 - INFO - codeparrot_training - Step 26079: {'lr': 0.000248707371282382, 'samples': 13352960, 'steps': 26079, 'loss/train': 1.1163451671600342} +02/25/2022 11:23:01 - INFO - codeparrot_training - Step 26080: {'lr': 0.00024869100904214507, 'samples': 13353472, 'steps': 26080, 'loss/train': 1.8644014596939087} +02/25/2022 11:23:04 - INFO - codeparrot_training - Step 26081: {'lr': 0.00024867464680751564, 'samples': 13353984, 'steps': 26081, 'loss/train': 1.5341618061065674} +02/25/2022 11:23:10 - INFO - codeparrot_training - Step 26082: {'lr': 0.0002486582845785635, 'samples': 13354496, 'steps': 26082, 'loss/train': 2.293531656265259} +02/25/2022 11:23:13 - INFO - codeparrot_training - Step 26083: {'lr': 0.0002486419223553588, 'samples': 13355008, 'steps': 26083, 'loss/train': 1.3669458627700806} +02/25/2022 11:23:19 - INFO - codeparrot_training - Step 26084: {'lr': 0.00024862556013797164, 'samples': 13355520, 'steps': 26084, 'loss/train': 0.8123820424079895} +02/25/2022 11:23:22 - INFO - codeparrot_training - Step 26085: {'lr': 0.0002486091979264722, 'samples': 13356032, 'steps': 26085, 'loss/train': 0.5838306546211243} +02/25/2022 11:23:28 - INFO - codeparrot_training - Step 26086: {'lr': 0.0002485928357209306, 'samples': 13356544, 'steps': 26086, 'loss/train': 1.9058812856674194} +02/25/2022 11:23:31 - INFO - codeparrot_training - Step 26087: {'lr': 0.00024857647352141677, 'samples': 13357056, 'steps': 26087, 'loss/train': 1.8455145359039307} +02/25/2022 11:23:39 - INFO - codeparrot_training - Step 26088: {'lr': 0.00024856011132800085, 'samples': 13357568, 'steps': 26088, 'loss/train': 0.8172327280044556} +02/25/2022 11:23:42 - INFO - codeparrot_training - Step 26089: {'lr': 0.00024854374914075295, 'samples': 13358080, 'steps': 26089, 'loss/train': 0.4047224223613739} +02/25/2022 11:23:48 - INFO - codeparrot_training - Step 26090: {'lr': 0.0002485273869597433, 'samples': 13358592, 'steps': 26090, 'loss/train': 0.8914444446563721} +02/25/2022 11:23:51 - INFO - codeparrot_training - Step 26091: {'lr': 0.00024851102478504173, 'samples': 13359104, 'steps': 26091, 'loss/train': 2.2594966888427734} +02/25/2022 11:23:57 - INFO - codeparrot_training - Step 26092: {'lr': 0.0002484946626167185, 'samples': 13359616, 'steps': 26092, 'loss/train': 1.4258530139923096} +02/25/2022 11:24:00 - INFO - codeparrot_training - Step 26093: {'lr': 0.00024847830045484357, 'samples': 13360128, 'steps': 26093, 'loss/train': 2.382063150405884} +02/25/2022 11:24:06 - INFO - codeparrot_training - Step 26094: {'lr': 0.0002484619382994873, 'samples': 13360640, 'steps': 26094, 'loss/train': 2.10003924369812} +02/25/2022 11:24:09 - INFO - codeparrot_training - Step 26095: {'lr': 0.00024844557615071944, 'samples': 13361152, 'steps': 26095, 'loss/train': 1.9866585731506348} +02/25/2022 11:24:15 - INFO - codeparrot_training - Step 26096: {'lr': 0.00024842921400861025, 'samples': 13361664, 'steps': 26096, 'loss/train': 2.330418109893799} +02/25/2022 11:24:18 - INFO - codeparrot_training - Step 26097: {'lr': 0.0002484128518732298, 'samples': 13362176, 'steps': 26097, 'loss/train': 2.1247408390045166} +02/25/2022 11:24:26 - INFO - codeparrot_training - Step 26098: {'lr': 0.0002483964897446482, 'samples': 13362688, 'steps': 26098, 'loss/train': 0.8035333156585693} +02/25/2022 11:24:29 - INFO - codeparrot_training - Step 26099: {'lr': 0.0002483801276229357, 'samples': 13363200, 'steps': 26099, 'loss/train': 0.8666520118713379} +02/25/2022 11:24:35 - INFO - codeparrot_training - Step 26100: {'lr': 0.00024836376550816205, 'samples': 13363712, 'steps': 26100, 'loss/train': 0.7444463968276978} +02/25/2022 11:24:38 - INFO - codeparrot_training - Step 26101: {'lr': 0.0002483474034003975, 'samples': 13364224, 'steps': 26101, 'loss/train': 0.4754897654056549} +02/25/2022 11:24:44 - INFO - codeparrot_training - Step 26102: {'lr': 0.00024833104129971226, 'samples': 13364736, 'steps': 26102, 'loss/train': 1.634103775024414} +02/25/2022 11:24:47 - INFO - codeparrot_training - Step 26103: {'lr': 0.00024831467920617624, 'samples': 13365248, 'steps': 26103, 'loss/train': 2.524019241333008} +02/25/2022 11:24:53 - INFO - codeparrot_training - Step 26104: {'lr': 0.00024829831711985955, 'samples': 13365760, 'steps': 26104, 'loss/train': 1.8770508766174316} +02/25/2022 11:24:56 - INFO - codeparrot_training - Step 26105: {'lr': 0.0002482819550408324, 'samples': 13366272, 'steps': 26105, 'loss/train': 2.1426119804382324} +02/25/2022 11:25:02 - INFO - codeparrot_training - Step 26106: {'lr': 0.0002482655929691648, 'samples': 13366784, 'steps': 26106, 'loss/train': 2.0250260829925537} +02/25/2022 11:25:05 - INFO - codeparrot_training - Step 26107: {'lr': 0.0002482492309049268, 'samples': 13367296, 'steps': 26107, 'loss/train': 0.9565497636795044} +02/25/2022 11:25:11 - INFO - codeparrot_training - Step 26108: {'lr': 0.0002482328688481886, 'samples': 13367808, 'steps': 26108, 'loss/train': 1.8798900842666626} +02/25/2022 11:25:14 - INFO - codeparrot_training - Step 26109: {'lr': 0.0002482165067990202, 'samples': 13368320, 'steps': 26109, 'loss/train': 2.214495897293091} +02/25/2022 11:25:20 - INFO - codeparrot_training - Step 26110: {'lr': 0.0002482001447574917, 'samples': 13368832, 'steps': 26110, 'loss/train': 1.5836482048034668} +02/25/2022 11:25:23 - INFO - codeparrot_training - Step 26111: {'lr': 0.0002481837827236732, 'samples': 13369344, 'steps': 26111, 'loss/train': 1.7975934743881226} +02/25/2022 11:25:29 - INFO - codeparrot_training - Step 26112: {'lr': 0.00024816742069763486, 'samples': 13369856, 'steps': 26112, 'loss/train': 1.5957379341125488} +02/25/2022 11:25:32 - INFO - codeparrot_training - Step 26113: {'lr': 0.0002481510586794467, 'samples': 13370368, 'steps': 26113, 'loss/train': 2.000986099243164} +02/25/2022 11:25:39 - INFO - codeparrot_training - Step 26114: {'lr': 0.0002481346966691788, 'samples': 13370880, 'steps': 26114, 'loss/train': 2.064115047454834} +02/25/2022 11:25:43 - INFO - codeparrot_training - Step 26115: {'lr': 0.0002481183346669012, 'samples': 13371392, 'steps': 26115, 'loss/train': 3.854135513305664} +02/25/2022 11:25:48 - INFO - codeparrot_training - Step 26116: {'lr': 0.0002481019726726842, 'samples': 13371904, 'steps': 26116, 'loss/train': 2.0776100158691406} +02/25/2022 11:25:52 - INFO - codeparrot_training - Step 26117: {'lr': 0.00024808561068659764, 'samples': 13372416, 'steps': 26117, 'loss/train': 0.347175657749176} +02/25/2022 11:25:57 - INFO - codeparrot_training - Step 26118: {'lr': 0.00024806924870871173, 'samples': 13372928, 'steps': 26118, 'loss/train': 2.3343749046325684} +02/25/2022 11:26:01 - INFO - codeparrot_training - Step 26119: {'lr': 0.00024805288673909656, 'samples': 13373440, 'steps': 26119, 'loss/train': 1.73350989818573} +02/25/2022 11:26:06 - INFO - codeparrot_training - Step 26120: {'lr': 0.00024803652477782225, 'samples': 13373952, 'steps': 26120, 'loss/train': 1.7164617776870728} +02/25/2022 11:26:10 - INFO - codeparrot_training - Step 26121: {'lr': 0.00024802016282495876, 'samples': 13374464, 'steps': 26121, 'loss/train': 1.5382933616638184} +02/25/2022 11:26:15 - INFO - codeparrot_training - Step 26122: {'lr': 0.00024800380088057627, 'samples': 13374976, 'steps': 26122, 'loss/train': 2.393277645111084} +02/25/2022 11:26:19 - INFO - codeparrot_training - Step 26123: {'lr': 0.0002479874389447449, 'samples': 13375488, 'steps': 26123, 'loss/train': 2.6691489219665527} +02/25/2022 11:26:26 - INFO - codeparrot_training - Step 26124: {'lr': 0.00024797107701753464, 'samples': 13376000, 'steps': 26124, 'loss/train': 1.599758267402649} +02/25/2022 11:26:30 - INFO - codeparrot_training - Step 26125: {'lr': 0.0002479547150990158, 'samples': 13376512, 'steps': 26125, 'loss/train': 2.0483505725860596} +02/25/2022 11:26:35 - INFO - codeparrot_training - Step 26126: {'lr': 0.0002479383531892582, 'samples': 13377024, 'steps': 26126, 'loss/train': 1.5155377388000488} +02/25/2022 11:26:38 - INFO - codeparrot_training - Step 26127: {'lr': 0.000247921991288332, 'samples': 13377536, 'steps': 26127, 'loss/train': 2.8289942741394043} +02/25/2022 11:26:44 - INFO - codeparrot_training - Step 26128: {'lr': 0.00024790562939630735, 'samples': 13378048, 'steps': 26128, 'loss/train': 1.9080466032028198} +02/25/2022 11:26:48 - INFO - codeparrot_training - Step 26129: {'lr': 0.00024788926751325444, 'samples': 13378560, 'steps': 26129, 'loss/train': 2.094698667526245} +02/25/2022 11:26:53 - INFO - codeparrot_training - Step 26130: {'lr': 0.00024787290563924307, 'samples': 13379072, 'steps': 26130, 'loss/train': 1.41063392162323} +02/25/2022 11:26:57 - INFO - codeparrot_training - Step 26131: {'lr': 0.00024785654377434355, 'samples': 13379584, 'steps': 26131, 'loss/train': 2.8320565223693848} +02/25/2022 11:27:02 - INFO - codeparrot_training - Step 26132: {'lr': 0.00024784018191862593, 'samples': 13380096, 'steps': 26132, 'loss/train': 1.170541524887085} +02/25/2022 11:27:06 - INFO - codeparrot_training - Step 26133: {'lr': 0.00024782382007216034, 'samples': 13380608, 'steps': 26133, 'loss/train': 1.952217936515808} +02/25/2022 11:27:11 - INFO - codeparrot_training - Step 26134: {'lr': 0.0002478074582350168, 'samples': 13381120, 'steps': 26134, 'loss/train': 1.4973058700561523} +02/25/2022 11:27:15 - INFO - codeparrot_training - Step 26135: {'lr': 0.0002477910964072653, 'samples': 13381632, 'steps': 26135, 'loss/train': 1.983148455619812} +02/25/2022 11:27:22 - INFO - codeparrot_training - Step 26136: {'lr': 0.0002477747345889761, 'samples': 13382144, 'steps': 26136, 'loss/train': 1.711746096611023} +02/25/2022 11:27:25 - INFO - codeparrot_training - Step 26137: {'lr': 0.00024775837278021923, 'samples': 13382656, 'steps': 26137, 'loss/train': 1.007331132888794} +02/25/2022 11:27:31 - INFO - codeparrot_training - Step 26138: {'lr': 0.00024774201098106487, 'samples': 13383168, 'steps': 26138, 'loss/train': 2.1550087928771973} +02/25/2022 11:27:34 - INFO - codeparrot_training - Step 26139: {'lr': 0.0002477256491915829, 'samples': 13383680, 'steps': 26139, 'loss/train': 2.03678035736084} +02/25/2022 11:27:40 - INFO - codeparrot_training - Step 26140: {'lr': 0.00024770928741184355, 'samples': 13384192, 'steps': 26140, 'loss/train': 1.0919735431671143} +02/25/2022 11:27:44 - INFO - codeparrot_training - Step 26141: {'lr': 0.0002476929256419169, 'samples': 13384704, 'steps': 26141, 'loss/train': 1.9188615083694458} +02/25/2022 11:27:49 - INFO - codeparrot_training - Step 26142: {'lr': 0.0002476765638818731, 'samples': 13385216, 'steps': 26142, 'loss/train': 2.8201305866241455} +02/25/2022 11:27:53 - INFO - codeparrot_training - Step 26143: {'lr': 0.0002476602021317821, 'samples': 13385728, 'steps': 26143, 'loss/train': 2.4097628593444824} +02/25/2022 11:27:58 - INFO - codeparrot_training - Step 26144: {'lr': 0.000247643840391714, 'samples': 13386240, 'steps': 26144, 'loss/train': 1.867837905883789} +02/25/2022 11:28:01 - INFO - codeparrot_training - Step 26145: {'lr': 0.000247627478661739, 'samples': 13386752, 'steps': 26145, 'loss/train': 1.6900721788406372} +02/25/2022 11:28:09 - INFO - codeparrot_training - Step 26146: {'lr': 0.0002476111169419271, 'samples': 13387264, 'steps': 26146, 'loss/train': 1.1938014030456543} +02/25/2022 11:28:12 - INFO - codeparrot_training - Step 26147: {'lr': 0.00024759475523234846, 'samples': 13387776, 'steps': 26147, 'loss/train': 2.093402862548828} +02/25/2022 11:28:18 - INFO - codeparrot_training - Step 26148: {'lr': 0.0002475783935330731, 'samples': 13388288, 'steps': 26148, 'loss/train': 1.6534773111343384} +02/25/2022 11:28:21 - INFO - codeparrot_training - Step 26149: {'lr': 0.0002475620318441711, 'samples': 13388800, 'steps': 26149, 'loss/train': 2.011162281036377} +02/25/2022 11:28:27 - INFO - codeparrot_training - Step 26150: {'lr': 0.0002475456701657126, 'samples': 13389312, 'steps': 26150, 'loss/train': 1.3796783685684204} +02/25/2022 11:28:30 - INFO - codeparrot_training - Step 26151: {'lr': 0.0002475293084977677, 'samples': 13389824, 'steps': 26151, 'loss/train': 2.144446849822998} +02/25/2022 11:28:36 - INFO - codeparrot_training - Step 26152: {'lr': 0.00024751294684040645, 'samples': 13390336, 'steps': 26152, 'loss/train': 2.7847137451171875} +02/25/2022 11:28:39 - INFO - codeparrot_training - Step 26153: {'lr': 0.00024749658519369894, 'samples': 13390848, 'steps': 26153, 'loss/train': 2.33437180519104} +02/25/2022 11:28:43 - INFO - codeparrot_training - Step 26154: {'lr': 0.0002474802235577152, 'samples': 13391360, 'steps': 26154, 'loss/train': 1.6049247980117798} +02/25/2022 11:28:48 - INFO - codeparrot_training - Step 26155: {'lr': 0.0002474638619325255, 'samples': 13391872, 'steps': 26155, 'loss/train': 1.2048485279083252} +02/25/2022 11:28:52 - INFO - codeparrot_training - Step 26156: {'lr': 0.0002474475003181997, 'samples': 13392384, 'steps': 26156, 'loss/train': 0.9827079176902771} +02/25/2022 11:28:58 - INFO - codeparrot_training - Step 26157: {'lr': 0.0002474311387148081, 'samples': 13392896, 'steps': 26157, 'loss/train': 2.077143430709839} +02/25/2022 11:29:01 - INFO - codeparrot_training - Step 26158: {'lr': 0.00024741477712242056, 'samples': 13393408, 'steps': 26158, 'loss/train': 1.3191636800765991} +02/25/2022 11:29:06 - INFO - codeparrot_training - Step 26159: {'lr': 0.00024739841554110735, 'samples': 13393920, 'steps': 26159, 'loss/train': 1.5726323127746582} +02/25/2022 11:29:10 - INFO - codeparrot_training - Step 26160: {'lr': 0.00024738205397093865, 'samples': 13394432, 'steps': 26160, 'loss/train': 1.984808087348938} +02/25/2022 11:29:17 - INFO - codeparrot_training - Step 26161: {'lr': 0.00024736569241198424, 'samples': 13394944, 'steps': 26161, 'loss/train': 1.4291894435882568} +02/25/2022 11:29:23 - INFO - codeparrot_training - Step 26162: {'lr': 0.00024734933086431436, 'samples': 13395456, 'steps': 26162, 'loss/train': 1.5518579483032227} +02/25/2022 11:29:26 - INFO - codeparrot_training - Step 26163: {'lr': 0.0002473329693279991, 'samples': 13395968, 'steps': 26163, 'loss/train': 2.2656655311584473} +02/25/2022 11:29:32 - INFO - codeparrot_training - Step 26164: {'lr': 0.00024731660780310865, 'samples': 13396480, 'steps': 26164, 'loss/train': 2.4494128227233887} +02/25/2022 11:29:35 - INFO - codeparrot_training - Step 26165: {'lr': 0.0002473002462897129, 'samples': 13396992, 'steps': 26165, 'loss/train': 2.3789594173431396} +02/25/2022 11:29:41 - INFO - codeparrot_training - Step 26166: {'lr': 0.0002472838847878821, 'samples': 13397504, 'steps': 26166, 'loss/train': 2.240251302719116} +02/25/2022 11:29:44 - INFO - codeparrot_training - Step 26167: {'lr': 0.00024726752329768623, 'samples': 13398016, 'steps': 26167, 'loss/train': 3.5440189838409424} +02/25/2022 11:29:50 - INFO - codeparrot_training - Step 26168: {'lr': 0.0002472511618191955, 'samples': 13398528, 'steps': 26168, 'loss/train': 0.8982509970664978} +02/25/2022 11:29:53 - INFO - codeparrot_training - Step 26169: {'lr': 0.00024723480035247986, 'samples': 13399040, 'steps': 26169, 'loss/train': 1.172585129737854} +02/25/2022 11:29:59 - INFO - codeparrot_training - Step 26170: {'lr': 0.00024721843889760945, 'samples': 13399552, 'steps': 26170, 'loss/train': 2.0878639221191406} +02/25/2022 11:30:02 - INFO - codeparrot_training - Step 26171: {'lr': 0.0002472020774546543, 'samples': 13400064, 'steps': 26171, 'loss/train': 3.1603128910064697} +02/25/2022 11:30:10 - INFO - codeparrot_training - Step 26172: {'lr': 0.00024718571602368465, 'samples': 13400576, 'steps': 26172, 'loss/train': 0.9676345586776733} +02/25/2022 11:30:13 - INFO - codeparrot_training - Step 26173: {'lr': 0.00024716935460477056, 'samples': 13401088, 'steps': 26173, 'loss/train': 2.520599126815796} +02/25/2022 11:30:19 - INFO - codeparrot_training - Step 26174: {'lr': 0.00024715299319798197, 'samples': 13401600, 'steps': 26174, 'loss/train': 3.573441505432129} +02/25/2022 11:30:22 - INFO - codeparrot_training - Step 26175: {'lr': 0.000247136631803389, 'samples': 13402112, 'steps': 26175, 'loss/train': 1.077135443687439} +02/25/2022 11:30:28 - INFO - codeparrot_training - Step 26176: {'lr': 0.0002471202704210619, 'samples': 13402624, 'steps': 26176, 'loss/train': 1.184415578842163} +02/25/2022 11:30:31 - INFO - codeparrot_training - Step 26177: {'lr': 0.0002471039090510707, 'samples': 13403136, 'steps': 26177, 'loss/train': 1.4721198081970215} +02/25/2022 11:30:37 - INFO - codeparrot_training - Step 26178: {'lr': 0.0002470875476934853, 'samples': 13403648, 'steps': 26178, 'loss/train': 1.9805235862731934} +02/25/2022 11:30:40 - INFO - codeparrot_training - Step 26179: {'lr': 0.00024707118634837593, 'samples': 13404160, 'steps': 26179, 'loss/train': 2.18442440032959} +02/25/2022 11:30:46 - INFO - codeparrot_training - Step 26180: {'lr': 0.0002470548250158127, 'samples': 13404672, 'steps': 26180, 'loss/train': 0.30659595131874084} +02/25/2022 11:30:49 - INFO - codeparrot_training - Step 26181: {'lr': 0.0002470384636958657, 'samples': 13405184, 'steps': 26181, 'loss/train': 1.3039799928665161} +02/25/2022 11:30:57 - INFO - codeparrot_training - Step 26182: {'lr': 0.0002470221023886049, 'samples': 13405696, 'steps': 26182, 'loss/train': 1.2231236696243286} +02/25/2022 11:31:00 - INFO - codeparrot_training - Step 26183: {'lr': 0.00024700574109410054, 'samples': 13406208, 'steps': 26183, 'loss/train': 1.3229402303695679} +02/25/2022 11:31:06 - INFO - codeparrot_training - Step 26184: {'lr': 0.00024698937981242254, 'samples': 13406720, 'steps': 26184, 'loss/train': 2.06085467338562} +02/25/2022 11:31:10 - INFO - codeparrot_training - Step 26185: {'lr': 0.00024697301854364117, 'samples': 13407232, 'steps': 26185, 'loss/train': 1.0619232654571533} +02/25/2022 11:31:15 - INFO - codeparrot_training - Step 26186: {'lr': 0.00024695665728782643, 'samples': 13407744, 'steps': 26186, 'loss/train': 1.3313326835632324} +02/25/2022 11:31:18 - INFO - codeparrot_training - Step 26187: {'lr': 0.0002469402960450484, 'samples': 13408256, 'steps': 26187, 'loss/train': 1.696230411529541} +02/25/2022 11:31:24 - INFO - codeparrot_training - Step 26188: {'lr': 0.0002469239348153771, 'samples': 13408768, 'steps': 26188, 'loss/train': 1.9529732465744019} +02/25/2022 11:31:27 - INFO - codeparrot_training - Step 26189: {'lr': 0.00024690757359888273, 'samples': 13409280, 'steps': 26189, 'loss/train': 1.9038735628128052} +02/25/2022 11:31:33 - INFO - codeparrot_training - Step 26190: {'lr': 0.0002468912123956354, 'samples': 13409792, 'steps': 26190, 'loss/train': 1.583982229232788} +02/25/2022 11:31:36 - INFO - codeparrot_training - Step 26191: {'lr': 0.00024687485120570505, 'samples': 13410304, 'steps': 26191, 'loss/train': 1.7699180841445923} +02/25/2022 11:31:42 - INFO - codeparrot_training - Step 26192: {'lr': 0.0002468584900291618, 'samples': 13410816, 'steps': 26192, 'loss/train': 0.6914160251617432} +02/25/2022 11:31:45 - INFO - codeparrot_training - Step 26193: {'lr': 0.0002468421288660759, 'samples': 13411328, 'steps': 26193, 'loss/train': 0.5209804773330688} +02/25/2022 11:31:51 - INFO - codeparrot_training - Step 26194: {'lr': 0.00024682576771651725, 'samples': 13411840, 'steps': 26194, 'loss/train': 1.2797808647155762} +02/25/2022 11:31:54 - INFO - codeparrot_training - Step 26195: {'lr': 0.000246809406580556, 'samples': 13412352, 'steps': 26195, 'loss/train': 1.3434840440750122} +02/25/2022 11:32:02 - INFO - codeparrot_training - Step 26196: {'lr': 0.00024679304545826224, 'samples': 13412864, 'steps': 26196, 'loss/train': 2.2019340991973877} +02/25/2022 11:32:06 - INFO - codeparrot_training - Step 26197: {'lr': 0.0002467766843497061, 'samples': 13413376, 'steps': 26197, 'loss/train': 1.9646128416061401} +02/25/2022 11:32:11 - INFO - codeparrot_training - Step 26198: {'lr': 0.0002467603232549576, 'samples': 13413888, 'steps': 26198, 'loss/train': 1.7637488842010498} +02/25/2022 11:32:15 - INFO - codeparrot_training - Step 26199: {'lr': 0.00024674396217408683, 'samples': 13414400, 'steps': 26199, 'loss/train': 0.3719959855079651} +02/25/2022 11:32:20 - INFO - codeparrot_training - Step 26200: {'lr': 0.00024672760110716396, 'samples': 13414912, 'steps': 26200, 'loss/train': 2.594597101211548} +02/25/2022 11:32:24 - INFO - codeparrot_training - Step 26201: {'lr': 0.0002467112400542589, 'samples': 13415424, 'steps': 26201, 'loss/train': 0.5499547123908997} +02/25/2022 11:32:29 - INFO - codeparrot_training - Step 26202: {'lr': 0.00024669487901544186, 'samples': 13415936, 'steps': 26202, 'loss/train': 2.2180166244506836} +02/25/2022 11:32:33 - INFO - codeparrot_training - Step 26203: {'lr': 0.000246678517990783, 'samples': 13416448, 'steps': 26203, 'loss/train': 1.4721049070358276} +02/25/2022 11:32:38 - INFO - codeparrot_training - Step 26204: {'lr': 0.00024666215698035225, 'samples': 13416960, 'steps': 26204, 'loss/train': 0.854203462600708} +02/25/2022 11:32:42 - INFO - codeparrot_training - Step 26205: {'lr': 0.00024664579598421976, 'samples': 13417472, 'steps': 26205, 'loss/train': 2.214334487915039} +02/25/2022 11:32:47 - INFO - codeparrot_training - Step 26206: {'lr': 0.00024662943500245555, 'samples': 13417984, 'steps': 26206, 'loss/train': 2.644735813140869} +02/25/2022 11:32:51 - INFO - codeparrot_training - Step 26207: {'lr': 0.0002466130740351298, 'samples': 13418496, 'steps': 26207, 'loss/train': 1.8945960998535156} +02/25/2022 11:32:58 - INFO - codeparrot_training - Step 26208: {'lr': 0.00024659671308231273, 'samples': 13419008, 'steps': 26208, 'loss/train': 0.8003060221672058} +02/25/2022 11:33:02 - INFO - codeparrot_training - Step 26209: {'lr': 0.0002465803521440741, 'samples': 13419520, 'steps': 26209, 'loss/train': 2.819276809692383} +02/25/2022 11:33:07 - INFO - codeparrot_training - Step 26210: {'lr': 0.00024656399122048415, 'samples': 13420032, 'steps': 26210, 'loss/train': 1.4604949951171875} +02/25/2022 11:33:11 - INFO - codeparrot_training - Step 26211: {'lr': 0.00024654763031161297, 'samples': 13420544, 'steps': 26211, 'loss/train': 1.8633092641830444} +02/25/2022 11:33:16 - INFO - codeparrot_training - Step 26212: {'lr': 0.0002465312694175308, 'samples': 13421056, 'steps': 26212, 'loss/train': 2.3561911582946777} +02/25/2022 11:33:20 - INFO - codeparrot_training - Step 26213: {'lr': 0.0002465149085383074, 'samples': 13421568, 'steps': 26213, 'loss/train': 1.8537622690200806} +02/25/2022 11:33:26 - INFO - codeparrot_training - Step 26214: {'lr': 0.00024649854767401307, 'samples': 13422080, 'steps': 26214, 'loss/train': 1.671692967414856} +02/25/2022 11:33:29 - INFO - codeparrot_training - Step 26215: {'lr': 0.0002464821868247178, 'samples': 13422592, 'steps': 26215, 'loss/train': 3.491947889328003} +02/25/2022 11:33:35 - INFO - codeparrot_training - Step 26216: {'lr': 0.0002464658259904919, 'samples': 13423104, 'steps': 26216, 'loss/train': 2.350614547729492} +02/25/2022 11:33:38 - INFO - codeparrot_training - Step 26217: {'lr': 0.0002464494651714051, 'samples': 13423616, 'steps': 26217, 'loss/train': 1.8202143907546997} +02/25/2022 11:33:46 - INFO - codeparrot_training - Step 26218: {'lr': 0.0002464331043675277, 'samples': 13424128, 'steps': 26218, 'loss/train': 1.8527570962905884} +02/25/2022 11:33:49 - INFO - codeparrot_training - Step 26219: {'lr': 0.0002464167435789298, 'samples': 13424640, 'steps': 26219, 'loss/train': 1.8715065717697144} +02/25/2022 11:33:55 - INFO - codeparrot_training - Step 26220: {'lr': 0.0002464003828056814, 'samples': 13425152, 'steps': 26220, 'loss/train': 2.158860445022583} +02/25/2022 11:33:58 - INFO - codeparrot_training - Step 26221: {'lr': 0.00024638402204785263, 'samples': 13425664, 'steps': 26221, 'loss/train': 1.5496877431869507} +02/25/2022 11:34:03 - INFO - codeparrot_training - Step 26222: {'lr': 0.00024636766130551354, 'samples': 13426176, 'steps': 26222, 'loss/train': 1.6209758520126343} +02/25/2022 11:34:07 - INFO - codeparrot_training - Step 26223: {'lr': 0.0002463513005787343, 'samples': 13426688, 'steps': 26223, 'loss/train': 1.8798103332519531} +02/25/2022 11:34:12 - INFO - codeparrot_training - Step 26224: {'lr': 0.0002463349398675848, 'samples': 13427200, 'steps': 26224, 'loss/train': 2.4048264026641846} +02/25/2022 11:34:16 - INFO - codeparrot_training - Step 26225: {'lr': 0.0002463185791721354, 'samples': 13427712, 'steps': 26225, 'loss/train': 1.4891506433486938} +02/25/2022 11:34:22 - INFO - codeparrot_training - Step 26226: {'lr': 0.00024630221849245595, 'samples': 13428224, 'steps': 26226, 'loss/train': 2.109239339828491} +02/25/2022 11:34:25 - INFO - codeparrot_training - Step 26227: {'lr': 0.00024628585782861663, 'samples': 13428736, 'steps': 26227, 'loss/train': 1.5284104347229004} +02/25/2022 11:34:32 - INFO - codeparrot_training - Step 26228: {'lr': 0.0002462694971806875, 'samples': 13429248, 'steps': 26228, 'loss/train': 0.9382879734039307} +02/25/2022 11:34:36 - INFO - codeparrot_training - Step 26229: {'lr': 0.0002462531365487388, 'samples': 13429760, 'steps': 26229, 'loss/train': 2.3886308670043945} +02/25/2022 11:34:41 - INFO - codeparrot_training - Step 26230: {'lr': 0.0002462367759328403, 'samples': 13430272, 'steps': 26230, 'loss/train': 1.0029067993164062} +02/25/2022 11:34:45 - INFO - codeparrot_training - Step 26231: {'lr': 0.00024622041533306233, 'samples': 13430784, 'steps': 26231, 'loss/train': 2.5881235599517822} +02/25/2022 11:34:50 - INFO - codeparrot_training - Step 26232: {'lr': 0.00024620405474947484, 'samples': 13431296, 'steps': 26232, 'loss/train': 1.576241135597229} +02/25/2022 11:34:54 - INFO - codeparrot_training - Step 26233: {'lr': 0.00024618769418214805, 'samples': 13431808, 'steps': 26233, 'loss/train': 2.874534845352173} +02/25/2022 11:34:59 - INFO - codeparrot_training - Step 26234: {'lr': 0.000246171333631152, 'samples': 13432320, 'steps': 26234, 'loss/train': 1.5983017683029175} +02/25/2022 11:35:03 - INFO - codeparrot_training - Step 26235: {'lr': 0.0002461549730965567, 'samples': 13432832, 'steps': 26235, 'loss/train': 1.9189375638961792} +02/25/2022 11:35:08 - INFO - codeparrot_training - Step 26236: {'lr': 0.0002461386125784322, 'samples': 13433344, 'steps': 26236, 'loss/train': 1.0371625423431396} +02/25/2022 11:35:12 - INFO - codeparrot_training - Step 26237: {'lr': 0.0002461222520768487, 'samples': 13433856, 'steps': 26237, 'loss/train': 1.8823490142822266} +02/25/2022 11:35:19 - INFO - codeparrot_training - Step 26238: {'lr': 0.00024610589159187627, 'samples': 13434368, 'steps': 26238, 'loss/train': 1.2497329711914062} +02/25/2022 11:35:23 - INFO - codeparrot_training - Step 26239: {'lr': 0.00024608953112358495, 'samples': 13434880, 'steps': 26239, 'loss/train': 0.6016479730606079} +02/25/2022 11:35:28 - INFO - codeparrot_training - Step 26240: {'lr': 0.0002460731706720449, 'samples': 13435392, 'steps': 26240, 'loss/train': 1.3934191465377808} +02/25/2022 11:35:32 - INFO - codeparrot_training - Step 26241: {'lr': 0.000246056810237326, 'samples': 13435904, 'steps': 26241, 'loss/train': 2.158846855163574} +02/25/2022 11:35:37 - INFO - codeparrot_training - Step 26242: {'lr': 0.00024604044981949854, 'samples': 13436416, 'steps': 26242, 'loss/train': 2.3282394409179688} +02/25/2022 11:35:41 - INFO - codeparrot_training - Step 26243: {'lr': 0.00024602408941863256, 'samples': 13436928, 'steps': 26243, 'loss/train': 2.113534688949585} +02/25/2022 11:35:46 - INFO - codeparrot_training - Step 26244: {'lr': 0.00024600772903479815, 'samples': 13437440, 'steps': 26244, 'loss/train': 2.697965621948242} +02/25/2022 11:35:50 - INFO - codeparrot_training - Step 26245: {'lr': 0.0002459913686680653, 'samples': 13437952, 'steps': 26245, 'loss/train': 1.2481744289398193} +02/25/2022 11:35:55 - INFO - codeparrot_training - Step 26246: {'lr': 0.00024597500831850415, 'samples': 13438464, 'steps': 26246, 'loss/train': 1.79423987865448} +02/25/2022 11:35:59 - INFO - codeparrot_training - Step 26247: {'lr': 0.00024595864798618484, 'samples': 13438976, 'steps': 26247, 'loss/train': 1.2637900114059448} +02/25/2022 11:36:04 - INFO - codeparrot_training - Step 26248: {'lr': 0.0002459422876711774, 'samples': 13439488, 'steps': 26248, 'loss/train': 2.9381160736083984} +02/25/2022 11:36:08 - INFO - codeparrot_training - Step 26249: {'lr': 0.00024592592737355184, 'samples': 13440000, 'steps': 26249, 'loss/train': 1.3678526878356934} +02/25/2022 11:36:13 - INFO - codeparrot_training - Step 26250: {'lr': 0.0002459095670933783, 'samples': 13440512, 'steps': 26250, 'loss/train': 2.3651723861694336} +02/25/2022 11:36:17 - INFO - codeparrot_training - Step 26251: {'lr': 0.00024589320683072704, 'samples': 13441024, 'steps': 26251, 'loss/train': 2.900285482406616} +02/25/2022 11:36:22 - INFO - codeparrot_training - Step 26252: {'lr': 0.0002458768465856678, 'samples': 13441536, 'steps': 26252, 'loss/train': 0.2583363354206085} +02/25/2022 11:36:26 - INFO - codeparrot_training - Step 26253: {'lr': 0.00024586048635827086, 'samples': 13442048, 'steps': 26253, 'loss/train': 2.3059117794036865} +02/25/2022 11:36:33 - INFO - codeparrot_training - Step 26254: {'lr': 0.0002458441261486063, 'samples': 13442560, 'steps': 26254, 'loss/train': 1.6535823345184326} +02/25/2022 11:36:36 - INFO - codeparrot_training - Step 26255: {'lr': 0.0002458277659567442, 'samples': 13443072, 'steps': 26255, 'loss/train': 1.945255160331726} +02/25/2022 11:36:42 - INFO - codeparrot_training - Step 26256: {'lr': 0.0002458114057827547, 'samples': 13443584, 'steps': 26256, 'loss/train': 2.845904588699341} +02/25/2022 11:36:45 - INFO - codeparrot_training - Step 26257: {'lr': 0.00024579504562670775, 'samples': 13444096, 'steps': 26257, 'loss/train': 2.2146105766296387} +02/25/2022 11:36:51 - INFO - codeparrot_training - Step 26258: {'lr': 0.0002457786854886734, 'samples': 13444608, 'steps': 26258, 'loss/train': 2.3608477115631104} +02/25/2022 11:36:54 - INFO - codeparrot_training - Step 26259: {'lr': 0.00024576232536872194, 'samples': 13445120, 'steps': 26259, 'loss/train': 2.943692684173584} +02/25/2022 11:37:00 - INFO - codeparrot_training - Step 26260: {'lr': 0.00024574596526692337, 'samples': 13445632, 'steps': 26260, 'loss/train': 3.278921365737915} +02/25/2022 11:37:03 - INFO - codeparrot_training - Step 26261: {'lr': 0.0002457296051833476, 'samples': 13446144, 'steps': 26261, 'loss/train': 1.5675383806228638} +02/25/2022 11:37:09 - INFO - codeparrot_training - Step 26262: {'lr': 0.0002457132451180649, 'samples': 13446656, 'steps': 26262, 'loss/train': 1.985438346862793} +02/25/2022 11:37:12 - INFO - codeparrot_training - Step 26263: {'lr': 0.0002456968850711453, 'samples': 13447168, 'steps': 26263, 'loss/train': 1.7740424871444702} +02/25/2022 11:37:19 - INFO - codeparrot_training - Step 26264: {'lr': 0.00024568052504265895, 'samples': 13447680, 'steps': 26264, 'loss/train': 0.7943164110183716} +02/25/2022 11:37:23 - INFO - codeparrot_training - Step 26265: {'lr': 0.00024566416503267577, 'samples': 13448192, 'steps': 26265, 'loss/train': 1.2856733798980713} +02/25/2022 11:37:28 - INFO - codeparrot_training - Step 26266: {'lr': 0.0002456478050412659, 'samples': 13448704, 'steps': 26266, 'loss/train': 0.706490695476532} +02/25/2022 11:37:32 - INFO - codeparrot_training - Step 26267: {'lr': 0.0002456314450684995, 'samples': 13449216, 'steps': 26267, 'loss/train': 1.5952187776565552} +02/25/2022 11:37:37 - INFO - codeparrot_training - Step 26268: {'lr': 0.00024561508511444655, 'samples': 13449728, 'steps': 26268, 'loss/train': 1.5375702381134033} +02/25/2022 11:37:41 - INFO - codeparrot_training - Step 26269: {'lr': 0.0002455987251791773, 'samples': 13450240, 'steps': 26269, 'loss/train': 1.4487030506134033} +02/25/2022 11:37:47 - INFO - codeparrot_training - Step 26270: {'lr': 0.0002455823652627616, 'samples': 13450752, 'steps': 26270, 'loss/train': 2.676496982574463} +02/25/2022 11:37:50 - INFO - codeparrot_training - Step 26271: {'lr': 0.0002455660053652697, 'samples': 13451264, 'steps': 26271, 'loss/train': 2.1709210872650146} +02/25/2022 11:37:56 - INFO - codeparrot_training - Step 26272: {'lr': 0.0002455496454867716, 'samples': 13451776, 'steps': 26272, 'loss/train': 1.0727542638778687} +02/25/2022 11:37:59 - INFO - codeparrot_training - Step 26273: {'lr': 0.00024553328562733746, 'samples': 13452288, 'steps': 26273, 'loss/train': 1.678377389907837} +02/25/2022 11:38:06 - INFO - codeparrot_training - Step 26274: {'lr': 0.0002455169257870373, 'samples': 13452800, 'steps': 26274, 'loss/train': 1.5258363485336304} +02/25/2022 11:38:10 - INFO - codeparrot_training - Step 26275: {'lr': 0.0002455005659659411, 'samples': 13453312, 'steps': 26275, 'loss/train': 0.6886319518089294} +02/25/2022 11:38:16 - INFO - codeparrot_training - Step 26276: {'lr': 0.0002454842061641191, 'samples': 13453824, 'steps': 26276, 'loss/train': 0.9576643109321594} +02/25/2022 11:38:19 - INFO - codeparrot_training - Step 26277: {'lr': 0.00024546784638164145, 'samples': 13454336, 'steps': 26277, 'loss/train': 2.579251289367676} +02/25/2022 11:38:25 - INFO - codeparrot_training - Step 26278: {'lr': 0.00024545148661857794, 'samples': 13454848, 'steps': 26278, 'loss/train': 2.191575288772583} +02/25/2022 11:38:28 - INFO - codeparrot_training - Step 26279: {'lr': 0.0002454351268749989, 'samples': 13455360, 'steps': 26279, 'loss/train': 1.9742505550384521} +02/25/2022 11:38:34 - INFO - codeparrot_training - Step 26280: {'lr': 0.0002454187671509743, 'samples': 13455872, 'steps': 26280, 'loss/train': 2.0906577110290527} +02/25/2022 11:38:37 - INFO - codeparrot_training - Step 26281: {'lr': 0.0002454024074465743, 'samples': 13456384, 'steps': 26281, 'loss/train': 2.5209059715270996} +02/25/2022 11:38:41 - INFO - codeparrot_training - Step 26282: {'lr': 0.0002453860477618689, 'samples': 13456896, 'steps': 26282, 'loss/train': 1.6772773265838623} +02/25/2022 11:38:46 - INFO - codeparrot_training - Step 26283: {'lr': 0.0002453696880969281, 'samples': 13457408, 'steps': 26283, 'loss/train': 3.2000346183776855} +02/25/2022 11:38:50 - INFO - codeparrot_training - Step 26284: {'lr': 0.00024535332845182224, 'samples': 13457920, 'steps': 26284, 'loss/train': 1.639349102973938} +02/25/2022 11:38:57 - INFO - codeparrot_training - Step 26285: {'lr': 0.00024533696882662117, 'samples': 13458432, 'steps': 26285, 'loss/train': 0.9806617498397827} +02/25/2022 11:39:00 - INFO - codeparrot_training - Step 26286: {'lr': 0.00024532060922139505, 'samples': 13458944, 'steps': 26286, 'loss/train': 2.0610392093658447} +02/25/2022 11:39:06 - INFO - codeparrot_training - Step 26287: {'lr': 0.000245304249636214, 'samples': 13459456, 'steps': 26287, 'loss/train': 1.7213953733444214} +02/25/2022 11:39:09 - INFO - codeparrot_training - Step 26288: {'lr': 0.000245287890071148, 'samples': 13459968, 'steps': 26288, 'loss/train': 1.8167425394058228} +02/25/2022 11:39:15 - INFO - codeparrot_training - Step 26289: {'lr': 0.00024527153052626715, 'samples': 13460480, 'steps': 26289, 'loss/train': 1.2684626579284668} +02/25/2022 11:39:18 - INFO - codeparrot_training - Step 26290: {'lr': 0.00024525517100164166, 'samples': 13460992, 'steps': 26290, 'loss/train': 0.6238476037979126} +02/25/2022 11:39:24 - INFO - codeparrot_training - Step 26291: {'lr': 0.0002452388114973415, 'samples': 13461504, 'steps': 26291, 'loss/train': 0.7544644474983215} +02/25/2022 11:39:27 - INFO - codeparrot_training - Step 26292: {'lr': 0.0002452224520134367, 'samples': 13462016, 'steps': 26292, 'loss/train': 2.487161159515381} +02/25/2022 11:39:33 - INFO - codeparrot_training - Step 26293: {'lr': 0.0002452060925499973, 'samples': 13462528, 'steps': 26293, 'loss/train': 0.49551865458488464} +02/25/2022 11:39:36 - INFO - codeparrot_training - Step 26294: {'lr': 0.0002451897331070936, 'samples': 13463040, 'steps': 26294, 'loss/train': 1.293086290359497} +02/25/2022 11:39:42 - INFO - codeparrot_training - Step 26295: {'lr': 0.0002451733736847957, 'samples': 13463552, 'steps': 26295, 'loss/train': 2.158167839050293} +02/25/2022 11:39:45 - INFO - codeparrot_training - Step 26296: {'lr': 0.00024515701428317336, 'samples': 13464064, 'steps': 26296, 'loss/train': 1.394509196281433} +02/25/2022 11:39:51 - INFO - codeparrot_training - Step 26297: {'lr': 0.0002451406549022968, 'samples': 13464576, 'steps': 26297, 'loss/train': 3.0340778827667236} +02/25/2022 11:39:54 - INFO - codeparrot_training - Step 26298: {'lr': 0.00024512429554223613, 'samples': 13465088, 'steps': 26298, 'loss/train': 2.00824236869812} +02/25/2022 11:40:00 - INFO - codeparrot_training - Step 26299: {'lr': 0.0002451079362030616, 'samples': 13465600, 'steps': 26299, 'loss/train': 1.4786911010742188} +02/25/2022 11:40:03 - INFO - codeparrot_training - Step 26300: {'lr': 0.00024509157688484297, 'samples': 13466112, 'steps': 26300, 'loss/train': 1.0193793773651123} +02/25/2022 11:40:11 - INFO - codeparrot_training - Step 26301: {'lr': 0.00024507521758765046, 'samples': 13466624, 'steps': 26301, 'loss/train': 1.8714008331298828} +02/25/2022 11:40:14 - INFO - codeparrot_training - Step 26302: {'lr': 0.0002450588583115542, 'samples': 13467136, 'steps': 26302, 'loss/train': 2.2302093505859375} +02/25/2022 11:40:20 - INFO - codeparrot_training - Step 26303: {'lr': 0.00024504249905662415, 'samples': 13467648, 'steps': 26303, 'loss/train': 2.8510708808898926} +02/25/2022 11:40:24 - INFO - codeparrot_training - Step 26304: {'lr': 0.00024502613982293065, 'samples': 13468160, 'steps': 26304, 'loss/train': 2.325540781021118} +02/25/2022 11:40:29 - INFO - codeparrot_training - Step 26305: {'lr': 0.00024500978061054346, 'samples': 13468672, 'steps': 26305, 'loss/train': 1.4643923044204712} +02/25/2022 11:40:33 - INFO - codeparrot_training - Step 26306: {'lr': 0.0002449934214195327, 'samples': 13469184, 'steps': 26306, 'loss/train': 1.4209997653961182} +02/25/2022 11:40:38 - INFO - codeparrot_training - Step 26307: {'lr': 0.00024497706224996864, 'samples': 13469696, 'steps': 26307, 'loss/train': 2.1823487281799316} +02/25/2022 11:40:42 - INFO - codeparrot_training - Step 26308: {'lr': 0.0002449607031019213, 'samples': 13470208, 'steps': 26308, 'loss/train': 1.8023405075073242} +02/25/2022 11:40:47 - INFO - codeparrot_training - Step 26309: {'lr': 0.00024494434397546067, 'samples': 13470720, 'steps': 26309, 'loss/train': 0.4026634991168976} +02/25/2022 11:40:51 - INFO - codeparrot_training - Step 26310: {'lr': 0.00024492798487065674, 'samples': 13471232, 'steps': 26310, 'loss/train': 1.7011061906814575} +02/25/2022 11:40:58 - INFO - codeparrot_training - Step 26311: {'lr': 0.0002449116257875798, 'samples': 13471744, 'steps': 26311, 'loss/train': 2.4335412979125977} +02/25/2022 11:41:01 - INFO - codeparrot_training - Step 26312: {'lr': 0.0002448952667262999, 'samples': 13472256, 'steps': 26312, 'loss/train': 2.1463046073913574} +02/25/2022 11:41:07 - INFO - codeparrot_training - Step 26313: {'lr': 0.00024487890768688705, 'samples': 13472768, 'steps': 26313, 'loss/train': 1.31080961227417} +02/25/2022 11:41:10 - INFO - codeparrot_training - Step 26314: {'lr': 0.0002448625486694112, 'samples': 13473280, 'steps': 26314, 'loss/train': 1.2081104516983032} +02/25/2022 11:41:16 - INFO - codeparrot_training - Step 26315: {'lr': 0.00024484618967394263, 'samples': 13473792, 'steps': 26315, 'loss/train': 0.958557665348053} +02/25/2022 11:41:20 - INFO - codeparrot_training - Step 26316: {'lr': 0.0002448298307005514, 'samples': 13474304, 'steps': 26316, 'loss/train': 0.6188236474990845} +02/25/2022 11:41:25 - INFO - codeparrot_training - Step 26317: {'lr': 0.00024481347174930756, 'samples': 13474816, 'steps': 26317, 'loss/train': 0.2930773198604584} +02/25/2022 11:41:28 - INFO - codeparrot_training - Step 26318: {'lr': 0.00024479711282028105, 'samples': 13475328, 'steps': 26318, 'loss/train': 1.8447990417480469} +02/25/2022 11:41:34 - INFO - codeparrot_training - Step 26319: {'lr': 0.0002447807539135421, 'samples': 13475840, 'steps': 26319, 'loss/train': 2.0253796577453613} +02/25/2022 11:41:37 - INFO - codeparrot_training - Step 26320: {'lr': 0.00024476439502916077, 'samples': 13476352, 'steps': 26320, 'loss/train': 2.498218297958374} +02/25/2022 11:41:45 - INFO - codeparrot_training - Step 26321: {'lr': 0.0002447480361672071, 'samples': 13476864, 'steps': 26321, 'loss/train': 1.2911486625671387} +02/25/2022 11:41:48 - INFO - codeparrot_training - Step 26322: {'lr': 0.00024473167732775116, 'samples': 13477376, 'steps': 26322, 'loss/train': 1.895642638206482} +02/25/2022 11:41:54 - INFO - codeparrot_training - Step 26323: {'lr': 0.00024471531851086303, 'samples': 13477888, 'steps': 26323, 'loss/train': 1.9397436380386353} +02/25/2022 11:41:57 - INFO - codeparrot_training - Step 26324: {'lr': 0.00024469895971661283, 'samples': 13478400, 'steps': 26324, 'loss/train': 0.09864596277475357} +02/25/2022 11:42:03 - INFO - codeparrot_training - Step 26325: {'lr': 0.0002446826009450706, 'samples': 13478912, 'steps': 26325, 'loss/train': 2.038851022720337} +02/25/2022 11:42:06 - INFO - codeparrot_training - Step 26326: {'lr': 0.0002446662421963064, 'samples': 13479424, 'steps': 26326, 'loss/train': 2.157456874847412} +02/25/2022 11:42:12 - INFO - codeparrot_training - Step 26327: {'lr': 0.00024464988347039037, 'samples': 13479936, 'steps': 26327, 'loss/train': 1.946167230606079} +02/25/2022 11:42:15 - INFO - codeparrot_training - Step 26328: {'lr': 0.0002446335247673925, 'samples': 13480448, 'steps': 26328, 'loss/train': 1.6931893825531006} +02/25/2022 11:42:21 - INFO - codeparrot_training - Step 26329: {'lr': 0.0002446171660873828, 'samples': 13480960, 'steps': 26329, 'loss/train': 1.933598518371582} +02/25/2022 11:42:24 - INFO - codeparrot_training - Step 26330: {'lr': 0.00024460080743043163, 'samples': 13481472, 'steps': 26330, 'loss/train': 0.47926944494247437} +02/25/2022 11:42:31 - INFO - codeparrot_training - Step 26331: {'lr': 0.0002445844487966088, 'samples': 13481984, 'steps': 26331, 'loss/train': 2.051633834838867} +02/25/2022 11:42:35 - INFO - codeparrot_training - Step 26332: {'lr': 0.0002445680901859845, 'samples': 13482496, 'steps': 26332, 'loss/train': 0.803810179233551} +02/25/2022 11:42:40 - INFO - codeparrot_training - Step 26333: {'lr': 0.0002445517315986287, 'samples': 13483008, 'steps': 26333, 'loss/train': 1.4540663957595825} +02/25/2022 11:42:44 - INFO - codeparrot_training - Step 26334: {'lr': 0.00024453537303461176, 'samples': 13483520, 'steps': 26334, 'loss/train': 1.1829906702041626} +02/25/2022 11:42:49 - INFO - codeparrot_training - Step 26335: {'lr': 0.00024451901449400334, 'samples': 13484032, 'steps': 26335, 'loss/train': 0.08518210053443909} +02/25/2022 11:42:53 - INFO - codeparrot_training - Step 26336: {'lr': 0.00024450265597687374, 'samples': 13484544, 'steps': 26336, 'loss/train': 0.9057542085647583} +02/25/2022 11:42:58 - INFO - codeparrot_training - Step 26337: {'lr': 0.000244486297483293, 'samples': 13485056, 'steps': 26337, 'loss/train': 2.377490997314453} +02/25/2022 11:43:02 - INFO - codeparrot_training - Step 26338: {'lr': 0.00024446993901333137, 'samples': 13485568, 'steps': 26338, 'loss/train': 1.8935253620147705} +02/25/2022 11:43:08 - INFO - codeparrot_training - Step 26339: {'lr': 0.0002444535805670587, 'samples': 13486080, 'steps': 26339, 'loss/train': 1.525955080986023} +02/25/2022 11:43:11 - INFO - codeparrot_training - Step 26340: {'lr': 0.000244437222144545, 'samples': 13486592, 'steps': 26340, 'loss/train': 3.2384912967681885} +02/25/2022 11:43:16 - INFO - codeparrot_training - Step 26341: {'lr': 0.00024442086374586056, 'samples': 13487104, 'steps': 26341, 'loss/train': 2.4522252082824707} +02/25/2022 11:43:20 - INFO - codeparrot_training - Step 26342: {'lr': 0.0002444045053710754, 'samples': 13487616, 'steps': 26342, 'loss/train': 1.6107395887374878} +02/25/2022 11:43:27 - INFO - codeparrot_training - Step 26343: {'lr': 0.0002443881470202596, 'samples': 13488128, 'steps': 26343, 'loss/train': 1.1788272857666016} +02/25/2022 11:43:31 - INFO - codeparrot_training - Step 26344: {'lr': 0.0002443717886934831, 'samples': 13488640, 'steps': 26344, 'loss/train': 3.0571553707122803} +02/25/2022 11:43:36 - INFO - codeparrot_training - Step 26345: {'lr': 0.00024435543039081606, 'samples': 13489152, 'steps': 26345, 'loss/train': 2.716193199157715} +02/25/2022 11:43:40 - INFO - codeparrot_training - Step 26346: {'lr': 0.0002443390721123286, 'samples': 13489664, 'steps': 26346, 'loss/train': 2.6670377254486084} +02/25/2022 11:43:45 - INFO - codeparrot_training - Step 26347: {'lr': 0.00024432271385809085, 'samples': 13490176, 'steps': 26347, 'loss/train': 1.6349900960922241} +02/25/2022 11:43:49 - INFO - codeparrot_training - Step 26348: {'lr': 0.0002443063556281727, 'samples': 13490688, 'steps': 26348, 'loss/train': 0.5903484225273132} +02/25/2022 11:43:55 - INFO - codeparrot_training - Step 26349: {'lr': 0.0002442899974226443, 'samples': 13491200, 'steps': 26349, 'loss/train': 2.429945945739746} +02/25/2022 11:43:58 - INFO - codeparrot_training - Step 26350: {'lr': 0.00024427363924157567, 'samples': 13491712, 'steps': 26350, 'loss/train': 1.7602263689041138} +02/25/2022 11:44:02 - INFO - codeparrot_training - Step 26351: {'lr': 0.0002442572810850371, 'samples': 13492224, 'steps': 26351, 'loss/train': 7.8003716468811035} +02/25/2022 11:44:09 - INFO - codeparrot_training - Step 26352: {'lr': 0.0002442409229530985, 'samples': 13492736, 'steps': 26352, 'loss/train': 2.0946173667907715} +02/25/2022 11:44:13 - INFO - codeparrot_training - Step 26353: {'lr': 0.00024422456484582986, 'samples': 13493248, 'steps': 26353, 'loss/train': 1.4148906469345093} +02/25/2022 11:44:18 - INFO - codeparrot_training - Step 26354: {'lr': 0.0002442082067633014, 'samples': 13493760, 'steps': 26354, 'loss/train': 1.2416362762451172} +02/25/2022 11:44:22 - INFO - codeparrot_training - Step 26355: {'lr': 0.00024419184870558313, 'samples': 13494272, 'steps': 26355, 'loss/train': 2.064422607421875} +02/25/2022 11:44:27 - INFO - codeparrot_training - Step 26356: {'lr': 0.00024417549067274523, 'samples': 13494784, 'steps': 26356, 'loss/train': 1.3636423349380493} +02/25/2022 11:44:31 - INFO - codeparrot_training - Step 26357: {'lr': 0.00024415913266485754, 'samples': 13495296, 'steps': 26357, 'loss/train': 1.8285813331604004} +02/25/2022 11:44:36 - INFO - codeparrot_training - Step 26358: {'lr': 0.0002441427746819903, 'samples': 13495808, 'steps': 26358, 'loss/train': 0.4868795573711395} +02/25/2022 11:44:40 - INFO - codeparrot_training - Step 26359: {'lr': 0.00024412641672421357, 'samples': 13496320, 'steps': 26359, 'loss/train': 1.3161351680755615} +02/25/2022 11:44:45 - INFO - codeparrot_training - Step 26360: {'lr': 0.0002441100587915975, 'samples': 13496832, 'steps': 26360, 'loss/train': 1.8202041387557983} +02/25/2022 11:44:49 - INFO - codeparrot_training - Step 26361: {'lr': 0.000244093700884212, 'samples': 13497344, 'steps': 26361, 'loss/train': 2.2062909603118896} +02/25/2022 11:44:54 - INFO - codeparrot_training - Step 26362: {'lr': 0.00024407734300212715, 'samples': 13497856, 'steps': 26362, 'loss/train': 1.8599963188171387} +02/25/2022 11:44:58 - INFO - codeparrot_training - Step 26363: {'lr': 0.0002440609851454131, 'samples': 13498368, 'steps': 26363, 'loss/train': 1.5320758819580078} +02/25/2022 11:45:04 - INFO - codeparrot_training - Step 26364: {'lr': 0.00024404462731413996, 'samples': 13498880, 'steps': 26364, 'loss/train': 2.612672805786133} +02/25/2022 11:45:07 - INFO - codeparrot_training - Step 26365: {'lr': 0.00024402826950837775, 'samples': 13499392, 'steps': 26365, 'loss/train': 0.6196591854095459} +02/25/2022 11:45:10 - INFO - codeparrot_training - Step 26366: {'lr': 0.00024401191172819647, 'samples': 13499904, 'steps': 26366, 'loss/train': 1.7340437173843384} +02/25/2022 11:45:16 - INFO - codeparrot_training - Step 26367: {'lr': 0.00024399555397366633, 'samples': 13500416, 'steps': 26367, 'loss/train': 1.1996020078659058} +02/25/2022 11:45:20 - INFO - codeparrot_training - Step 26368: {'lr': 0.0002439791962448573, 'samples': 13500928, 'steps': 26368, 'loss/train': 2.166757583618164} +02/25/2022 11:45:27 - INFO - codeparrot_training - Step 26369: {'lr': 0.00024396283854183947, 'samples': 13501440, 'steps': 26369, 'loss/train': 1.2331719398498535} +02/25/2022 11:45:30 - INFO - codeparrot_training - Step 26370: {'lr': 0.0002439464808646829, 'samples': 13501952, 'steps': 26370, 'loss/train': 1.3486862182617188} +02/25/2022 11:45:36 - INFO - codeparrot_training - Step 26371: {'lr': 0.00024393012321345775, 'samples': 13502464, 'steps': 26371, 'loss/train': 2.0946381092071533} +02/25/2022 11:45:39 - INFO - codeparrot_training - Step 26372: {'lr': 0.00024391376558823398, 'samples': 13502976, 'steps': 26372, 'loss/train': 0.9696707725524902} +02/25/2022 11:45:45 - INFO - codeparrot_training - Step 26373: {'lr': 0.00024389740798908173, 'samples': 13503488, 'steps': 26373, 'loss/train': 0.8916708827018738} +02/25/2022 11:45:48 - INFO - codeparrot_training - Step 26374: {'lr': 0.00024388105041607105, 'samples': 13504000, 'steps': 26374, 'loss/train': 0.22761280834674835} +02/25/2022 11:45:54 - INFO - codeparrot_training - Step 26375: {'lr': 0.00024386469286927196, 'samples': 13504512, 'steps': 26375, 'loss/train': 2.7063379287719727} +02/25/2022 11:45:57 - INFO - codeparrot_training - Step 26376: {'lr': 0.00024384833534875458, 'samples': 13505024, 'steps': 26376, 'loss/train': 2.295356035232544} +02/25/2022 11:46:05 - INFO - codeparrot_training - Step 26377: {'lr': 0.00024383197785458899, 'samples': 13505536, 'steps': 26377, 'loss/train': 1.3243085145950317} +02/25/2022 11:46:08 - INFO - codeparrot_training - Step 26378: {'lr': 0.00024381562038684534, 'samples': 13506048, 'steps': 26378, 'loss/train': 1.9024004936218262} +02/25/2022 11:46:14 - INFO - codeparrot_training - Step 26379: {'lr': 0.00024379926294559352, 'samples': 13506560, 'steps': 26379, 'loss/train': 2.1351585388183594} +02/25/2022 11:46:17 - INFO - codeparrot_training - Step 26380: {'lr': 0.00024378290553090366, 'samples': 13507072, 'steps': 26380, 'loss/train': 2.3966636657714844} +02/25/2022 11:46:23 - INFO - codeparrot_training - Step 26381: {'lr': 0.00024376654814284586, 'samples': 13507584, 'steps': 26381, 'loss/train': 2.458189010620117} +02/25/2022 11:46:26 - INFO - codeparrot_training - Step 26382: {'lr': 0.0002437501907814903, 'samples': 13508096, 'steps': 26382, 'loss/train': 2.2841715812683105} +02/25/2022 11:46:32 - INFO - codeparrot_training - Step 26383: {'lr': 0.00024373383344690686, 'samples': 13508608, 'steps': 26383, 'loss/train': 1.6360108852386475} +02/25/2022 11:46:35 - INFO - codeparrot_training - Step 26384: {'lr': 0.00024371747613916565, 'samples': 13509120, 'steps': 26384, 'loss/train': 1.940095067024231} +02/25/2022 11:46:41 - INFO - codeparrot_training - Step 26385: {'lr': 0.00024370111885833678, 'samples': 13509632, 'steps': 26385, 'loss/train': 2.4187495708465576} +02/25/2022 11:46:44 - INFO - codeparrot_training - Step 26386: {'lr': 0.00024368476160449047, 'samples': 13510144, 'steps': 26386, 'loss/train': 1.6374770402908325} +02/25/2022 11:46:50 - INFO - codeparrot_training - Step 26387: {'lr': 0.00024366840437769647, 'samples': 13510656, 'steps': 26387, 'loss/train': 3.7390332221984863} +02/25/2022 11:46:54 - INFO - codeparrot_training - Step 26388: {'lr': 0.00024365204717802507, 'samples': 13511168, 'steps': 26388, 'loss/train': 1.6569944620132446} +02/25/2022 11:46:59 - INFO - codeparrot_training - Step 26389: {'lr': 0.0002436356900055462, 'samples': 13511680, 'steps': 26389, 'loss/train': 1.7931761741638184} +02/25/2022 11:47:03 - INFO - codeparrot_training - Step 26390: {'lr': 0.0002436193328603301, 'samples': 13512192, 'steps': 26390, 'loss/train': 1.3246029615402222} +02/25/2022 11:47:08 - INFO - codeparrot_training - Step 26391: {'lr': 0.00024360297574244682, 'samples': 13512704, 'steps': 26391, 'loss/train': 2.185159683227539} +02/25/2022 11:47:14 - INFO - codeparrot_training - Step 26392: {'lr': 0.00024358661865196628, 'samples': 13513216, 'steps': 26392, 'loss/train': 1.2739105224609375} +02/25/2022 11:47:17 - INFO - codeparrot_training - Step 26393: {'lr': 0.00024357026158895858, 'samples': 13513728, 'steps': 26393, 'loss/train': 3.099457263946533} +02/25/2022 11:47:23 - INFO - codeparrot_training - Step 26394: {'lr': 0.00024355390455349388, 'samples': 13514240, 'steps': 26394, 'loss/train': 0.4554407000541687} +02/25/2022 11:47:26 - INFO - codeparrot_training - Step 26395: {'lr': 0.0002435375475456423, 'samples': 13514752, 'steps': 26395, 'loss/train': 0.24246011674404144} +02/25/2022 11:47:32 - INFO - codeparrot_training - Step 26396: {'lr': 0.0002435211905654737, 'samples': 13515264, 'steps': 26396, 'loss/train': 2.035609006881714} +02/25/2022 11:47:35 - INFO - codeparrot_training - Step 26397: {'lr': 0.00024350483361305827, 'samples': 13515776, 'steps': 26397, 'loss/train': 0.31787604093551636} +02/25/2022 11:47:42 - INFO - codeparrot_training - Step 26398: {'lr': 0.00024348847668846608, 'samples': 13516288, 'steps': 26398, 'loss/train': 1.9995020627975464} +02/25/2022 11:47:45 - INFO - codeparrot_training - Step 26399: {'lr': 0.00024347211979176727, 'samples': 13516800, 'steps': 26399, 'loss/train': 1.246475100517273} +02/25/2022 11:47:49 - INFO - codeparrot_training - Step 26400: {'lr': 0.00024345576292303175, 'samples': 13517312, 'steps': 26400, 'loss/train': 1.504394292831421} +02/25/2022 11:47:54 - INFO - codeparrot_training - Step 26401: {'lr': 0.00024343940608232965, 'samples': 13517824, 'steps': 26401, 'loss/train': 0.5194740295410156} +02/25/2022 11:47:58 - INFO - codeparrot_training - Step 26402: {'lr': 0.00024342304926973105, 'samples': 13518336, 'steps': 26402, 'loss/train': 2.1852293014526367} +02/25/2022 11:48:03 - INFO - codeparrot_training - Step 26403: {'lr': 0.00024340669248530602, 'samples': 13518848, 'steps': 26403, 'loss/train': 1.5815045833587646} +02/25/2022 11:48:07 - INFO - codeparrot_training - Step 26404: {'lr': 0.00024339033572912472, 'samples': 13519360, 'steps': 26404, 'loss/train': 1.9526680707931519} +02/25/2022 11:48:12 - INFO - codeparrot_training - Step 26405: {'lr': 0.000243373979001257, 'samples': 13519872, 'steps': 26405, 'loss/train': 2.006246328353882} +02/25/2022 11:48:19 - INFO - codeparrot_training - Step 26406: {'lr': 0.0002433576223017731, 'samples': 13520384, 'steps': 26406, 'loss/train': 1.5348105430603027} +02/25/2022 11:48:23 - INFO - codeparrot_training - Step 26407: {'lr': 0.00024334126563074307, 'samples': 13520896, 'steps': 26407, 'loss/train': 1.5503588914871216} +02/25/2022 11:48:28 - INFO - codeparrot_training - Step 26408: {'lr': 0.00024332490898823695, 'samples': 13521408, 'steps': 26408, 'loss/train': 1.5304791927337646} +02/25/2022 11:48:32 - INFO - codeparrot_training - Step 26409: {'lr': 0.00024330855237432472, 'samples': 13521920, 'steps': 26409, 'loss/train': 2.740084409713745} +02/25/2022 11:48:37 - INFO - codeparrot_training - Step 26410: {'lr': 0.00024329219578907653, 'samples': 13522432, 'steps': 26410, 'loss/train': 2.324345827102661} +02/25/2022 11:48:41 - INFO - codeparrot_training - Step 26411: {'lr': 0.00024327583923256253, 'samples': 13522944, 'steps': 26411, 'loss/train': 2.3731019496917725} +02/25/2022 11:48:46 - INFO - codeparrot_training - Step 26412: {'lr': 0.00024325948270485263, 'samples': 13523456, 'steps': 26412, 'loss/train': 1.6274141073226929} +02/25/2022 11:48:50 - INFO - codeparrot_training - Step 26413: {'lr': 0.00024324312620601702, 'samples': 13523968, 'steps': 26413, 'loss/train': 2.2079548835754395} +02/25/2022 11:48:56 - INFO - codeparrot_training - Step 26414: {'lr': 0.00024322676973612565, 'samples': 13524480, 'steps': 26414, 'loss/train': 1.9055914878845215} +02/25/2022 11:48:59 - INFO - codeparrot_training - Step 26415: {'lr': 0.00024321041329524872, 'samples': 13524992, 'steps': 26415, 'loss/train': 0.7796139121055603} +02/25/2022 11:49:03 - INFO - codeparrot_training - Step 26416: {'lr': 0.00024319405688345613, 'samples': 13525504, 'steps': 26416, 'loss/train': 2.0578885078430176} +02/25/2022 11:49:08 - INFO - codeparrot_training - Step 26417: {'lr': 0.00024317770050081815, 'samples': 13526016, 'steps': 26417, 'loss/train': 2.581758737564087} +02/25/2022 11:49:11 - INFO - codeparrot_training - Step 26418: {'lr': 0.00024316134414740468, 'samples': 13526528, 'steps': 26418, 'loss/train': 2.315044641494751} +02/25/2022 11:49:17 - INFO - codeparrot_training - Step 26419: {'lr': 0.0002431449878232858, 'samples': 13527040, 'steps': 26419, 'loss/train': 3.263165235519409} +02/25/2022 11:49:20 - INFO - codeparrot_training - Step 26420: {'lr': 0.00024312863152853165, 'samples': 13527552, 'steps': 26420, 'loss/train': 1.6306297779083252} +02/25/2022 11:49:26 - INFO - codeparrot_training - Step 26421: {'lr': 0.0002431122752632123, 'samples': 13528064, 'steps': 26421, 'loss/train': 1.2548456192016602} +02/25/2022 11:49:33 - INFO - codeparrot_training - Step 26422: {'lr': 0.00024309591902739775, 'samples': 13528576, 'steps': 26422, 'loss/train': 1.6222890615463257} +02/25/2022 11:49:36 - INFO - codeparrot_training - Step 26423: {'lr': 0.00024307956282115803, 'samples': 13529088, 'steps': 26423, 'loss/train': 2.542529344558716} +02/25/2022 11:49:42 - INFO - codeparrot_training - Step 26424: {'lr': 0.0002430632066445633, 'samples': 13529600, 'steps': 26424, 'loss/train': 2.220149040222168} +02/25/2022 11:49:45 - INFO - codeparrot_training - Step 26425: {'lr': 0.00024304685049768358, 'samples': 13530112, 'steps': 26425, 'loss/train': 2.2881112098693848} +02/25/2022 11:49:52 - INFO - codeparrot_training - Step 26426: {'lr': 0.00024303049438058905, 'samples': 13530624, 'steps': 26426, 'loss/train': 1.0089293718338013} +02/25/2022 11:49:55 - INFO - codeparrot_training - Step 26427: {'lr': 0.00024301413829334957, 'samples': 13531136, 'steps': 26427, 'loss/train': 2.1103029251098633} +02/25/2022 11:49:59 - INFO - codeparrot_training - Step 26428: {'lr': 0.00024299778223603528, 'samples': 13531648, 'steps': 26428, 'loss/train': 2.3049960136413574} +02/25/2022 11:50:02 - INFO - codeparrot_training - Step 26429: {'lr': 0.00024298142620871627, 'samples': 13532160, 'steps': 26429, 'loss/train': 1.0803604125976562} +02/25/2022 11:50:08 - INFO - codeparrot_training - Step 26430: {'lr': 0.00024296507021146274, 'samples': 13532672, 'steps': 26430, 'loss/train': 2.7948157787323} +02/25/2022 11:50:11 - INFO - codeparrot_training - Step 26431: {'lr': 0.0002429487142443445, 'samples': 13533184, 'steps': 26431, 'loss/train': 1.4330811500549316} +02/25/2022 11:50:17 - INFO - codeparrot_training - Step 26432: {'lr': 0.00024293235830743172, 'samples': 13533696, 'steps': 26432, 'loss/train': 1.6584124565124512} +02/25/2022 11:50:20 - INFO - codeparrot_training - Step 26433: {'lr': 0.00024291600240079444, 'samples': 13534208, 'steps': 26433, 'loss/train': 2.803561210632324} +02/25/2022 11:50:27 - INFO - codeparrot_training - Step 26434: {'lr': 0.0002428996465245029, 'samples': 13534720, 'steps': 26434, 'loss/train': 1.9357272386550903} +02/25/2022 11:50:30 - INFO - codeparrot_training - Step 26435: {'lr': 0.00024288329067862692, 'samples': 13535232, 'steps': 26435, 'loss/train': 1.6152267456054688} +02/25/2022 11:50:36 - INFO - codeparrot_training - Step 26436: {'lr': 0.00024286693486323663, 'samples': 13535744, 'steps': 26436, 'loss/train': 1.9259852170944214} +02/25/2022 11:50:39 - INFO - codeparrot_training - Step 26437: {'lr': 0.00024285057907840217, 'samples': 13536256, 'steps': 26437, 'loss/train': 1.3499150276184082} +02/25/2022 11:50:45 - INFO - codeparrot_training - Step 26438: {'lr': 0.00024283422332419352, 'samples': 13536768, 'steps': 26438, 'loss/train': 2.0157687664031982} +02/25/2022 11:50:48 - INFO - codeparrot_training - Step 26439: {'lr': 0.00024281786760068093, 'samples': 13537280, 'steps': 26439, 'loss/train': 1.5157722234725952} +02/25/2022 11:50:54 - INFO - codeparrot_training - Step 26440: {'lr': 0.00024280151190793415, 'samples': 13537792, 'steps': 26440, 'loss/train': 2.3307814598083496} +02/25/2022 11:50:59 - INFO - codeparrot_training - Step 26441: {'lr': 0.00024278515624602344, 'samples': 13538304, 'steps': 26441, 'loss/train': 2.3740296363830566} +02/25/2022 11:51:02 - INFO - codeparrot_training - Step 26442: {'lr': 0.00024276880061501884, 'samples': 13538816, 'steps': 26442, 'loss/train': 0.3079553246498108} +02/25/2022 11:51:09 - INFO - codeparrot_training - Step 26443: {'lr': 0.00024275244501499048, 'samples': 13539328, 'steps': 26443, 'loss/train': 0.6742960810661316} +02/25/2022 11:51:12 - INFO - codeparrot_training - Step 26444: {'lr': 0.00024273608944600826, 'samples': 13539840, 'steps': 26444, 'loss/train': 1.4264261722564697} +02/25/2022 11:51:18 - INFO - codeparrot_training - Step 26445: {'lr': 0.00024271973390814234, 'samples': 13540352, 'steps': 26445, 'loss/train': 1.2122957706451416} +02/25/2022 11:51:21 - INFO - codeparrot_training - Step 26446: {'lr': 0.00024270337840146274, 'samples': 13540864, 'steps': 26446, 'loss/train': 2.329277992248535} +02/25/2022 11:51:25 - INFO - codeparrot_training - Step 26447: {'lr': 0.00024268702292603968, 'samples': 13541376, 'steps': 26447, 'loss/train': 2.3386948108673096} +02/25/2022 11:51:30 - INFO - codeparrot_training - Step 26448: {'lr': 0.00024267066748194293, 'samples': 13541888, 'steps': 26448, 'loss/train': 2.032050132751465} +02/25/2022 11:51:34 - INFO - codeparrot_training - Step 26449: {'lr': 0.00024265431206924276, 'samples': 13542400, 'steps': 26449, 'loss/train': 1.2951445579528809} +02/25/2022 11:51:39 - INFO - codeparrot_training - Step 26450: {'lr': 0.0002426379566880092, 'samples': 13542912, 'steps': 26450, 'loss/train': 7.37010383605957} +02/25/2022 11:51:43 - INFO - codeparrot_training - Step 26451: {'lr': 0.0002426216013383123, 'samples': 13543424, 'steps': 26451, 'loss/train': 1.44121515750885} +02/25/2022 11:51:48 - INFO - codeparrot_training - Step 26452: {'lr': 0.00024260524602022216, 'samples': 13543936, 'steps': 26452, 'loss/train': 1.873138666152954} +02/25/2022 11:51:52 - INFO - codeparrot_training - Step 26453: {'lr': 0.00024258889073380875, 'samples': 13544448, 'steps': 26453, 'loss/train': 2.3655571937561035} +02/25/2022 11:51:58 - INFO - codeparrot_training - Step 26454: {'lr': 0.00024257253547914213, 'samples': 13544960, 'steps': 26454, 'loss/train': 1.5407308340072632} +02/25/2022 11:52:04 - INFO - codeparrot_training - Step 26455: {'lr': 0.0002425561802562925, 'samples': 13545472, 'steps': 26455, 'loss/train': 3.0276730060577393} +02/25/2022 11:52:08 - INFO - codeparrot_training - Step 26456: {'lr': 0.0002425398250653298, 'samples': 13545984, 'steps': 26456, 'loss/train': 1.8760740756988525} +02/25/2022 11:52:11 - INFO - codeparrot_training - Step 26457: {'lr': 0.0002425234699063241, 'samples': 13546496, 'steps': 26457, 'loss/train': 0.7985270619392395} +02/25/2022 11:52:16 - INFO - codeparrot_training - Step 26458: {'lr': 0.00024250711477934552, 'samples': 13547008, 'steps': 26458, 'loss/train': 2.397965908050537} +02/25/2022 11:52:20 - INFO - codeparrot_training - Step 26459: {'lr': 0.00024249075968446404, 'samples': 13547520, 'steps': 26459, 'loss/train': 0.9945634007453918} +02/25/2022 11:52:26 - INFO - codeparrot_training - Step 26460: {'lr': 0.00024247440462174974, 'samples': 13548032, 'steps': 26460, 'loss/train': 1.7033792734146118} +02/25/2022 11:52:29 - INFO - codeparrot_training - Step 26461: {'lr': 0.00024245804959127277, 'samples': 13548544, 'steps': 26461, 'loss/train': 1.7728068828582764} +02/25/2022 11:52:35 - INFO - codeparrot_training - Step 26462: {'lr': 0.00024244169459310312, 'samples': 13549056, 'steps': 26462, 'loss/train': 1.7886940240859985} +02/25/2022 11:52:38 - INFO - codeparrot_training - Step 26463: {'lr': 0.00024242533962731078, 'samples': 13549568, 'steps': 26463, 'loss/train': 1.7180354595184326} +02/25/2022 11:52:44 - INFO - codeparrot_training - Step 26464: {'lr': 0.0002424089846939659, 'samples': 13550080, 'steps': 26464, 'loss/train': 1.7602565288543701} +02/25/2022 11:52:48 - INFO - codeparrot_training - Step 26465: {'lr': 0.0002423926297931386, 'samples': 13550592, 'steps': 26465, 'loss/train': 0.8669648170471191} +02/25/2022 11:52:53 - INFO - codeparrot_training - Step 26466: {'lr': 0.0002423762749248988, 'samples': 13551104, 'steps': 26466, 'loss/train': 3.1445229053497314} +02/25/2022 11:52:57 - INFO - codeparrot_training - Step 26467: {'lr': 0.00024235992008931657, 'samples': 13551616, 'steps': 26467, 'loss/train': 1.8835163116455078} +02/25/2022 11:53:02 - INFO - codeparrot_training - Step 26468: {'lr': 0.00024234356528646204, 'samples': 13552128, 'steps': 26468, 'loss/train': 2.2562899589538574} +02/25/2022 11:53:06 - INFO - codeparrot_training - Step 26469: {'lr': 0.00024232721051640536, 'samples': 13552640, 'steps': 26469, 'loss/train': 1.772828221321106} +02/25/2022 11:53:11 - INFO - codeparrot_training - Step 26470: {'lr': 0.00024231085577921635, 'samples': 13553152, 'steps': 26470, 'loss/train': 2.3923168182373047} +02/25/2022 11:53:15 - INFO - codeparrot_training - Step 26471: {'lr': 0.00024229450107496518, 'samples': 13553664, 'steps': 26471, 'loss/train': 1.8775622844696045} +02/25/2022 11:53:20 - INFO - codeparrot_training - Step 26472: {'lr': 0.00024227814640372195, 'samples': 13554176, 'steps': 26472, 'loss/train': 1.8366479873657227} +02/25/2022 11:53:24 - INFO - codeparrot_training - Step 26473: {'lr': 0.00024226179176555665, 'samples': 13554688, 'steps': 26473, 'loss/train': 0.8986123204231262} +02/25/2022 11:53:30 - INFO - codeparrot_training - Step 26474: {'lr': 0.00024224543716053952, 'samples': 13555200, 'steps': 26474, 'loss/train': 3.697416067123413} +02/25/2022 11:53:34 - INFO - codeparrot_training - Step 26475: {'lr': 0.00024222908258874035, 'samples': 13555712, 'steps': 26475, 'loss/train': 1.9967089891433716} +02/25/2022 11:53:39 - INFO - codeparrot_training - Step 26476: {'lr': 0.00024221272805022935, 'samples': 13556224, 'steps': 26476, 'loss/train': 2.262087345123291} +02/25/2022 11:53:43 - INFO - codeparrot_training - Step 26477: {'lr': 0.0002421963735450765, 'samples': 13556736, 'steps': 26477, 'loss/train': 2.1178677082061768} +02/25/2022 11:53:48 - INFO - codeparrot_training - Step 26478: {'lr': 0.00024218001907335207, 'samples': 13557248, 'steps': 26478, 'loss/train': 0.641125500202179} +02/25/2022 11:53:52 - INFO - codeparrot_training - Step 26479: {'lr': 0.00024216366463512582, 'samples': 13557760, 'steps': 26479, 'loss/train': 1.3325393199920654} +02/25/2022 11:53:58 - INFO - codeparrot_training - Step 26480: {'lr': 0.00024214731023046793, 'samples': 13558272, 'steps': 26480, 'loss/train': 2.29300594329834} +02/25/2022 11:54:01 - INFO - codeparrot_training - Step 26481: {'lr': 0.0002421309558594485, 'samples': 13558784, 'steps': 26481, 'loss/train': 1.9313616752624512} +02/25/2022 11:54:07 - INFO - codeparrot_training - Step 26482: {'lr': 0.00024211460152213763, 'samples': 13559296, 'steps': 26482, 'loss/train': 1.8698166608810425} +02/25/2022 11:54:10 - INFO - codeparrot_training - Step 26483: {'lr': 0.0002420982472186052, 'samples': 13559808, 'steps': 26483, 'loss/train': 2.829249620437622} +02/25/2022 11:54:16 - INFO - codeparrot_training - Step 26484: {'lr': 0.0002420818929489214, 'samples': 13560320, 'steps': 26484, 'loss/train': 1.379737138748169} +02/25/2022 11:54:19 - INFO - codeparrot_training - Step 26485: {'lr': 0.00024206553871315622, 'samples': 13560832, 'steps': 26485, 'loss/train': 2.0421934127807617} +02/25/2022 11:54:25 - INFO - codeparrot_training - Step 26486: {'lr': 0.0002420491845113798, 'samples': 13561344, 'steps': 26486, 'loss/train': 2.2395713329315186} +02/25/2022 11:54:29 - INFO - codeparrot_training - Step 26487: {'lr': 0.00024203283034366223, 'samples': 13561856, 'steps': 26487, 'loss/train': 2.547132730484009} +02/25/2022 11:54:34 - INFO - codeparrot_training - Step 26488: {'lr': 0.00024201647621007336, 'samples': 13562368, 'steps': 26488, 'loss/train': 1.558421015739441} +02/25/2022 11:54:38 - INFO - codeparrot_training - Step 26489: {'lr': 0.0002420001221106834, 'samples': 13562880, 'steps': 26489, 'loss/train': 0.7302256226539612} +02/25/2022 11:54:44 - INFO - codeparrot_training - Step 26490: {'lr': 0.00024198376804556235, 'samples': 13563392, 'steps': 26490, 'loss/train': 3.0497522354125977} +02/25/2022 11:54:48 - INFO - codeparrot_training - Step 26491: {'lr': 0.00024196741401478044, 'samples': 13563904, 'steps': 26491, 'loss/train': 1.8120146989822388} +02/25/2022 11:54:53 - INFO - codeparrot_training - Step 26492: {'lr': 0.00024195106001840741, 'samples': 13564416, 'steps': 26492, 'loss/train': 1.8885564804077148} +02/25/2022 11:54:57 - INFO - codeparrot_training - Step 26493: {'lr': 0.0002419347060565135, 'samples': 13564928, 'steps': 26493, 'loss/train': 1.7711570262908936} +02/25/2022 11:55:02 - INFO - codeparrot_training - Step 26494: {'lr': 0.0002419183521291688, 'samples': 13565440, 'steps': 26494, 'loss/train': 1.6625829935073853} +02/25/2022 11:55:06 - INFO - codeparrot_training - Step 26495: {'lr': 0.0002419019982364434, 'samples': 13565952, 'steps': 26495, 'loss/train': 2.5402770042419434} +02/25/2022 11:55:11 - INFO - codeparrot_training - Step 26496: {'lr': 0.00024188564437840714, 'samples': 13566464, 'steps': 26496, 'loss/train': 2.300276041030884} +02/25/2022 11:55:15 - INFO - codeparrot_training - Step 26497: {'lr': 0.0002418692905551302, 'samples': 13566976, 'steps': 26497, 'loss/train': 1.101785659790039} +02/25/2022 11:55:20 - INFO - codeparrot_training - Step 26498: {'lr': 0.00024185293676668267, 'samples': 13567488, 'steps': 26498, 'loss/train': 2.2778050899505615} +02/25/2022 11:55:24 - INFO - codeparrot_training - Step 26499: {'lr': 0.0002418365830131346, 'samples': 13568000, 'steps': 26499, 'loss/train': 1.111402988433838} +02/25/2022 11:55:30 - INFO - codeparrot_training - Step 26500: {'lr': 0.00024182022929455598, 'samples': 13568512, 'steps': 26500, 'loss/train': 3.105008363723755} +02/25/2022 11:55:34 - INFO - codeparrot_training - Step 26501: {'lr': 0.00024180387561101692, 'samples': 13569024, 'steps': 26501, 'loss/train': 2.5942699909210205} +02/25/2022 11:55:39 - INFO - codeparrot_training - Step 26502: {'lr': 0.00024178752196258747, 'samples': 13569536, 'steps': 26502, 'loss/train': 1.6470201015472412} +02/25/2022 11:55:43 - INFO - codeparrot_training - Step 26503: {'lr': 0.0002417711683493376, 'samples': 13570048, 'steps': 26503, 'loss/train': 2.0359835624694824} +02/25/2022 11:55:48 - INFO - codeparrot_training - Step 26504: {'lr': 0.0002417548147713375, 'samples': 13570560, 'steps': 26504, 'loss/train': 2.046633243560791} +02/25/2022 11:55:52 - INFO - codeparrot_training - Step 26505: {'lr': 0.00024173846122865718, 'samples': 13571072, 'steps': 26505, 'loss/train': 1.1537288427352905} +02/25/2022 11:55:57 - INFO - codeparrot_training - Step 26506: {'lr': 0.00024172210772136656, 'samples': 13571584, 'steps': 26506, 'loss/train': 0.40817639231681824} +02/25/2022 11:56:01 - INFO - codeparrot_training - Step 26507: {'lr': 0.00024170575424953584, 'samples': 13572096, 'steps': 26507, 'loss/train': 1.8084417581558228} +02/25/2022 11:56:06 - INFO - codeparrot_training - Step 26508: {'lr': 0.0002416894008132351, 'samples': 13572608, 'steps': 26508, 'loss/train': 1.5863398313522339} +02/25/2022 11:56:10 - INFO - codeparrot_training - Step 26509: {'lr': 0.00024167304741253432, 'samples': 13573120, 'steps': 26509, 'loss/train': 1.686205506324768} +02/25/2022 11:56:15 - INFO - codeparrot_training - Step 26510: {'lr': 0.00024165669404750347, 'samples': 13573632, 'steps': 26510, 'loss/train': 1.3457974195480347} +02/25/2022 11:56:19 - INFO - codeparrot_training - Step 26511: {'lr': 0.00024164034071821273, 'samples': 13574144, 'steps': 26511, 'loss/train': 1.401214838027954} +02/25/2022 11:56:25 - INFO - codeparrot_training - Step 26512: {'lr': 0.00024162398742473212, 'samples': 13574656, 'steps': 26512, 'loss/train': 1.4683468341827393} +02/25/2022 11:56:28 - INFO - codeparrot_training - Step 26513: {'lr': 0.00024160763416713178, 'samples': 13575168, 'steps': 26513, 'loss/train': 2.3101515769958496} +02/25/2022 11:56:34 - INFO - codeparrot_training - Step 26514: {'lr': 0.00024159128094548157, 'samples': 13575680, 'steps': 26514, 'loss/train': 1.2724518775939941} +02/25/2022 11:56:38 - INFO - codeparrot_training - Step 26515: {'lr': 0.00024157492775985162, 'samples': 13576192, 'steps': 26515, 'loss/train': 2.1959407329559326} +02/25/2022 11:56:43 - INFO - codeparrot_training - Step 26516: {'lr': 0.00024155857461031203, 'samples': 13576704, 'steps': 26516, 'loss/train': 2.2702581882476807} +02/25/2022 11:56:47 - INFO - codeparrot_training - Step 26517: {'lr': 0.00024154222149693294, 'samples': 13577216, 'steps': 26517, 'loss/train': 2.8432254791259766} +02/25/2022 11:56:52 - INFO - codeparrot_training - Step 26518: {'lr': 0.00024152586841978417, 'samples': 13577728, 'steps': 26518, 'loss/train': 1.9278030395507812} +02/25/2022 11:56:56 - INFO - codeparrot_training - Step 26519: {'lr': 0.00024150951537893587, 'samples': 13578240, 'steps': 26519, 'loss/train': 2.2340476512908936} +02/25/2022 11:57:01 - INFO - codeparrot_training - Step 26520: {'lr': 0.00024149316237445813, 'samples': 13578752, 'steps': 26520, 'loss/train': 1.3008180856704712} +02/25/2022 11:57:04 - INFO - codeparrot_training - Step 26521: {'lr': 0.00024147680940642097, 'samples': 13579264, 'steps': 26521, 'loss/train': 0.985638439655304} +02/25/2022 11:57:10 - INFO - codeparrot_training - Step 26522: {'lr': 0.0002414604564748946, 'samples': 13579776, 'steps': 26522, 'loss/train': 1.7963171005249023} +02/25/2022 11:57:13 - INFO - codeparrot_training - Step 26523: {'lr': 0.00024144410357994876, 'samples': 13580288, 'steps': 26523, 'loss/train': 2.6810050010681152} +02/25/2022 11:57:19 - INFO - codeparrot_training - Step 26524: {'lr': 0.00024142775072165368, 'samples': 13580800, 'steps': 26524, 'loss/train': 1.9601081609725952} +02/25/2022 11:57:22 - INFO - codeparrot_training - Step 26525: {'lr': 0.00024141139790007942, 'samples': 13581312, 'steps': 26525, 'loss/train': 1.6294368505477905} +02/25/2022 11:57:29 - INFO - codeparrot_training - Step 26526: {'lr': 0.0002413950451152961, 'samples': 13581824, 'steps': 26526, 'loss/train': 1.6196589469909668} +02/25/2022 11:57:32 - INFO - codeparrot_training - Step 26527: {'lr': 0.00024137869236737352, 'samples': 13582336, 'steps': 26527, 'loss/train': 2.0140879154205322} +02/25/2022 11:57:38 - INFO - codeparrot_training - Step 26528: {'lr': 0.00024136233965638194, 'samples': 13582848, 'steps': 26528, 'loss/train': 2.0122475624084473} +02/25/2022 11:57:41 - INFO - codeparrot_training - Step 26529: {'lr': 0.00024134598698239134, 'samples': 13583360, 'steps': 26529, 'loss/train': 2.5290005207061768} +02/25/2022 11:57:47 - INFO - codeparrot_training - Step 26530: {'lr': 0.00024132963434547188, 'samples': 13583872, 'steps': 26530, 'loss/train': 1.0112128257751465} +02/25/2022 11:57:50 - INFO - codeparrot_training - Step 26531: {'lr': 0.00024131328174569342, 'samples': 13584384, 'steps': 26531, 'loss/train': 2.014789581298828} +02/25/2022 11:57:56 - INFO - codeparrot_training - Step 26532: {'lr': 0.0002412969291831261, 'samples': 13584896, 'steps': 26532, 'loss/train': 1.5335077047348022} +02/25/2022 11:57:59 - INFO - codeparrot_training - Step 26533: {'lr': 0.00024128057665783996, 'samples': 13585408, 'steps': 26533, 'loss/train': 1.3351154327392578} +02/25/2022 11:58:05 - INFO - codeparrot_training - Step 26534: {'lr': 0.00024126422416990506, 'samples': 13585920, 'steps': 26534, 'loss/train': 2.1637916564941406} +02/25/2022 11:58:08 - INFO - codeparrot_training - Step 26535: {'lr': 0.0002412478717193916, 'samples': 13586432, 'steps': 26535, 'loss/train': 2.0140459537506104} +02/25/2022 11:58:15 - INFO - codeparrot_training - Step 26536: {'lr': 0.0002412315193063693, 'samples': 13586944, 'steps': 26536, 'loss/train': 1.7236000299453735} +02/25/2022 11:58:20 - INFO - codeparrot_training - Step 26537: {'lr': 0.00024121516693090841, 'samples': 13587456, 'steps': 26537, 'loss/train': 1.813808560371399} +02/25/2022 11:58:24 - INFO - codeparrot_training - Step 26538: {'lr': 0.00024119881459307906, 'samples': 13587968, 'steps': 26538, 'loss/train': 0.23139937222003937} +02/25/2022 11:58:27 - INFO - codeparrot_training - Step 26539: {'lr': 0.00024118246229295115, 'samples': 13588480, 'steps': 26539, 'loss/train': 1.9699782133102417} +02/25/2022 11:58:33 - INFO - codeparrot_training - Step 26540: {'lr': 0.0002411661100305947, 'samples': 13588992, 'steps': 26540, 'loss/train': 2.3278486728668213} +02/25/2022 11:58:36 - INFO - codeparrot_training - Step 26541: {'lr': 0.00024114975780607987, 'samples': 13589504, 'steps': 26541, 'loss/train': 2.3825831413269043} +02/25/2022 11:58:42 - INFO - codeparrot_training - Step 26542: {'lr': 0.0002411334056194767, 'samples': 13590016, 'steps': 26542, 'loss/train': 1.9745625257492065} +02/25/2022 11:58:45 - INFO - codeparrot_training - Step 26543: {'lr': 0.00024111705347085521, 'samples': 13590528, 'steps': 26543, 'loss/train': 2.179685592651367} +02/25/2022 11:58:51 - INFO - codeparrot_training - Step 26544: {'lr': 0.0002411007013602854, 'samples': 13591040, 'steps': 26544, 'loss/train': 1.745808482170105} +02/25/2022 11:58:56 - INFO - codeparrot_training - Step 26545: {'lr': 0.0002410843492878374, 'samples': 13591552, 'steps': 26545, 'loss/train': 0.9869518280029297} +02/25/2022 11:59:00 - INFO - codeparrot_training - Step 26546: {'lr': 0.00024106799725358117, 'samples': 13592064, 'steps': 26546, 'loss/train': 1.8209220170974731} +02/25/2022 11:59:06 - INFO - codeparrot_training - Step 26547: {'lr': 0.0002410516452575868, 'samples': 13592576, 'steps': 26547, 'loss/train': 1.198609471321106} +02/25/2022 11:59:09 - INFO - codeparrot_training - Step 26548: {'lr': 0.00024103529329992437, 'samples': 13593088, 'steps': 26548, 'loss/train': 2.8850834369659424} +02/25/2022 11:59:15 - INFO - codeparrot_training - Step 26549: {'lr': 0.00024101894138066395, 'samples': 13593600, 'steps': 26549, 'loss/train': 0.6228379607200623} +02/25/2022 11:59:18 - INFO - codeparrot_training - Step 26550: {'lr': 0.00024100258949987544, 'samples': 13594112, 'steps': 26550, 'loss/train': 1.8354586362838745} +02/25/2022 11:59:24 - INFO - codeparrot_training - Step 26551: {'lr': 0.00024098623765762898, 'samples': 13594624, 'steps': 26551, 'loss/train': 2.662301778793335} +02/25/2022 11:59:27 - INFO - codeparrot_training - Step 26552: {'lr': 0.00024096988585399474, 'samples': 13595136, 'steps': 26552, 'loss/train': 1.998597264289856} +02/25/2022 11:59:33 - INFO - codeparrot_training - Step 26553: {'lr': 0.00024095353408904252, 'samples': 13595648, 'steps': 26553, 'loss/train': 2.0784735679626465} +02/25/2022 11:59:36 - INFO - codeparrot_training - Step 26554: {'lr': 0.00024093718236284248, 'samples': 13596160, 'steps': 26554, 'loss/train': 1.9538110494613647} +02/25/2022 11:59:42 - INFO - codeparrot_training - Step 26555: {'lr': 0.00024092083067546468, 'samples': 13596672, 'steps': 26555, 'loss/train': 1.279066801071167} +02/25/2022 11:59:45 - INFO - codeparrot_training - Step 26556: {'lr': 0.00024090447902697928, 'samples': 13597184, 'steps': 26556, 'loss/train': 0.8897513151168823} +02/25/2022 11:59:52 - INFO - codeparrot_training - Step 26557: {'lr': 0.0002408881274174561, 'samples': 13597696, 'steps': 26557, 'loss/train': 2.133594036102295} +02/25/2022 11:59:55 - INFO - codeparrot_training - Step 26558: {'lr': 0.00024087177584696526, 'samples': 13598208, 'steps': 26558, 'loss/train': 3.3634629249572754} +02/25/2022 12:00:01 - INFO - codeparrot_training - Step 26559: {'lr': 0.00024085542431557687, 'samples': 13598720, 'steps': 26559, 'loss/train': 0.5618734955787659} +02/25/2022 12:00:04 - INFO - codeparrot_training - Step 26560: {'lr': 0.0002408390728233609, 'samples': 13599232, 'steps': 26560, 'loss/train': 1.6674375534057617} +02/25/2022 12:00:07 - INFO - codeparrot_training - Step 26561: {'lr': 0.00024082272137038757, 'samples': 13599744, 'steps': 26561, 'loss/train': 2.2926013469696045} +02/25/2022 12:00:13 - INFO - codeparrot_training - Step 26562: {'lr': 0.00024080636995672667, 'samples': 13600256, 'steps': 26562, 'loss/train': 1.0135136842727661} +02/25/2022 12:00:19 - INFO - codeparrot_training - Step 26563: {'lr': 0.00024079001858244835, 'samples': 13600768, 'steps': 26563, 'loss/train': 2.3266797065734863} +02/25/2022 12:00:22 - INFO - codeparrot_training - Step 26564: {'lr': 0.0002407736672476227, 'samples': 13601280, 'steps': 26564, 'loss/train': 1.8169890642166138} +02/25/2022 12:00:28 - INFO - codeparrot_training - Step 26565: {'lr': 0.0002407573159523198, 'samples': 13601792, 'steps': 26565, 'loss/train': 0.08260375261306763} +02/25/2022 12:00:32 - INFO - codeparrot_training - Step 26566: {'lr': 0.00024074096469660952, 'samples': 13602304, 'steps': 26566, 'loss/train': 1.8742878437042236} +02/25/2022 12:00:35 - INFO - codeparrot_training - Step 26567: {'lr': 0.00024072461348056205, 'samples': 13602816, 'steps': 26567, 'loss/train': 1.9476584196090698} +02/25/2022 12:00:41 - INFO - codeparrot_training - Step 26568: {'lr': 0.00024070826230424732, 'samples': 13603328, 'steps': 26568, 'loss/train': 2.019618272781372} +02/25/2022 12:00:44 - INFO - codeparrot_training - Step 26569: {'lr': 0.00024069191116773552, 'samples': 13603840, 'steps': 26569, 'loss/train': 2.0052618980407715} +02/25/2022 12:00:50 - INFO - codeparrot_training - Step 26570: {'lr': 0.00024067556007109666, 'samples': 13604352, 'steps': 26570, 'loss/train': 1.7393321990966797} +02/25/2022 12:00:53 - INFO - codeparrot_training - Step 26571: {'lr': 0.00024065920901440068, 'samples': 13604864, 'steps': 26571, 'loss/train': 2.554619789123535} +02/25/2022 12:00:59 - INFO - codeparrot_training - Step 26572: {'lr': 0.00024064285799771766, 'samples': 13605376, 'steps': 26572, 'loss/train': 1.22853684425354} +02/25/2022 12:01:03 - INFO - codeparrot_training - Step 26573: {'lr': 0.00024062650702111766, 'samples': 13605888, 'steps': 26573, 'loss/train': 1.5425652265548706} +02/25/2022 12:01:08 - INFO - codeparrot_training - Step 26574: {'lr': 0.00024061015608467084, 'samples': 13606400, 'steps': 26574, 'loss/train': 1.59976327419281} +02/25/2022 12:01:12 - INFO - codeparrot_training - Step 26575: {'lr': 0.00024059380518844702, 'samples': 13606912, 'steps': 26575, 'loss/train': 1.6789342164993286} +02/25/2022 12:01:17 - INFO - codeparrot_training - Step 26576: {'lr': 0.00024057745433251636, 'samples': 13607424, 'steps': 26576, 'loss/train': 1.1474515199661255} +02/25/2022 12:01:21 - INFO - codeparrot_training - Step 26577: {'lr': 0.00024056110351694887, 'samples': 13607936, 'steps': 26577, 'loss/train': 2.047783374786377} +02/25/2022 12:01:26 - INFO - codeparrot_training - Step 26578: {'lr': 0.00024054475274181474, 'samples': 13608448, 'steps': 26578, 'loss/train': 1.6341447830200195} +02/25/2022 12:01:30 - INFO - codeparrot_training - Step 26579: {'lr': 0.0002405284020071838, 'samples': 13608960, 'steps': 26579, 'loss/train': 2.2441558837890625} +02/25/2022 12:01:35 - INFO - codeparrot_training - Step 26580: {'lr': 0.00024051205131312618, 'samples': 13609472, 'steps': 26580, 'loss/train': 0.5366752743721008} +02/25/2022 12:01:39 - INFO - codeparrot_training - Step 26581: {'lr': 0.00024049570065971188, 'samples': 13609984, 'steps': 26581, 'loss/train': 1.8090510368347168} +02/25/2022 12:01:44 - INFO - codeparrot_training - Step 26582: {'lr': 0.00024047935004701106, 'samples': 13610496, 'steps': 26582, 'loss/train': 1.4878827333450317} +02/25/2022 12:01:48 - INFO - codeparrot_training - Step 26583: {'lr': 0.0002404629994750937, 'samples': 13611008, 'steps': 26583, 'loss/train': 1.8696717023849487} +02/25/2022 12:01:54 - INFO - codeparrot_training - Step 26584: {'lr': 0.0002404466489440297, 'samples': 13611520, 'steps': 26584, 'loss/train': 1.2563058137893677} +02/25/2022 12:01:57 - INFO - codeparrot_training - Step 26585: {'lr': 0.00024043029845388934, 'samples': 13612032, 'steps': 26585, 'loss/train': 2.8918843269348145} +02/25/2022 12:02:03 - INFO - codeparrot_training - Step 26586: {'lr': 0.00024041394800474247, 'samples': 13612544, 'steps': 26586, 'loss/train': 1.777243971824646} +02/25/2022 12:02:06 - INFO - codeparrot_training - Step 26587: {'lr': 0.00024039759759665925, 'samples': 13613056, 'steps': 26587, 'loss/train': 1.2043194770812988} +02/25/2022 12:02:12 - INFO - codeparrot_training - Step 26588: {'lr': 0.00024038124722970962, 'samples': 13613568, 'steps': 26588, 'loss/train': 1.223273515701294} +02/25/2022 12:02:15 - INFO - codeparrot_training - Step 26589: {'lr': 0.00024036489690396374, 'samples': 13614080, 'steps': 26589, 'loss/train': 2.6900808811187744} +02/25/2022 12:02:21 - INFO - codeparrot_training - Step 26590: {'lr': 0.00024034854661949152, 'samples': 13614592, 'steps': 26590, 'loss/train': 1.5867611169815063} +02/25/2022 12:02:24 - INFO - codeparrot_training - Step 26591: {'lr': 0.00024033219637636312, 'samples': 13615104, 'steps': 26591, 'loss/train': 1.285199522972107} +02/25/2022 12:02:30 - INFO - codeparrot_training - Step 26592: {'lr': 0.0002403158461746485, 'samples': 13615616, 'steps': 26592, 'loss/train': 0.11368382722139359} +02/25/2022 12:02:33 - INFO - codeparrot_training - Step 26593: {'lr': 0.00024029949601441766, 'samples': 13616128, 'steps': 26593, 'loss/train': 2.3916549682617188} +02/25/2022 12:02:40 - INFO - codeparrot_training - Step 26594: {'lr': 0.00024028314589574074, 'samples': 13616640, 'steps': 26594, 'loss/train': 0.6202870011329651} +02/25/2022 12:02:43 - INFO - codeparrot_training - Step 26595: {'lr': 0.00024026679581868772, 'samples': 13617152, 'steps': 26595, 'loss/train': 2.531552791595459} +02/25/2022 12:02:49 - INFO - codeparrot_training - Step 26596: {'lr': 0.00024025044578332875, 'samples': 13617664, 'steps': 26596, 'loss/train': 2.273005247116089} +02/25/2022 12:02:52 - INFO - codeparrot_training - Step 26597: {'lr': 0.00024023409578973368, 'samples': 13618176, 'steps': 26597, 'loss/train': 2.321377992630005} +02/25/2022 12:02:58 - INFO - codeparrot_training - Step 26598: {'lr': 0.00024021774583797266, 'samples': 13618688, 'steps': 26598, 'loss/train': 0.043440669775009155} +02/25/2022 12:03:01 - INFO - codeparrot_training - Step 26599: {'lr': 0.00024020139592811568, 'samples': 13619200, 'steps': 26599, 'loss/train': 2.193847179412842} +02/25/2022 12:03:06 - INFO - codeparrot_training - Step 26600: {'lr': 0.00024018504606023293, 'samples': 13619712, 'steps': 26600, 'loss/train': 1.485547423362732} +02/25/2022 12:03:10 - INFO - codeparrot_training - Step 26601: {'lr': 0.00024016869623439422, 'samples': 13620224, 'steps': 26601, 'loss/train': 2.1026217937469482} +02/25/2022 12:03:16 - INFO - codeparrot_training - Step 26602: {'lr': 0.00024015234645066968, 'samples': 13620736, 'steps': 26602, 'loss/train': 1.9695422649383545} +02/25/2022 12:03:19 - INFO - codeparrot_training - Step 26603: {'lr': 0.00024013599670912936, 'samples': 13621248, 'steps': 26603, 'loss/train': 1.4434484243392944} +02/25/2022 12:03:25 - INFO - codeparrot_training - Step 26604: {'lr': 0.00024011964700984342, 'samples': 13621760, 'steps': 26604, 'loss/train': 1.5674806833267212} +02/25/2022 12:03:29 - INFO - codeparrot_training - Step 26605: {'lr': 0.00024010329735288166, 'samples': 13622272, 'steps': 26605, 'loss/train': 2.466535806655884} +02/25/2022 12:03:34 - INFO - codeparrot_training - Step 26606: {'lr': 0.00024008694773831424, 'samples': 13622784, 'steps': 26606, 'loss/train': 2.4010603427886963} +02/25/2022 12:03:38 - INFO - codeparrot_training - Step 26607: {'lr': 0.0002400705981662112, 'samples': 13623296, 'steps': 26607, 'loss/train': 2.2689950466156006} +02/25/2022 12:03:43 - INFO - codeparrot_training - Step 26608: {'lr': 0.00024005424863664251, 'samples': 13623808, 'steps': 26608, 'loss/train': 1.5158312320709229} +02/25/2022 12:03:47 - INFO - codeparrot_training - Step 26609: {'lr': 0.00024003789914967842, 'samples': 13624320, 'steps': 26609, 'loss/train': 2.184361219406128} +02/25/2022 12:03:52 - INFO - codeparrot_training - Step 26610: {'lr': 0.0002400215497053887, 'samples': 13624832, 'steps': 26610, 'loss/train': 1.9128063917160034} +02/25/2022 12:03:56 - INFO - codeparrot_training - Step 26611: {'lr': 0.00024000520030384345, 'samples': 13625344, 'steps': 26611, 'loss/train': 3.688497304916382} +02/25/2022 12:04:01 - INFO - codeparrot_training - Step 26612: {'lr': 0.00023998885094511277, 'samples': 13625856, 'steps': 26612, 'loss/train': 0.3894404172897339} +02/25/2022 12:04:05 - INFO - codeparrot_training - Step 26613: {'lr': 0.00023997250162926682, 'samples': 13626368, 'steps': 26613, 'loss/train': 2.4020166397094727} +02/25/2022 12:04:10 - INFO - codeparrot_training - Step 26614: {'lr': 0.00023995615235637534, 'samples': 13626880, 'steps': 26614, 'loss/train': 0.3250059187412262} +02/25/2022 12:04:14 - INFO - codeparrot_training - Step 26615: {'lr': 0.00023993980312650852, 'samples': 13627392, 'steps': 26615, 'loss/train': 1.8597544431686401} +02/25/2022 12:04:19 - INFO - codeparrot_training - Step 26616: {'lr': 0.00023992345393973637, 'samples': 13627904, 'steps': 26616, 'loss/train': 3.1517093181610107} +02/25/2022 12:04:23 - INFO - codeparrot_training - Step 26617: {'lr': 0.00023990710479612897, 'samples': 13628416, 'steps': 26617, 'loss/train': 2.3661489486694336} +02/25/2022 12:04:28 - INFO - codeparrot_training - Step 26618: {'lr': 0.00023989075569575644, 'samples': 13628928, 'steps': 26618, 'loss/train': 2.699223518371582} +02/25/2022 12:04:31 - INFO - codeparrot_training - Step 26619: {'lr': 0.0002398744066386886, 'samples': 13629440, 'steps': 26619, 'loss/train': 1.8562602996826172} +02/25/2022 12:04:38 - INFO - codeparrot_training - Step 26620: {'lr': 0.00023985805762499553, 'samples': 13629952, 'steps': 26620, 'loss/train': 0.43890470266342163} +02/25/2022 12:04:41 - INFO - codeparrot_training - Step 26621: {'lr': 0.00023984170865474737, 'samples': 13630464, 'steps': 26621, 'loss/train': 1.8763319253921509} +02/25/2022 12:04:47 - INFO - codeparrot_training - Step 26622: {'lr': 0.00023982535972801424, 'samples': 13630976, 'steps': 26622, 'loss/train': 1.9783025979995728} +02/25/2022 12:04:50 - INFO - codeparrot_training - Step 26623: {'lr': 0.00023980901084486587, 'samples': 13631488, 'steps': 26623, 'loss/train': 1.3606804609298706} +02/25/2022 12:04:56 - INFO - codeparrot_training - Step 26624: {'lr': 0.00023979266200537251, 'samples': 13632000, 'steps': 26624, 'loss/train': 1.8004802465438843} +02/25/2022 12:04:59 - INFO - codeparrot_training - Step 26625: {'lr': 0.00023977631320960417, 'samples': 13632512, 'steps': 26625, 'loss/train': 1.6889280080795288} +02/25/2022 12:05:05 - INFO - codeparrot_training - Step 26626: {'lr': 0.00023975996445763084, 'samples': 13633024, 'steps': 26626, 'loss/train': 1.457277536392212} +02/25/2022 12:05:08 - INFO - codeparrot_training - Step 26627: {'lr': 0.00023974361574952255, 'samples': 13633536, 'steps': 26627, 'loss/train': 1.3851258754730225} +02/25/2022 12:05:14 - INFO - codeparrot_training - Step 26628: {'lr': 0.00023972726708534932, 'samples': 13634048, 'steps': 26628, 'loss/train': 1.564491868019104} +02/25/2022 12:05:17 - INFO - codeparrot_training - Step 26629: {'lr': 0.00023971091846518132, 'samples': 13634560, 'steps': 26629, 'loss/train': 2.001295804977417} +02/25/2022 12:05:23 - INFO - codeparrot_training - Step 26630: {'lr': 0.00023969456988908838, 'samples': 13635072, 'steps': 26630, 'loss/train': 2.423020362854004} +02/25/2022 12:05:27 - INFO - codeparrot_training - Step 26631: {'lr': 0.0002396782213571407, 'samples': 13635584, 'steps': 26631, 'loss/train': 1.8267040252685547} +02/25/2022 12:05:32 - INFO - codeparrot_training - Step 26632: {'lr': 0.00023966187286940827, 'samples': 13636096, 'steps': 26632, 'loss/train': 1.217692255973816} +02/25/2022 12:05:36 - INFO - codeparrot_training - Step 26633: {'lr': 0.000239645524425961, 'samples': 13636608, 'steps': 26633, 'loss/train': 0.6895974278450012} +02/25/2022 12:05:41 - INFO - codeparrot_training - Step 26634: {'lr': 0.00023962917602686904, 'samples': 13637120, 'steps': 26634, 'loss/train': 1.9834662675857544} +02/25/2022 12:05:45 - INFO - codeparrot_training - Step 26635: {'lr': 0.00023961282767220244, 'samples': 13637632, 'steps': 26635, 'loss/train': 1.8184022903442383} +02/25/2022 12:05:51 - INFO - codeparrot_training - Step 26636: {'lr': 0.00023959647936203118, 'samples': 13638144, 'steps': 26636, 'loss/train': 2.4523892402648926} +02/25/2022 12:05:54 - INFO - codeparrot_training - Step 26637: {'lr': 0.00023958013109642523, 'samples': 13638656, 'steps': 26637, 'loss/train': 1.2354689836502075} +02/25/2022 12:06:00 - INFO - codeparrot_training - Step 26638: {'lr': 0.0002395637828754547, 'samples': 13639168, 'steps': 26638, 'loss/train': 2.018993616104126} +02/25/2022 12:06:04 - INFO - codeparrot_training - Step 26639: {'lr': 0.00023954743469918976, 'samples': 13639680, 'steps': 26639, 'loss/train': 1.320568323135376} +02/25/2022 12:06:10 - INFO - codeparrot_training - Step 26640: {'lr': 0.00023953108656770016, 'samples': 13640192, 'steps': 26640, 'loss/train': 2.031628131866455} +02/25/2022 12:06:13 - INFO - codeparrot_training - Step 26641: {'lr': 0.00023951473848105603, 'samples': 13640704, 'steps': 26641, 'loss/train': 1.6969976425170898} +02/25/2022 12:06:17 - INFO - codeparrot_training - Step 26642: {'lr': 0.00023949839043932748, 'samples': 13641216, 'steps': 26642, 'loss/train': 0.2128089964389801} +02/25/2022 12:06:22 - INFO - codeparrot_training - Step 26643: {'lr': 0.00023948204244258445, 'samples': 13641728, 'steps': 26643, 'loss/train': 1.0688501596450806} +02/25/2022 12:06:26 - INFO - codeparrot_training - Step 26644: {'lr': 0.00023946569449089715, 'samples': 13642240, 'steps': 26644, 'loss/train': 2.347038507461548} +02/25/2022 12:06:32 - INFO - codeparrot_training - Step 26645: {'lr': 0.00023944934658433535, 'samples': 13642752, 'steps': 26645, 'loss/train': 1.2107418775558472} +02/25/2022 12:06:35 - INFO - codeparrot_training - Step 26646: {'lr': 0.00023943299872296916, 'samples': 13643264, 'steps': 26646, 'loss/train': 2.8251256942749023} +02/25/2022 12:06:40 - INFO - codeparrot_training - Step 26647: {'lr': 0.00023941665090686871, 'samples': 13643776, 'steps': 26647, 'loss/train': 2.206286668777466} +02/25/2022 12:06:44 - INFO - codeparrot_training - Step 26648: {'lr': 0.00023940030313610402, 'samples': 13644288, 'steps': 26648, 'loss/train': 1.6813606023788452} +02/25/2022 12:06:49 - INFO - codeparrot_training - Step 26649: {'lr': 0.000239383955410745, 'samples': 13644800, 'steps': 26649, 'loss/train': 1.980342149734497} +02/25/2022 12:06:53 - INFO - codeparrot_training - Step 26650: {'lr': 0.00023936760773086174, 'samples': 13645312, 'steps': 26650, 'loss/train': 2.2128231525421143} +02/25/2022 12:06:58 - INFO - codeparrot_training - Step 26651: {'lr': 0.00023935126009652424, 'samples': 13645824, 'steps': 26651, 'loss/train': 1.8103755712509155} +02/25/2022 12:07:02 - INFO - codeparrot_training - Step 26652: {'lr': 0.0002393349125078027, 'samples': 13646336, 'steps': 26652, 'loss/train': 2.147063970565796} +02/25/2022 12:07:07 - INFO - codeparrot_training - Step 26653: {'lr': 0.0002393185649647669, 'samples': 13646848, 'steps': 26653, 'loss/train': 1.9703105688095093} +02/25/2022 12:07:11 - INFO - codeparrot_training - Step 26654: {'lr': 0.00023930221746748696, 'samples': 13647360, 'steps': 26654, 'loss/train': 2.709378719329834} +02/25/2022 12:07:17 - INFO - codeparrot_training - Step 26655: {'lr': 0.00023928587001603295, 'samples': 13647872, 'steps': 26655, 'loss/train': 1.3376750946044922} +02/25/2022 12:07:21 - INFO - codeparrot_training - Step 26656: {'lr': 0.00023926952261047482, 'samples': 13648384, 'steps': 26656, 'loss/train': 1.840340495109558} +02/25/2022 12:07:26 - INFO - codeparrot_training - Step 26657: {'lr': 0.0002392531752508828, 'samples': 13648896, 'steps': 26657, 'loss/train': 1.5656640529632568} +02/25/2022 12:07:30 - INFO - codeparrot_training - Step 26658: {'lr': 0.00023923682793732665, 'samples': 13649408, 'steps': 26658, 'loss/train': 2.85665225982666} +02/25/2022 12:07:35 - INFO - codeparrot_training - Step 26659: {'lr': 0.00023922048066987652, 'samples': 13649920, 'steps': 26659, 'loss/train': 1.539355993270874} +02/25/2022 12:07:39 - INFO - codeparrot_training - Step 26660: {'lr': 0.0002392041334486024, 'samples': 13650432, 'steps': 26660, 'loss/train': 2.2422823905944824} +02/25/2022 12:07:44 - INFO - codeparrot_training - Step 26661: {'lr': 0.00023918778627357446, 'samples': 13650944, 'steps': 26661, 'loss/train': 1.8062001466751099} +02/25/2022 12:07:48 - INFO - codeparrot_training - Step 26662: {'lr': 0.0002391714391448625, 'samples': 13651456, 'steps': 26662, 'loss/train': 1.9824061393737793} +02/25/2022 12:07:53 - INFO - codeparrot_training - Step 26663: {'lr': 0.00023915509206253667, 'samples': 13651968, 'steps': 26663, 'loss/train': 1.5481623411178589} +02/25/2022 12:07:57 - INFO - codeparrot_training - Step 26664: {'lr': 0.00023913874502666697, 'samples': 13652480, 'steps': 26664, 'loss/train': 0.2746760845184326} +02/25/2022 12:08:02 - INFO - codeparrot_training - Step 26665: {'lr': 0.00023912239803732354, 'samples': 13652992, 'steps': 26665, 'loss/train': 8.848331451416016} +02/25/2022 12:08:06 - INFO - codeparrot_training - Step 26666: {'lr': 0.00023910605109457624, 'samples': 13653504, 'steps': 26666, 'loss/train': 2.227452516555786} +02/25/2022 12:08:12 - INFO - codeparrot_training - Step 26667: {'lr': 0.0002390897041984951, 'samples': 13654016, 'steps': 26667, 'loss/train': 2.1106221675872803} +02/25/2022 12:08:15 - INFO - codeparrot_training - Step 26668: {'lr': 0.00023907335734915024, 'samples': 13654528, 'steps': 26668, 'loss/train': 0.7590692043304443} +02/25/2022 12:08:21 - INFO - codeparrot_training - Step 26669: {'lr': 0.00023905701054661166, 'samples': 13655040, 'steps': 26669, 'loss/train': 1.7530666589736938} +02/25/2022 12:08:24 - INFO - codeparrot_training - Step 26670: {'lr': 0.00023904066379094941, 'samples': 13655552, 'steps': 26670, 'loss/train': 1.820556879043579} +02/25/2022 12:08:30 - INFO - codeparrot_training - Step 26671: {'lr': 0.00023902431708223342, 'samples': 13656064, 'steps': 26671, 'loss/train': 2.796388626098633} +02/25/2022 12:08:33 - INFO - codeparrot_training - Step 26672: {'lr': 0.00023900797042053382, 'samples': 13656576, 'steps': 26672, 'loss/train': 1.490715503692627} +02/25/2022 12:08:39 - INFO - codeparrot_training - Step 26673: {'lr': 0.00023899162380592052, 'samples': 13657088, 'steps': 26673, 'loss/train': 0.7696727514266968} +02/25/2022 12:08:42 - INFO - codeparrot_training - Step 26674: {'lr': 0.00023897527723846365, 'samples': 13657600, 'steps': 26674, 'loss/train': 1.3364040851593018} +02/25/2022 12:08:48 - INFO - codeparrot_training - Step 26675: {'lr': 0.0002389589307182332, 'samples': 13658112, 'steps': 26675, 'loss/train': 1.5182671546936035} +02/25/2022 12:08:51 - INFO - codeparrot_training - Step 26676: {'lr': 0.00023894258424529916, 'samples': 13658624, 'steps': 26676, 'loss/train': 1.4722493886947632} +02/25/2022 12:08:58 - INFO - codeparrot_training - Step 26677: {'lr': 0.00023892623781973158, 'samples': 13659136, 'steps': 26677, 'loss/train': 0.5243614912033081} +02/25/2022 12:09:02 - INFO - codeparrot_training - Step 26678: {'lr': 0.00023890989144160045, 'samples': 13659648, 'steps': 26678, 'loss/train': 2.0051987171173096} +02/25/2022 12:09:07 - INFO - codeparrot_training - Step 26679: {'lr': 0.0002388935451109759, 'samples': 13660160, 'steps': 26679, 'loss/train': 0.909983217716217} +02/25/2022 12:09:13 - INFO - codeparrot_training - Step 26680: {'lr': 0.00023887719882792786, 'samples': 13660672, 'steps': 26680, 'loss/train': 1.8194165229797363} +02/25/2022 12:09:16 - INFO - codeparrot_training - Step 26681: {'lr': 0.00023886085259252628, 'samples': 13661184, 'steps': 26681, 'loss/train': 1.961308240890503} +02/25/2022 12:09:22 - INFO - codeparrot_training - Step 26682: {'lr': 0.00023884450640484134, 'samples': 13661696, 'steps': 26682, 'loss/train': 2.4702696800231934} +02/25/2022 12:09:25 - INFO - codeparrot_training - Step 26683: {'lr': 0.00023882816026494306, 'samples': 13662208, 'steps': 26683, 'loss/train': 1.9763925075531006} +02/25/2022 12:09:31 - INFO - codeparrot_training - Step 26684: {'lr': 0.00023881181417290129, 'samples': 13662720, 'steps': 26684, 'loss/train': 1.8367406129837036} +02/25/2022 12:09:34 - INFO - codeparrot_training - Step 26685: {'lr': 0.00023879546812878618, 'samples': 13663232, 'steps': 26685, 'loss/train': 1.3323067426681519} +02/25/2022 12:09:40 - INFO - codeparrot_training - Step 26686: {'lr': 0.0002387791221326677, 'samples': 13663744, 'steps': 26686, 'loss/train': 2.693667411804199} +02/25/2022 12:09:44 - INFO - codeparrot_training - Step 26687: {'lr': 0.000238762776184616, 'samples': 13664256, 'steps': 26687, 'loss/train': 1.696393370628357} +02/25/2022 12:09:49 - INFO - codeparrot_training - Step 26688: {'lr': 0.0002387464302847009, 'samples': 13664768, 'steps': 26688, 'loss/train': 1.588341474533081} +02/25/2022 12:09:53 - INFO - codeparrot_training - Step 26689: {'lr': 0.00023873008443299254, 'samples': 13665280, 'steps': 26689, 'loss/train': 1.1500643491744995} +02/25/2022 12:09:58 - INFO - codeparrot_training - Step 26690: {'lr': 0.00023871373862956088, 'samples': 13665792, 'steps': 26690, 'loss/train': 2.7455103397369385} +02/25/2022 12:10:02 - INFO - codeparrot_training - Step 26691: {'lr': 0.00023869739287447602, 'samples': 13666304, 'steps': 26691, 'loss/train': 2.0966193675994873} +02/25/2022 12:10:07 - INFO - codeparrot_training - Step 26692: {'lr': 0.00023868104716780802, 'samples': 13666816, 'steps': 26692, 'loss/train': 1.7433397769927979} +02/25/2022 12:10:11 - INFO - codeparrot_training - Step 26693: {'lr': 0.00023866470150962673, 'samples': 13667328, 'steps': 26693, 'loss/train': 1.6447679996490479} +02/25/2022 12:10:16 - INFO - codeparrot_training - Step 26694: {'lr': 0.00023864835590000225, 'samples': 13667840, 'steps': 26694, 'loss/train': 2.9193737506866455} +02/25/2022 12:10:20 - INFO - codeparrot_training - Step 26695: {'lr': 0.00023863201033900462, 'samples': 13668352, 'steps': 26695, 'loss/train': 2.1003835201263428} +02/25/2022 12:10:25 - INFO - codeparrot_training - Step 26696: {'lr': 0.00023861566482670393, 'samples': 13668864, 'steps': 26696, 'loss/train': 0.4295022189617157} +02/25/2022 12:10:29 - INFO - codeparrot_training - Step 26697: {'lr': 0.00023859931936317003, 'samples': 13669376, 'steps': 26697, 'loss/train': 0.7564507722854614} +02/25/2022 12:10:34 - INFO - codeparrot_training - Step 26698: {'lr': 0.00023858297394847302, 'samples': 13669888, 'steps': 26698, 'loss/train': 2.08242130279541} +02/25/2022 12:10:38 - INFO - codeparrot_training - Step 26699: {'lr': 0.00023856662858268293, 'samples': 13670400, 'steps': 26699, 'loss/train': 1.5856281518936157} +02/25/2022 12:10:43 - INFO - codeparrot_training - Step 26700: {'lr': 0.00023855028326586985, 'samples': 13670912, 'steps': 26700, 'loss/train': 0.8329271674156189} +02/25/2022 12:10:47 - INFO - codeparrot_training - Step 26701: {'lr': 0.00023853393799810367, 'samples': 13671424, 'steps': 26701, 'loss/train': 0.9876444339752197} +02/25/2022 12:10:53 - INFO - codeparrot_training - Step 26702: {'lr': 0.00023851759277945442, 'samples': 13671936, 'steps': 26702, 'loss/train': 2.265570640563965} +02/25/2022 12:10:57 - INFO - codeparrot_training - Step 26703: {'lr': 0.00023850124760999217, 'samples': 13672448, 'steps': 26703, 'loss/train': 1.5717158317565918} +02/25/2022 12:11:02 - INFO - codeparrot_training - Step 26704: {'lr': 0.0002384849024897869, 'samples': 13672960, 'steps': 26704, 'loss/train': 1.2222161293029785} +02/25/2022 12:11:06 - INFO - codeparrot_training - Step 26705: {'lr': 0.0002384685574189088, 'samples': 13673472, 'steps': 26705, 'loss/train': 1.661441683769226} +02/25/2022 12:11:11 - INFO - codeparrot_training - Step 26706: {'lr': 0.00023845221239742758, 'samples': 13673984, 'steps': 26706, 'loss/train': 2.177128791809082} +02/25/2022 12:11:15 - INFO - codeparrot_training - Step 26707: {'lr': 0.00023843586742541346, 'samples': 13674496, 'steps': 26707, 'loss/train': 1.0078840255737305} +02/25/2022 12:11:20 - INFO - codeparrot_training - Step 26708: {'lr': 0.0002384195225029364, 'samples': 13675008, 'steps': 26708, 'loss/train': 1.187821388244629} +02/25/2022 12:11:24 - INFO - codeparrot_training - Step 26709: {'lr': 0.0002384031776300665, 'samples': 13675520, 'steps': 26709, 'loss/train': 1.7327485084533691} +02/25/2022 12:11:29 - INFO - codeparrot_training - Step 26710: {'lr': 0.00023838683280687364, 'samples': 13676032, 'steps': 26710, 'loss/train': 2.5005273818969727} +02/25/2022 12:11:33 - INFO - codeparrot_training - Step 26711: {'lr': 0.0002383704880334279, 'samples': 13676544, 'steps': 26711, 'loss/train': 2.4751930236816406} +02/25/2022 12:11:36 - INFO - codeparrot_training - Step 26712: {'lr': 0.00023835414330979928, 'samples': 13677056, 'steps': 26712, 'loss/train': 2.3055968284606934} +02/25/2022 12:11:42 - INFO - codeparrot_training - Step 26713: {'lr': 0.00023833779863605793, 'samples': 13677568, 'steps': 26713, 'loss/train': 1.997613549232483} +02/25/2022 12:11:48 - INFO - codeparrot_training - Step 26714: {'lr': 0.00023832145401227363, 'samples': 13678080, 'steps': 26714, 'loss/train': 2.6117136478424072} +02/25/2022 12:11:51 - INFO - codeparrot_training - Step 26715: {'lr': 0.00023830510943851653, 'samples': 13678592, 'steps': 26715, 'loss/train': 1.553622841835022} +02/25/2022 12:11:57 - INFO - codeparrot_training - Step 26716: {'lr': 0.00023828876491485665, 'samples': 13679104, 'steps': 26716, 'loss/train': 2.3320722579956055} +02/25/2022 12:12:00 - INFO - codeparrot_training - Step 26717: {'lr': 0.00023827242044136395, 'samples': 13679616, 'steps': 26717, 'loss/train': 2.0440430641174316} +02/25/2022 12:12:06 - INFO - codeparrot_training - Step 26718: {'lr': 0.00023825607601810853, 'samples': 13680128, 'steps': 26718, 'loss/train': 1.2034085988998413} +02/25/2022 12:12:09 - INFO - codeparrot_training - Step 26719: {'lr': 0.0002382397316451603, 'samples': 13680640, 'steps': 26719, 'loss/train': 2.004425048828125} +02/25/2022 12:12:15 - INFO - codeparrot_training - Step 26720: {'lr': 0.00023822338732258937, 'samples': 13681152, 'steps': 26720, 'loss/train': 0.8579483032226562} +02/25/2022 12:12:18 - INFO - codeparrot_training - Step 26721: {'lr': 0.00023820704305046564, 'samples': 13681664, 'steps': 26721, 'loss/train': 1.320696234703064} +02/25/2022 12:12:25 - INFO - codeparrot_training - Step 26722: {'lr': 0.00023819069882885928, 'samples': 13682176, 'steps': 26722, 'loss/train': 1.1861369609832764} +02/25/2022 12:12:28 - INFO - codeparrot_training - Step 26723: {'lr': 0.00023817435465784022, 'samples': 13682688, 'steps': 26723, 'loss/train': 2.611652135848999} +02/25/2022 12:12:34 - INFO - codeparrot_training - Step 26724: {'lr': 0.00023815801053747837, 'samples': 13683200, 'steps': 26724, 'loss/train': 1.8311712741851807} +02/25/2022 12:12:37 - INFO - codeparrot_training - Step 26725: {'lr': 0.0002381416664678439, 'samples': 13683712, 'steps': 26725, 'loss/train': 2.5182249546051025} +02/25/2022 12:12:43 - INFO - codeparrot_training - Step 26726: {'lr': 0.00023812532244900673, 'samples': 13684224, 'steps': 26726, 'loss/train': 1.261854887008667} +02/25/2022 12:12:46 - INFO - codeparrot_training - Step 26727: {'lr': 0.00023810897848103703, 'samples': 13684736, 'steps': 26727, 'loss/train': 2.2077341079711914} +02/25/2022 12:12:52 - INFO - codeparrot_training - Step 26728: {'lr': 0.00023809263456400456, 'samples': 13685248, 'steps': 26728, 'loss/train': 2.747763156890869} +02/25/2022 12:12:55 - INFO - codeparrot_training - Step 26729: {'lr': 0.00023807629069797948, 'samples': 13685760, 'steps': 26729, 'loss/train': 1.0291587114334106} +02/25/2022 12:13:01 - INFO - codeparrot_training - Step 26730: {'lr': 0.00023805994688303177, 'samples': 13686272, 'steps': 26730, 'loss/train': 2.311230182647705} +02/25/2022 12:13:05 - INFO - codeparrot_training - Step 26731: {'lr': 0.00023804360311923157, 'samples': 13686784, 'steps': 26731, 'loss/train': 2.2481613159179688} +02/25/2022 12:13:08 - INFO - codeparrot_training - Step 26732: {'lr': 0.00023802725940664867, 'samples': 13687296, 'steps': 26732, 'loss/train': 2.0449531078338623} +02/25/2022 12:13:14 - INFO - codeparrot_training - Step 26733: {'lr': 0.00023801091574535322, 'samples': 13687808, 'steps': 26733, 'loss/train': 1.5576388835906982} +02/25/2022 12:13:18 - INFO - codeparrot_training - Step 26734: {'lr': 0.00023799457213541516, 'samples': 13688320, 'steps': 26734, 'loss/train': 1.0398763418197632} +02/25/2022 12:13:23 - INFO - codeparrot_training - Step 26735: {'lr': 0.00023797822857690465, 'samples': 13688832, 'steps': 26735, 'loss/train': 2.205842971801758} +02/25/2022 12:13:27 - INFO - codeparrot_training - Step 26736: {'lr': 0.00023796188506989153, 'samples': 13689344, 'steps': 26736, 'loss/train': 2.0860581398010254} +02/25/2022 12:13:32 - INFO - codeparrot_training - Step 26737: {'lr': 0.00023794554161444582, 'samples': 13689856, 'steps': 26737, 'loss/train': 0.382570743560791} +02/25/2022 12:13:36 - INFO - codeparrot_training - Step 26738: {'lr': 0.0002379291982106376, 'samples': 13690368, 'steps': 26738, 'loss/train': 0.4131981134414673} +02/25/2022 12:13:41 - INFO - codeparrot_training - Step 26739: {'lr': 0.00023791285485853686, 'samples': 13690880, 'steps': 26739, 'loss/train': 1.860669493675232} +02/25/2022 12:13:45 - INFO - codeparrot_training - Step 26740: {'lr': 0.00023789651155821373, 'samples': 13691392, 'steps': 26740, 'loss/train': 1.9211090803146362} +02/25/2022 12:13:50 - INFO - codeparrot_training - Step 26741: {'lr': 0.00023788016830973797, 'samples': 13691904, 'steps': 26741, 'loss/train': 2.520002841949463} +02/25/2022 12:13:54 - INFO - codeparrot_training - Step 26742: {'lr': 0.00023786382511317976, 'samples': 13692416, 'steps': 26742, 'loss/train': 1.8196336030960083} +02/25/2022 12:14:00 - INFO - codeparrot_training - Step 26743: {'lr': 0.00023784748196860903, 'samples': 13692928, 'steps': 26743, 'loss/train': 2.385216474533081} +02/25/2022 12:14:05 - INFO - codeparrot_training - Step 26744: {'lr': 0.00023783113887609596, 'samples': 13693440, 'steps': 26744, 'loss/train': 1.892880916595459} +02/25/2022 12:14:09 - INFO - codeparrot_training - Step 26745: {'lr': 0.0002378147958357103, 'samples': 13693952, 'steps': 26745, 'loss/train': 1.8789676427841187} +02/25/2022 12:14:14 - INFO - codeparrot_training - Step 26746: {'lr': 0.00023779845284752218, 'samples': 13694464, 'steps': 26746, 'loss/train': 1.783031940460205} +02/25/2022 12:14:18 - INFO - codeparrot_training - Step 26747: {'lr': 0.00023778210991160165, 'samples': 13694976, 'steps': 26747, 'loss/train': 1.2729867696762085} +02/25/2022 12:14:24 - INFO - codeparrot_training - Step 26748: {'lr': 0.0002377657670280188, 'samples': 13695488, 'steps': 26748, 'loss/train': 1.7813818454742432} +02/25/2022 12:14:27 - INFO - codeparrot_training - Step 26749: {'lr': 0.00023774942419684333, 'samples': 13696000, 'steps': 26749, 'loss/train': 1.202014446258545} +02/25/2022 12:14:31 - INFO - codeparrot_training - Step 26750: {'lr': 0.0002377330814181455, 'samples': 13696512, 'steps': 26750, 'loss/train': 1.443450927734375} +02/25/2022 12:14:36 - INFO - codeparrot_training - Step 26751: {'lr': 0.00023771673869199525, 'samples': 13697024, 'steps': 26751, 'loss/train': 0.7878649830818176} +02/25/2022 12:14:40 - INFO - codeparrot_training - Step 26752: {'lr': 0.00023770039601846257, 'samples': 13697536, 'steps': 26752, 'loss/train': 1.4083608388900757} +02/25/2022 12:14:45 - INFO - codeparrot_training - Step 26753: {'lr': 0.00023768405339761762, 'samples': 13698048, 'steps': 26753, 'loss/train': 2.113192081451416} +02/25/2022 12:14:49 - INFO - codeparrot_training - Step 26754: {'lr': 0.00023766771082953014, 'samples': 13698560, 'steps': 26754, 'loss/train': 1.9707279205322266} +02/25/2022 12:14:54 - INFO - codeparrot_training - Step 26755: {'lr': 0.0002376513683142703, 'samples': 13699072, 'steps': 26755, 'loss/train': 0.7499202489852905} +02/25/2022 12:15:00 - INFO - codeparrot_training - Step 26756: {'lr': 0.0002376350258519081, 'samples': 13699584, 'steps': 26756, 'loss/train': 2.2111752033233643} +02/25/2022 12:15:03 - INFO - codeparrot_training - Step 26757: {'lr': 0.00023761868344251356, 'samples': 13700096, 'steps': 26757, 'loss/train': 1.2791670560836792} +02/25/2022 12:15:07 - INFO - codeparrot_training - Step 26758: {'lr': 0.00023760234108615656, 'samples': 13700608, 'steps': 26758, 'loss/train': 0.6070950031280518} +02/25/2022 12:15:13 - INFO - codeparrot_training - Step 26759: {'lr': 0.00023758599878290722, 'samples': 13701120, 'steps': 26759, 'loss/train': 1.8183608055114746} +02/25/2022 12:15:17 - INFO - codeparrot_training - Step 26760: {'lr': 0.00023756965653283556, 'samples': 13701632, 'steps': 26760, 'loss/train': 1.6736971139907837} +02/25/2022 12:15:22 - INFO - codeparrot_training - Step 26761: {'lr': 0.00023755331433601153, 'samples': 13702144, 'steps': 26761, 'loss/train': 1.8272558450698853} +02/25/2022 12:15:26 - INFO - codeparrot_training - Step 26762: {'lr': 0.00023753697219250513, 'samples': 13702656, 'steps': 26762, 'loss/train': 2.012540102005005} +02/25/2022 12:15:31 - INFO - codeparrot_training - Step 26763: {'lr': 0.0002375206301023864, 'samples': 13703168, 'steps': 26763, 'loss/train': 1.3723292350769043} +02/25/2022 12:15:35 - INFO - codeparrot_training - Step 26764: {'lr': 0.0002375042880657253, 'samples': 13703680, 'steps': 26764, 'loss/train': 2.2657365798950195} +02/25/2022 12:15:40 - INFO - codeparrot_training - Step 26765: {'lr': 0.00023748794608259186, 'samples': 13704192, 'steps': 26765, 'loss/train': 1.8205993175506592} +02/25/2022 12:15:44 - INFO - codeparrot_training - Step 26766: {'lr': 0.00023747160415305612, 'samples': 13704704, 'steps': 26766, 'loss/train': 1.750273585319519} +02/25/2022 12:15:49 - INFO - codeparrot_training - Step 26767: {'lr': 0.00023745526227718808, 'samples': 13705216, 'steps': 26767, 'loss/train': 1.5082825422286987} +02/25/2022 12:15:53 - INFO - codeparrot_training - Step 26768: {'lr': 0.00023743892045505763, 'samples': 13705728, 'steps': 26768, 'loss/train': 1.7371412515640259} +02/25/2022 12:15:59 - INFO - codeparrot_training - Step 26769: {'lr': 0.00023742257868673486, 'samples': 13706240, 'steps': 26769, 'loss/train': 1.9277106523513794} +02/25/2022 12:16:03 - INFO - codeparrot_training - Step 26770: {'lr': 0.00023740623697228992, 'samples': 13706752, 'steps': 26770, 'loss/train': 1.839339256286621} +02/25/2022 12:16:08 - INFO - codeparrot_training - Step 26771: {'lr': 0.0002373898953117925, 'samples': 13707264, 'steps': 26771, 'loss/train': 2.537174940109253} +02/25/2022 12:16:12 - INFO - codeparrot_training - Step 26772: {'lr': 0.00023737355370531278, 'samples': 13707776, 'steps': 26772, 'loss/train': 1.7278813123703003} +02/25/2022 12:16:17 - INFO - codeparrot_training - Step 26773: {'lr': 0.00023735721215292074, 'samples': 13708288, 'steps': 26773, 'loss/train': 1.8701725006103516} +02/25/2022 12:16:21 - INFO - codeparrot_training - Step 26774: {'lr': 0.0002373408706546865, 'samples': 13708800, 'steps': 26774, 'loss/train': 1.7911018133163452} +02/25/2022 12:16:26 - INFO - codeparrot_training - Step 26775: {'lr': 0.00023732452921067988, 'samples': 13709312, 'steps': 26775, 'loss/train': 2.0169239044189453} +02/25/2022 12:16:30 - INFO - codeparrot_training - Step 26776: {'lr': 0.00023730818782097092, 'samples': 13709824, 'steps': 26776, 'loss/train': 1.7721707820892334} +02/25/2022 12:16:35 - INFO - codeparrot_training - Step 26777: {'lr': 0.00023729184648562965, 'samples': 13710336, 'steps': 26777, 'loss/train': 1.4347071647644043} +02/25/2022 12:16:39 - INFO - codeparrot_training - Step 26778: {'lr': 0.00023727550520472607, 'samples': 13710848, 'steps': 26778, 'loss/train': 2.0113892555236816} +02/25/2022 12:16:45 - INFO - codeparrot_training - Step 26779: {'lr': 0.00023725916397833032, 'samples': 13711360, 'steps': 26779, 'loss/train': 2.8242876529693604} +02/25/2022 12:16:49 - INFO - codeparrot_training - Step 26780: {'lr': 0.00023724282280651214, 'samples': 13711872, 'steps': 26780, 'loss/train': 1.09941828250885} +02/25/2022 12:16:54 - INFO - codeparrot_training - Step 26781: {'lr': 0.00023722648168934165, 'samples': 13712384, 'steps': 26781, 'loss/train': 4.165886402130127} +02/25/2022 12:16:58 - INFO - codeparrot_training - Step 26782: {'lr': 0.00023721014062688886, 'samples': 13712896, 'steps': 26782, 'loss/train': 1.1417133808135986} +02/25/2022 12:17:03 - INFO - codeparrot_training - Step 26783: {'lr': 0.00023719379961922388, 'samples': 13713408, 'steps': 26783, 'loss/train': 1.5448094606399536} +02/25/2022 12:17:07 - INFO - codeparrot_training - Step 26784: {'lr': 0.0002371774586664165, 'samples': 13713920, 'steps': 26784, 'loss/train': 2.5177090167999268} +02/25/2022 12:17:12 - INFO - codeparrot_training - Step 26785: {'lr': 0.00023716111776853679, 'samples': 13714432, 'steps': 26785, 'loss/train': 1.6660338640213013} +02/25/2022 12:17:16 - INFO - codeparrot_training - Step 26786: {'lr': 0.00023714477692565478, 'samples': 13714944, 'steps': 26786, 'loss/train': 2.424240827560425} +02/25/2022 12:17:21 - INFO - codeparrot_training - Step 26787: {'lr': 0.00023712843613784047, 'samples': 13715456, 'steps': 26787, 'loss/train': 1.3898310661315918} +02/25/2022 12:17:25 - INFO - codeparrot_training - Step 26788: {'lr': 0.00023711209540516396, 'samples': 13715968, 'steps': 26788, 'loss/train': 2.6359150409698486} +02/25/2022 12:17:31 - INFO - codeparrot_training - Step 26789: {'lr': 0.00023709575472769505, 'samples': 13716480, 'steps': 26789, 'loss/train': 2.998382091522217} +02/25/2022 12:17:35 - INFO - codeparrot_training - Step 26790: {'lr': 0.0002370794141055038, 'samples': 13716992, 'steps': 26790, 'loss/train': 2.6061904430389404} +02/25/2022 12:17:40 - INFO - codeparrot_training - Step 26791: {'lr': 0.00023706307353866025, 'samples': 13717504, 'steps': 26791, 'loss/train': 0.48793825507164} +02/25/2022 12:17:44 - INFO - codeparrot_training - Step 26792: {'lr': 0.00023704673302723449, 'samples': 13718016, 'steps': 26792, 'loss/train': 1.3789302110671997} +02/25/2022 12:17:49 - INFO - codeparrot_training - Step 26793: {'lr': 0.0002370303925712963, 'samples': 13718528, 'steps': 26793, 'loss/train': 1.8587357997894287} +02/25/2022 12:17:53 - INFO - codeparrot_training - Step 26794: {'lr': 0.00023701405217091583, 'samples': 13719040, 'steps': 26794, 'loss/train': 1.4766002893447876} +02/25/2022 12:17:58 - INFO - codeparrot_training - Step 26795: {'lr': 0.00023699771182616298, 'samples': 13719552, 'steps': 26795, 'loss/train': 0.9420468807220459} +02/25/2022 12:18:02 - INFO - codeparrot_training - Step 26796: {'lr': 0.00023698137153710795, 'samples': 13720064, 'steps': 26796, 'loss/train': 1.0802677869796753} +02/25/2022 12:18:07 - INFO - codeparrot_training - Step 26797: {'lr': 0.00023696503130382044, 'samples': 13720576, 'steps': 26797, 'loss/train': 1.8587385416030884} +02/25/2022 12:18:11 - INFO - codeparrot_training - Step 26798: {'lr': 0.00023694869112637063, 'samples': 13721088, 'steps': 26798, 'loss/train': 0.8755258321762085} +02/25/2022 12:18:16 - INFO - codeparrot_training - Step 26799: {'lr': 0.0002369323510048285, 'samples': 13721600, 'steps': 26799, 'loss/train': 1.7706990242004395} +02/25/2022 12:18:19 - INFO - codeparrot_training - Step 26800: {'lr': 0.00023691601093926405, 'samples': 13722112, 'steps': 26800, 'loss/train': 1.8594273328781128} +02/25/2022 12:18:25 - INFO - codeparrot_training - Step 26801: {'lr': 0.00023689967092974728, 'samples': 13722624, 'steps': 26801, 'loss/train': 2.4129745960235596} +02/25/2022 12:18:28 - INFO - codeparrot_training - Step 26802: {'lr': 0.00023688333097634807, 'samples': 13723136, 'steps': 26802, 'loss/train': 1.707031488418579} +02/25/2022 12:18:34 - INFO - codeparrot_training - Step 26803: {'lr': 0.00023686699107913658, 'samples': 13723648, 'steps': 26803, 'loss/train': 1.1110788583755493} +02/25/2022 12:18:37 - INFO - codeparrot_training - Step 26804: {'lr': 0.00023685065123818267, 'samples': 13724160, 'steps': 26804, 'loss/train': 2.5814056396484375} +02/25/2022 12:18:44 - INFO - codeparrot_training - Step 26805: {'lr': 0.00023683431145355647, 'samples': 13724672, 'steps': 26805, 'loss/train': 2.2575063705444336} +02/25/2022 12:18:47 - INFO - codeparrot_training - Step 26806: {'lr': 0.0002368179717253278, 'samples': 13725184, 'steps': 26806, 'loss/train': 0.48791399598121643} +02/25/2022 12:18:53 - INFO - codeparrot_training - Step 26807: {'lr': 0.00023680163205356687, 'samples': 13725696, 'steps': 26807, 'loss/train': 1.4099923372268677} +02/25/2022 12:18:56 - INFO - codeparrot_training - Step 26808: {'lr': 0.00023678529243834347, 'samples': 13726208, 'steps': 26808, 'loss/train': 1.836795449256897} +02/25/2022 12:19:02 - INFO - codeparrot_training - Step 26809: {'lr': 0.00023676895287972773, 'samples': 13726720, 'steps': 26809, 'loss/train': 1.2395943403244019} +02/25/2022 12:19:05 - INFO - codeparrot_training - Step 26810: {'lr': 0.00023675261337778957, 'samples': 13727232, 'steps': 26810, 'loss/train': 2.0408270359039307} +02/25/2022 12:19:10 - INFO - codeparrot_training - Step 26811: {'lr': 0.000236736273932599, 'samples': 13727744, 'steps': 26811, 'loss/train': 2.1260390281677246} +02/25/2022 12:19:14 - INFO - codeparrot_training - Step 26812: {'lr': 0.00023671993454422603, 'samples': 13728256, 'steps': 26812, 'loss/train': 1.8632508516311646} +02/25/2022 12:19:20 - INFO - codeparrot_training - Step 26813: {'lr': 0.00023670359521274059, 'samples': 13728768, 'steps': 26813, 'loss/train': 1.5697566270828247} +02/25/2022 12:19:23 - INFO - codeparrot_training - Step 26814: {'lr': 0.0002366872559382129, 'samples': 13729280, 'steps': 26814, 'loss/train': 2.268803834915161} +02/25/2022 12:19:29 - INFO - codeparrot_training - Step 26815: {'lr': 0.00023667091672071263, 'samples': 13729792, 'steps': 26815, 'loss/train': 2.0402042865753174} +02/25/2022 12:19:33 - INFO - codeparrot_training - Step 26816: {'lr': 0.0002366545775603099, 'samples': 13730304, 'steps': 26816, 'loss/train': 1.8463525772094727} +02/25/2022 12:19:38 - INFO - codeparrot_training - Step 26817: {'lr': 0.00023663823845707476, 'samples': 13730816, 'steps': 26817, 'loss/train': 1.9438560009002686} +02/25/2022 12:19:42 - INFO - codeparrot_training - Step 26818: {'lr': 0.00023662189941107726, 'samples': 13731328, 'steps': 26818, 'loss/train': 1.1925404071807861} +02/25/2022 12:19:47 - INFO - codeparrot_training - Step 26819: {'lr': 0.00023660556042238718, 'samples': 13731840, 'steps': 26819, 'loss/train': 1.205910563468933} +02/25/2022 12:19:51 - INFO - codeparrot_training - Step 26820: {'lr': 0.0002365892214910746, 'samples': 13732352, 'steps': 26820, 'loss/train': 2.38867449760437} +02/25/2022 12:19:57 - INFO - codeparrot_training - Step 26821: {'lr': 0.00023657288261720957, 'samples': 13732864, 'steps': 26821, 'loss/train': 1.930776596069336} +02/25/2022 12:20:00 - INFO - codeparrot_training - Step 26822: {'lr': 0.00023655654380086216, 'samples': 13733376, 'steps': 26822, 'loss/train': 2.257798194885254} +02/25/2022 12:20:05 - INFO - codeparrot_training - Step 26823: {'lr': 0.00023654020504210215, 'samples': 13733888, 'steps': 26823, 'loss/train': 1.8421812057495117} +02/25/2022 12:20:09 - INFO - codeparrot_training - Step 26824: {'lr': 0.0002365238663409996, 'samples': 13734400, 'steps': 26824, 'loss/train': 1.8362871408462524} +02/25/2022 12:20:16 - INFO - codeparrot_training - Step 26825: {'lr': 0.0002365075276976245, 'samples': 13734912, 'steps': 26825, 'loss/train': 1.683214783668518} +02/25/2022 12:20:19 - INFO - codeparrot_training - Step 26826: {'lr': 0.00023649118911204693, 'samples': 13735424, 'steps': 26826, 'loss/train': 2.1748716831207275} +02/25/2022 12:20:25 - INFO - codeparrot_training - Step 26827: {'lr': 0.0002364748505843369, 'samples': 13735936, 'steps': 26827, 'loss/train': 1.2752037048339844} +02/25/2022 12:20:28 - INFO - codeparrot_training - Step 26828: {'lr': 0.0002364585121145642, 'samples': 13736448, 'steps': 26828, 'loss/train': 1.4916974306106567} +02/25/2022 12:20:34 - INFO - codeparrot_training - Step 26829: {'lr': 0.00023644217370279893, 'samples': 13736960, 'steps': 26829, 'loss/train': 4.189927101135254} +02/25/2022 12:20:37 - INFO - codeparrot_training - Step 26830: {'lr': 0.0002364258353491111, 'samples': 13737472, 'steps': 26830, 'loss/train': 2.129122018814087} +02/25/2022 12:20:43 - INFO - codeparrot_training - Step 26831: {'lr': 0.00023640949705357078, 'samples': 13737984, 'steps': 26831, 'loss/train': 1.9369949102401733} +02/25/2022 12:20:46 - INFO - codeparrot_training - Step 26832: {'lr': 0.00023639315881624775, 'samples': 13738496, 'steps': 26832, 'loss/train': 0.6894099116325378} +02/25/2022 12:20:52 - INFO - codeparrot_training - Step 26833: {'lr': 0.0002363768206372121, 'samples': 13739008, 'steps': 26833, 'loss/train': 0.18040867149829865} +02/25/2022 12:20:55 - INFO - codeparrot_training - Step 26834: {'lr': 0.00023636048251653386, 'samples': 13739520, 'steps': 26834, 'loss/train': 0.7886021137237549} +02/25/2022 12:21:01 - INFO - codeparrot_training - Step 26835: {'lr': 0.00023634414445428297, 'samples': 13740032, 'steps': 26835, 'loss/train': 2.280454158782959} +02/25/2022 12:21:05 - INFO - codeparrot_training - Step 26836: {'lr': 0.0002363278064505295, 'samples': 13740544, 'steps': 26836, 'loss/train': 2.167886257171631} +02/25/2022 12:21:10 - INFO - codeparrot_training - Step 26837: {'lr': 0.00023631146850534328, 'samples': 13741056, 'steps': 26837, 'loss/train': 2.3823812007904053} +02/25/2022 12:21:14 - INFO - codeparrot_training - Step 26838: {'lr': 0.00023629513061879437, 'samples': 13741568, 'steps': 26838, 'loss/train': 1.8117777109146118} +02/25/2022 12:21:19 - INFO - codeparrot_training - Step 26839: {'lr': 0.0002362787927909528, 'samples': 13742080, 'steps': 26839, 'loss/train': 1.6279933452606201} +02/25/2022 12:21:23 - INFO - codeparrot_training - Step 26840: {'lr': 0.00023626245502188863, 'samples': 13742592, 'steps': 26840, 'loss/train': 1.2928667068481445} +02/25/2022 12:21:29 - INFO - codeparrot_training - Step 26841: {'lr': 0.0002362461173116716, 'samples': 13743104, 'steps': 26841, 'loss/train': 0.9796396493911743} +02/25/2022 12:21:32 - INFO - codeparrot_training - Step 26842: {'lr': 0.00023622977966037184, 'samples': 13743616, 'steps': 26842, 'loss/train': 1.7577086687088013} +02/25/2022 12:21:38 - INFO - codeparrot_training - Step 26843: {'lr': 0.0002362134420680594, 'samples': 13744128, 'steps': 26843, 'loss/train': 1.9885544776916504} +02/25/2022 12:21:41 - INFO - codeparrot_training - Step 26844: {'lr': 0.0002361971045348042, 'samples': 13744640, 'steps': 26844, 'loss/train': 1.3850375413894653} +02/25/2022 12:21:47 - INFO - codeparrot_training - Step 26845: {'lr': 0.00023618076706067616, 'samples': 13745152, 'steps': 26845, 'loss/train': 1.7462857961654663} +02/25/2022 12:21:50 - INFO - codeparrot_training - Step 26846: {'lr': 0.00023616442964574533, 'samples': 13745664, 'steps': 26846, 'loss/train': 1.104557991027832} +02/25/2022 12:21:56 - INFO - codeparrot_training - Step 26847: {'lr': 0.00023614809229008173, 'samples': 13746176, 'steps': 26847, 'loss/train': 1.7320835590362549} +02/25/2022 12:21:59 - INFO - codeparrot_training - Step 26848: {'lr': 0.00023613175499375528, 'samples': 13746688, 'steps': 26848, 'loss/train': 1.4686414003372192} +02/25/2022 12:22:05 - INFO - codeparrot_training - Step 26849: {'lr': 0.000236115417756836, 'samples': 13747200, 'steps': 26849, 'loss/train': 1.4412899017333984} +02/25/2022 12:22:08 - INFO - codeparrot_training - Step 26850: {'lr': 0.00023609908057939388, 'samples': 13747712, 'steps': 26850, 'loss/train': 1.3898965120315552} +02/25/2022 12:22:14 - INFO - codeparrot_training - Step 26851: {'lr': 0.00023608274346149886, 'samples': 13748224, 'steps': 26851, 'loss/train': 2.3681750297546387} +02/25/2022 12:22:18 - INFO - codeparrot_training - Step 26852: {'lr': 0.00023606640640322092, 'samples': 13748736, 'steps': 26852, 'loss/train': 2.6245272159576416} +02/25/2022 12:22:23 - INFO - codeparrot_training - Step 26853: {'lr': 0.00023605006940463012, 'samples': 13749248, 'steps': 26853, 'loss/train': 1.9355206489562988} +02/25/2022 12:22:27 - INFO - codeparrot_training - Step 26854: {'lr': 0.0002360337324657964, 'samples': 13749760, 'steps': 26854, 'loss/train': 1.4596617221832275} +02/25/2022 12:22:32 - INFO - codeparrot_training - Step 26855: {'lr': 0.00023601739558678968, 'samples': 13750272, 'steps': 26855, 'loss/train': 1.1350302696228027} +02/25/2022 12:22:36 - INFO - codeparrot_training - Step 26856: {'lr': 0.00023600105876768, 'samples': 13750784, 'steps': 26856, 'loss/train': 1.9619797468185425} +02/25/2022 12:22:41 - INFO - codeparrot_training - Step 26857: {'lr': 0.00023598472200853747, 'samples': 13751296, 'steps': 26857, 'loss/train': 0.891645610332489} +02/25/2022 12:22:45 - INFO - codeparrot_training - Step 26858: {'lr': 0.0002359683853094318, 'samples': 13751808, 'steps': 26858, 'loss/train': 2.0408740043640137} +02/25/2022 12:22:50 - INFO - codeparrot_training - Step 26859: {'lr': 0.00023595204867043313, 'samples': 13752320, 'steps': 26859, 'loss/train': 1.3322694301605225} +02/25/2022 12:22:57 - INFO - codeparrot_training - Step 26860: {'lr': 0.00023593571209161142, 'samples': 13752832, 'steps': 26860, 'loss/train': 0.8712801337242126} +02/25/2022 12:23:00 - INFO - codeparrot_training - Step 26861: {'lr': 0.00023591937557303664, 'samples': 13753344, 'steps': 26861, 'loss/train': 1.4019553661346436} +02/25/2022 12:23:04 - INFO - codeparrot_training - Step 26862: {'lr': 0.0002359030391147789, 'samples': 13753856, 'steps': 26862, 'loss/train': 0.35387876629829407} +02/25/2022 12:23:09 - INFO - codeparrot_training - Step 26863: {'lr': 0.00023588670271690792, 'samples': 13754368, 'steps': 26863, 'loss/train': 0.9114536046981812} +02/25/2022 12:23:15 - INFO - codeparrot_training - Step 26864: {'lr': 0.00023587036637949389, 'samples': 13754880, 'steps': 26864, 'loss/train': 0.6249039173126221} +02/25/2022 12:23:19 - INFO - codeparrot_training - Step 26865: {'lr': 0.00023585403010260668, 'samples': 13755392, 'steps': 26865, 'loss/train': 1.0310841798782349} +02/25/2022 12:23:22 - INFO - codeparrot_training - Step 26866: {'lr': 0.00023583769388631643, 'samples': 13755904, 'steps': 26866, 'loss/train': 0.3120317757129669} +02/25/2022 12:23:28 - INFO - codeparrot_training - Step 26867: {'lr': 0.0002358213577306929, 'samples': 13756416, 'steps': 26867, 'loss/train': 1.968385100364685} +02/25/2022 12:23:31 - INFO - codeparrot_training - Step 26868: {'lr': 0.00023580502163580617, 'samples': 13756928, 'steps': 26868, 'loss/train': 3.264969825744629} +02/25/2022 12:23:37 - INFO - codeparrot_training - Step 26869: {'lr': 0.00023578868560172623, 'samples': 13757440, 'steps': 26869, 'loss/train': 1.6674898862838745} +02/25/2022 12:23:40 - INFO - codeparrot_training - Step 26870: {'lr': 0.00023577234962852316, 'samples': 13757952, 'steps': 26870, 'loss/train': 1.49990713596344} +02/25/2022 12:23:46 - INFO - codeparrot_training - Step 26871: {'lr': 0.0002357560137162667, 'samples': 13758464, 'steps': 26871, 'loss/train': 2.062405824661255} +02/25/2022 12:23:49 - INFO - codeparrot_training - Step 26872: {'lr': 0.00023573967786502694, 'samples': 13758976, 'steps': 26872, 'loss/train': 0.9157112836837769} +02/25/2022 12:23:55 - INFO - codeparrot_training - Step 26873: {'lr': 0.0002357233420748739, 'samples': 13759488, 'steps': 26873, 'loss/train': 2.401214838027954} +02/25/2022 12:24:01 - INFO - codeparrot_training - Step 26874: {'lr': 0.00023570700634587753, 'samples': 13760000, 'steps': 26874, 'loss/train': 1.9598432779312134} +02/25/2022 12:24:04 - INFO - codeparrot_training - Step 26875: {'lr': 0.0002356906706781079, 'samples': 13760512, 'steps': 26875, 'loss/train': 1.7263938188552856} +02/25/2022 12:24:10 - INFO - codeparrot_training - Step 26876: {'lr': 0.00023567433507163478, 'samples': 13761024, 'steps': 26876, 'loss/train': 1.5991458892822266} +02/25/2022 12:24:13 - INFO - codeparrot_training - Step 26877: {'lr': 0.0002356579995265283, 'samples': 13761536, 'steps': 26877, 'loss/train': 2.3382513523101807} +02/25/2022 12:24:19 - INFO - codeparrot_training - Step 26878: {'lr': 0.00023564166404285833, 'samples': 13762048, 'steps': 26878, 'loss/train': 1.8738468885421753} +02/25/2022 12:24:22 - INFO - codeparrot_training - Step 26879: {'lr': 0.00023562532862069505, 'samples': 13762560, 'steps': 26879, 'loss/train': 0.6636435985565186} +02/25/2022 12:24:29 - INFO - codeparrot_training - Step 26880: {'lr': 0.0002356089932601082, 'samples': 13763072, 'steps': 26880, 'loss/train': 1.8751325607299805} +02/25/2022 12:24:32 - INFO - codeparrot_training - Step 26881: {'lr': 0.00023559265796116785, 'samples': 13763584, 'steps': 26881, 'loss/train': 1.8324499130249023} +02/25/2022 12:24:36 - INFO - codeparrot_training - Step 26882: {'lr': 0.00023557632272394396, 'samples': 13764096, 'steps': 26882, 'loss/train': 1.8953220844268799} +02/25/2022 12:24:41 - INFO - codeparrot_training - Step 26883: {'lr': 0.00023555998754850666, 'samples': 13764608, 'steps': 26883, 'loss/train': 1.5912364721298218} +02/25/2022 12:24:47 - INFO - codeparrot_training - Step 26884: {'lr': 0.00023554365243492567, 'samples': 13765120, 'steps': 26884, 'loss/train': 1.2592664957046509} +02/25/2022 12:24:50 - INFO - codeparrot_training - Step 26885: {'lr': 0.0002355273173832711, 'samples': 13765632, 'steps': 26885, 'loss/train': 2.073204517364502} +02/25/2022 12:24:56 - INFO - codeparrot_training - Step 26886: {'lr': 0.00023551098239361288, 'samples': 13766144, 'steps': 26886, 'loss/train': 1.4266589879989624} +02/25/2022 12:24:59 - INFO - codeparrot_training - Step 26887: {'lr': 0.00023549464746602106, 'samples': 13766656, 'steps': 26887, 'loss/train': 1.1164153814315796} +02/25/2022 12:25:04 - INFO - codeparrot_training - Step 26888: {'lr': 0.00023547831260056556, 'samples': 13767168, 'steps': 26888, 'loss/train': 1.8492226600646973} +02/25/2022 12:25:08 - INFO - codeparrot_training - Step 26889: {'lr': 0.00023546197779731636, 'samples': 13767680, 'steps': 26889, 'loss/train': 2.067951202392578} +02/25/2022 12:25:16 - INFO - codeparrot_training - Step 26890: {'lr': 0.00023544564305634342, 'samples': 13768192, 'steps': 26890, 'loss/train': 2.8574769496917725} +02/25/2022 12:25:19 - INFO - codeparrot_training - Step 26891: {'lr': 0.0002354293083777167, 'samples': 13768704, 'steps': 26891, 'loss/train': 1.9289116859436035} +02/25/2022 12:25:25 - INFO - codeparrot_training - Step 26892: {'lr': 0.00023541297376150625, 'samples': 13769216, 'steps': 26892, 'loss/train': 2.142730951309204} +02/25/2022 12:25:28 - INFO - codeparrot_training - Step 26893: {'lr': 0.00023539663920778191, 'samples': 13769728, 'steps': 26893, 'loss/train': 2.585052013397217} +02/25/2022 12:25:34 - INFO - codeparrot_training - Step 26894: {'lr': 0.00023538030471661383, 'samples': 13770240, 'steps': 26894, 'loss/train': 3.1235265731811523} +02/25/2022 12:25:37 - INFO - codeparrot_training - Step 26895: {'lr': 0.0002353639702880718, 'samples': 13770752, 'steps': 26895, 'loss/train': 1.5745577812194824} +02/25/2022 12:25:43 - INFO - codeparrot_training - Step 26896: {'lr': 0.00023534763592222586, 'samples': 13771264, 'steps': 26896, 'loss/train': 2.261033535003662} +02/25/2022 12:25:46 - INFO - codeparrot_training - Step 26897: {'lr': 0.0002353313016191461, 'samples': 13771776, 'steps': 26897, 'loss/train': 1.7269883155822754} +02/25/2022 12:25:52 - INFO - codeparrot_training - Step 26898: {'lr': 0.00023531496737890233, 'samples': 13772288, 'steps': 26898, 'loss/train': 2.141082763671875} +02/25/2022 12:25:55 - INFO - codeparrot_training - Step 26899: {'lr': 0.00023529863320156453, 'samples': 13772800, 'steps': 26899, 'loss/train': 1.8374658823013306} +02/25/2022 12:26:02 - INFO - codeparrot_training - Step 26900: {'lr': 0.00023528229908720272, 'samples': 13773312, 'steps': 26900, 'loss/train': 3.225510835647583} +02/25/2022 12:26:06 - INFO - codeparrot_training - Step 26901: {'lr': 0.00023526596503588702, 'samples': 13773824, 'steps': 26901, 'loss/train': 1.1997783184051514} +02/25/2022 12:26:11 - INFO - codeparrot_training - Step 26902: {'lr': 0.0002352496310476871, 'samples': 13774336, 'steps': 26902, 'loss/train': 1.8035966157913208} +02/25/2022 12:26:15 - INFO - codeparrot_training - Step 26903: {'lr': 0.00023523329712267307, 'samples': 13774848, 'steps': 26903, 'loss/train': 1.8447291851043701} +02/25/2022 12:26:20 - INFO - codeparrot_training - Step 26904: {'lr': 0.0002352169632609149, 'samples': 13775360, 'steps': 26904, 'loss/train': 2.5429704189300537} +02/25/2022 12:26:24 - INFO - codeparrot_training - Step 26905: {'lr': 0.0002352006294624827, 'samples': 13775872, 'steps': 26905, 'loss/train': 2.946312189102173} +02/25/2022 12:26:29 - INFO - codeparrot_training - Step 26906: {'lr': 0.0002351842957274462, 'samples': 13776384, 'steps': 26906, 'loss/train': 2.117699384689331} +02/25/2022 12:26:33 - INFO - codeparrot_training - Step 26907: {'lr': 0.00023516796205587547, 'samples': 13776896, 'steps': 26907, 'loss/train': 2.334052324295044} +02/25/2022 12:26:38 - INFO - codeparrot_training - Step 26908: {'lr': 0.00023515162844784046, 'samples': 13777408, 'steps': 26908, 'loss/train': 1.7366881370544434} +02/25/2022 12:26:42 - INFO - codeparrot_training - Step 26909: {'lr': 0.00023513529490341116, 'samples': 13777920, 'steps': 26909, 'loss/train': 1.0266656875610352} +02/25/2022 12:26:47 - INFO - codeparrot_training - Step 26910: {'lr': 0.00023511896142265764, 'samples': 13778432, 'steps': 26910, 'loss/train': 2.2760567665100098} +02/25/2022 12:26:51 - INFO - codeparrot_training - Step 26911: {'lr': 0.00023510262800564968, 'samples': 13778944, 'steps': 26911, 'loss/train': 1.6876857280731201} +02/25/2022 12:26:58 - INFO - codeparrot_training - Step 26912: {'lr': 0.00023508629465245735, 'samples': 13779456, 'steps': 26912, 'loss/train': 1.7996442317962646} +02/25/2022 12:27:01 - INFO - codeparrot_training - Step 26913: {'lr': 0.00023506996136315057, 'samples': 13779968, 'steps': 26913, 'loss/train': 1.8048832416534424} +02/25/2022 12:27:07 - INFO - codeparrot_training - Step 26914: {'lr': 0.00023505362813779944, 'samples': 13780480, 'steps': 26914, 'loss/train': 0.961259663105011} +02/25/2022 12:27:11 - INFO - codeparrot_training - Step 26915: {'lr': 0.0002350372949764737, 'samples': 13780992, 'steps': 26915, 'loss/train': 2.397836208343506} +02/25/2022 12:27:16 - INFO - codeparrot_training - Step 26916: {'lr': 0.00023502096187924347, 'samples': 13781504, 'steps': 26916, 'loss/train': 1.7659590244293213} +02/25/2022 12:27:20 - INFO - codeparrot_training - Step 26917: {'lr': 0.00023500462884617866, 'samples': 13782016, 'steps': 26917, 'loss/train': 2.1485817432403564} +02/25/2022 12:27:25 - INFO - codeparrot_training - Step 26918: {'lr': 0.00023498829587734936, 'samples': 13782528, 'steps': 26918, 'loss/train': 2.0062687397003174} +02/25/2022 12:27:29 - INFO - codeparrot_training - Step 26919: {'lr': 0.00023497196297282534, 'samples': 13783040, 'steps': 26919, 'loss/train': 1.2176316976547241} +02/25/2022 12:27:34 - INFO - codeparrot_training - Step 26920: {'lr': 0.00023495563013267666, 'samples': 13783552, 'steps': 26920, 'loss/train': 0.33599233627319336} +02/25/2022 12:27:38 - INFO - codeparrot_training - Step 26921: {'lr': 0.00023493929735697328, 'samples': 13784064, 'steps': 26921, 'loss/train': 1.470510721206665} +02/25/2022 12:27:45 - INFO - codeparrot_training - Step 26922: {'lr': 0.00023492296464578516, 'samples': 13784576, 'steps': 26922, 'loss/train': 2.0926873683929443} +02/25/2022 12:27:48 - INFO - codeparrot_training - Step 26923: {'lr': 0.00023490663199918238, 'samples': 13785088, 'steps': 26923, 'loss/train': 1.7308963537216187} +02/25/2022 12:27:54 - INFO - codeparrot_training - Step 26924: {'lr': 0.00023489029941723468, 'samples': 13785600, 'steps': 26924, 'loss/train': 2.3819479942321777} +02/25/2022 12:27:57 - INFO - codeparrot_training - Step 26925: {'lr': 0.00023487396690001218, 'samples': 13786112, 'steps': 26925, 'loss/train': 1.6707313060760498} +02/25/2022 12:28:03 - INFO - codeparrot_training - Step 26926: {'lr': 0.00023485763444758476, 'samples': 13786624, 'steps': 26926, 'loss/train': 1.0905370712280273} +02/25/2022 12:28:06 - INFO - codeparrot_training - Step 26927: {'lr': 0.00023484130206002257, 'samples': 13787136, 'steps': 26927, 'loss/train': 2.322423219680786} +02/25/2022 12:28:12 - INFO - codeparrot_training - Step 26928: {'lr': 0.00023482496973739527, 'samples': 13787648, 'steps': 26928, 'loss/train': 1.0728545188903809} +02/25/2022 12:28:15 - INFO - codeparrot_training - Step 26929: {'lr': 0.00023480863747977305, 'samples': 13788160, 'steps': 26929, 'loss/train': 1.530009150505066} +02/25/2022 12:28:21 - INFO - codeparrot_training - Step 26930: {'lr': 0.0002347923052872258, 'samples': 13788672, 'steps': 26930, 'loss/train': 1.2424969673156738} +02/25/2022 12:28:24 - INFO - codeparrot_training - Step 26931: {'lr': 0.00023477597315982353, 'samples': 13789184, 'steps': 26931, 'loss/train': 1.1929811239242554} +02/25/2022 12:28:32 - INFO - codeparrot_training - Step 26932: {'lr': 0.00023475964109763607, 'samples': 13789696, 'steps': 26932, 'loss/train': 2.6617064476013184} +02/25/2022 12:28:35 - INFO - codeparrot_training - Step 26933: {'lr': 0.00023474330910073347, 'samples': 13790208, 'steps': 26933, 'loss/train': 2.2720558643341064} +02/25/2022 12:28:41 - INFO - codeparrot_training - Step 26934: {'lr': 0.00023472697716918575, 'samples': 13790720, 'steps': 26934, 'loss/train': 2.345998764038086} +02/25/2022 12:28:44 - INFO - codeparrot_training - Step 26935: {'lr': 0.00023471064530306273, 'samples': 13791232, 'steps': 26935, 'loss/train': 1.6002155542373657} +02/25/2022 12:28:50 - INFO - codeparrot_training - Step 26936: {'lr': 0.00023469431350243457, 'samples': 13791744, 'steps': 26936, 'loss/train': 1.923892617225647} +02/25/2022 12:28:53 - INFO - codeparrot_training - Step 26937: {'lr': 0.00023467798176737104, 'samples': 13792256, 'steps': 26937, 'loss/train': 2.699946880340576} +02/25/2022 12:28:59 - INFO - codeparrot_training - Step 26938: {'lr': 0.00023466165009794216, 'samples': 13792768, 'steps': 26938, 'loss/train': 2.1960859298706055} +02/25/2022 12:29:02 - INFO - codeparrot_training - Step 26939: {'lr': 0.00023464531849421788, 'samples': 13793280, 'steps': 26939, 'loss/train': 2.641878604888916} +02/25/2022 12:29:08 - INFO - codeparrot_training - Step 26940: {'lr': 0.00023462898695626825, 'samples': 13793792, 'steps': 26940, 'loss/train': 2.054353713989258} +02/25/2022 12:29:11 - INFO - codeparrot_training - Step 26941: {'lr': 0.00023461265548416316, 'samples': 13794304, 'steps': 26941, 'loss/train': 2.514080286026001} +02/25/2022 12:29:17 - INFO - codeparrot_training - Step 26942: {'lr': 0.0002345963240779725, 'samples': 13794816, 'steps': 26942, 'loss/train': 0.7865099906921387} +02/25/2022 12:29:20 - INFO - codeparrot_training - Step 26943: {'lr': 0.0002345799927377663, 'samples': 13795328, 'steps': 26943, 'loss/train': 1.6671823263168335} +02/25/2022 12:29:26 - INFO - codeparrot_training - Step 26944: {'lr': 0.0002345636614636145, 'samples': 13795840, 'steps': 26944, 'loss/train': 0.3675834536552429} +02/25/2022 12:29:29 - INFO - codeparrot_training - Step 26945: {'lr': 0.00023454733025558722, 'samples': 13796352, 'steps': 26945, 'loss/train': 2.8286986351013184} +02/25/2022 12:29:37 - INFO - codeparrot_training - Step 26946: {'lr': 0.00023453099911375415, 'samples': 13796864, 'steps': 26946, 'loss/train': 1.433905005455017} +02/25/2022 12:29:40 - INFO - codeparrot_training - Step 26947: {'lr': 0.00023451466803818533, 'samples': 13797376, 'steps': 26947, 'loss/train': 1.6915913820266724} +02/25/2022 12:29:46 - INFO - codeparrot_training - Step 26948: {'lr': 0.00023449833702895079, 'samples': 13797888, 'steps': 26948, 'loss/train': 1.1234729290008545} +02/25/2022 12:29:49 - INFO - codeparrot_training - Step 26949: {'lr': 0.00023448200608612056, 'samples': 13798400, 'steps': 26949, 'loss/train': 1.8688406944274902} +02/25/2022 12:29:55 - INFO - codeparrot_training - Step 26950: {'lr': 0.00023446567520976437, 'samples': 13798912, 'steps': 26950, 'loss/train': 1.9066269397735596} +02/25/2022 12:29:58 - INFO - codeparrot_training - Step 26951: {'lr': 0.00023444934439995231, 'samples': 13799424, 'steps': 26951, 'loss/train': 2.5175890922546387} +02/25/2022 12:30:04 - INFO - codeparrot_training - Step 26952: {'lr': 0.00023443301365675429, 'samples': 13799936, 'steps': 26952, 'loss/train': 1.9502217769622803} +02/25/2022 12:30:07 - INFO - codeparrot_training - Step 26953: {'lr': 0.0002344166829802404, 'samples': 13800448, 'steps': 26953, 'loss/train': 2.8203048706054688} +02/25/2022 12:30:13 - INFO - codeparrot_training - Step 26954: {'lr': 0.00023440035237048043, 'samples': 13800960, 'steps': 26954, 'loss/train': 2.954355001449585} +02/25/2022 12:30:16 - INFO - codeparrot_training - Step 26955: {'lr': 0.00023438402182754438, 'samples': 13801472, 'steps': 26955, 'loss/train': 1.731742262840271} +02/25/2022 12:30:23 - INFO - codeparrot_training - Step 26956: {'lr': 0.0002343676913515022, 'samples': 13801984, 'steps': 26956, 'loss/train': 1.950888752937317} +02/25/2022 12:30:27 - INFO - codeparrot_training - Step 26957: {'lr': 0.00023435136094242386, 'samples': 13802496, 'steps': 26957, 'loss/train': 2.819667339324951} +02/25/2022 12:30:32 - INFO - codeparrot_training - Step 26958: {'lr': 0.00023433503060037947, 'samples': 13803008, 'steps': 26958, 'loss/train': 1.6244242191314697} +02/25/2022 12:30:36 - INFO - codeparrot_training - Step 26959: {'lr': 0.0002343187003254387, 'samples': 13803520, 'steps': 26959, 'loss/train': 1.6507328748703003} +02/25/2022 12:30:41 - INFO - codeparrot_training - Step 26960: {'lr': 0.00023430237011767165, 'samples': 13804032, 'steps': 26960, 'loss/train': 2.173668384552002} +02/25/2022 12:30:45 - INFO - codeparrot_training - Step 26961: {'lr': 0.00023428603997714825, 'samples': 13804544, 'steps': 26961, 'loss/train': 2.7793047428131104} +02/25/2022 12:30:50 - INFO - codeparrot_training - Step 26962: {'lr': 0.0002342697099039386, 'samples': 13805056, 'steps': 26962, 'loss/train': 1.5307743549346924} +02/25/2022 12:30:54 - INFO - codeparrot_training - Step 26963: {'lr': 0.00023425337989811237, 'samples': 13805568, 'steps': 26963, 'loss/train': 2.0633704662323} +02/25/2022 12:30:59 - INFO - codeparrot_training - Step 26964: {'lr': 0.00023423704995973967, 'samples': 13806080, 'steps': 26964, 'loss/train': 1.6551611423492432} +02/25/2022 12:31:03 - INFO - codeparrot_training - Step 26965: {'lr': 0.00023422072008889047, 'samples': 13806592, 'steps': 26965, 'loss/train': 2.9720993041992188} +02/25/2022 12:31:06 - INFO - codeparrot_training - Step 26966: {'lr': 0.0002342043902856348, 'samples': 13807104, 'steps': 26966, 'loss/train': 2.187894105911255} +02/25/2022 12:31:12 - INFO - codeparrot_training - Step 26967: {'lr': 0.00023418806055004238, 'samples': 13807616, 'steps': 26967, 'loss/train': 1.8310469388961792} +02/25/2022 12:31:19 - INFO - codeparrot_training - Step 26968: {'lr': 0.00023417173088218328, 'samples': 13808128, 'steps': 26968, 'loss/train': 1.8833154439926147} +02/25/2022 12:31:23 - INFO - codeparrot_training - Step 26969: {'lr': 0.00023415540128212748, 'samples': 13808640, 'steps': 26969, 'loss/train': 2.8030288219451904} +02/25/2022 12:31:28 - INFO - codeparrot_training - Step 26970: {'lr': 0.00023413907174994495, 'samples': 13809152, 'steps': 26970, 'loss/train': 1.8186744451522827} +02/25/2022 12:31:32 - INFO - codeparrot_training - Step 26971: {'lr': 0.0002341227422857056, 'samples': 13809664, 'steps': 26971, 'loss/train': 2.4994451999664307} +02/25/2022 12:31:35 - INFO - codeparrot_training - Step 26972: {'lr': 0.00023410641288947935, 'samples': 13810176, 'steps': 26972, 'loss/train': 1.3450109958648682} +02/25/2022 12:31:41 - INFO - codeparrot_training - Step 26973: {'lr': 0.00023409008356133616, 'samples': 13810688, 'steps': 26973, 'loss/train': 1.844714641571045} +02/25/2022 12:31:46 - INFO - codeparrot_training - Step 26974: {'lr': 0.00023407375430134603, 'samples': 13811200, 'steps': 26974, 'loss/train': 1.5350931882858276} +02/25/2022 12:31:50 - INFO - codeparrot_training - Step 26975: {'lr': 0.0002340574251095789, 'samples': 13811712, 'steps': 26975, 'loss/train': 2.808093309402466} +02/25/2022 12:31:56 - INFO - codeparrot_training - Step 26976: {'lr': 0.0002340410959861047, 'samples': 13812224, 'steps': 26976, 'loss/train': 2.0313754081726074} +02/25/2022 12:31:59 - INFO - codeparrot_training - Step 26977: {'lr': 0.0002340247669309933, 'samples': 13812736, 'steps': 26977, 'loss/train': 2.2129156589508057} +02/25/2022 12:32:02 - INFO - codeparrot_training - Step 26978: {'lr': 0.0002340084379443148, 'samples': 13813248, 'steps': 26978, 'loss/train': 1.1918896436691284} +02/25/2022 12:32:08 - INFO - codeparrot_training - Step 26979: {'lr': 0.00023399210902613906, 'samples': 13813760, 'steps': 26979, 'loss/train': 1.6910289525985718} +02/25/2022 12:32:11 - INFO - codeparrot_training - Step 26980: {'lr': 0.00023397578017653604, 'samples': 13814272, 'steps': 26980, 'loss/train': 0.5626059174537659} +02/25/2022 12:32:17 - INFO - codeparrot_training - Step 26981: {'lr': 0.0002339594513955757, 'samples': 13814784, 'steps': 26981, 'loss/train': 1.6927043199539185} +02/25/2022 12:32:20 - INFO - codeparrot_training - Step 26982: {'lr': 0.00023394312268332793, 'samples': 13815296, 'steps': 26982, 'loss/train': 0.8931909203529358} +02/25/2022 12:32:28 - INFO - codeparrot_training - Step 26983: {'lr': 0.00023392679403986272, 'samples': 13815808, 'steps': 26983, 'loss/train': 1.8689600229263306} +02/25/2022 12:32:31 - INFO - codeparrot_training - Step 26984: {'lr': 0.0002339104654652501, 'samples': 13816320, 'steps': 26984, 'loss/train': 1.7950832843780518} +02/25/2022 12:32:37 - INFO - codeparrot_training - Step 26985: {'lr': 0.0002338941369595599, 'samples': 13816832, 'steps': 26985, 'loss/train': 1.7799584865570068} +02/25/2022 12:32:40 - INFO - codeparrot_training - Step 26986: {'lr': 0.0002338778085228621, 'samples': 13817344, 'steps': 26986, 'loss/train': 2.5141048431396484} +02/25/2022 12:32:46 - INFO - codeparrot_training - Step 26987: {'lr': 0.0002338614801552266, 'samples': 13817856, 'steps': 26987, 'loss/train': 1.3375355005264282} +02/25/2022 12:32:49 - INFO - codeparrot_training - Step 26988: {'lr': 0.00023384515185672353, 'samples': 13818368, 'steps': 26988, 'loss/train': 2.8453927040100098} +02/25/2022 12:32:55 - INFO - codeparrot_training - Step 26989: {'lr': 0.00023382882362742258, 'samples': 13818880, 'steps': 26989, 'loss/train': 2.148735284805298} +02/25/2022 12:32:58 - INFO - codeparrot_training - Step 26990: {'lr': 0.0002338124954673938, 'samples': 13819392, 'steps': 26990, 'loss/train': 1.4600436687469482} +02/25/2022 12:33:04 - INFO - codeparrot_training - Step 26991: {'lr': 0.00023379616737670718, 'samples': 13819904, 'steps': 26991, 'loss/train': 1.4878910779953003} +02/25/2022 12:33:07 - INFO - codeparrot_training - Step 26992: {'lr': 0.0002337798393554326, 'samples': 13820416, 'steps': 26992, 'loss/train': 2.499819040298462} +02/25/2022 12:33:13 - INFO - codeparrot_training - Step 26993: {'lr': 0.0002337635114036402, 'samples': 13820928, 'steps': 26993, 'loss/train': 2.2658467292785645} +02/25/2022 12:33:16 - INFO - codeparrot_training - Step 26994: {'lr': 0.0002337471835213996, 'samples': 13821440, 'steps': 26994, 'loss/train': 1.5784450769424438} +02/25/2022 12:33:23 - INFO - codeparrot_training - Step 26995: {'lr': 0.0002337308557087809, 'samples': 13821952, 'steps': 26995, 'loss/train': 1.4939954280853271} +02/25/2022 12:33:27 - INFO - codeparrot_training - Step 26996: {'lr': 0.00023371452796585408, 'samples': 13822464, 'steps': 26996, 'loss/train': 2.2780017852783203} +02/25/2022 12:33:32 - INFO - codeparrot_training - Step 26997: {'lr': 0.00023369820029268916, 'samples': 13822976, 'steps': 26997, 'loss/train': 1.8589674234390259} +02/25/2022 12:33:36 - INFO - codeparrot_training - Step 26998: {'lr': 0.00023368187268935588, 'samples': 13823488, 'steps': 26998, 'loss/train': 2.1118319034576416} +02/25/2022 12:33:41 - INFO - codeparrot_training - Step 26999: {'lr': 0.00023366554515592423, 'samples': 13824000, 'steps': 26999, 'loss/train': 1.4386999607086182} +02/25/2022 12:33:42 - INFO - codeparrot_training - Evaluating and saving model checkpoint