diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -24445,3 +24445,1009 @@ Use FP16 precision: False 02/25/2022 08:39:37 - INFO - codeparrot_training - Step 23998: {'lr': 0.00028266399273257193, 'samples': 12287488, 'steps': 23998, 'loss/train': 2.2219560146331787} 02/25/2022 08:39:43 - INFO - codeparrot_training - Step 23999: {'lr': 0.0002826477704637188, 'samples': 12288000, 'steps': 23999, 'loss/train': 1.6088899374008179} 02/25/2022 08:39:43 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 08:40:01 - WARNING - huggingface_hub.repository - Several commits (24) will be pushed upstream. +02/25/2022 08:40:01 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 08:40:34 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 04e6e2d..7a862ae floral-grass-11 -> floral-grass-11 + +02/25/2022 08:40:39 - INFO - codeparrot_training - Step 24000: {'lr': 0.000282631548055013, 'samples': 12288512, 'steps': 24000, 'loss/train': 1.9354079961776733} +02/25/2022 08:40:45 - INFO - codeparrot_training - Step 24001: {'lr': 0.00028261532550652387, 'samples': 12289024, 'steps': 24001, 'loss/train': 0.9326981902122498} +02/25/2022 08:40:48 - INFO - codeparrot_training - Step 24002: {'lr': 0.0002825991028183212, 'samples': 12289536, 'steps': 24002, 'loss/train': 5.711711883544922} +02/25/2022 08:40:54 - INFO - codeparrot_training - Step 24003: {'lr': 0.00028258287999047423, 'samples': 12290048, 'steps': 24003, 'loss/train': 2.30151104927063} +02/25/2022 08:40:57 - INFO - codeparrot_training - Step 24004: {'lr': 0.0002825666570230526, 'samples': 12290560, 'steps': 24004, 'loss/train': 1.609424114227295} +02/25/2022 08:41:03 - INFO - codeparrot_training - Step 24005: {'lr': 0.00028255043391612575, 'samples': 12291072, 'steps': 24005, 'loss/train': 1.54843270778656} +02/25/2022 08:41:06 - INFO - codeparrot_training - Step 24006: {'lr': 0.0002825342106697631, 'samples': 12291584, 'steps': 24006, 'loss/train': 1.9663410186767578} +02/25/2022 08:41:14 - INFO - codeparrot_training - Step 24007: {'lr': 0.00028251798728403434, 'samples': 12292096, 'steps': 24007, 'loss/train': 1.3868088722229004} +02/25/2022 08:41:17 - INFO - codeparrot_training - Step 24008: {'lr': 0.0002825017637590088, 'samples': 12292608, 'steps': 24008, 'loss/train': 2.250570774078369} +02/25/2022 08:41:23 - INFO - codeparrot_training - Step 24009: {'lr': 0.00028248554009475604, 'samples': 12293120, 'steps': 24009, 'loss/train': 1.775123119354248} +02/25/2022 08:41:26 - INFO - codeparrot_training - Step 24010: {'lr': 0.0002824693162913454, 'samples': 12293632, 'steps': 24010, 'loss/train': 1.6687016487121582} +02/25/2022 08:41:32 - INFO - codeparrot_training - Step 24011: {'lr': 0.00028245309234884667, 'samples': 12294144, 'steps': 24011, 'loss/train': 1.0860693454742432} +02/25/2022 08:41:35 - INFO - codeparrot_training - Step 24012: {'lr': 0.0002824368682673292, 'samples': 12294656, 'steps': 24012, 'loss/train': 2.729572057723999} +02/25/2022 08:41:41 - INFO - codeparrot_training - Step 24013: {'lr': 0.00028242064404686247, 'samples': 12295168, 'steps': 24013, 'loss/train': 2.1110994815826416} +02/25/2022 08:41:44 - INFO - codeparrot_training - Step 24014: {'lr': 0.00028240441968751596, 'samples': 12295680, 'steps': 24014, 'loss/train': 1.3538614511489868} +02/25/2022 08:41:50 - INFO - codeparrot_training - Step 24015: {'lr': 0.00028238819518935926, 'samples': 12296192, 'steps': 24015, 'loss/train': 1.8347688913345337} +02/25/2022 08:41:53 - INFO - codeparrot_training - Step 24016: {'lr': 0.0002823719705524617, 'samples': 12296704, 'steps': 24016, 'loss/train': 0.36350807547569275} +02/25/2022 08:42:01 - INFO - codeparrot_training - Step 24017: {'lr': 0.000282355745776893, 'samples': 12297216, 'steps': 24017, 'loss/train': 2.2671284675598145} +02/25/2022 08:42:04 - INFO - codeparrot_training - Step 24018: {'lr': 0.0002823395208627226, 'samples': 12297728, 'steps': 24018, 'loss/train': 1.6415660381317139} +02/25/2022 08:42:10 - INFO - codeparrot_training - Step 24019: {'lr': 0.0002823232958100199, 'samples': 12298240, 'steps': 24019, 'loss/train': 2.3679404258728027} +02/25/2022 08:42:13 - INFO - codeparrot_training - Step 24020: {'lr': 0.0002823070706188544, 'samples': 12298752, 'steps': 24020, 'loss/train': 2.10951828956604} +02/25/2022 08:42:19 - INFO - codeparrot_training - Step 24021: {'lr': 0.0002822908452892958, 'samples': 12299264, 'steps': 24021, 'loss/train': 1.0121402740478516} +02/25/2022 08:42:22 - INFO - codeparrot_training - Step 24022: {'lr': 0.0002822746198214133, 'samples': 12299776, 'steps': 24022, 'loss/train': 0.5913536548614502} +02/25/2022 08:42:28 - INFO - codeparrot_training - Step 24023: {'lr': 0.00028225839421527676, 'samples': 12300288, 'steps': 24023, 'loss/train': 2.159191846847534} +02/25/2022 08:42:31 - INFO - codeparrot_training - Step 24024: {'lr': 0.00028224216847095543, 'samples': 12300800, 'steps': 24024, 'loss/train': 0.8222342133522034} +02/25/2022 08:42:37 - INFO - codeparrot_training - Step 24025: {'lr': 0.00028222594258851884, 'samples': 12301312, 'steps': 24025, 'loss/train': 2.244511365890503} +02/25/2022 08:42:40 - INFO - codeparrot_training - Step 24026: {'lr': 0.0002822097165680367, 'samples': 12301824, 'steps': 24026, 'loss/train': 0.4128205478191376} +02/25/2022 08:42:46 - INFO - codeparrot_training - Step 24027: {'lr': 0.0002821934904095782, 'samples': 12302336, 'steps': 24027, 'loss/train': 1.5412288904190063} +02/25/2022 08:42:49 - INFO - codeparrot_training - Step 24028: {'lr': 0.0002821772641132131, 'samples': 12302848, 'steps': 24028, 'loss/train': 0.3498699963092804} +02/25/2022 08:42:55 - INFO - codeparrot_training - Step 24029: {'lr': 0.00028216103767901075, 'samples': 12303360, 'steps': 24029, 'loss/train': 2.6806092262268066} +02/25/2022 08:42:58 - INFO - codeparrot_training - Step 24030: {'lr': 0.0002821448111070407, 'samples': 12303872, 'steps': 24030, 'loss/train': 2.172480344772339} +02/25/2022 08:43:06 - INFO - codeparrot_training - Step 24031: {'lr': 0.00028212858439737254, 'samples': 12304384, 'steps': 24031, 'loss/train': 1.7248648405075073} +02/25/2022 08:43:09 - INFO - codeparrot_training - Step 24032: {'lr': 0.00028211235755007575, 'samples': 12304896, 'steps': 24032, 'loss/train': 2.882354974746704} +02/25/2022 08:43:15 - INFO - codeparrot_training - Step 24033: {'lr': 0.00028209613056521984, 'samples': 12305408, 'steps': 24033, 'loss/train': 2.4785149097442627} +02/25/2022 08:43:18 - INFO - codeparrot_training - Step 24034: {'lr': 0.00028207990344287414, 'samples': 12305920, 'steps': 24034, 'loss/train': 1.556368112564087} +02/25/2022 08:43:23 - INFO - codeparrot_training - Step 24035: {'lr': 0.0002820636761831085, 'samples': 12306432, 'steps': 24035, 'loss/train': 1.5226795673370361} +02/25/2022 08:43:27 - INFO - codeparrot_training - Step 24036: {'lr': 0.00028204744878599215, 'samples': 12306944, 'steps': 24036, 'loss/train': 0.5659157633781433} +02/25/2022 08:43:33 - INFO - codeparrot_training - Step 24037: {'lr': 0.00028203122125159476, 'samples': 12307456, 'steps': 24037, 'loss/train': 0.859870970249176} +02/25/2022 08:43:36 - INFO - codeparrot_training - Step 24038: {'lr': 0.0002820149935799858, 'samples': 12307968, 'steps': 24038, 'loss/train': 2.4015917778015137} +02/25/2022 08:43:40 - INFO - codeparrot_training - Step 24039: {'lr': 0.0002819987657712347, 'samples': 12308480, 'steps': 24039, 'loss/train': 2.179159641265869} +02/25/2022 08:43:46 - INFO - codeparrot_training - Step 24040: {'lr': 0.0002819825378254111, 'samples': 12308992, 'steps': 24040, 'loss/train': 2.623436450958252} +02/25/2022 08:43:49 - INFO - codeparrot_training - Step 24041: {'lr': 0.0002819663097425844, 'samples': 12309504, 'steps': 24041, 'loss/train': 2.718327045440674} +02/25/2022 08:43:55 - INFO - codeparrot_training - Step 24042: {'lr': 0.0002819500815228243, 'samples': 12310016, 'steps': 24042, 'loss/train': 1.3318506479263306} +02/25/2022 08:43:58 - INFO - codeparrot_training - Step 24043: {'lr': 0.00028193385316620007, 'samples': 12310528, 'steps': 24043, 'loss/train': 2.337672472000122} +02/25/2022 08:44:05 - INFO - codeparrot_training - Step 24044: {'lr': 0.00028191762467278146, 'samples': 12311040, 'steps': 24044, 'loss/train': 1.1867364645004272} +02/25/2022 08:44:09 - INFO - codeparrot_training - Step 24045: {'lr': 0.0002819013960426378, 'samples': 12311552, 'steps': 24045, 'loss/train': 1.3193825483322144} +02/25/2022 08:44:14 - INFO - codeparrot_training - Step 24046: {'lr': 0.00028188516727583874, 'samples': 12312064, 'steps': 24046, 'loss/train': 1.7453577518463135} +02/25/2022 08:44:18 - INFO - codeparrot_training - Step 24047: {'lr': 0.00028186893837245375, 'samples': 12312576, 'steps': 24047, 'loss/train': 2.1129283905029297} +02/25/2022 08:44:23 - INFO - codeparrot_training - Step 24048: {'lr': 0.00028185270933255236, 'samples': 12313088, 'steps': 24048, 'loss/train': 1.895471215248108} +02/25/2022 08:44:27 - INFO - codeparrot_training - Step 24049: {'lr': 0.000281836480156204, 'samples': 12313600, 'steps': 24049, 'loss/train': 0.7452694177627563} +02/25/2022 08:44:32 - INFO - codeparrot_training - Step 24050: {'lr': 0.0002818202508434783, 'samples': 12314112, 'steps': 24050, 'loss/train': 2.0025129318237305} +02/25/2022 08:44:36 - INFO - codeparrot_training - Step 24051: {'lr': 0.0002818040213944448, 'samples': 12314624, 'steps': 24051, 'loss/train': 0.6810865998268127} +02/25/2022 08:44:41 - INFO - codeparrot_training - Step 24052: {'lr': 0.0002817877918091729, 'samples': 12315136, 'steps': 24052, 'loss/train': 2.5121686458587646} +02/25/2022 08:44:45 - INFO - codeparrot_training - Step 24053: {'lr': 0.00028177156208773226, 'samples': 12315648, 'steps': 24053, 'loss/train': 0.9702437520027161} +02/25/2022 08:44:52 - INFO - codeparrot_training - Step 24054: {'lr': 0.0002817553322301922, 'samples': 12316160, 'steps': 24054, 'loss/train': 2.570195198059082} +02/25/2022 08:44:56 - INFO - codeparrot_training - Step 24055: {'lr': 0.0002817391022366226, 'samples': 12316672, 'steps': 24055, 'loss/train': 2.3012843132019043} +02/25/2022 08:45:01 - INFO - codeparrot_training - Step 24056: {'lr': 0.0002817228721070926, 'samples': 12317184, 'steps': 24056, 'loss/train': 0.297405481338501} +02/25/2022 08:45:07 - INFO - codeparrot_training - Step 24057: {'lr': 0.00028170664184167194, 'samples': 12317696, 'steps': 24057, 'loss/train': 1.7904223203659058} +02/25/2022 08:45:10 - INFO - codeparrot_training - Step 24058: {'lr': 0.0002816904114404301, 'samples': 12318208, 'steps': 24058, 'loss/train': 1.4671951532363892} +02/25/2022 08:45:14 - INFO - codeparrot_training - Step 24059: {'lr': 0.0002816741809034366, 'samples': 12318720, 'steps': 24059, 'loss/train': 0.8111671209335327} +02/25/2022 08:45:19 - INFO - codeparrot_training - Step 24060: {'lr': 0.000281657950230761, 'samples': 12319232, 'steps': 24060, 'loss/train': 1.8155766725540161} +02/25/2022 08:45:25 - INFO - codeparrot_training - Step 24061: {'lr': 0.0002816417194224727, 'samples': 12319744, 'steps': 24061, 'loss/train': 1.7982791662216187} +02/25/2022 08:45:28 - INFO - codeparrot_training - Step 24062: {'lr': 0.0002816254884786414, 'samples': 12320256, 'steps': 24062, 'loss/train': 1.531999945640564} +02/25/2022 08:45:32 - INFO - codeparrot_training - Step 24063: {'lr': 0.00028160925739933654, 'samples': 12320768, 'steps': 24063, 'loss/train': 2.4597690105438232} +02/25/2022 08:45:39 - INFO - codeparrot_training - Step 24064: {'lr': 0.0002815930261846276, 'samples': 12321280, 'steps': 24064, 'loss/train': 1.4569636583328247} +02/25/2022 08:45:42 - INFO - codeparrot_training - Step 24065: {'lr': 0.00028157679483458423, 'samples': 12321792, 'steps': 24065, 'loss/train': 2.049741506576538} +02/25/2022 08:45:48 - INFO - codeparrot_training - Step 24066: {'lr': 0.00028156056334927595, 'samples': 12322304, 'steps': 24066, 'loss/train': 1.324506402015686} +02/25/2022 08:45:51 - INFO - codeparrot_training - Step 24067: {'lr': 0.00028154433172877216, 'samples': 12322816, 'steps': 24067, 'loss/train': 2.5777499675750732} +02/25/2022 08:45:57 - INFO - codeparrot_training - Step 24068: {'lr': 0.0002815280999731424, 'samples': 12323328, 'steps': 24068, 'loss/train': 1.1260238885879517} +02/25/2022 08:46:00 - INFO - codeparrot_training - Step 24069: {'lr': 0.00028151186808245636, 'samples': 12323840, 'steps': 24069, 'loss/train': 2.5874171257019043} +02/25/2022 08:46:06 - INFO - codeparrot_training - Step 24070: {'lr': 0.0002814956360567835, 'samples': 12324352, 'steps': 24070, 'loss/train': 1.1112257242202759} +02/25/2022 08:46:10 - INFO - codeparrot_training - Step 24071: {'lr': 0.0002814794038961933, 'samples': 12324864, 'steps': 24071, 'loss/train': 0.7731790542602539} +02/25/2022 08:46:15 - INFO - codeparrot_training - Step 24072: {'lr': 0.00028146317160075533, 'samples': 12325376, 'steps': 24072, 'loss/train': 1.5927790403366089} +02/25/2022 08:46:19 - INFO - codeparrot_training - Step 24073: {'lr': 0.00028144693917053916, 'samples': 12325888, 'steps': 24073, 'loss/train': 2.5427446365356445} +02/25/2022 08:46:24 - INFO - codeparrot_training - Step 24074: {'lr': 0.00028143070660561424, 'samples': 12326400, 'steps': 24074, 'loss/train': 1.7923914194107056} +02/25/2022 08:46:28 - INFO - codeparrot_training - Step 24075: {'lr': 0.00028141447390605016, 'samples': 12326912, 'steps': 24075, 'loss/train': 1.227791428565979} +02/25/2022 08:46:33 - INFO - codeparrot_training - Step 24076: {'lr': 0.0002813982410719165, 'samples': 12327424, 'steps': 24076, 'loss/train': 2.6001620292663574} +02/25/2022 08:46:37 - INFO - codeparrot_training - Step 24077: {'lr': 0.0002813820081032827, 'samples': 12327936, 'steps': 24077, 'loss/train': 4.775791168212891} +02/25/2022 08:46:42 - INFO - codeparrot_training - Step 24078: {'lr': 0.0002813657750002183, 'samples': 12328448, 'steps': 24078, 'loss/train': 1.6923785209655762} +02/25/2022 08:46:46 - INFO - codeparrot_training - Step 24079: {'lr': 0.0002813495417627929, 'samples': 12328960, 'steps': 24079, 'loss/train': 2.515749216079712} +02/25/2022 08:46:53 - INFO - codeparrot_training - Step 24080: {'lr': 0.00028133330839107606, 'samples': 12329472, 'steps': 24080, 'loss/train': 2.0844130516052246} +02/25/2022 08:46:57 - INFO - codeparrot_training - Step 24081: {'lr': 0.0002813170748851373, 'samples': 12329984, 'steps': 24081, 'loss/train': 2.1875863075256348} +02/25/2022 08:47:02 - INFO - codeparrot_training - Step 24082: {'lr': 0.00028130084124504607, 'samples': 12330496, 'steps': 24082, 'loss/train': 1.7935336828231812} +02/25/2022 08:47:06 - INFO - codeparrot_training - Step 24083: {'lr': 0.000281284607470872, 'samples': 12331008, 'steps': 24083, 'loss/train': 2.8879871368408203} +02/25/2022 08:47:11 - INFO - codeparrot_training - Step 24084: {'lr': 0.00028126837356268463, 'samples': 12331520, 'steps': 24084, 'loss/train': 2.0636024475097656} +02/25/2022 08:47:15 - INFO - codeparrot_training - Step 24085: {'lr': 0.00028125213952055343, 'samples': 12332032, 'steps': 24085, 'loss/train': 2.0726478099823} +02/25/2022 08:47:20 - INFO - codeparrot_training - Step 24086: {'lr': 0.00028123590534454804, 'samples': 12332544, 'steps': 24086, 'loss/train': 2.4974093437194824} +02/25/2022 08:47:24 - INFO - codeparrot_training - Step 24087: {'lr': 0.00028121967103473795, 'samples': 12333056, 'steps': 24087, 'loss/train': 2.1765761375427246} +02/25/2022 08:47:29 - INFO - codeparrot_training - Step 24088: {'lr': 0.0002812034365911926, 'samples': 12333568, 'steps': 24088, 'loss/train': 1.0200955867767334} +02/25/2022 08:47:33 - INFO - codeparrot_training - Step 24089: {'lr': 0.00028118720201398173, 'samples': 12334080, 'steps': 24089, 'loss/train': 2.440505266189575} +02/25/2022 08:47:40 - INFO - codeparrot_training - Step 24090: {'lr': 0.00028117096730317475, 'samples': 12334592, 'steps': 24090, 'loss/train': 0.2194325178861618} +02/25/2022 08:47:43 - INFO - codeparrot_training - Step 24091: {'lr': 0.0002811547324588413, 'samples': 12335104, 'steps': 24091, 'loss/train': 1.6860175132751465} +02/25/2022 08:47:49 - INFO - codeparrot_training - Step 24092: {'lr': 0.0002811384974810508, 'samples': 12335616, 'steps': 24092, 'loss/train': 1.37722909450531} +02/25/2022 08:47:52 - INFO - codeparrot_training - Step 24093: {'lr': 0.0002811222623698729, 'samples': 12336128, 'steps': 24093, 'loss/train': 2.72214937210083} +02/25/2022 08:47:58 - INFO - codeparrot_training - Step 24094: {'lr': 0.00028110602712537713, 'samples': 12336640, 'steps': 24094, 'loss/train': 2.743868827819824} +02/25/2022 08:48:01 - INFO - codeparrot_training - Step 24095: {'lr': 0.000281089791747633, 'samples': 12337152, 'steps': 24095, 'loss/train': 2.0026705265045166} +02/25/2022 08:48:07 - INFO - codeparrot_training - Step 24096: {'lr': 0.00028107355623671, 'samples': 12337664, 'steps': 24096, 'loss/train': 2.309119701385498} +02/25/2022 08:48:11 - INFO - codeparrot_training - Step 24097: {'lr': 0.00028105732059267787, 'samples': 12338176, 'steps': 24097, 'loss/train': 1.4898655414581299} +02/25/2022 08:48:16 - INFO - codeparrot_training - Step 24098: {'lr': 0.00028104108481560603, 'samples': 12338688, 'steps': 24098, 'loss/train': 2.7868711948394775} +02/25/2022 08:48:20 - INFO - codeparrot_training - Step 24099: {'lr': 0.00028102484890556395, 'samples': 12339200, 'steps': 24099, 'loss/train': 1.7177258729934692} +02/25/2022 08:48:27 - INFO - codeparrot_training - Step 24100: {'lr': 0.00028100861286262135, 'samples': 12339712, 'steps': 24100, 'loss/train': 1.4562634229660034} +02/25/2022 08:48:30 - INFO - codeparrot_training - Step 24101: {'lr': 0.0002809923766868476, 'samples': 12340224, 'steps': 24101, 'loss/train': 1.6887115240097046} +02/25/2022 08:48:36 - INFO - codeparrot_training - Step 24102: {'lr': 0.00028097614037831244, 'samples': 12340736, 'steps': 24102, 'loss/train': 1.382042646408081} +02/25/2022 08:48:39 - INFO - codeparrot_training - Step 24103: {'lr': 0.00028095990393708534, 'samples': 12341248, 'steps': 24103, 'loss/train': 2.1060309410095215} +02/25/2022 08:48:45 - INFO - codeparrot_training - Step 24104: {'lr': 0.00028094366736323577, 'samples': 12341760, 'steps': 24104, 'loss/train': 1.7109125852584839} +02/25/2022 08:48:49 - INFO - codeparrot_training - Step 24105: {'lr': 0.0002809274306568335, 'samples': 12342272, 'steps': 24105, 'loss/train': 0.7774963974952698} +02/25/2022 08:48:54 - INFO - codeparrot_training - Step 24106: {'lr': 0.00028091119381794776, 'samples': 12342784, 'steps': 24106, 'loss/train': 1.1881859302520752} +02/25/2022 08:48:58 - INFO - codeparrot_training - Step 24107: {'lr': 0.0002808949568466484, 'samples': 12343296, 'steps': 24107, 'loss/train': 1.824115514755249} +02/25/2022 08:49:03 - INFO - codeparrot_training - Step 24108: {'lr': 0.0002808787197430048, 'samples': 12343808, 'steps': 24108, 'loss/train': 1.1005526781082153} +02/25/2022 08:49:07 - INFO - codeparrot_training - Step 24109: {'lr': 0.0002808624825070866, 'samples': 12344320, 'steps': 24109, 'loss/train': 1.4673726558685303} +02/25/2022 08:49:14 - INFO - codeparrot_training - Step 24110: {'lr': 0.0002808462451389633, 'samples': 12344832, 'steps': 24110, 'loss/train': 2.0260980129241943} +02/25/2022 08:49:17 - INFO - codeparrot_training - Step 24111: {'lr': 0.0002808300076387045, 'samples': 12345344, 'steps': 24111, 'loss/train': 2.2493178844451904} +02/25/2022 08:49:23 - INFO - codeparrot_training - Step 24112: {'lr': 0.0002808137700063797, 'samples': 12345856, 'steps': 24112, 'loss/train': 2.1967365741729736} +02/25/2022 08:49:26 - INFO - codeparrot_training - Step 24113: {'lr': 0.0002807975322420585, 'samples': 12346368, 'steps': 24113, 'loss/train': 0.9307685494422913} +02/25/2022 08:49:32 - INFO - codeparrot_training - Step 24114: {'lr': 0.0002807812943458105, 'samples': 12346880, 'steps': 24114, 'loss/train': 1.0833745002746582} +02/25/2022 08:49:35 - INFO - codeparrot_training - Step 24115: {'lr': 0.00028076505631770515, 'samples': 12347392, 'steps': 24115, 'loss/train': 1.9819477796554565} +02/25/2022 08:49:41 - INFO - codeparrot_training - Step 24116: {'lr': 0.0002807488181578121, 'samples': 12347904, 'steps': 24116, 'loss/train': 1.9693245887756348} +02/25/2022 08:49:44 - INFO - codeparrot_training - Step 24117: {'lr': 0.00028073257986620085, 'samples': 12348416, 'steps': 24117, 'loss/train': 1.4338395595550537} +02/25/2022 08:49:50 - INFO - codeparrot_training - Step 24118: {'lr': 0.00028071634144294106, 'samples': 12348928, 'steps': 24118, 'loss/train': 0.9974554181098938} +02/25/2022 08:49:53 - INFO - codeparrot_training - Step 24119: {'lr': 0.0002807001028881021, 'samples': 12349440, 'steps': 24119, 'loss/train': 1.834402322769165} +02/25/2022 08:49:59 - INFO - codeparrot_training - Step 24120: {'lr': 0.0002806838642017537, 'samples': 12349952, 'steps': 24120, 'loss/train': 1.0758941173553467} +02/25/2022 08:50:03 - INFO - codeparrot_training - Step 24121: {'lr': 0.0002806676253839654, 'samples': 12350464, 'steps': 24121, 'loss/train': 1.8781483173370361} +02/25/2022 08:50:08 - INFO - codeparrot_training - Step 24122: {'lr': 0.00028065138643480677, 'samples': 12350976, 'steps': 24122, 'loss/train': 2.4904329776763916} +02/25/2022 08:50:12 - INFO - codeparrot_training - Step 24123: {'lr': 0.00028063514735434726, 'samples': 12351488, 'steps': 24123, 'loss/train': 2.470557928085327} +02/25/2022 08:50:17 - INFO - codeparrot_training - Step 24124: {'lr': 0.0002806189081426565, 'samples': 12352000, 'steps': 24124, 'loss/train': 8.82812213897705} +02/25/2022 08:50:21 - INFO - codeparrot_training - Step 24125: {'lr': 0.0002806026687998041, 'samples': 12352512, 'steps': 24125, 'loss/train': 1.196666955947876} +02/25/2022 08:50:28 - INFO - codeparrot_training - Step 24126: {'lr': 0.0002805864293258595, 'samples': 12353024, 'steps': 24126, 'loss/train': 2.797055721282959} +02/25/2022 08:50:32 - INFO - codeparrot_training - Step 24127: {'lr': 0.00028057018972089243, 'samples': 12353536, 'steps': 24127, 'loss/train': 1.391169786453247} +02/25/2022 08:50:37 - INFO - codeparrot_training - Step 24128: {'lr': 0.00028055394998497237, 'samples': 12354048, 'steps': 24128, 'loss/train': 2.859102725982666} +02/25/2022 08:50:41 - INFO - codeparrot_training - Step 24129: {'lr': 0.00028053771011816894, 'samples': 12354560, 'steps': 24129, 'loss/train': 1.148810863494873} +02/25/2022 08:50:47 - INFO - codeparrot_training - Step 24130: {'lr': 0.00028052147012055153, 'samples': 12355072, 'steps': 24130, 'loss/train': 1.575769305229187} +02/25/2022 08:50:50 - INFO - codeparrot_training - Step 24131: {'lr': 0.0002805052299921899, 'samples': 12355584, 'steps': 24131, 'loss/train': 0.7827603816986084} +02/25/2022 08:50:54 - INFO - codeparrot_training - Step 24132: {'lr': 0.0002804889897331536, 'samples': 12356096, 'steps': 24132, 'loss/train': 2.9821131229400635} +02/25/2022 08:50:59 - INFO - codeparrot_training - Step 24133: {'lr': 0.0002804727493435121, 'samples': 12356608, 'steps': 24133, 'loss/train': 0.4320419430732727} +02/25/2022 08:51:03 - INFO - codeparrot_training - Step 24134: {'lr': 0.000280456508823335, 'samples': 12357120, 'steps': 24134, 'loss/train': 0.1283150017261505} +02/25/2022 08:51:08 - INFO - codeparrot_training - Step 24135: {'lr': 0.0002804402681726919, 'samples': 12357632, 'steps': 24135, 'loss/train': 2.6496336460113525} +02/25/2022 08:51:12 - INFO - codeparrot_training - Step 24136: {'lr': 0.00028042402739165235, 'samples': 12358144, 'steps': 24136, 'loss/train': 1.8936935663223267} +02/25/2022 08:51:20 - INFO - codeparrot_training - Step 24137: {'lr': 0.000280407786480286, 'samples': 12358656, 'steps': 24137, 'loss/train': 1.015517234802246} +02/25/2022 08:51:23 - INFO - codeparrot_training - Step 24138: {'lr': 0.00028039154543866223, 'samples': 12359168, 'steps': 24138, 'loss/train': 1.958033800125122} +02/25/2022 08:51:29 - INFO - codeparrot_training - Step 24139: {'lr': 0.0002803753042668508, 'samples': 12359680, 'steps': 24139, 'loss/train': 0.6777181625366211} +02/25/2022 08:51:32 - INFO - codeparrot_training - Step 24140: {'lr': 0.0002803590629649212, 'samples': 12360192, 'steps': 24140, 'loss/train': 0.3564806878566742} +02/25/2022 08:51:38 - INFO - codeparrot_training - Step 24141: {'lr': 0.000280342821532943, 'samples': 12360704, 'steps': 24141, 'loss/train': 2.056265115737915} +02/25/2022 08:51:41 - INFO - codeparrot_training - Step 24142: {'lr': 0.0002803265799709858, 'samples': 12361216, 'steps': 24142, 'loss/train': 0.9112293720245361} +02/25/2022 08:51:47 - INFO - codeparrot_training - Step 24143: {'lr': 0.0002803103382791191, 'samples': 12361728, 'steps': 24143, 'loss/train': 1.7687865495681763} +02/25/2022 08:51:50 - INFO - codeparrot_training - Step 24144: {'lr': 0.00028029409645741264, 'samples': 12362240, 'steps': 24144, 'loss/train': 1.9292594194412231} +02/25/2022 08:51:56 - INFO - codeparrot_training - Step 24145: {'lr': 0.00028027785450593585, 'samples': 12362752, 'steps': 24145, 'loss/train': 2.1720035076141357} +02/25/2022 08:51:59 - INFO - codeparrot_training - Step 24146: {'lr': 0.00028026161242475833, 'samples': 12363264, 'steps': 24146, 'loss/train': 0.971410870552063} +02/25/2022 08:52:07 - INFO - codeparrot_training - Step 24147: {'lr': 0.0002802453702139497, 'samples': 12363776, 'steps': 24147, 'loss/train': 1.8997070789337158} +02/25/2022 08:52:10 - INFO - codeparrot_training - Step 24148: {'lr': 0.0002802291278735795, 'samples': 12364288, 'steps': 24148, 'loss/train': 1.9926778078079224} +02/25/2022 08:52:16 - INFO - codeparrot_training - Step 24149: {'lr': 0.0002802128854037173, 'samples': 12364800, 'steps': 24149, 'loss/train': 2.148219108581543} +02/25/2022 08:52:19 - INFO - codeparrot_training - Step 24150: {'lr': 0.00028019664280443275, 'samples': 12365312, 'steps': 24150, 'loss/train': 1.7260531187057495} +02/25/2022 08:52:24 - INFO - codeparrot_training - Step 24151: {'lr': 0.00028018040007579524, 'samples': 12365824, 'steps': 24151, 'loss/train': 2.203099250793457} +02/25/2022 08:52:28 - INFO - codeparrot_training - Step 24152: {'lr': 0.00028016415721787463, 'samples': 12366336, 'steps': 24152, 'loss/train': 1.587680697441101} +02/25/2022 08:52:34 - INFO - codeparrot_training - Step 24153: {'lr': 0.0002801479142307403, 'samples': 12366848, 'steps': 24153, 'loss/train': 1.4639865159988403} +02/25/2022 08:52:37 - INFO - codeparrot_training - Step 24154: {'lr': 0.0002801316711144619, 'samples': 12367360, 'steps': 24154, 'loss/train': 0.7426556348800659} +02/25/2022 08:52:43 - INFO - codeparrot_training - Step 24155: {'lr': 0.00028011542786910896, 'samples': 12367872, 'steps': 24155, 'loss/train': 1.4798780679702759} +02/25/2022 08:52:46 - INFO - codeparrot_training - Step 24156: {'lr': 0.00028009918449475104, 'samples': 12368384, 'steps': 24156, 'loss/train': 3.189852476119995} +02/25/2022 08:52:53 - INFO - codeparrot_training - Step 24157: {'lr': 0.0002800829409914578, 'samples': 12368896, 'steps': 24157, 'loss/train': 2.7021100521087646} +02/25/2022 08:52:57 - INFO - codeparrot_training - Step 24158: {'lr': 0.0002800666973592988, 'samples': 12369408, 'steps': 24158, 'loss/train': 1.9429315328598022} +02/25/2022 08:53:02 - INFO - codeparrot_training - Step 24159: {'lr': 0.0002800504535983436, 'samples': 12369920, 'steps': 24159, 'loss/train': 2.165562152862549} +02/25/2022 08:53:06 - INFO - codeparrot_training - Step 24160: {'lr': 0.00028003420970866175, 'samples': 12370432, 'steps': 24160, 'loss/train': 0.9998244047164917} +02/25/2022 08:53:11 - INFO - codeparrot_training - Step 24161: {'lr': 0.00028001796569032304, 'samples': 12370944, 'steps': 24161, 'loss/train': 1.5083441734313965} +02/25/2022 08:53:15 - INFO - codeparrot_training - Step 24162: {'lr': 0.00028000172154339675, 'samples': 12371456, 'steps': 24162, 'loss/train': 1.707032322883606} +02/25/2022 08:53:20 - INFO - codeparrot_training - Step 24163: {'lr': 0.00027998547726795265, 'samples': 12371968, 'steps': 24163, 'loss/train': 1.2037488222122192} +02/25/2022 08:53:24 - INFO - codeparrot_training - Step 24164: {'lr': 0.00027996923286406037, 'samples': 12372480, 'steps': 24164, 'loss/train': 2.5205039978027344} +02/25/2022 08:53:29 - INFO - codeparrot_training - Step 24165: {'lr': 0.00027995298833178927, 'samples': 12372992, 'steps': 24165, 'loss/train': 1.5232199430465698} +02/25/2022 08:53:33 - INFO - codeparrot_training - Step 24166: {'lr': 0.0002799367436712092, 'samples': 12373504, 'steps': 24166, 'loss/train': 1.2361977100372314} +02/25/2022 08:53:38 - INFO - codeparrot_training - Step 24167: {'lr': 0.00027992049888238957, 'samples': 12374016, 'steps': 24167, 'loss/train': 1.5076152086257935} +02/25/2022 08:53:42 - INFO - codeparrot_training - Step 24168: {'lr': 0.00027990425396540007, 'samples': 12374528, 'steps': 24168, 'loss/train': 2.016110897064209} +02/25/2022 08:53:47 - INFO - codeparrot_training - Step 24169: {'lr': 0.0002798880089203102, 'samples': 12375040, 'steps': 24169, 'loss/train': 1.945264458656311} +02/25/2022 08:53:51 - INFO - codeparrot_training - Step 24170: {'lr': 0.0002798717637471896, 'samples': 12375552, 'steps': 24170, 'loss/train': 1.2609217166900635} +02/25/2022 08:53:56 - INFO - codeparrot_training - Step 24171: {'lr': 0.0002798555184461078, 'samples': 12376064, 'steps': 24171, 'loss/train': 1.3531477451324463} +02/25/2022 08:53:59 - INFO - codeparrot_training - Step 24172: {'lr': 0.0002798392730171345, 'samples': 12376576, 'steps': 24172, 'loss/train': 1.294921875} +02/25/2022 08:54:07 - INFO - codeparrot_training - Step 24173: {'lr': 0.00027982302746033924, 'samples': 12377088, 'steps': 24173, 'loss/train': 0.43970680236816406} +02/25/2022 08:54:10 - INFO - codeparrot_training - Step 24174: {'lr': 0.0002798067817757916, 'samples': 12377600, 'steps': 24174, 'loss/train': 1.9545875787734985} +02/25/2022 08:54:16 - INFO - codeparrot_training - Step 24175: {'lr': 0.00027979053596356105, 'samples': 12378112, 'steps': 24175, 'loss/train': 1.5943564176559448} +02/25/2022 08:54:19 - INFO - codeparrot_training - Step 24176: {'lr': 0.00027977429002371744, 'samples': 12378624, 'steps': 24176, 'loss/train': 2.2447307109832764} +02/25/2022 08:54:25 - INFO - codeparrot_training - Step 24177: {'lr': 0.0002797580439563302, 'samples': 12379136, 'steps': 24177, 'loss/train': 2.1768507957458496} +02/25/2022 08:54:28 - INFO - codeparrot_training - Step 24178: {'lr': 0.0002797417977614689, 'samples': 12379648, 'steps': 24178, 'loss/train': 1.9363279342651367} +02/25/2022 08:54:34 - INFO - codeparrot_training - Step 24179: {'lr': 0.00027972555143920326, 'samples': 12380160, 'steps': 24179, 'loss/train': 2.437059164047241} +02/25/2022 08:54:37 - INFO - codeparrot_training - Step 24180: {'lr': 0.00027970930498960275, 'samples': 12380672, 'steps': 24180, 'loss/train': 1.9060323238372803} +02/25/2022 08:54:43 - INFO - codeparrot_training - Step 24181: {'lr': 0.000279693058412737, 'samples': 12381184, 'steps': 24181, 'loss/train': 1.8979460000991821} +02/25/2022 08:54:46 - INFO - codeparrot_training - Step 24182: {'lr': 0.00027967681170867567, 'samples': 12381696, 'steps': 24182, 'loss/train': 2.1599743366241455} +02/25/2022 08:54:54 - INFO - codeparrot_training - Step 24183: {'lr': 0.00027966056487748825, 'samples': 12382208, 'steps': 24183, 'loss/train': 2.8617851734161377} +02/25/2022 08:54:57 - INFO - codeparrot_training - Step 24184: {'lr': 0.0002796443179192444, 'samples': 12382720, 'steps': 24184, 'loss/train': 2.365355968475342} +02/25/2022 08:55:03 - INFO - codeparrot_training - Step 24185: {'lr': 0.0002796280708340137, 'samples': 12383232, 'steps': 24185, 'loss/train': 2.2310194969177246} +02/25/2022 08:55:06 - INFO - codeparrot_training - Step 24186: {'lr': 0.0002796118236218658, 'samples': 12383744, 'steps': 24186, 'loss/train': 1.3441805839538574} +02/25/2022 08:55:12 - INFO - codeparrot_training - Step 24187: {'lr': 0.00027959557628287016, 'samples': 12384256, 'steps': 24187, 'loss/train': 0.7696624398231506} +02/25/2022 08:55:15 - INFO - codeparrot_training - Step 24188: {'lr': 0.0002795793288170965, 'samples': 12384768, 'steps': 24188, 'loss/train': 2.259233236312866} +02/25/2022 08:55:21 - INFO - codeparrot_training - Step 24189: {'lr': 0.00027956308122461436, 'samples': 12385280, 'steps': 24189, 'loss/train': 1.4260845184326172} +02/25/2022 08:55:24 - INFO - codeparrot_training - Step 24190: {'lr': 0.0002795468335054935, 'samples': 12385792, 'steps': 24190, 'loss/train': 1.8014477491378784} +02/25/2022 08:55:30 - INFO - codeparrot_training - Step 24191: {'lr': 0.00027953058565980326, 'samples': 12386304, 'steps': 24191, 'loss/train': 1.4506462812423706} +02/25/2022 08:55:33 - INFO - codeparrot_training - Step 24192: {'lr': 0.0002795143376876134, 'samples': 12386816, 'steps': 24192, 'loss/train': 2.0312018394470215} +02/25/2022 08:55:39 - INFO - codeparrot_training - Step 24193: {'lr': 0.00027949808958899354, 'samples': 12387328, 'steps': 24193, 'loss/train': 2.3549644947052} +02/25/2022 08:55:42 - INFO - codeparrot_training - Step 24194: {'lr': 0.00027948184136401317, 'samples': 12387840, 'steps': 24194, 'loss/train': 1.9346637725830078} +02/25/2022 08:55:50 - INFO - codeparrot_training - Step 24195: {'lr': 0.00027946559301274206, 'samples': 12388352, 'steps': 24195, 'loss/train': 3.3463943004608154} +02/25/2022 08:55:53 - INFO - codeparrot_training - Step 24196: {'lr': 0.0002794493445352496, 'samples': 12388864, 'steps': 24196, 'loss/train': 1.8509702682495117} +02/25/2022 08:55:59 - INFO - codeparrot_training - Step 24197: {'lr': 0.0002794330959316055, 'samples': 12389376, 'steps': 24197, 'loss/train': 1.0540375709533691} +02/25/2022 08:56:02 - INFO - codeparrot_training - Step 24198: {'lr': 0.0002794168472018794, 'samples': 12389888, 'steps': 24198, 'loss/train': 1.5441783666610718} +02/25/2022 08:56:08 - INFO - codeparrot_training - Step 24199: {'lr': 0.0002794005983461408, 'samples': 12390400, 'steps': 24199, 'loss/train': 2.2779369354248047} +02/25/2022 08:56:11 - INFO - codeparrot_training - Step 24200: {'lr': 0.00027938434936445943, 'samples': 12390912, 'steps': 24200, 'loss/train': 0.6469489932060242} +02/25/2022 08:56:17 - INFO - codeparrot_training - Step 24201: {'lr': 0.00027936810025690483, 'samples': 12391424, 'steps': 24201, 'loss/train': 1.9743132591247559} +02/25/2022 08:56:20 - INFO - codeparrot_training - Step 24202: {'lr': 0.0002793518510235466, 'samples': 12391936, 'steps': 24202, 'loss/train': 2.8360402584075928} +02/25/2022 08:56:26 - INFO - codeparrot_training - Step 24203: {'lr': 0.00027933560166445445, 'samples': 12392448, 'steps': 24203, 'loss/train': 2.3457517623901367} +02/25/2022 08:56:29 - INFO - codeparrot_training - Step 24204: {'lr': 0.00027931935217969777, 'samples': 12392960, 'steps': 24204, 'loss/train': 2.4369144439697266} +02/25/2022 08:56:36 - INFO - codeparrot_training - Step 24205: {'lr': 0.00027930310256934636, 'samples': 12393472, 'steps': 24205, 'loss/train': 1.5425649881362915} +02/25/2022 08:56:40 - INFO - codeparrot_training - Step 24206: {'lr': 0.0002792868528334697, 'samples': 12393984, 'steps': 24206, 'loss/train': 2.003851890563965} +02/25/2022 08:56:45 - INFO - codeparrot_training - Step 24207: {'lr': 0.0002792706029721376, 'samples': 12394496, 'steps': 24207, 'loss/train': 1.8645727634429932} +02/25/2022 08:56:49 - INFO - codeparrot_training - Step 24208: {'lr': 0.0002792543529854194, 'samples': 12395008, 'steps': 24208, 'loss/train': 1.888032078742981} +02/25/2022 08:56:55 - INFO - codeparrot_training - Step 24209: {'lr': 0.0002792381028733849, 'samples': 12395520, 'steps': 24209, 'loss/train': 1.743841290473938} +02/25/2022 08:56:58 - INFO - codeparrot_training - Step 24210: {'lr': 0.0002792218526361036, 'samples': 12396032, 'steps': 24210, 'loss/train': 1.3721891641616821} +02/25/2022 08:57:04 - INFO - codeparrot_training - Step 24211: {'lr': 0.00027920560227364516, 'samples': 12396544, 'steps': 24211, 'loss/train': 2.106753349304199} +02/25/2022 08:57:07 - INFO - codeparrot_training - Step 24212: {'lr': 0.00027918935178607927, 'samples': 12397056, 'steps': 24212, 'loss/train': 1.7442359924316406} +02/25/2022 08:57:12 - INFO - codeparrot_training - Step 24213: {'lr': 0.00027917310117347543, 'samples': 12397568, 'steps': 24213, 'loss/train': 2.296968460083008} +02/25/2022 08:57:16 - INFO - codeparrot_training - Step 24214: {'lr': 0.00027915685043590325, 'samples': 12398080, 'steps': 24214, 'loss/train': 2.3627583980560303} +02/25/2022 08:57:22 - INFO - codeparrot_training - Step 24215: {'lr': 0.00027914059957343245, 'samples': 12398592, 'steps': 24215, 'loss/train': 1.9211373329162598} +02/25/2022 08:57:27 - INFO - codeparrot_training - Step 24216: {'lr': 0.00027912434858613257, 'samples': 12399104, 'steps': 24216, 'loss/train': 1.789819598197937} +02/25/2022 08:57:31 - INFO - codeparrot_training - Step 24217: {'lr': 0.00027910809747407316, 'samples': 12399616, 'steps': 24217, 'loss/train': 1.4690529108047485} +02/25/2022 08:57:38 - INFO - codeparrot_training - Step 24218: {'lr': 0.00027909184623732395, 'samples': 12400128, 'steps': 24218, 'loss/train': 3.1276214122772217} +02/25/2022 08:57:42 - INFO - codeparrot_training - Step 24219: {'lr': 0.00027907559487595453, 'samples': 12400640, 'steps': 24219, 'loss/train': 2.853700637817383} +02/25/2022 08:57:47 - INFO - codeparrot_training - Step 24220: {'lr': 0.00027905934339003446, 'samples': 12401152, 'steps': 24220, 'loss/train': 1.4881477355957031} +02/25/2022 08:57:51 - INFO - codeparrot_training - Step 24221: {'lr': 0.0002790430917796334, 'samples': 12401664, 'steps': 24221, 'loss/train': 2.0138003826141357} +02/25/2022 08:57:56 - INFO - codeparrot_training - Step 24222: {'lr': 0.0002790268400448209, 'samples': 12402176, 'steps': 24222, 'loss/train': 2.5834553241729736} +02/25/2022 08:58:00 - INFO - codeparrot_training - Step 24223: {'lr': 0.00027901058818566673, 'samples': 12402688, 'steps': 24223, 'loss/train': 1.3534088134765625} +02/25/2022 08:58:05 - INFO - codeparrot_training - Step 24224: {'lr': 0.00027899433620224033, 'samples': 12403200, 'steps': 24224, 'loss/train': 1.4188522100448608} +02/25/2022 08:58:09 - INFO - codeparrot_training - Step 24225: {'lr': 0.00027897808409461147, 'samples': 12403712, 'steps': 24225, 'loss/train': 0.9269300103187561} +02/25/2022 08:58:14 - INFO - codeparrot_training - Step 24226: {'lr': 0.00027896183186284964, 'samples': 12404224, 'steps': 24226, 'loss/train': 2.1332437992095947} +02/25/2022 08:58:18 - INFO - codeparrot_training - Step 24227: {'lr': 0.00027894557950702457, 'samples': 12404736, 'steps': 24227, 'loss/train': 3.8392045497894287} +02/25/2022 08:58:23 - INFO - codeparrot_training - Step 24228: {'lr': 0.0002789293270272058, 'samples': 12405248, 'steps': 24228, 'loss/train': 1.892857551574707} +02/25/2022 08:58:27 - INFO - codeparrot_training - Step 24229: {'lr': 0.00027891307442346304, 'samples': 12405760, 'steps': 24229, 'loss/train': 1.1620746850967407} +02/25/2022 08:58:34 - INFO - codeparrot_training - Step 24230: {'lr': 0.0002788968216958657, 'samples': 12406272, 'steps': 24230, 'loss/train': 1.9652231931686401} +02/25/2022 08:58:38 - INFO - codeparrot_training - Step 24231: {'lr': 0.0002788805688444837, 'samples': 12406784, 'steps': 24231, 'loss/train': 2.0279951095581055} +02/25/2022 08:58:43 - INFO - codeparrot_training - Step 24232: {'lr': 0.00027886431586938645, 'samples': 12407296, 'steps': 24232, 'loss/train': 1.5853351354599} +02/25/2022 08:58:47 - INFO - codeparrot_training - Step 24233: {'lr': 0.00027884806277064366, 'samples': 12407808, 'steps': 24233, 'loss/train': 1.0278211832046509} +02/25/2022 08:58:50 - INFO - codeparrot_training - Step 24234: {'lr': 0.00027883180954832486, 'samples': 12408320, 'steps': 24234, 'loss/train': 3.804316282272339} +02/25/2022 08:58:56 - INFO - codeparrot_training - Step 24235: {'lr': 0.0002788155562024999, 'samples': 12408832, 'steps': 24235, 'loss/train': 5.114134311676025} +02/25/2022 08:58:59 - INFO - codeparrot_training - Step 24236: {'lr': 0.0002787993027332381, 'samples': 12409344, 'steps': 24236, 'loss/train': 1.403770923614502} +02/25/2022 08:59:05 - INFO - codeparrot_training - Step 24237: {'lr': 0.00027878304914060934, 'samples': 12409856, 'steps': 24237, 'loss/train': 2.5707404613494873} +02/25/2022 08:59:10 - INFO - codeparrot_training - Step 24238: {'lr': 0.00027876679542468305, 'samples': 12410368, 'steps': 24238, 'loss/train': 0.8141780495643616} +02/25/2022 08:59:14 - INFO - codeparrot_training - Step 24239: {'lr': 0.00027875054158552895, 'samples': 12410880, 'steps': 24239, 'loss/train': 2.051060914993286} +02/25/2022 08:59:22 - INFO - codeparrot_training - Step 24240: {'lr': 0.0002787342876232167, 'samples': 12411392, 'steps': 24240, 'loss/train': 1.2634204626083374} +02/25/2022 08:59:25 - INFO - codeparrot_training - Step 24241: {'lr': 0.0002787180335378158, 'samples': 12411904, 'steps': 24241, 'loss/train': 2.1626992225646973} +02/25/2022 08:59:29 - INFO - codeparrot_training - Step 24242: {'lr': 0.00027870177932939606, 'samples': 12412416, 'steps': 24242, 'loss/train': 2.298090934753418} +02/25/2022 08:59:34 - INFO - codeparrot_training - Step 24243: {'lr': 0.000278685524998027, 'samples': 12412928, 'steps': 24243, 'loss/train': 2.382725715637207} +02/25/2022 08:59:38 - INFO - codeparrot_training - Step 24244: {'lr': 0.0002786692705437783, 'samples': 12413440, 'steps': 24244, 'loss/train': 3.057950258255005} +02/25/2022 08:59:44 - INFO - codeparrot_training - Step 24245: {'lr': 0.00027865301596671945, 'samples': 12413952, 'steps': 24245, 'loss/train': 2.18320369720459} +02/25/2022 08:59:47 - INFO - codeparrot_training - Step 24246: {'lr': 0.0002786367612669202, 'samples': 12414464, 'steps': 24246, 'loss/train': 1.99215829372406} +02/25/2022 08:59:53 - INFO - codeparrot_training - Step 24247: {'lr': 0.00027862050644445016, 'samples': 12414976, 'steps': 24247, 'loss/train': 1.9238471984863281} +02/25/2022 08:59:56 - INFO - codeparrot_training - Step 24248: {'lr': 0.00027860425149937894, 'samples': 12415488, 'steps': 24248, 'loss/train': 1.4706083536148071} +02/25/2022 09:00:02 - INFO - codeparrot_training - Step 24249: {'lr': 0.00027858799643177624, 'samples': 12416000, 'steps': 24249, 'loss/train': 1.3288410902023315} +02/25/2022 09:00:05 - INFO - codeparrot_training - Step 24250: {'lr': 0.00027857174124171165, 'samples': 12416512, 'steps': 24250, 'loss/train': 1.8691834211349487} +02/25/2022 09:00:12 - INFO - codeparrot_training - Step 24251: {'lr': 0.0002785554859292548, 'samples': 12417024, 'steps': 24251, 'loss/train': 0.7318936586380005} +02/25/2022 09:00:16 - INFO - codeparrot_training - Step 24252: {'lr': 0.0002785392304944752, 'samples': 12417536, 'steps': 24252, 'loss/train': 1.5632015466690063} +02/25/2022 09:00:21 - INFO - codeparrot_training - Step 24253: {'lr': 0.0002785229749374427, 'samples': 12418048, 'steps': 24253, 'loss/train': 2.637326717376709} +02/25/2022 09:00:25 - INFO - codeparrot_training - Step 24254: {'lr': 0.0002785067192582268, 'samples': 12418560, 'steps': 24254, 'loss/train': 2.509840250015259} +02/25/2022 09:00:30 - INFO - codeparrot_training - Step 24255: {'lr': 0.0002784904634568972, 'samples': 12419072, 'steps': 24255, 'loss/train': 1.9652732610702515} +02/25/2022 09:00:34 - INFO - codeparrot_training - Step 24256: {'lr': 0.0002784742075335235, 'samples': 12419584, 'steps': 24256, 'loss/train': 1.8331021070480347} +02/25/2022 09:00:39 - INFO - codeparrot_training - Step 24257: {'lr': 0.0002784579514881753, 'samples': 12420096, 'steps': 24257, 'loss/train': 1.8726516962051392} +02/25/2022 09:00:43 - INFO - codeparrot_training - Step 24258: {'lr': 0.0002784416953209223, 'samples': 12420608, 'steps': 24258, 'loss/train': 2.4869985580444336} +02/25/2022 09:00:48 - INFO - codeparrot_training - Step 24259: {'lr': 0.00027842543903183406, 'samples': 12421120, 'steps': 24259, 'loss/train': 2.487274646759033} +02/25/2022 09:00:52 - INFO - codeparrot_training - Step 24260: {'lr': 0.0002784091826209803, 'samples': 12421632, 'steps': 24260, 'loss/train': 2.5869758129119873} +02/25/2022 09:00:57 - INFO - codeparrot_training - Step 24261: {'lr': 0.0002783929260884306, 'samples': 12422144, 'steps': 24261, 'loss/train': 1.8004097938537598} +02/25/2022 09:01:01 - INFO - codeparrot_training - Step 24262: {'lr': 0.00027837666943425466, 'samples': 12422656, 'steps': 24262, 'loss/train': 1.555433750152588} +02/25/2022 09:01:06 - INFO - codeparrot_training - Step 24263: {'lr': 0.00027836041265852206, 'samples': 12423168, 'steps': 24263, 'loss/train': 3.006253957748413} +02/25/2022 09:01:10 - INFO - codeparrot_training - Step 24264: {'lr': 0.0002783441557613025, 'samples': 12423680, 'steps': 24264, 'loss/train': 2.2995831966400146} +02/25/2022 09:01:15 - INFO - codeparrot_training - Step 24265: {'lr': 0.0002783278987426655, 'samples': 12424192, 'steps': 24265, 'loss/train': 1.8213459253311157} +02/25/2022 09:01:19 - INFO - codeparrot_training - Step 24266: {'lr': 0.00027831164160268087, 'samples': 12424704, 'steps': 24266, 'loss/train': 1.57319974899292} +02/25/2022 09:01:26 - INFO - codeparrot_training - Step 24267: {'lr': 0.00027829538434141803, 'samples': 12425216, 'steps': 24267, 'loss/train': 4.088140487670898} +02/25/2022 09:01:30 - INFO - codeparrot_training - Step 24268: {'lr': 0.00027827912695894686, 'samples': 12425728, 'steps': 24268, 'loss/train': 0.7101935148239136} +02/25/2022 09:01:35 - INFO - codeparrot_training - Step 24269: {'lr': 0.00027826286945533687, 'samples': 12426240, 'steps': 24269, 'loss/train': 0.826137125492096} +02/25/2022 09:01:39 - INFO - codeparrot_training - Step 24270: {'lr': 0.0002782466118306577, 'samples': 12426752, 'steps': 24270, 'loss/train': 2.115061044692993} +02/25/2022 09:01:44 - INFO - codeparrot_training - Step 24271: {'lr': 0.00027823035408497897, 'samples': 12427264, 'steps': 24271, 'loss/train': 2.5070087909698486} +02/25/2022 09:01:48 - INFO - codeparrot_training - Step 24272: {'lr': 0.0002782140962183704, 'samples': 12427776, 'steps': 24272, 'loss/train': 1.9987293481826782} +02/25/2022 09:01:53 - INFO - codeparrot_training - Step 24273: {'lr': 0.0002781978382309017, 'samples': 12428288, 'steps': 24273, 'loss/train': 1.8195164203643799} +02/25/2022 09:01:57 - INFO - codeparrot_training - Step 24274: {'lr': 0.00027818158012264226, 'samples': 12428800, 'steps': 24274, 'loss/train': 1.359501600265503} +02/25/2022 09:02:02 - INFO - codeparrot_training - Step 24275: {'lr': 0.00027816532189366193, 'samples': 12429312, 'steps': 24275, 'loss/train': 1.328649878501892} +02/25/2022 09:02:06 - INFO - codeparrot_training - Step 24276: {'lr': 0.00027814906354403033, 'samples': 12429824, 'steps': 24276, 'loss/train': 1.9147900342941284} +02/25/2022 09:02:13 - INFO - codeparrot_training - Step 24277: {'lr': 0.00027813280507381713, 'samples': 12430336, 'steps': 24277, 'loss/train': 2.292353630065918} +02/25/2022 09:02:16 - INFO - codeparrot_training - Step 24278: {'lr': 0.0002781165464830918, 'samples': 12430848, 'steps': 24278, 'loss/train': 1.9917237758636475} +02/25/2022 09:02:22 - INFO - codeparrot_training - Step 24279: {'lr': 0.0002781002877719243, 'samples': 12431360, 'steps': 24279, 'loss/train': 0.934857964515686} +02/25/2022 09:02:25 - INFO - codeparrot_training - Step 24280: {'lr': 0.0002780840289403839, 'samples': 12431872, 'steps': 24280, 'loss/train': 1.9658485651016235} +02/25/2022 09:02:31 - INFO - codeparrot_training - Step 24281: {'lr': 0.0002780677699885405, 'samples': 12432384, 'steps': 24281, 'loss/train': 1.8287357091903687} +02/25/2022 09:02:34 - INFO - codeparrot_training - Step 24282: {'lr': 0.0002780515109164637, 'samples': 12432896, 'steps': 24282, 'loss/train': 2.986912250518799} +02/25/2022 09:02:40 - INFO - codeparrot_training - Step 24283: {'lr': 0.00027803525172422316, 'samples': 12433408, 'steps': 24283, 'loss/train': 0.8380170464515686} +02/25/2022 09:02:43 - INFO - codeparrot_training - Step 24284: {'lr': 0.0002780189924118885, 'samples': 12433920, 'steps': 24284, 'loss/train': 1.5723103284835815} +02/25/2022 09:02:51 - INFO - codeparrot_training - Step 24285: {'lr': 0.00027800273297952935, 'samples': 12434432, 'steps': 24285, 'loss/train': 1.9735093116760254} +02/25/2022 09:02:54 - INFO - codeparrot_training - Step 24286: {'lr': 0.0002779864734272154, 'samples': 12434944, 'steps': 24286, 'loss/train': 1.3630189895629883} +02/25/2022 09:03:00 - INFO - codeparrot_training - Step 24287: {'lr': 0.0002779702137550162, 'samples': 12435456, 'steps': 24287, 'loss/train': 1.713210940361023} +02/25/2022 09:03:03 - INFO - codeparrot_training - Step 24288: {'lr': 0.0002779539539630016, 'samples': 12435968, 'steps': 24288, 'loss/train': 1.8743717670440674} +02/25/2022 09:03:09 - INFO - codeparrot_training - Step 24289: {'lr': 0.00027793769405124103, 'samples': 12436480, 'steps': 24289, 'loss/train': 2.632967472076416} +02/25/2022 09:03:12 - INFO - codeparrot_training - Step 24290: {'lr': 0.00027792143401980435, 'samples': 12436992, 'steps': 24290, 'loss/train': 2.1291074752807617} +02/25/2022 09:03:18 - INFO - codeparrot_training - Step 24291: {'lr': 0.0002779051738687611, 'samples': 12437504, 'steps': 24291, 'loss/train': 2.324388027191162} +02/25/2022 09:03:22 - INFO - codeparrot_training - Step 24292: {'lr': 0.0002778889135981809, 'samples': 12438016, 'steps': 24292, 'loss/train': 1.976311206817627} +02/25/2022 09:03:27 - INFO - codeparrot_training - Step 24293: {'lr': 0.00027787265320813344, 'samples': 12438528, 'steps': 24293, 'loss/train': 2.4051411151885986} +02/25/2022 09:03:31 - INFO - codeparrot_training - Step 24294: {'lr': 0.00027785639269868844, 'samples': 12439040, 'steps': 24294, 'loss/train': 2.4954209327697754} +02/25/2022 09:03:38 - INFO - codeparrot_training - Step 24295: {'lr': 0.00027784013206991545, 'samples': 12439552, 'steps': 24295, 'loss/train': 1.896340250968933} +02/25/2022 09:03:41 - INFO - codeparrot_training - Step 24296: {'lr': 0.0002778238713218842, 'samples': 12440064, 'steps': 24296, 'loss/train': 1.7325503826141357} +02/25/2022 09:03:47 - INFO - codeparrot_training - Step 24297: {'lr': 0.0002778076104546643, 'samples': 12440576, 'steps': 24297, 'loss/train': 2.351745843887329} +02/25/2022 09:03:50 - INFO - codeparrot_training - Step 24298: {'lr': 0.0002777913494683255, 'samples': 12441088, 'steps': 24298, 'loss/train': 2.510953664779663} +02/25/2022 09:03:56 - INFO - codeparrot_training - Step 24299: {'lr': 0.0002777750883629373, 'samples': 12441600, 'steps': 24299, 'loss/train': 2.0129010677337646} +02/25/2022 09:03:59 - INFO - codeparrot_training - Step 24300: {'lr': 0.0002777588271385694, 'samples': 12442112, 'steps': 24300, 'loss/train': 2.266029119491577} +02/25/2022 09:04:05 - INFO - codeparrot_training - Step 24301: {'lr': 0.0002777425657952916, 'samples': 12442624, 'steps': 24301, 'loss/train': 2.1747663021087646} +02/25/2022 09:04:08 - INFO - codeparrot_training - Step 24302: {'lr': 0.0002777263043331734, 'samples': 12443136, 'steps': 24302, 'loss/train': 0.8633140921592712} +02/25/2022 09:04:14 - INFO - codeparrot_training - Step 24303: {'lr': 0.00027771004275228465, 'samples': 12443648, 'steps': 24303, 'loss/train': 0.796806275844574} +02/25/2022 09:04:18 - INFO - codeparrot_training - Step 24304: {'lr': 0.00027769378105269466, 'samples': 12444160, 'steps': 24304, 'loss/train': 2.556018829345703} +02/25/2022 09:04:23 - INFO - codeparrot_training - Step 24305: {'lr': 0.0002776775192344735, 'samples': 12444672, 'steps': 24305, 'loss/train': 2.546994209289551} +02/25/2022 09:04:27 - INFO - codeparrot_training - Step 24306: {'lr': 0.0002776612572976905, 'samples': 12445184, 'steps': 24306, 'loss/train': 1.7073020935058594} +02/25/2022 09:04:32 - INFO - codeparrot_training - Step 24307: {'lr': 0.0002776449952424155, 'samples': 12445696, 'steps': 24307, 'loss/train': 1.3263360261917114} +02/25/2022 09:04:36 - INFO - codeparrot_training - Step 24308: {'lr': 0.0002776287330687181, 'samples': 12446208, 'steps': 24308, 'loss/train': 2.3595807552337646} +02/25/2022 09:04:41 - INFO - codeparrot_training - Step 24309: {'lr': 0.000277612470776668, 'samples': 12446720, 'steps': 24309, 'loss/train': 1.9263052940368652} +02/25/2022 09:04:45 - INFO - codeparrot_training - Step 24310: {'lr': 0.0002775962083663349, 'samples': 12447232, 'steps': 24310, 'loss/train': 1.4907540082931519} +02/25/2022 09:04:50 - INFO - codeparrot_training - Step 24311: {'lr': 0.00027757994583778827, 'samples': 12447744, 'steps': 24311, 'loss/train': 2.1533026695251465} +02/25/2022 09:04:54 - INFO - codeparrot_training - Step 24312: {'lr': 0.000277563683191098, 'samples': 12448256, 'steps': 24312, 'loss/train': 2.024930477142334} +02/25/2022 09:05:01 - INFO - codeparrot_training - Step 24313: {'lr': 0.00027754742042633367, 'samples': 12448768, 'steps': 24313, 'loss/train': 2.051766872406006} +02/25/2022 09:05:04 - INFO - codeparrot_training - Step 24314: {'lr': 0.00027753115754356497, 'samples': 12449280, 'steps': 24314, 'loss/train': 2.1391053199768066} +02/25/2022 09:05:10 - INFO - codeparrot_training - Step 24315: {'lr': 0.0002775148945428614, 'samples': 12449792, 'steps': 24315, 'loss/train': 2.4742958545684814} +02/25/2022 09:05:13 - INFO - codeparrot_training - Step 24316: {'lr': 0.00027749863142429294, 'samples': 12450304, 'steps': 24316, 'loss/train': 1.7500590085983276} +02/25/2022 09:05:19 - INFO - codeparrot_training - Step 24317: {'lr': 0.00027748236818792894, 'samples': 12450816, 'steps': 24317, 'loss/train': 1.2185124158859253} +02/25/2022 09:05:22 - INFO - codeparrot_training - Step 24318: {'lr': 0.0002774661048338393, 'samples': 12451328, 'steps': 24318, 'loss/train': 2.03092098236084} +02/25/2022 09:05:30 - INFO - codeparrot_training - Step 24319: {'lr': 0.0002774498413620935, 'samples': 12451840, 'steps': 24319, 'loss/train': 2.5360498428344727} +02/25/2022 09:05:33 - INFO - codeparrot_training - Step 24320: {'lr': 0.0002774335777727613, 'samples': 12452352, 'steps': 24320, 'loss/train': 2.1589839458465576} +02/25/2022 09:05:39 - INFO - codeparrot_training - Step 24321: {'lr': 0.0002774173140659124, 'samples': 12452864, 'steps': 24321, 'loss/train': 1.8409605026245117} +02/25/2022 09:05:42 - INFO - codeparrot_training - Step 24322: {'lr': 0.00027740105024161646, 'samples': 12453376, 'steps': 24322, 'loss/train': 2.1168031692504883} +02/25/2022 09:05:48 - INFO - codeparrot_training - Step 24323: {'lr': 0.00027738478629994306, 'samples': 12453888, 'steps': 24323, 'loss/train': 1.936193585395813} +02/25/2022 09:05:51 - INFO - codeparrot_training - Step 24324: {'lr': 0.00027736852224096196, 'samples': 12454400, 'steps': 24324, 'loss/train': 2.5072245597839355} +02/25/2022 09:05:57 - INFO - codeparrot_training - Step 24325: {'lr': 0.0002773522580647428, 'samples': 12454912, 'steps': 24325, 'loss/train': 3.1440672874450684} +02/25/2022 09:06:00 - INFO - codeparrot_training - Step 24326: {'lr': 0.00027733599377135527, 'samples': 12455424, 'steps': 24326, 'loss/train': 1.4173699617385864} +02/25/2022 09:06:06 - INFO - codeparrot_training - Step 24327: {'lr': 0.00027731972936086895, 'samples': 12455936, 'steps': 24327, 'loss/train': 1.0513856410980225} +02/25/2022 09:06:09 - INFO - codeparrot_training - Step 24328: {'lr': 0.00027730346483335373, 'samples': 12456448, 'steps': 24328, 'loss/train': 1.9477031230926514} +02/25/2022 09:06:17 - INFO - codeparrot_training - Step 24329: {'lr': 0.00027728720018887896, 'samples': 12456960, 'steps': 24329, 'loss/train': 2.603245496749878} +02/25/2022 09:06:20 - INFO - codeparrot_training - Step 24330: {'lr': 0.0002772709354275146, 'samples': 12457472, 'steps': 24330, 'loss/train': 1.859300136566162} +02/25/2022 09:06:26 - INFO - codeparrot_training - Step 24331: {'lr': 0.0002772546705493302, 'samples': 12457984, 'steps': 24331, 'loss/train': 2.2015562057495117} +02/25/2022 09:06:29 - INFO - codeparrot_training - Step 24332: {'lr': 0.0002772384055543954, 'samples': 12458496, 'steps': 24332, 'loss/train': 2.3628835678100586} +02/25/2022 09:06:35 - INFO - codeparrot_training - Step 24333: {'lr': 0.00027722214044278, 'samples': 12459008, 'steps': 24333, 'loss/train': 1.6578948497772217} +02/25/2022 09:06:38 - INFO - codeparrot_training - Step 24334: {'lr': 0.00027720587521455354, 'samples': 12459520, 'steps': 24334, 'loss/train': 1.805680513381958} +02/25/2022 09:06:44 - INFO - codeparrot_training - Step 24335: {'lr': 0.00027718960986978575, 'samples': 12460032, 'steps': 24335, 'loss/train': 2.547279119491577} +02/25/2022 09:06:47 - INFO - codeparrot_training - Step 24336: {'lr': 0.0002771733444085463, 'samples': 12460544, 'steps': 24336, 'loss/train': 1.863051414489746} +02/25/2022 09:06:53 - INFO - codeparrot_training - Step 24337: {'lr': 0.00027715707883090485, 'samples': 12461056, 'steps': 24337, 'loss/train': 1.6383551359176636} +02/25/2022 09:06:57 - INFO - codeparrot_training - Step 24338: {'lr': 0.00027714081313693115, 'samples': 12461568, 'steps': 24338, 'loss/train': 1.4323002099990845} +02/25/2022 09:07:04 - INFO - codeparrot_training - Step 24339: {'lr': 0.0002771245473266948, 'samples': 12462080, 'steps': 24339, 'loss/train': 2.596574068069458} +02/25/2022 09:07:08 - INFO - codeparrot_training - Step 24340: {'lr': 0.00027710828140026553, 'samples': 12462592, 'steps': 24340, 'loss/train': 1.6517757177352905} +02/25/2022 09:07:13 - INFO - codeparrot_training - Step 24341: {'lr': 0.0002770920153577129, 'samples': 12463104, 'steps': 24341, 'loss/train': 1.9912539720535278} +02/25/2022 09:07:17 - INFO - codeparrot_training - Step 24342: {'lr': 0.00027707574919910683, 'samples': 12463616, 'steps': 24342, 'loss/train': 2.359637498855591} +02/25/2022 09:07:22 - INFO - codeparrot_training - Step 24343: {'lr': 0.0002770594829245167, 'samples': 12464128, 'steps': 24343, 'loss/train': 1.962052583694458} +02/25/2022 09:07:26 - INFO - codeparrot_training - Step 24344: {'lr': 0.00027704321653401244, 'samples': 12464640, 'steps': 24344, 'loss/train': 2.239534854888916} +02/25/2022 09:07:31 - INFO - codeparrot_training - Step 24345: {'lr': 0.00027702695002766357, 'samples': 12465152, 'steps': 24345, 'loss/train': 0.7575122714042664} +02/25/2022 09:07:35 - INFO - codeparrot_training - Step 24346: {'lr': 0.0002770106834055398, 'samples': 12465664, 'steps': 24346, 'loss/train': 1.8983911275863647} +02/25/2022 09:07:40 - INFO - codeparrot_training - Step 24347: {'lr': 0.000276994416667711, 'samples': 12466176, 'steps': 24347, 'loss/train': 2.28899884223938} +02/25/2022 09:07:44 - INFO - codeparrot_training - Step 24348: {'lr': 0.0002769781498142465, 'samples': 12466688, 'steps': 24348, 'loss/train': 2.139613151550293} +02/25/2022 09:07:49 - INFO - codeparrot_training - Step 24349: {'lr': 0.0002769618828452163, 'samples': 12467200, 'steps': 24349, 'loss/train': 2.573157787322998} +02/25/2022 09:07:53 - INFO - codeparrot_training - Step 24350: {'lr': 0.00027694561576068985, 'samples': 12467712, 'steps': 24350, 'loss/train': 2.0471861362457275} +02/25/2022 09:08:00 - INFO - codeparrot_training - Step 24351: {'lr': 0.00027692934856073705, 'samples': 12468224, 'steps': 24351, 'loss/train': 2.3244287967681885} +02/25/2022 09:08:04 - INFO - codeparrot_training - Step 24352: {'lr': 0.0002769130812454274, 'samples': 12468736, 'steps': 24352, 'loss/train': 2.1188852787017822} +02/25/2022 09:08:09 - INFO - codeparrot_training - Step 24353: {'lr': 0.0002768968138148307, 'samples': 12469248, 'steps': 24353, 'loss/train': 2.082770586013794} +02/25/2022 09:08:12 - INFO - codeparrot_training - Step 24354: {'lr': 0.0002768805462690165, 'samples': 12469760, 'steps': 24354, 'loss/train': 2.0079801082611084} +02/25/2022 09:08:18 - INFO - codeparrot_training - Step 24355: {'lr': 0.0002768642786080546, 'samples': 12470272, 'steps': 24355, 'loss/train': 1.2906962633132935} +02/25/2022 09:08:21 - INFO - codeparrot_training - Step 24356: {'lr': 0.0002768480108320147, 'samples': 12470784, 'steps': 24356, 'loss/train': 2.0639445781707764} +02/25/2022 09:08:27 - INFO - codeparrot_training - Step 24357: {'lr': 0.0002768317429409664, 'samples': 12471296, 'steps': 24357, 'loss/train': 1.953052043914795} +02/25/2022 09:08:30 - INFO - codeparrot_training - Step 24358: {'lr': 0.00027681547493497955, 'samples': 12471808, 'steps': 24358, 'loss/train': 1.8935571908950806} +02/25/2022 09:08:36 - INFO - codeparrot_training - Step 24359: {'lr': 0.00027679920681412365, 'samples': 12472320, 'steps': 24359, 'loss/train': 1.1786301136016846} +02/25/2022 09:08:40 - INFO - codeparrot_training - Step 24360: {'lr': 0.0002767829385784684, 'samples': 12472832, 'steps': 24360, 'loss/train': 2.4036977291107178} +02/25/2022 09:08:47 - INFO - codeparrot_training - Step 24361: {'lr': 0.0002767666702280836, 'samples': 12473344, 'steps': 24361, 'loss/train': 2.4852397441864014} +02/25/2022 09:08:50 - INFO - codeparrot_training - Step 24362: {'lr': 0.0002767504017630389, 'samples': 12473856, 'steps': 24362, 'loss/train': 1.4464774131774902} +02/25/2022 09:08:56 - INFO - codeparrot_training - Step 24363: {'lr': 0.00027673413318340397, 'samples': 12474368, 'steps': 24363, 'loss/train': 0.43508830666542053} +02/25/2022 09:08:59 - INFO - codeparrot_training - Step 24364: {'lr': 0.0002767178644892485, 'samples': 12474880, 'steps': 24364, 'loss/train': 1.3368829488754272} +02/25/2022 09:09:05 - INFO - codeparrot_training - Step 24365: {'lr': 0.00027670159568064215, 'samples': 12475392, 'steps': 24365, 'loss/train': 1.8249205350875854} +02/25/2022 09:09:08 - INFO - codeparrot_training - Step 24366: {'lr': 0.00027668532675765466, 'samples': 12475904, 'steps': 24366, 'loss/train': 2.646982431411743} +02/25/2022 09:09:14 - INFO - codeparrot_training - Step 24367: {'lr': 0.00027666905772035573, 'samples': 12476416, 'steps': 24367, 'loss/train': 1.5984379053115845} +02/25/2022 09:09:17 - INFO - codeparrot_training - Step 24368: {'lr': 0.00027665278856881496, 'samples': 12476928, 'steps': 24368, 'loss/train': 2.1124837398529053} +02/25/2022 09:09:23 - INFO - codeparrot_training - Step 24369: {'lr': 0.0002766365193031022, 'samples': 12477440, 'steps': 24369, 'loss/train': 1.7307562828063965} +02/25/2022 09:09:26 - INFO - codeparrot_training - Step 24370: {'lr': 0.00027662024992328697, 'samples': 12477952, 'steps': 24370, 'loss/train': 1.9396568536758423} +02/25/2022 09:09:32 - INFO - codeparrot_training - Step 24371: {'lr': 0.00027660398042943907, 'samples': 12478464, 'steps': 24371, 'loss/train': 2.0503547191619873} +02/25/2022 09:09:35 - INFO - codeparrot_training - Step 24372: {'lr': 0.0002765877108216282, 'samples': 12478976, 'steps': 24372, 'loss/train': 0.8696369528770447} +02/25/2022 09:09:41 - INFO - codeparrot_training - Step 24373: {'lr': 0.000276571441099924, 'samples': 12479488, 'steps': 24373, 'loss/train': 1.862486481666565} +02/25/2022 09:09:45 - INFO - codeparrot_training - Step 24374: {'lr': 0.00027655517126439616, 'samples': 12480000, 'steps': 24374, 'loss/train': 3.0517146587371826} +02/25/2022 09:09:48 - INFO - codeparrot_training - Step 24375: {'lr': 0.00027653890131511445, 'samples': 12480512, 'steps': 24375, 'loss/train': 1.9236218929290771} +02/25/2022 09:09:55 - INFO - codeparrot_training - Step 24376: {'lr': 0.00027652263125214845, 'samples': 12481024, 'steps': 24376, 'loss/train': 1.5654569864273071} +02/25/2022 09:09:59 - INFO - codeparrot_training - Step 24377: {'lr': 0.0002765063610755679, 'samples': 12481536, 'steps': 24377, 'loss/train': 1.2738064527511597} +02/25/2022 09:10:04 - INFO - codeparrot_training - Step 24378: {'lr': 0.0002764900907854426, 'samples': 12482048, 'steps': 24378, 'loss/train': 2.406679630279541} +02/25/2022 09:10:08 - INFO - codeparrot_training - Step 24379: {'lr': 0.0002764738203818421, 'samples': 12482560, 'steps': 24379, 'loss/train': 1.2675119638442993} +02/25/2022 09:10:13 - INFO - codeparrot_training - Step 24380: {'lr': 0.0002764575498648362, 'samples': 12483072, 'steps': 24380, 'loss/train': 1.303342342376709} +02/25/2022 09:10:19 - INFO - codeparrot_training - Step 24381: {'lr': 0.00027644127923449446, 'samples': 12483584, 'steps': 24381, 'loss/train': 1.9849861860275269} +02/25/2022 09:10:22 - INFO - codeparrot_training - Step 24382: {'lr': 0.0002764250084908868, 'samples': 12484096, 'steps': 24382, 'loss/train': 1.4672366380691528} +02/25/2022 09:10:28 - INFO - codeparrot_training - Step 24383: {'lr': 0.00027640873763408273, 'samples': 12484608, 'steps': 24383, 'loss/train': 1.2897140979766846} +02/25/2022 09:10:31 - INFO - codeparrot_training - Step 24384: {'lr': 0.00027639246666415207, 'samples': 12485120, 'steps': 24384, 'loss/train': 1.5851019620895386} +02/25/2022 09:10:39 - INFO - codeparrot_training - Step 24385: {'lr': 0.0002763761955811644, 'samples': 12485632, 'steps': 24385, 'loss/train': 2.6295554637908936} +02/25/2022 09:10:42 - INFO - codeparrot_training - Step 24386: {'lr': 0.00027635992438518954, 'samples': 12486144, 'steps': 24386, 'loss/train': 1.5678340196609497} +02/25/2022 09:10:48 - INFO - codeparrot_training - Step 24387: {'lr': 0.00027634365307629705, 'samples': 12486656, 'steps': 24387, 'loss/train': 2.0508193969726562} +02/25/2022 09:10:51 - INFO - codeparrot_training - Step 24388: {'lr': 0.00027632738165455685, 'samples': 12487168, 'steps': 24388, 'loss/train': 1.8973125219345093} +02/25/2022 09:10:57 - INFO - codeparrot_training - Step 24389: {'lr': 0.00027631111012003836, 'samples': 12487680, 'steps': 24389, 'loss/train': 2.5513358116149902} +02/25/2022 09:11:00 - INFO - codeparrot_training - Step 24390: {'lr': 0.0002762948384728115, 'samples': 12488192, 'steps': 24390, 'loss/train': 0.49132177233695984} +02/25/2022 09:11:06 - INFO - codeparrot_training - Step 24391: {'lr': 0.00027627856671294586, 'samples': 12488704, 'steps': 24391, 'loss/train': 1.7337344884872437} +02/25/2022 09:11:09 - INFO - codeparrot_training - Step 24392: {'lr': 0.00027626229484051126, 'samples': 12489216, 'steps': 24392, 'loss/train': 1.959826946258545} +02/25/2022 09:11:15 - INFO - codeparrot_training - Step 24393: {'lr': 0.00027624602285557725, 'samples': 12489728, 'steps': 24393, 'loss/train': 1.6941568851470947} +02/25/2022 09:11:18 - INFO - codeparrot_training - Step 24394: {'lr': 0.00027622975075821364, 'samples': 12490240, 'steps': 24394, 'loss/train': 1.2322165966033936} +02/25/2022 09:11:24 - INFO - codeparrot_training - Step 24395: {'lr': 0.00027621347854849015, 'samples': 12490752, 'steps': 24395, 'loss/train': 2.565326690673828} +02/25/2022 09:11:27 - INFO - codeparrot_training - Step 24396: {'lr': 0.0002761972062264764, 'samples': 12491264, 'steps': 24396, 'loss/train': 0.1975572109222412} +02/25/2022 09:11:35 - INFO - codeparrot_training - Step 24397: {'lr': 0.0002761809337922422, 'samples': 12491776, 'steps': 24397, 'loss/train': 2.5141966342926025} +02/25/2022 09:11:38 - INFO - codeparrot_training - Step 24398: {'lr': 0.0002761646612458571, 'samples': 12492288, 'steps': 24398, 'loss/train': 1.4061397314071655} +02/25/2022 09:11:44 - INFO - codeparrot_training - Step 24399: {'lr': 0.00027614838858739093, 'samples': 12492800, 'steps': 24399, 'loss/train': 1.66769540309906} +02/25/2022 09:11:47 - INFO - codeparrot_training - Step 24400: {'lr': 0.0002761321158169134, 'samples': 12493312, 'steps': 24400, 'loss/train': 2.850393772125244} +02/25/2022 09:11:53 - INFO - codeparrot_training - Step 24401: {'lr': 0.00027611584293449413, 'samples': 12493824, 'steps': 24401, 'loss/train': 1.828554630279541} +02/25/2022 09:11:56 - INFO - codeparrot_training - Step 24402: {'lr': 0.0002760995699402029, 'samples': 12494336, 'steps': 24402, 'loss/train': 2.0508873462677} +02/25/2022 09:12:02 - INFO - codeparrot_training - Step 24403: {'lr': 0.0002760832968341094, 'samples': 12494848, 'steps': 24403, 'loss/train': 1.4734331369400024} +02/25/2022 09:12:05 - INFO - codeparrot_training - Step 24404: {'lr': 0.00027606702361628337, 'samples': 12495360, 'steps': 24404, 'loss/train': 2.2672884464263916} +02/25/2022 09:12:11 - INFO - codeparrot_training - Step 24405: {'lr': 0.00027605075028679446, 'samples': 12495872, 'steps': 24405, 'loss/train': 2.1354756355285645} +02/25/2022 09:12:14 - INFO - codeparrot_training - Step 24406: {'lr': 0.0002760344768457124, 'samples': 12496384, 'steps': 24406, 'loss/train': 2.779613971710205} +02/25/2022 09:12:22 - INFO - codeparrot_training - Step 24407: {'lr': 0.0002760182032931069, 'samples': 12496896, 'steps': 24407, 'loss/train': 1.361061692237854} +02/25/2022 09:12:25 - INFO - codeparrot_training - Step 24408: {'lr': 0.00027600192962904773, 'samples': 12497408, 'steps': 24408, 'loss/train': 2.293539524078369} +02/25/2022 09:12:31 - INFO - codeparrot_training - Step 24409: {'lr': 0.0002759856558536045, 'samples': 12497920, 'steps': 24409, 'loss/train': 2.024662971496582} +02/25/2022 09:12:34 - INFO - codeparrot_training - Step 24410: {'lr': 0.000275969381966847, 'samples': 12498432, 'steps': 24410, 'loss/train': 1.0239968299865723} +02/25/2022 09:12:40 - INFO - codeparrot_training - Step 24411: {'lr': 0.0002759531079688449, 'samples': 12498944, 'steps': 24411, 'loss/train': 1.3101378679275513} +02/25/2022 09:12:43 - INFO - codeparrot_training - Step 24412: {'lr': 0.000275936833859668, 'samples': 12499456, 'steps': 24412, 'loss/train': 1.9156187772750854} +02/25/2022 09:12:49 - INFO - codeparrot_training - Step 24413: {'lr': 0.0002759205596393859, 'samples': 12499968, 'steps': 24413, 'loss/train': 1.6916019916534424} +02/25/2022 09:12:52 - INFO - codeparrot_training - Step 24414: {'lr': 0.0002759042853080683, 'samples': 12500480, 'steps': 24414, 'loss/train': 0.9117424488067627} +02/25/2022 09:12:58 - INFO - codeparrot_training - Step 24415: {'lr': 0.000275888010865785, 'samples': 12500992, 'steps': 24415, 'loss/train': 1.7600107192993164} +02/25/2022 09:13:01 - INFO - codeparrot_training - Step 24416: {'lr': 0.00027587173631260563, 'samples': 12501504, 'steps': 24416, 'loss/train': 2.988499164581299} +02/25/2022 09:13:07 - INFO - codeparrot_training - Step 24417: {'lr': 0.0002758554616486, 'samples': 12502016, 'steps': 24417, 'loss/train': 1.1107361316680908} +02/25/2022 09:13:10 - INFO - codeparrot_training - Step 24418: {'lr': 0.0002758391868738378, 'samples': 12502528, 'steps': 24418, 'loss/train': 1.1253814697265625} +02/25/2022 09:13:16 - INFO - codeparrot_training - Step 24419: {'lr': 0.0002758229119883888, 'samples': 12503040, 'steps': 24419, 'loss/train': 1.8688368797302246} +02/25/2022 09:13:19 - INFO - codeparrot_training - Step 24420: {'lr': 0.0002758066369923225, 'samples': 12503552, 'steps': 24420, 'loss/train': 3.5818772315979004} +02/25/2022 09:13:25 - INFO - codeparrot_training - Step 24421: {'lr': 0.00027579036188570883, 'samples': 12504064, 'steps': 24421, 'loss/train': 1.5507477521896362} +02/25/2022 09:13:28 - INFO - codeparrot_training - Step 24422: {'lr': 0.00027577408666861744, 'samples': 12504576, 'steps': 24422, 'loss/train': 0.26538732647895813} +02/25/2022 09:13:36 - INFO - codeparrot_training - Step 24423: {'lr': 0.00027575781134111805, 'samples': 12505088, 'steps': 24423, 'loss/train': 2.144705295562744} +02/25/2022 09:13:39 - INFO - codeparrot_training - Step 24424: {'lr': 0.00027574153590328033, 'samples': 12505600, 'steps': 24424, 'loss/train': 0.6362213492393494} +02/25/2022 09:13:44 - INFO - codeparrot_training - Step 24425: {'lr': 0.0002757252603551741, 'samples': 12506112, 'steps': 24425, 'loss/train': 1.3682286739349365} +02/25/2022 09:13:48 - INFO - codeparrot_training - Step 24426: {'lr': 0.00027570898469686896, 'samples': 12506624, 'steps': 24426, 'loss/train': 2.466688871383667} +02/25/2022 09:13:54 - INFO - codeparrot_training - Step 24427: {'lr': 0.00027569270892843474, 'samples': 12507136, 'steps': 24427, 'loss/train': 1.8645234107971191} +02/25/2022 09:13:57 - INFO - codeparrot_training - Step 24428: {'lr': 0.0002756764330499411, 'samples': 12507648, 'steps': 24428, 'loss/train': 2.1611015796661377} +02/25/2022 09:14:03 - INFO - codeparrot_training - Step 24429: {'lr': 0.00027566015706145775, 'samples': 12508160, 'steps': 24429, 'loss/train': 1.5633702278137207} +02/25/2022 09:14:06 - INFO - codeparrot_training - Step 24430: {'lr': 0.00027564388096305446, 'samples': 12508672, 'steps': 24430, 'loss/train': 2.6833713054656982} +02/25/2022 09:14:12 - INFO - codeparrot_training - Step 24431: {'lr': 0.00027562760475480095, 'samples': 12509184, 'steps': 24431, 'loss/train': 2.556901693344116} +02/25/2022 09:14:15 - INFO - codeparrot_training - Step 24432: {'lr': 0.0002756113284367669, 'samples': 12509696, 'steps': 24432, 'loss/train': 2.1286306381225586} +02/25/2022 09:14:23 - INFO - codeparrot_training - Step 24433: {'lr': 0.00027559505200902204, 'samples': 12510208, 'steps': 24433, 'loss/train': 2.053403854370117} +02/25/2022 09:14:26 - INFO - codeparrot_training - Step 24434: {'lr': 0.00027557877547163613, 'samples': 12510720, 'steps': 24434, 'loss/train': 2.115370750427246} +02/25/2022 09:14:32 - INFO - codeparrot_training - Step 24435: {'lr': 0.0002755624988246788, 'samples': 12511232, 'steps': 24435, 'loss/train': 2.078890800476074} +02/25/2022 09:14:35 - INFO - codeparrot_training - Step 24436: {'lr': 0.0002755462220682199, 'samples': 12511744, 'steps': 24436, 'loss/train': 1.6449601650238037} +02/25/2022 09:14:41 - INFO - codeparrot_training - Step 24437: {'lr': 0.0002755299452023291, 'samples': 12512256, 'steps': 24437, 'loss/train': 2.1148951053619385} +02/25/2022 09:14:44 - INFO - codeparrot_training - Step 24438: {'lr': 0.0002755136682270761, 'samples': 12512768, 'steps': 24438, 'loss/train': 2.4138970375061035} +02/25/2022 09:14:50 - INFO - codeparrot_training - Step 24439: {'lr': 0.00027549739114253067, 'samples': 12513280, 'steps': 24439, 'loss/train': 2.2177722454071045} +02/25/2022 09:14:53 - INFO - codeparrot_training - Step 24440: {'lr': 0.00027548111394876254, 'samples': 12513792, 'steps': 24440, 'loss/train': 1.8243474960327148} +02/25/2022 09:14:59 - INFO - codeparrot_training - Step 24441: {'lr': 0.00027546483664584137, 'samples': 12514304, 'steps': 24441, 'loss/train': 2.0535154342651367} +02/25/2022 09:15:03 - INFO - codeparrot_training - Step 24442: {'lr': 0.0002754485592338369, 'samples': 12514816, 'steps': 24442, 'loss/train': 1.728975534439087} +02/25/2022 09:15:10 - INFO - codeparrot_training - Step 24443: {'lr': 0.0002754322817128189, 'samples': 12515328, 'steps': 24443, 'loss/train': 0.6173520088195801} +02/25/2022 09:15:13 - INFO - codeparrot_training - Step 24444: {'lr': 0.0002754160040828571, 'samples': 12515840, 'steps': 24444, 'loss/train': 1.6655817031860352} +02/25/2022 09:15:19 - INFO - codeparrot_training - Step 24445: {'lr': 0.00027539972634402124, 'samples': 12516352, 'steps': 24445, 'loss/train': 1.7090250253677368} +02/25/2022 09:15:22 - INFO - codeparrot_training - Step 24446: {'lr': 0.0002753834484963809, 'samples': 12516864, 'steps': 24446, 'loss/train': 0.6559537053108215} +02/25/2022 09:15:28 - INFO - codeparrot_training - Step 24447: {'lr': 0.00027536717054000605, 'samples': 12517376, 'steps': 24447, 'loss/train': 2.229844093322754} +02/25/2022 09:15:31 - INFO - codeparrot_training - Step 24448: {'lr': 0.00027535089247496627, 'samples': 12517888, 'steps': 24448, 'loss/train': 1.687962293624878} +02/25/2022 09:15:37 - INFO - codeparrot_training - Step 24449: {'lr': 0.0002753346143013313, 'samples': 12518400, 'steps': 24449, 'loss/train': 1.6801073551177979} +02/25/2022 09:15:40 - INFO - codeparrot_training - Step 24450: {'lr': 0.00027531833601917086, 'samples': 12518912, 'steps': 24450, 'loss/train': 1.8685766458511353} +02/25/2022 09:15:48 - INFO - codeparrot_training - Step 24451: {'lr': 0.00027530205762855476, 'samples': 12519424, 'steps': 24451, 'loss/train': 1.3363102674484253} +02/25/2022 09:15:51 - INFO - codeparrot_training - Step 24452: {'lr': 0.0002752857791295526, 'samples': 12519936, 'steps': 24452, 'loss/train': 2.0327751636505127} +02/25/2022 09:15:57 - INFO - codeparrot_training - Step 24453: {'lr': 0.0002752695005222343, 'samples': 12520448, 'steps': 24453, 'loss/train': 2.1356115341186523} +02/25/2022 09:16:00 - INFO - codeparrot_training - Step 24454: {'lr': 0.0002752532218066694, 'samples': 12520960, 'steps': 24454, 'loss/train': 0.9559314846992493} +02/25/2022 09:16:06 - INFO - codeparrot_training - Step 24455: {'lr': 0.0002752369429829278, 'samples': 12521472, 'steps': 24455, 'loss/train': 1.6489819288253784} +02/25/2022 09:16:09 - INFO - codeparrot_training - Step 24456: {'lr': 0.00027522066405107906, 'samples': 12521984, 'steps': 24456, 'loss/train': 0.7534356117248535} +02/25/2022 09:16:15 - INFO - codeparrot_training - Step 24457: {'lr': 0.00027520438501119304, 'samples': 12522496, 'steps': 24457, 'loss/train': 2.2548422813415527} +02/25/2022 09:16:18 - INFO - codeparrot_training - Step 24458: {'lr': 0.0002751881058633394, 'samples': 12523008, 'steps': 24458, 'loss/train': 2.4279603958129883} +02/25/2022 09:16:24 - INFO - codeparrot_training - Step 24459: {'lr': 0.000275171826607588, 'samples': 12523520, 'steps': 24459, 'loss/train': 2.2388763427734375} +02/25/2022 09:16:27 - INFO - codeparrot_training - Step 24460: {'lr': 0.0002751555472440084, 'samples': 12524032, 'steps': 24460, 'loss/train': 2.459434986114502} +02/25/2022 09:16:33 - INFO - codeparrot_training - Step 24461: {'lr': 0.00027513926777267045, 'samples': 12524544, 'steps': 24461, 'loss/train': 2.8915839195251465} +02/25/2022 09:16:36 - INFO - codeparrot_training - Step 24462: {'lr': 0.00027512298819364387, 'samples': 12525056, 'steps': 24462, 'loss/train': 2.3681576251983643} +02/25/2022 09:16:42 - INFO - codeparrot_training - Step 24463: {'lr': 0.0002751067085069984, 'samples': 12525568, 'steps': 24463, 'loss/train': 1.0342020988464355} +02/25/2022 09:16:45 - INFO - codeparrot_training - Step 24464: {'lr': 0.0002750904287128037, 'samples': 12526080, 'steps': 24464, 'loss/train': 0.5249140858650208} +02/25/2022 09:16:51 - INFO - codeparrot_training - Step 24465: {'lr': 0.0002750741488111297, 'samples': 12526592, 'steps': 24465, 'loss/train': 1.663329005241394} +02/25/2022 09:16:54 - INFO - codeparrot_training - Step 24466: {'lr': 0.00027505786880204587, 'samples': 12527104, 'steps': 24466, 'loss/train': 1.4527299404144287} +02/25/2022 09:17:02 - INFO - codeparrot_training - Step 24467: {'lr': 0.0002750415886856222, 'samples': 12527616, 'steps': 24467, 'loss/train': 1.3786159753799438} +02/25/2022 09:17:05 - INFO - codeparrot_training - Step 24468: {'lr': 0.0002750253084619282, 'samples': 12528128, 'steps': 24468, 'loss/train': 1.8913116455078125} +02/25/2022 09:17:11 - INFO - codeparrot_training - Step 24469: {'lr': 0.00027500902813103385, 'samples': 12528640, 'steps': 24469, 'loss/train': 1.9512361288070679} +02/25/2022 09:17:14 - INFO - codeparrot_training - Step 24470: {'lr': 0.00027499274769300863, 'samples': 12529152, 'steps': 24470, 'loss/train': 2.614431142807007} +02/25/2022 09:17:19 - INFO - codeparrot_training - Step 24471: {'lr': 0.0002749764671479225, 'samples': 12529664, 'steps': 24471, 'loss/train': 1.3377066850662231} +02/25/2022 09:17:23 - INFO - codeparrot_training - Step 24472: {'lr': 0.00027496018649584506, 'samples': 12530176, 'steps': 24472, 'loss/train': 2.480290412902832} +02/25/2022 09:17:29 - INFO - codeparrot_training - Step 24473: {'lr': 0.00027494390573684624, 'samples': 12530688, 'steps': 24473, 'loss/train': 1.292910099029541} +02/25/2022 09:17:32 - INFO - codeparrot_training - Step 24474: {'lr': 0.0002749276248709955, 'samples': 12531200, 'steps': 24474, 'loss/train': 0.7167650461196899} +02/25/2022 09:17:37 - INFO - codeparrot_training - Step 24475: {'lr': 0.00027491134389836283, 'samples': 12531712, 'steps': 24475, 'loss/train': 1.7323970794677734} +02/25/2022 09:17:41 - INFO - codeparrot_training - Step 24476: {'lr': 0.00027489506281901777, 'samples': 12532224, 'steps': 24476, 'loss/train': 2.0276267528533936} +02/25/2022 09:17:46 - INFO - codeparrot_training - Step 24477: {'lr': 0.0002748787816330302, 'samples': 12532736, 'steps': 24477, 'loss/train': 1.820704698562622} +02/25/2022 09:17:50 - INFO - codeparrot_training - Step 24478: {'lr': 0.0002748625003404699, 'samples': 12533248, 'steps': 24478, 'loss/train': 1.592031717300415} +02/25/2022 09:17:57 - INFO - codeparrot_training - Step 24479: {'lr': 0.0002748462189414065, 'samples': 12533760, 'steps': 24479, 'loss/train': 2.2885358333587646} +02/25/2022 09:18:01 - INFO - codeparrot_training - Step 24480: {'lr': 0.0002748299374359098, 'samples': 12534272, 'steps': 24480, 'loss/train': 1.6717679500579834} +02/25/2022 09:18:06 - INFO - codeparrot_training - Step 24481: {'lr': 0.00027481365582404947, 'samples': 12534784, 'steps': 24481, 'loss/train': 1.6308966875076294} +02/25/2022 09:18:10 - INFO - codeparrot_training - Step 24482: {'lr': 0.00027479737410589537, 'samples': 12535296, 'steps': 24482, 'loss/train': 2.250183582305908} +02/25/2022 09:18:15 - INFO - codeparrot_training - Step 24483: {'lr': 0.0002747810922815172, 'samples': 12535808, 'steps': 24483, 'loss/train': 1.4832526445388794} +02/25/2022 09:18:18 - INFO - codeparrot_training - Step 24484: {'lr': 0.0002747648103509847, 'samples': 12536320, 'steps': 24484, 'loss/train': 1.9258743524551392} +02/25/2022 09:18:24 - INFO - codeparrot_training - Step 24485: {'lr': 0.0002747485283143676, 'samples': 12536832, 'steps': 24485, 'loss/train': 1.9167473316192627} +02/25/2022 09:18:27 - INFO - codeparrot_training - Step 24486: {'lr': 0.00027473224617173564, 'samples': 12537344, 'steps': 24486, 'loss/train': 1.9225873947143555} +02/25/2022 09:18:33 - INFO - codeparrot_training - Step 24487: {'lr': 0.0002747159639231586, 'samples': 12537856, 'steps': 24487, 'loss/train': 1.394760012626648} +02/25/2022 09:18:36 - INFO - codeparrot_training - Step 24488: {'lr': 0.00027469968156870625, 'samples': 12538368, 'steps': 24488, 'loss/train': 1.8857632875442505} +02/25/2022 09:18:43 - INFO - codeparrot_training - Step 24489: {'lr': 0.0002746833991084483, 'samples': 12538880, 'steps': 24489, 'loss/train': 0.7643253803253174} +02/25/2022 09:18:46 - INFO - codeparrot_training - Step 24490: {'lr': 0.0002746671165424545, 'samples': 12539392, 'steps': 24490, 'loss/train': 2.0565500259399414} +02/25/2022 09:18:52 - INFO - codeparrot_training - Step 24491: {'lr': 0.0002746508338707946, 'samples': 12539904, 'steps': 24491, 'loss/train': 2.6000890731811523} +02/25/2022 09:18:55 - INFO - codeparrot_training - Step 24492: {'lr': 0.00027463455109353837, 'samples': 12540416, 'steps': 24492, 'loss/train': 1.6585088968276978} +02/25/2022 09:19:01 - INFO - codeparrot_training - Step 24493: {'lr': 0.00027461826821075554, 'samples': 12540928, 'steps': 24493, 'loss/train': 1.6845992803573608} +02/25/2022 09:19:04 - INFO - codeparrot_training - Step 24494: {'lr': 0.00027460198522251584, 'samples': 12541440, 'steps': 24494, 'loss/train': 1.7877154350280762} +02/25/2022 09:19:10 - INFO - codeparrot_training - Step 24495: {'lr': 0.0002745857021288891, 'samples': 12541952, 'steps': 24495, 'loss/train': 2.11248517036438} +02/25/2022 09:19:13 - INFO - codeparrot_training - Step 24496: {'lr': 0.000274569418929945, 'samples': 12542464, 'steps': 24496, 'loss/train': 1.868991732597351} +02/25/2022 09:19:19 - INFO - codeparrot_training - Step 24497: {'lr': 0.0002745531356257533, 'samples': 12542976, 'steps': 24497, 'loss/train': 1.831846833229065} +02/25/2022 09:19:22 - INFO - codeparrot_training - Step 24498: {'lr': 0.0002745368522163837, 'samples': 12543488, 'steps': 24498, 'loss/train': 2.2027790546417236} +02/25/2022 09:19:28 - INFO - codeparrot_training - Step 24499: {'lr': 0.0002745205687019061, 'samples': 12544000, 'steps': 24499, 'loss/train': 3.2684645652770996} +02/25/2022 09:19:32 - INFO - codeparrot_training - Step 24500: {'lr': 0.0002745042850823902, 'samples': 12544512, 'steps': 24500, 'loss/train': 1.1635981798171997} +02/25/2022 09:19:37 - INFO - codeparrot_training - Step 24501: {'lr': 0.00027448800135790563, 'samples': 12545024, 'steps': 24501, 'loss/train': 1.4591107368469238} +02/25/2022 09:19:41 - INFO - codeparrot_training - Step 24502: {'lr': 0.00027447171752852237, 'samples': 12545536, 'steps': 24502, 'loss/train': 1.7812879085540771} +02/25/2022 09:19:46 - INFO - codeparrot_training - Step 24503: {'lr': 0.00027445543359430987, 'samples': 12546048, 'steps': 24503, 'loss/train': 1.4782919883728027} +02/25/2022 09:19:50 - INFO - codeparrot_training - Step 24504: {'lr': 0.0002744391495553382, 'samples': 12546560, 'steps': 24504, 'loss/train': 1.7535756826400757} +02/25/2022 09:19:55 - INFO - codeparrot_training - Step 24505: {'lr': 0.00027442286541167686, 'samples': 12547072, 'steps': 24505, 'loss/train': 2.996457576751709} +02/25/2022 09:19:59 - INFO - codeparrot_training - Step 24506: {'lr': 0.00027440658116339577, 'samples': 12547584, 'steps': 24506, 'loss/train': 2.0868303775787354} +02/25/2022 09:20:04 - INFO - codeparrot_training - Step 24507: {'lr': 0.00027439029681056463, 'samples': 12548096, 'steps': 24507, 'loss/train': 1.03550386428833} +02/25/2022 09:20:08 - INFO - codeparrot_training - Step 24508: {'lr': 0.00027437401235325327, 'samples': 12548608, 'steps': 24508, 'loss/train': 2.2904536724090576} +02/25/2022 09:20:14 - INFO - codeparrot_training - Step 24509: {'lr': 0.0002743577277915313, 'samples': 12549120, 'steps': 24509, 'loss/train': 2.1216845512390137} +02/25/2022 09:20:17 - INFO - codeparrot_training - Step 24510: {'lr': 0.0002743414431254686, 'samples': 12549632, 'steps': 24510, 'loss/train': 2.9121487140655518} +02/25/2022 09:20:23 - INFO - codeparrot_training - Step 24511: {'lr': 0.00027432515835513485, 'samples': 12550144, 'steps': 24511, 'loss/train': 2.58669114112854} +02/25/2022 09:20:26 - INFO - codeparrot_training - Step 24512: {'lr': 0.00027430887348059993, 'samples': 12550656, 'steps': 24512, 'loss/train': 2.023613691329956} +02/25/2022 09:20:32 - INFO - codeparrot_training - Step 24513: {'lr': 0.0002742925885019334, 'samples': 12551168, 'steps': 24513, 'loss/train': 1.340361475944519} +02/25/2022 09:20:36 - INFO - codeparrot_training - Step 24514: {'lr': 0.0002742763034192051, 'samples': 12551680, 'steps': 24514, 'loss/train': 0.6868749856948853} +02/25/2022 09:20:42 - INFO - codeparrot_training - Step 24515: {'lr': 0.000274260018232485, 'samples': 12552192, 'steps': 24515, 'loss/train': 2.7125587463378906} +02/25/2022 09:20:45 - INFO - codeparrot_training - Step 24516: {'lr': 0.00027424373294184255, 'samples': 12552704, 'steps': 24516, 'loss/train': 2.369844436645508} +02/25/2022 09:20:51 - INFO - codeparrot_training - Step 24517: {'lr': 0.00027422744754734775, 'samples': 12553216, 'steps': 24517, 'loss/train': 2.318622350692749} +02/25/2022 09:20:54 - INFO - codeparrot_training - Step 24518: {'lr': 0.00027421116204907013, 'samples': 12553728, 'steps': 24518, 'loss/train': 1.1846846342086792} +02/25/2022 09:21:00 - INFO - codeparrot_training - Step 24519: {'lr': 0.0002741948764470797, 'samples': 12554240, 'steps': 24519, 'loss/train': 1.3930130004882812} +02/25/2022 09:21:03 - INFO - codeparrot_training - Step 24520: {'lr': 0.000274178590741446, 'samples': 12554752, 'steps': 24520, 'loss/train': 2.1973109245300293} +02/25/2022 09:21:09 - INFO - codeparrot_training - Step 24521: {'lr': 0.00027416230493223896, 'samples': 12555264, 'steps': 24521, 'loss/train': 2.070000171661377} +02/25/2022 09:21:13 - INFO - codeparrot_training - Step 24522: {'lr': 0.0002741460190195283, 'samples': 12555776, 'steps': 24522, 'loss/train': 2.4560000896453857} +02/25/2022 09:21:18 - INFO - codeparrot_training - Step 24523: {'lr': 0.00027412973300338376, 'samples': 12556288, 'steps': 24523, 'loss/train': 1.6704992055892944} +02/25/2022 09:21:22 - INFO - codeparrot_training - Step 24524: {'lr': 0.000274113446883875, 'samples': 12556800, 'steps': 24524, 'loss/train': 1.6079808473587036} +02/25/2022 09:21:28 - INFO - codeparrot_training - Step 24525: {'lr': 0.0002740971606610719, 'samples': 12557312, 'steps': 24525, 'loss/train': 1.5220167636871338} +02/25/2022 09:21:31 - INFO - codeparrot_training - Step 24526: {'lr': 0.00027408087433504437, 'samples': 12557824, 'steps': 24526, 'loss/train': 2.134115219116211} +02/25/2022 09:21:35 - INFO - codeparrot_training - Step 24527: {'lr': 0.0002740645879058619, 'samples': 12558336, 'steps': 24527, 'loss/train': 2.4043474197387695} +02/25/2022 09:21:40 - INFO - codeparrot_training - Step 24528: {'lr': 0.0002740483013735944, 'samples': 12558848, 'steps': 24528, 'loss/train': 1.037956714630127} +02/25/2022 09:21:44 - INFO - codeparrot_training - Step 24529: {'lr': 0.00027403201473831165, 'samples': 12559360, 'steps': 24529, 'loss/train': 2.3583619594573975} +02/25/2022 09:21:49 - INFO - codeparrot_training - Step 24530: {'lr': 0.00027401572800008335, 'samples': 12559872, 'steps': 24530, 'loss/train': 1.2241411209106445} +02/25/2022 09:21:53 - INFO - codeparrot_training - Step 24531: {'lr': 0.0002739994411589792, 'samples': 12560384, 'steps': 24531, 'loss/train': 2.054564952850342} +02/25/2022 09:21:59 - INFO - codeparrot_training - Step 24532: {'lr': 0.00027398315421506926, 'samples': 12560896, 'steps': 24532, 'loss/train': 2.1500895023345947} +02/25/2022 09:22:02 - INFO - codeparrot_training - Step 24533: {'lr': 0.00027396686716842295, 'samples': 12561408, 'steps': 24533, 'loss/train': 1.61677086353302} +02/25/2022 09:22:07 - INFO - codeparrot_training - Step 24534: {'lr': 0.0002739505800191103, 'samples': 12561920, 'steps': 24534, 'loss/train': 1.5186175107955933} +02/25/2022 09:22:11 - INFO - codeparrot_training - Step 24535: {'lr': 0.0002739342927672009, 'samples': 12562432, 'steps': 24535, 'loss/train': 1.5359241962432861} +02/25/2022 09:22:17 - INFO - codeparrot_training - Step 24536: {'lr': 0.00027391800541276464, 'samples': 12562944, 'steps': 24536, 'loss/train': 1.7430449724197388} +02/25/2022 09:22:21 - INFO - codeparrot_training - Step 24537: {'lr': 0.00027390171795587114, 'samples': 12563456, 'steps': 24537, 'loss/train': 1.097501516342163} +02/25/2022 09:22:26 - INFO - codeparrot_training - Step 24538: {'lr': 0.0002738854303965903, 'samples': 12563968, 'steps': 24538, 'loss/train': 1.8211660385131836} +02/25/2022 09:22:32 - INFO - codeparrot_training - Step 24539: {'lr': 0.0002738691427349919, 'samples': 12564480, 'steps': 24539, 'loss/train': 1.9708224534988403} +02/25/2022 09:22:36 - INFO - codeparrot_training - Step 24540: {'lr': 0.00027385285497114563, 'samples': 12564992, 'steps': 24540, 'loss/train': 1.9764999151229858} +02/25/2022 09:22:41 - INFO - codeparrot_training - Step 24541: {'lr': 0.0002738365671051214, 'samples': 12565504, 'steps': 24541, 'loss/train': 1.355342984199524} +02/25/2022 09:22:45 - INFO - codeparrot_training - Step 24542: {'lr': 0.0002738202791369889, 'samples': 12566016, 'steps': 24542, 'loss/train': 2.0183141231536865} +02/25/2022 09:22:50 - INFO - codeparrot_training - Step 24543: {'lr': 0.00027380399106681773, 'samples': 12566528, 'steps': 24543, 'loss/train': 1.5322482585906982} +02/25/2022 09:22:54 - INFO - codeparrot_training - Step 24544: {'lr': 0.00027378770289467795, 'samples': 12567040, 'steps': 24544, 'loss/train': 2.3565030097961426} +02/25/2022 09:23:00 - INFO - codeparrot_training - Step 24545: {'lr': 0.00027377141462063916, 'samples': 12567552, 'steps': 24545, 'loss/train': 1.427379846572876} +02/25/2022 09:23:03 - INFO - codeparrot_training - Step 24546: {'lr': 0.0002737551262447712, 'samples': 12568064, 'steps': 24546, 'loss/train': 1.826188325881958} +02/25/2022 09:23:09 - INFO - codeparrot_training - Step 24547: {'lr': 0.00027373883776714373, 'samples': 12568576, 'steps': 24547, 'loss/train': 1.5599071979522705} +02/25/2022 09:23:12 - INFO - codeparrot_training - Step 24548: {'lr': 0.00027372254918782673, 'samples': 12569088, 'steps': 24548, 'loss/train': 1.6204804182052612} +02/25/2022 09:23:18 - INFO - codeparrot_training - Step 24549: {'lr': 0.0002737062605068898, 'samples': 12569600, 'steps': 24549, 'loss/train': 1.1979007720947266} +02/25/2022 09:23:21 - INFO - codeparrot_training - Step 24550: {'lr': 0.0002736899717244028, 'samples': 12570112, 'steps': 24550, 'loss/train': 0.6492655873298645} +02/25/2022 09:23:27 - INFO - codeparrot_training - Step 24551: {'lr': 0.0002736736828404355, 'samples': 12570624, 'steps': 24551, 'loss/train': 2.2705295085906982} +02/25/2022 09:23:31 - INFO - codeparrot_training - Step 24552: {'lr': 0.0002736573938550577, 'samples': 12571136, 'steps': 24552, 'loss/train': 3.0706064701080322} +02/25/2022 09:23:36 - INFO - codeparrot_training - Step 24553: {'lr': 0.000273641104768339, 'samples': 12571648, 'steps': 24553, 'loss/train': 1.422694444656372} +02/25/2022 09:23:40 - INFO - codeparrot_training - Step 24554: {'lr': 0.0002736248155803494, 'samples': 12572160, 'steps': 24554, 'loss/train': 3.0465056896209717} +02/25/2022 09:23:45 - INFO - codeparrot_training - Step 24555: {'lr': 0.00027360852629115855, 'samples': 12572672, 'steps': 24555, 'loss/train': 5.385129928588867} +02/25/2022 09:23:49 - INFO - codeparrot_training - Step 24556: {'lr': 0.00027359223690083637, 'samples': 12573184, 'steps': 24556, 'loss/train': 1.9099751710891724} +02/25/2022 09:23:54 - INFO - codeparrot_training - Step 24557: {'lr': 0.0002735759474094524, 'samples': 12573696, 'steps': 24557, 'loss/train': 1.010520577430725} +02/25/2022 09:23:58 - INFO - codeparrot_training - Step 24558: {'lr': 0.0002735596578170767, 'samples': 12574208, 'steps': 24558, 'loss/train': 2.2503061294555664} +02/25/2022 09:24:03 - INFO - codeparrot_training - Step 24559: {'lr': 0.00027354336812377875, 'samples': 12574720, 'steps': 24559, 'loss/train': 1.7974720001220703} +02/25/2022 09:24:07 - INFO - codeparrot_training - Step 24560: {'lr': 0.0002735270783296286, 'samples': 12575232, 'steps': 24560, 'loss/train': 1.950779914855957} +02/25/2022 09:24:13 - INFO - codeparrot_training - Step 24561: {'lr': 0.0002735107884346959, 'samples': 12575744, 'steps': 24561, 'loss/train': 1.9560264348983765} +02/25/2022 09:24:17 - INFO - codeparrot_training - Step 24562: {'lr': 0.0002734944984390504, 'samples': 12576256, 'steps': 24562, 'loss/train': 2.540931224822998} +02/25/2022 09:24:22 - INFO - codeparrot_training - Step 24563: {'lr': 0.000273478208342762, 'samples': 12576768, 'steps': 24563, 'loss/train': 0.7010085582733154} +02/25/2022 09:24:26 - INFO - codeparrot_training - Step 24564: {'lr': 0.0002734619181459003, 'samples': 12577280, 'steps': 24564, 'loss/train': 1.374031901359558} +02/25/2022 09:24:32 - INFO - codeparrot_training - Step 24565: {'lr': 0.00027344562784853535, 'samples': 12577792, 'steps': 24565, 'loss/train': 0.8947920203208923} +02/25/2022 09:24:35 - INFO - codeparrot_training - Step 24566: {'lr': 0.0002734293374507367, 'samples': 12578304, 'steps': 24566, 'loss/train': 2.3546180725097656} +02/25/2022 09:24:41 - INFO - codeparrot_training - Step 24567: {'lr': 0.00027341304695257417, 'samples': 12578816, 'steps': 24567, 'loss/train': 1.5401376485824585} +02/25/2022 09:24:44 - INFO - codeparrot_training - Step 24568: {'lr': 0.0002733967563541176, 'samples': 12579328, 'steps': 24568, 'loss/train': 1.9574304819107056} +02/25/2022 09:24:48 - INFO - codeparrot_training - Step 24569: {'lr': 0.0002733804656554368, 'samples': 12579840, 'steps': 24569, 'loss/train': 1.685356855392456} +02/25/2022 09:24:53 - INFO - codeparrot_training - Step 24570: {'lr': 0.0002733641748566015, 'samples': 12580352, 'steps': 24570, 'loss/train': 0.5905944108963013} +02/25/2022 09:25:00 - INFO - codeparrot_training - Step 24571: {'lr': 0.0002733478839576815, 'samples': 12580864, 'steps': 24571, 'loss/train': 2.1740591526031494} +02/25/2022 09:25:03 - INFO - codeparrot_training - Step 24572: {'lr': 0.0002733315929587465, 'samples': 12581376, 'steps': 24572, 'loss/train': 1.9099316596984863} +02/25/2022 09:25:09 - INFO - codeparrot_training - Step 24573: {'lr': 0.00027331530185986636, 'samples': 12581888, 'steps': 24573, 'loss/train': 2.815541982650757} +02/25/2022 09:25:12 - INFO - codeparrot_training - Step 24574: {'lr': 0.00027329901066111095, 'samples': 12582400, 'steps': 24574, 'loss/train': 2.305406332015991} +02/25/2022 09:25:16 - INFO - codeparrot_training - Step 24575: {'lr': 0.00027328271936254996, 'samples': 12582912, 'steps': 24575, 'loss/train': 1.4654350280761719} +02/25/2022 09:25:21 - INFO - codeparrot_training - Step 24576: {'lr': 0.00027326642796425316, 'samples': 12583424, 'steps': 24576, 'loss/train': 3.1276206970214844} +02/25/2022 09:25:25 - INFO - codeparrot_training - Step 24577: {'lr': 0.00027325013646629036, 'samples': 12583936, 'steps': 24577, 'loss/train': 2.556285858154297} +02/25/2022 09:25:30 - INFO - codeparrot_training - Step 24578: {'lr': 0.00027323384486873146, 'samples': 12584448, 'steps': 24578, 'loss/train': 1.7402993440628052} +02/25/2022 09:25:34 - INFO - codeparrot_training - Step 24579: {'lr': 0.00027321755317164605, 'samples': 12584960, 'steps': 24579, 'loss/train': 2.208979606628418} +02/25/2022 09:25:40 - INFO - codeparrot_training - Step 24580: {'lr': 0.0002732012613751041, 'samples': 12585472, 'steps': 24580, 'loss/train': 1.5136798620224} +02/25/2022 09:25:45 - INFO - codeparrot_training - Step 24581: {'lr': 0.0002731849694791752, 'samples': 12585984, 'steps': 24581, 'loss/train': 0.6692734956741333} +02/25/2022 09:25:48 - INFO - codeparrot_training - Step 24582: {'lr': 0.0002731686774839294, 'samples': 12586496, 'steps': 24582, 'loss/train': 2.0074896812438965} +02/25/2022 09:25:55 - INFO - codeparrot_training - Step 24583: {'lr': 0.00027315238538943616, 'samples': 12587008, 'steps': 24583, 'loss/train': 1.5673069953918457} +02/25/2022 09:25:58 - INFO - codeparrot_training - Step 24584: {'lr': 0.0002731360931957656, 'samples': 12587520, 'steps': 24584, 'loss/train': 0.8167309165000916} +02/25/2022 09:26:04 - INFO - codeparrot_training - Step 24585: {'lr': 0.00027311980090298727, 'samples': 12588032, 'steps': 24585, 'loss/train': 1.6703777313232422} +02/25/2022 09:26:07 - INFO - codeparrot_training - Step 24586: {'lr': 0.0002731035085111711, 'samples': 12588544, 'steps': 24586, 'loss/train': 1.4617022275924683} +02/25/2022 09:26:13 - INFO - codeparrot_training - Step 24587: {'lr': 0.00027308721602038684, 'samples': 12589056, 'steps': 24587, 'loss/train': 2.047581434249878} +02/25/2022 09:26:16 - INFO - codeparrot_training - Step 24588: {'lr': 0.0002730709234307043, 'samples': 12589568, 'steps': 24588, 'loss/train': 1.6226900815963745} +02/25/2022 09:26:22 - INFO - codeparrot_training - Step 24589: {'lr': 0.00027305463074219323, 'samples': 12590080, 'steps': 24589, 'loss/train': 2.5973119735717773} +02/25/2022 09:26:25 - INFO - codeparrot_training - Step 24590: {'lr': 0.0002730383379549234, 'samples': 12590592, 'steps': 24590, 'loss/train': 10.280702590942383} +02/25/2022 09:26:31 - INFO - codeparrot_training - Step 24591: {'lr': 0.0002730220450689647, 'samples': 12591104, 'steps': 24591, 'loss/train': 1.4964972734451294} +02/25/2022 09:26:34 - INFO - codeparrot_training - Step 24592: {'lr': 0.00027300575208438684, 'samples': 12591616, 'steps': 24592, 'loss/train': 2.003296375274658} +02/25/2022 09:26:40 - INFO - codeparrot_training - Step 24593: {'lr': 0.00027298945900125965, 'samples': 12592128, 'steps': 24593, 'loss/train': 1.6805553436279297} +02/25/2022 09:26:43 - INFO - codeparrot_training - Step 24594: {'lr': 0.00027297316581965285, 'samples': 12592640, 'steps': 24594, 'loss/train': 2.3026275634765625} +02/25/2022 09:26:49 - INFO - codeparrot_training - Step 24595: {'lr': 0.0002729568725396364, 'samples': 12593152, 'steps': 24595, 'loss/train': 1.7576102018356323} +02/25/2022 09:26:52 - INFO - codeparrot_training - Step 24596: {'lr': 0.00027294057916127997, 'samples': 12593664, 'steps': 24596, 'loss/train': 2.323209762573242} +02/25/2022 09:26:58 - INFO - codeparrot_training - Step 24597: {'lr': 0.0002729242856846533, 'samples': 12594176, 'steps': 24597, 'loss/train': 2.3953471183776855} +02/25/2022 09:27:02 - INFO - codeparrot_training - Step 24598: {'lr': 0.00027290799210982644, 'samples': 12594688, 'steps': 24598, 'loss/train': 1.495805025100708} +02/25/2022 09:27:07 - INFO - codeparrot_training - Step 24599: {'lr': 0.0002728916984368689, 'samples': 12595200, 'steps': 24599, 'loss/train': 0.1257612407207489} +02/25/2022 09:27:11 - INFO - codeparrot_training - Step 24600: {'lr': 0.00027287540466585064, 'samples': 12595712, 'steps': 24600, 'loss/train': 2.241560220718384} +02/25/2022 09:27:16 - INFO - codeparrot_training - Step 24601: {'lr': 0.00027285911079684134, 'samples': 12596224, 'steps': 24601, 'loss/train': 2.553394079208374} +02/25/2022 09:27:20 - INFO - codeparrot_training - Step 24602: {'lr': 0.000272842816829911, 'samples': 12596736, 'steps': 24602, 'loss/train': 1.7803586721420288} +02/25/2022 09:27:25 - INFO - codeparrot_training - Step 24603: {'lr': 0.00027282652276512914, 'samples': 12597248, 'steps': 24603, 'loss/train': 1.9855388402938843} +02/25/2022 09:27:29 - INFO - codeparrot_training - Step 24604: {'lr': 0.00027281022860256576, 'samples': 12597760, 'steps': 24604, 'loss/train': 2.154768228530884} +02/25/2022 09:27:34 - INFO - codeparrot_training - Step 24605: {'lr': 0.0002727939343422906, 'samples': 12598272, 'steps': 24605, 'loss/train': 2.0404345989227295} +02/25/2022 09:27:38 - INFO - codeparrot_training - Step 24606: {'lr': 0.0002727776399843735, 'samples': 12598784, 'steps': 24606, 'loss/train': 2.2131264209747314} +02/25/2022 09:27:44 - INFO - codeparrot_training - Step 24607: {'lr': 0.00027276134552888415, 'samples': 12599296, 'steps': 24607, 'loss/train': 2.552511692047119} +02/25/2022 09:27:47 - INFO - codeparrot_training - Step 24608: {'lr': 0.0002727450509758925, 'samples': 12599808, 'steps': 24608, 'loss/train': 2.0152928829193115} +02/25/2022 09:27:53 - INFO - codeparrot_training - Step 24609: {'lr': 0.0002727287563254682, 'samples': 12600320, 'steps': 24609, 'loss/train': 1.5814917087554932} +02/25/2022 09:27:56 - INFO - codeparrot_training - Step 24610: {'lr': 0.00027271246157768123, 'samples': 12600832, 'steps': 24610, 'loss/train': 2.7921433448791504} +02/25/2022 09:28:02 - INFO - codeparrot_training - Step 24611: {'lr': 0.0002726961667326012, 'samples': 12601344, 'steps': 24611, 'loss/train': 1.7712713479995728} +02/25/2022 09:28:05 - INFO - codeparrot_training - Step 24612: {'lr': 0.000272679871790298, 'samples': 12601856, 'steps': 24612, 'loss/train': 1.2757468223571777} +02/25/2022 09:28:11 - INFO - codeparrot_training - Step 24613: {'lr': 0.00027266357675084145, 'samples': 12602368, 'steps': 24613, 'loss/train': 2.2437453269958496} +02/25/2022 09:28:14 - INFO - codeparrot_training - Step 24614: {'lr': 0.00027264728161430137, 'samples': 12602880, 'steps': 24614, 'loss/train': 0.7529964447021484} +02/25/2022 09:28:20 - INFO - codeparrot_training - Step 24615: {'lr': 0.00027263098638074753, 'samples': 12603392, 'steps': 24615, 'loss/train': 1.377153754234314} +02/25/2022 09:28:23 - INFO - codeparrot_training - Step 24616: {'lr': 0.0002726146910502496, 'samples': 12603904, 'steps': 24616, 'loss/train': 2.3080670833587646} +02/25/2022 09:28:30 - INFO - codeparrot_training - Step 24617: {'lr': 0.0002725983956228777, 'samples': 12604416, 'steps': 24617, 'loss/train': 0.46623751521110535} +02/25/2022 09:28:33 - INFO - codeparrot_training - Step 24618: {'lr': 0.0002725821000987013, 'samples': 12604928, 'steps': 24618, 'loss/train': 1.3109983205795288} +02/25/2022 09:28:39 - INFO - codeparrot_training - Step 24619: {'lr': 0.00027256580447779043, 'samples': 12605440, 'steps': 24619, 'loss/train': 2.421919107437134} +02/25/2022 09:28:42 - INFO - codeparrot_training - Step 24620: {'lr': 0.0002725495087602148, 'samples': 12605952, 'steps': 24620, 'loss/train': 1.560776948928833} +02/25/2022 09:28:48 - INFO - codeparrot_training - Step 24621: {'lr': 0.0002725332129460442, 'samples': 12606464, 'steps': 24621, 'loss/train': 1.8630948066711426} +02/25/2022 09:28:51 - INFO - codeparrot_training - Step 24622: {'lr': 0.00027251691703534853, 'samples': 12606976, 'steps': 24622, 'loss/train': 2.2826967239379883} +02/25/2022 09:28:57 - INFO - codeparrot_training - Step 24623: {'lr': 0.0002725006210281975, 'samples': 12607488, 'steps': 24623, 'loss/train': 1.3786349296569824} +02/25/2022 09:29:00 - INFO - codeparrot_training - Step 24624: {'lr': 0.000272484324924661, 'samples': 12608000, 'steps': 24624, 'loss/train': 2.12591552734375} +02/25/2022 09:29:06 - INFO - codeparrot_training - Step 24625: {'lr': 0.00027246802872480877, 'samples': 12608512, 'steps': 24625, 'loss/train': 1.7557646036148071} +02/25/2022 09:29:09 - INFO - codeparrot_training - Step 24626: {'lr': 0.0002724517324287106, 'samples': 12609024, 'steps': 24626, 'loss/train': 1.381507396697998} +02/25/2022 09:29:15 - INFO - codeparrot_training - Step 24627: {'lr': 0.00027243543603643636, 'samples': 12609536, 'steps': 24627, 'loss/train': 1.5252448320388794} +02/25/2022 09:29:19 - INFO - codeparrot_training - Step 24628: {'lr': 0.00027241913954805587, 'samples': 12610048, 'steps': 24628, 'loss/train': 2.1617088317871094} +02/25/2022 09:29:24 - INFO - codeparrot_training - Step 24629: {'lr': 0.00027240284296363887, 'samples': 12610560, 'steps': 24629, 'loss/train': 2.0953404903411865} +02/25/2022 09:29:28 - INFO - codeparrot_training - Step 24630: {'lr': 0.00027238654628325524, 'samples': 12611072, 'steps': 24630, 'loss/train': 1.753469467163086} +02/25/2022 09:29:33 - INFO - codeparrot_training - Step 24631: {'lr': 0.00027237024950697473, 'samples': 12611584, 'steps': 24631, 'loss/train': 2.017042398452759} +02/25/2022 09:29:37 - INFO - codeparrot_training - Step 24632: {'lr': 0.0002723539526348671, 'samples': 12612096, 'steps': 24632, 'loss/train': 2.876164674758911} +02/25/2022 09:29:42 - INFO - codeparrot_training - Step 24633: {'lr': 0.00027233765566700235, 'samples': 12612608, 'steps': 24633, 'loss/train': 2.098737955093384} +02/25/2022 09:29:46 - INFO - codeparrot_training - Step 24634: {'lr': 0.00027232135860345017, 'samples': 12613120, 'steps': 24634, 'loss/train': 2.8700051307678223} +02/25/2022 09:29:51 - INFO - codeparrot_training - Step 24635: {'lr': 0.00027230506144428036, 'samples': 12613632, 'steps': 24635, 'loss/train': 1.7338645458221436} +02/25/2022 09:29:55 - INFO - codeparrot_training - Step 24636: {'lr': 0.0002722887641895627, 'samples': 12614144, 'steps': 24636, 'loss/train': 0.9063979983329773} +02/25/2022 09:30:00 - INFO - codeparrot_training - Step 24637: {'lr': 0.0002722724668393671, 'samples': 12614656, 'steps': 24637, 'loss/train': 2.547739267349243} +02/25/2022 09:30:04 - INFO - codeparrot_training - Step 24638: {'lr': 0.00027225616939376325, 'samples': 12615168, 'steps': 24638, 'loss/train': 2.4339704513549805} +02/25/2022 09:30:09 - INFO - codeparrot_training - Step 24639: {'lr': 0.00027223987185282113, 'samples': 12615680, 'steps': 24639, 'loss/train': 2.8214831352233887} +02/25/2022 09:30:13 - INFO - codeparrot_training - Step 24640: {'lr': 0.00027222357421661044, 'samples': 12616192, 'steps': 24640, 'loss/train': 2.649697780609131} +02/25/2022 09:30:18 - INFO - codeparrot_training - Step 24641: {'lr': 0.000272207276485201, 'samples': 12616704, 'steps': 24641, 'loss/train': 1.6627308130264282} +02/25/2022 09:30:22 - INFO - codeparrot_training - Step 24642: {'lr': 0.0002721909786586626, 'samples': 12617216, 'steps': 24642, 'loss/train': 1.9770973920822144} +02/25/2022 09:30:28 - INFO - codeparrot_training - Step 24643: {'lr': 0.00027217468073706516, 'samples': 12617728, 'steps': 24643, 'loss/train': 3.204784393310547} +02/25/2022 09:30:32 - INFO - codeparrot_training - Step 24644: {'lr': 0.0002721583827204784, 'samples': 12618240, 'steps': 24644, 'loss/train': 1.5256271362304688} +02/25/2022 09:30:37 - INFO - codeparrot_training - Step 24645: {'lr': 0.0002721420846089722, 'samples': 12618752, 'steps': 24645, 'loss/train': 0.4611816704273224} +02/25/2022 09:30:41 - INFO - codeparrot_training - Step 24646: {'lr': 0.00027212578640261627, 'samples': 12619264, 'steps': 24646, 'loss/train': 2.130063772201538} +02/25/2022 09:30:46 - INFO - codeparrot_training - Step 24647: {'lr': 0.0002721094881014805, 'samples': 12619776, 'steps': 24647, 'loss/train': 1.3082990646362305} +02/25/2022 09:30:50 - INFO - codeparrot_training - Step 24648: {'lr': 0.0002720931897056347, 'samples': 12620288, 'steps': 24648, 'loss/train': 1.4419804811477661} +02/25/2022 09:30:55 - INFO - codeparrot_training - Step 24649: {'lr': 0.0002720768912151487, 'samples': 12620800, 'steps': 24649, 'loss/train': 2.1319429874420166} +02/25/2022 09:30:59 - INFO - codeparrot_training - Step 24650: {'lr': 0.00027206059263009243, 'samples': 12621312, 'steps': 24650, 'loss/train': 2.5869035720825195} +02/25/2022 09:31:04 - INFO - codeparrot_training - Step 24651: {'lr': 0.00027204429395053545, 'samples': 12621824, 'steps': 24651, 'loss/train': 1.911998987197876} +02/25/2022 09:31:08 - INFO - codeparrot_training - Step 24652: {'lr': 0.0002720279951765478, 'samples': 12622336, 'steps': 24652, 'loss/train': 1.693935751914978} +02/25/2022 09:31:13 - INFO - codeparrot_training - Step 24653: {'lr': 0.0002720116963081991, 'samples': 12622848, 'steps': 24653, 'loss/train': 1.788081407546997} +02/25/2022 09:31:16 - INFO - codeparrot_training - Step 24654: {'lr': 0.00027199539734555937, 'samples': 12623360, 'steps': 24654, 'loss/train': 0.9297451376914978} +02/25/2022 09:31:23 - INFO - codeparrot_training - Step 24655: {'lr': 0.0002719790982886983, 'samples': 12623872, 'steps': 24655, 'loss/train': 1.7606264352798462} +02/25/2022 09:31:26 - INFO - codeparrot_training - Step 24656: {'lr': 0.00027196279913768587, 'samples': 12624384, 'steps': 24656, 'loss/train': 1.6372466087341309} +02/25/2022 09:31:32 - INFO - codeparrot_training - Step 24657: {'lr': 0.00027194649989259164, 'samples': 12624896, 'steps': 24657, 'loss/train': 1.7308844327926636} +02/25/2022 09:31:35 - INFO - codeparrot_training - Step 24658: {'lr': 0.0002719302005534856, 'samples': 12625408, 'steps': 24658, 'loss/train': 1.886879324913025} +02/25/2022 09:31:41 - INFO - codeparrot_training - Step 24659: {'lr': 0.0002719139011204376, 'samples': 12625920, 'steps': 24659, 'loss/train': 2.9859209060668945} +02/25/2022 09:31:45 - INFO - codeparrot_training - Step 24660: {'lr': 0.00027189760159351735, 'samples': 12626432, 'steps': 24660, 'loss/train': 1.5303181409835815} +02/25/2022 09:31:50 - INFO - codeparrot_training - Step 24661: {'lr': 0.00027188130197279477, 'samples': 12626944, 'steps': 24661, 'loss/train': 1.9257639646530151} +02/25/2022 09:31:54 - INFO - codeparrot_training - Step 24662: {'lr': 0.00027186500225833955, 'samples': 12627456, 'steps': 24662, 'loss/train': 2.1705539226531982} +02/25/2022 09:31:59 - INFO - codeparrot_training - Step 24663: {'lr': 0.00027184870245022173, 'samples': 12627968, 'steps': 24663, 'loss/train': 1.9565694332122803} +02/25/2022 09:32:03 - INFO - codeparrot_training - Step 24664: {'lr': 0.0002718324025485109, 'samples': 12628480, 'steps': 24664, 'loss/train': 1.956503987312317} +02/25/2022 09:32:09 - INFO - codeparrot_training - Step 24665: {'lr': 0.0002718161025532771, 'samples': 12628992, 'steps': 24665, 'loss/train': 1.2346445322036743} +02/25/2022 09:32:13 - INFO - codeparrot_training - Step 24666: {'lr': 0.00027179980246459, 'samples': 12629504, 'steps': 24666, 'loss/train': 1.7928624153137207} +02/25/2022 09:32:18 - INFO - codeparrot_training - Step 24667: {'lr': 0.0002717835022825194, 'samples': 12630016, 'steps': 24667, 'loss/train': 1.4517059326171875} +02/25/2022 09:32:22 - INFO - codeparrot_training - Step 24668: {'lr': 0.0002717672020071352, 'samples': 12630528, 'steps': 24668, 'loss/train': 1.3011809587478638} +02/25/2022 09:32:27 - INFO - codeparrot_training - Step 24669: {'lr': 0.00027175090163850736, 'samples': 12631040, 'steps': 24669, 'loss/train': 1.4450370073318481} +02/25/2022 09:32:31 - INFO - codeparrot_training - Step 24670: {'lr': 0.0002717346011767054, 'samples': 12631552, 'steps': 24670, 'loss/train': 1.423353672027588} +02/25/2022 09:32:36 - INFO - codeparrot_training - Step 24671: {'lr': 0.00027171830062179943, 'samples': 12632064, 'steps': 24671, 'loss/train': 1.6652315855026245} +02/25/2022 09:32:40 - INFO - codeparrot_training - Step 24672: {'lr': 0.0002717019999738591, 'samples': 12632576, 'steps': 24672, 'loss/train': 0.451820969581604} +02/25/2022 09:32:45 - INFO - codeparrot_training - Step 24673: {'lr': 0.0002716856992329543, 'samples': 12633088, 'steps': 24673, 'loss/train': 1.8710403442382812} +02/25/2022 09:32:49 - INFO - codeparrot_training - Step 24674: {'lr': 0.00027166939839915486, 'samples': 12633600, 'steps': 24674, 'loss/train': 1.1517184972763062} +02/25/2022 09:32:55 - INFO - codeparrot_training - Step 24675: {'lr': 0.0002716530974725306, 'samples': 12634112, 'steps': 24675, 'loss/train': 1.9069888591766357} +02/25/2022 09:32:59 - INFO - codeparrot_training - Step 24676: {'lr': 0.0002716367964531513, 'samples': 12634624, 'steps': 24676, 'loss/train': 0.27837708592414856} +02/25/2022 09:33:04 - INFO - codeparrot_training - Step 24677: {'lr': 0.0002716204953410869, 'samples': 12635136, 'steps': 24677, 'loss/train': 1.7817667722702026} +02/25/2022 09:33:08 - INFO - codeparrot_training - Step 24678: {'lr': 0.00027160419413640714, 'samples': 12635648, 'steps': 24678, 'loss/train': 2.4504878520965576} +02/25/2022 09:33:13 - INFO - codeparrot_training - Step 24679: {'lr': 0.0002715878928391818, 'samples': 12636160, 'steps': 24679, 'loss/train': 1.368591547012329} +02/25/2022 09:33:17 - INFO - codeparrot_training - Step 24680: {'lr': 0.0002715715914494809, 'samples': 12636672, 'steps': 24680, 'loss/train': 2.594334602355957} +02/25/2022 09:33:22 - INFO - codeparrot_training - Step 24681: {'lr': 0.00027155528996737404, 'samples': 12637184, 'steps': 24681, 'loss/train': 1.9636738300323486} +02/25/2022 09:33:25 - INFO - codeparrot_training - Step 24682: {'lr': 0.00027153898839293124, 'samples': 12637696, 'steps': 24682, 'loss/train': 1.0482842922210693} +02/25/2022 09:33:31 - INFO - codeparrot_training - Step 24683: {'lr': 0.0002715226867262223, 'samples': 12638208, 'steps': 24683, 'loss/train': 1.1728090047836304} +02/25/2022 09:33:34 - INFO - codeparrot_training - Step 24684: {'lr': 0.00027150638496731684, 'samples': 12638720, 'steps': 24684, 'loss/train': 2.0531039237976074} +02/25/2022 09:33:41 - INFO - codeparrot_training - Step 24685: {'lr': 0.000271490083116285, 'samples': 12639232, 'steps': 24685, 'loss/train': 2.5731382369995117} +02/25/2022 09:33:44 - INFO - codeparrot_training - Step 24686: {'lr': 0.0002714737811731964, 'samples': 12639744, 'steps': 24686, 'loss/train': 2.7383646965026855} +02/25/2022 09:33:50 - INFO - codeparrot_training - Step 24687: {'lr': 0.00027145747913812096, 'samples': 12640256, 'steps': 24687, 'loss/train': 2.3733439445495605} +02/25/2022 09:33:53 - INFO - codeparrot_training - Step 24688: {'lr': 0.00027144117701112844, 'samples': 12640768, 'steps': 24688, 'loss/train': 1.9890474081039429} +02/25/2022 09:33:59 - INFO - codeparrot_training - Step 24689: {'lr': 0.00027142487479228883, 'samples': 12641280, 'steps': 24689, 'loss/train': 1.2939120531082153} +02/25/2022 09:34:02 - INFO - codeparrot_training - Step 24690: {'lr': 0.0002714085724816718, 'samples': 12641792, 'steps': 24690, 'loss/train': 2.0192711353302} +02/25/2022 09:34:08 - INFO - codeparrot_training - Step 24691: {'lr': 0.0002713922700793473, 'samples': 12642304, 'steps': 24691, 'loss/train': 2.2185118198394775} +02/25/2022 09:34:11 - INFO - codeparrot_training - Step 24692: {'lr': 0.000271375967585385, 'samples': 12642816, 'steps': 24692, 'loss/train': 2.1645827293395996} +02/25/2022 09:34:17 - INFO - codeparrot_training - Step 24693: {'lr': 0.000271359664999855, 'samples': 12643328, 'steps': 24693, 'loss/train': 2.1087687015533447} +02/25/2022 09:34:20 - INFO - codeparrot_training - Step 24694: {'lr': 0.0002713433623228268, 'samples': 12643840, 'steps': 24694, 'loss/train': 3.318579912185669} +02/25/2022 09:34:26 - INFO - codeparrot_training - Step 24695: {'lr': 0.00027132705955437047, 'samples': 12644352, 'steps': 24695, 'loss/train': 1.543757677078247} +02/25/2022 09:34:29 - INFO - codeparrot_training - Step 24696: {'lr': 0.00027131075669455584, 'samples': 12644864, 'steps': 24696, 'loss/train': 2.4322471618652344} +02/25/2022 09:34:35 - INFO - codeparrot_training - Step 24697: {'lr': 0.00027129445374345264, 'samples': 12645376, 'steps': 24697, 'loss/train': 1.7436842918395996} +02/25/2022 09:34:38 - INFO - codeparrot_training - Step 24698: {'lr': 0.00027127815070113084, 'samples': 12645888, 'steps': 24698, 'loss/train': 1.5629328489303589} +02/25/2022 09:34:44 - INFO - codeparrot_training - Step 24699: {'lr': 0.0002712618475676601, 'samples': 12646400, 'steps': 24699, 'loss/train': 1.822376012802124} +02/25/2022 09:34:47 - INFO - codeparrot_training - Step 24700: {'lr': 0.00027124554434311045, 'samples': 12646912, 'steps': 24700, 'loss/train': 2.0030741691589355} +02/25/2022 09:34:54 - INFO - codeparrot_training - Step 24701: {'lr': 0.00027122924102755154, 'samples': 12647424, 'steps': 24701, 'loss/train': 2.359635829925537} +02/25/2022 09:34:57 - INFO - codeparrot_training - Step 24702: {'lr': 0.0002712129376210534, 'samples': 12647936, 'steps': 24702, 'loss/train': 1.3259538412094116} +02/25/2022 09:35:03 - INFO - codeparrot_training - Step 24703: {'lr': 0.00027119663412368566, 'samples': 12648448, 'steps': 24703, 'loss/train': 2.0318992137908936} +02/25/2022 09:35:06 - INFO - codeparrot_training - Step 24704: {'lr': 0.0002711803305355184, 'samples': 12648960, 'steps': 24704, 'loss/train': 1.5608978271484375} +02/25/2022 09:35:12 - INFO - codeparrot_training - Step 24705: {'lr': 0.0002711640268566212, 'samples': 12649472, 'steps': 24705, 'loss/train': 2.191253185272217} +02/25/2022 09:35:15 - INFO - codeparrot_training - Step 24706: {'lr': 0.0002711477230870641, 'samples': 12649984, 'steps': 24706, 'loss/train': 1.4878144264221191} +02/25/2022 09:35:21 - INFO - codeparrot_training - Step 24707: {'lr': 0.0002711314192269169, 'samples': 12650496, 'steps': 24707, 'loss/train': 1.6701326370239258} +02/25/2022 09:35:24 - INFO - codeparrot_training - Step 24708: {'lr': 0.0002711151152762493, 'samples': 12651008, 'steps': 24708, 'loss/train': 1.852627158164978} +02/25/2022 09:35:30 - INFO - codeparrot_training - Step 24709: {'lr': 0.00027109881123513137, 'samples': 12651520, 'steps': 24709, 'loss/train': 1.2789192199707031} +02/25/2022 09:35:33 - INFO - codeparrot_training - Step 24710: {'lr': 0.00027108250710363276, 'samples': 12652032, 'steps': 24710, 'loss/train': 2.080981969833374} +02/25/2022 09:35:39 - INFO - codeparrot_training - Step 24711: {'lr': 0.0002710662028818234, 'samples': 12652544, 'steps': 24711, 'loss/train': 2.297793388366699} +02/25/2022 09:35:43 - INFO - codeparrot_training - Step 24712: {'lr': 0.00027104989856977315, 'samples': 12653056, 'steps': 24712, 'loss/train': 2.144005537033081} +02/25/2022 09:35:48 - INFO - codeparrot_training - Step 24713: {'lr': 0.0002710335941675518, 'samples': 12653568, 'steps': 24713, 'loss/train': 2.7341160774230957} +02/25/2022 09:35:52 - INFO - codeparrot_training - Step 24714: {'lr': 0.0002710172896752292, 'samples': 12654080, 'steps': 24714, 'loss/train': 1.7736080884933472} +02/25/2022 09:35:57 - INFO - codeparrot_training - Step 24715: {'lr': 0.00027100098509287525, 'samples': 12654592, 'steps': 24715, 'loss/train': 2.310661792755127} +02/25/2022 09:36:01 - INFO - codeparrot_training - Step 24716: {'lr': 0.0002709846804205597, 'samples': 12655104, 'steps': 24716, 'loss/train': 2.078089475631714} +02/25/2022 09:36:06 - INFO - codeparrot_training - Step 24717: {'lr': 0.0002709683756583524, 'samples': 12655616, 'steps': 24717, 'loss/train': 1.4521042108535767} +02/25/2022 09:36:10 - INFO - codeparrot_training - Step 24718: {'lr': 0.00027095207080632335, 'samples': 12656128, 'steps': 24718, 'loss/train': 1.8167824745178223} +02/25/2022 09:36:15 - INFO - codeparrot_training - Step 24719: {'lr': 0.00027093576586454223, 'samples': 12656640, 'steps': 24719, 'loss/train': 1.5202302932739258} +02/25/2022 09:36:19 - INFO - codeparrot_training - Step 24720: {'lr': 0.00027091946083307894, 'samples': 12657152, 'steps': 24720, 'loss/train': 1.656925082206726} +02/25/2022 09:36:25 - INFO - codeparrot_training - Step 24721: {'lr': 0.00027090315571200326, 'samples': 12657664, 'steps': 24721, 'loss/train': 1.1279889345169067} +02/25/2022 09:36:28 - INFO - codeparrot_training - Step 24722: {'lr': 0.00027088685050138516, 'samples': 12658176, 'steps': 24722, 'loss/train': 1.7200313806533813} +02/25/2022 09:36:34 - INFO - codeparrot_training - Step 24723: {'lr': 0.0002708705452012944, 'samples': 12658688, 'steps': 24723, 'loss/train': 1.6260935068130493} +02/25/2022 09:36:37 - INFO - codeparrot_training - Step 24724: {'lr': 0.0002708542398118009, 'samples': 12659200, 'steps': 24724, 'loss/train': 1.7535221576690674} +02/25/2022 09:36:43 - INFO - codeparrot_training - Step 24725: {'lr': 0.0002708379343329744, 'samples': 12659712, 'steps': 24725, 'loss/train': 1.6087149381637573} +02/25/2022 09:36:47 - INFO - codeparrot_training - Step 24726: {'lr': 0.00027082162876488486, 'samples': 12660224, 'steps': 24726, 'loss/train': 1.9861663579940796} +02/25/2022 09:36:52 - INFO - codeparrot_training - Step 24727: {'lr': 0.000270805323107602, 'samples': 12660736, 'steps': 24727, 'loss/train': 2.083125114440918} +02/25/2022 09:36:56 - INFO - codeparrot_training - Step 24728: {'lr': 0.0002707890173611958, 'samples': 12661248, 'steps': 24728, 'loss/train': 1.7620930671691895} +02/25/2022 09:37:01 - INFO - codeparrot_training - Step 24729: {'lr': 0.000270772711525736, 'samples': 12661760, 'steps': 24729, 'loss/train': 1.5488576889038086} +02/25/2022 09:37:05 - INFO - codeparrot_training - Step 24730: {'lr': 0.00027075640560129255, 'samples': 12662272, 'steps': 24730, 'loss/train': 1.8740463256835938} +02/25/2022 09:37:11 - INFO - codeparrot_training - Step 24731: {'lr': 0.00027074009958793523, 'samples': 12662784, 'steps': 24731, 'loss/train': 2.655433416366577} +02/25/2022 09:37:14 - INFO - codeparrot_training - Step 24732: {'lr': 0.0002707237934857339, 'samples': 12663296, 'steps': 24732, 'loss/train': 2.241673469543457} +02/25/2022 09:37:20 - INFO - codeparrot_training - Step 24733: {'lr': 0.0002707074872947585, 'samples': 12663808, 'steps': 24733, 'loss/train': 2.175262928009033} +02/25/2022 09:37:23 - INFO - codeparrot_training - Step 24734: {'lr': 0.0002706911810150787, 'samples': 12664320, 'steps': 24734, 'loss/train': 2.5209100246429443} +02/25/2022 09:37:29 - INFO - codeparrot_training - Step 24735: {'lr': 0.0002706748746467645, 'samples': 12664832, 'steps': 24735, 'loss/train': 1.610283374786377} +02/25/2022 09:37:32 - INFO - codeparrot_training - Step 24736: {'lr': 0.00027065856818988567, 'samples': 12665344, 'steps': 24736, 'loss/train': 2.2202906608581543} +02/25/2022 09:37:38 - INFO - codeparrot_training - Step 24737: {'lr': 0.00027064226164451213, 'samples': 12665856, 'steps': 24737, 'loss/train': 1.3246862888336182} +02/25/2022 09:37:41 - INFO - codeparrot_training - Step 24738: {'lr': 0.0002706259550107136, 'samples': 12666368, 'steps': 24738, 'loss/train': 2.9630515575408936} +02/25/2022 09:37:47 - INFO - codeparrot_training - Step 24739: {'lr': 0.00027060964828856015, 'samples': 12666880, 'steps': 24739, 'loss/train': 0.3877182602882385} +02/25/2022 09:37:50 - INFO - codeparrot_training - Step 24740: {'lr': 0.0002705933414781214, 'samples': 12667392, 'steps': 24740, 'loss/train': 1.803843379020691} +02/25/2022 09:37:56 - INFO - codeparrot_training - Step 24741: {'lr': 0.00027057703457946747, 'samples': 12667904, 'steps': 24741, 'loss/train': 1.636519193649292} +02/25/2022 09:37:59 - INFO - codeparrot_training - Step 24742: {'lr': 0.0002705607275926679, 'samples': 12668416, 'steps': 24742, 'loss/train': 1.6374881267547607} +02/25/2022 09:38:05 - INFO - codeparrot_training - Step 24743: {'lr': 0.0002705444205177928, 'samples': 12668928, 'steps': 24743, 'loss/train': 1.6519156694412231} +02/25/2022 09:38:09 - INFO - codeparrot_training - Step 24744: {'lr': 0.0002705281133549119, 'samples': 12669440, 'steps': 24744, 'loss/train': 1.5621296167373657} +02/25/2022 09:38:14 - INFO - codeparrot_training - Step 24745: {'lr': 0.000270511806104095, 'samples': 12669952, 'steps': 24745, 'loss/train': 0.5297259092330933} +02/25/2022 09:38:17 - INFO - codeparrot_training - Step 24746: {'lr': 0.0002704954987654122, 'samples': 12670464, 'steps': 24746, 'loss/train': 1.9659370183944702} +02/25/2022 09:38:24 - INFO - codeparrot_training - Step 24747: {'lr': 0.00027047919133893304, 'samples': 12670976, 'steps': 24747, 'loss/train': 2.1376090049743652} +02/25/2022 09:38:27 - INFO - codeparrot_training - Step 24748: {'lr': 0.0002704628838247276, 'samples': 12671488, 'steps': 24748, 'loss/train': 1.825350046157837} +02/25/2022 09:38:33 - INFO - codeparrot_training - Step 24749: {'lr': 0.0002704465762228656, 'samples': 12672000, 'steps': 24749, 'loss/train': 1.8036125898361206} +02/25/2022 09:38:36 - INFO - codeparrot_training - Step 24750: {'lr': 0.00027043026853341707, 'samples': 12672512, 'steps': 24750, 'loss/train': 0.12093210220336914} +02/25/2022 09:38:42 - INFO - codeparrot_training - Step 24751: {'lr': 0.0002704139607564517, 'samples': 12673024, 'steps': 24751, 'loss/train': 1.0983920097351074} +02/25/2022 09:38:47 - INFO - codeparrot_training - Step 24752: {'lr': 0.00027039765289203944, 'samples': 12673536, 'steps': 24752, 'loss/train': 1.9977049827575684} +02/25/2022 09:38:51 - INFO - codeparrot_training - Step 24753: {'lr': 0.00027038134494025, 'samples': 12674048, 'steps': 24753, 'loss/train': 2.3789122104644775} +02/25/2022 09:38:56 - INFO - codeparrot_training - Step 24754: {'lr': 0.00027036503690115347, 'samples': 12674560, 'steps': 24754, 'loss/train': 3.5160694122314453} +02/25/2022 09:39:00 - INFO - codeparrot_training - Step 24755: {'lr': 0.0002703487287748195, 'samples': 12675072, 'steps': 24755, 'loss/train': 2.3558614253997803} +02/25/2022 09:39:06 - INFO - codeparrot_training - Step 24756: {'lr': 0.00027033242056131806, 'samples': 12675584, 'steps': 24756, 'loss/train': 2.561534881591797} +02/25/2022 09:39:10 - INFO - codeparrot_training - Step 24757: {'lr': 0.00027031611226071905, 'samples': 12676096, 'steps': 24757, 'loss/train': 1.4274990558624268} +02/25/2022 09:39:15 - INFO - codeparrot_training - Step 24758: {'lr': 0.0002702998038730923, 'samples': 12676608, 'steps': 24758, 'loss/train': 1.7446138858795166} +02/25/2022 09:39:19 - INFO - codeparrot_training - Step 24759: {'lr': 0.0002702834953985075, 'samples': 12677120, 'steps': 24759, 'loss/train': 1.9842373132705688} +02/25/2022 09:39:24 - INFO - codeparrot_training - Step 24760: {'lr': 0.0002702671868370347, 'samples': 12677632, 'steps': 24760, 'loss/train': 2.2683663368225098} +02/25/2022 09:39:28 - INFO - codeparrot_training - Step 24761: {'lr': 0.0002702508781887437, 'samples': 12678144, 'steps': 24761, 'loss/train': 2.706632614135742} +02/25/2022 09:39:33 - INFO - codeparrot_training - Step 24762: {'lr': 0.00027023456945370446, 'samples': 12678656, 'steps': 24762, 'loss/train': 1.7192577123641968} +02/25/2022 09:39:37 - INFO - codeparrot_training - Step 24763: {'lr': 0.0002702182606319866, 'samples': 12679168, 'steps': 24763, 'loss/train': 2.067255735397339} +02/25/2022 09:39:42 - INFO - codeparrot_training - Step 24764: {'lr': 0.00027020195172366025, 'samples': 12679680, 'steps': 24764, 'loss/train': 2.072300910949707} +02/25/2022 09:39:46 - INFO - codeparrot_training - Step 24765: {'lr': 0.0002701856427287951, 'samples': 12680192, 'steps': 24765, 'loss/train': 1.1736483573913574} +02/25/2022 09:39:52 - INFO - codeparrot_training - Step 24766: {'lr': 0.0002701693336474611, 'samples': 12680704, 'steps': 24766, 'loss/train': 1.6502591371536255} +02/25/2022 09:39:55 - INFO - codeparrot_training - Step 24767: {'lr': 0.000270153024479728, 'samples': 12681216, 'steps': 24767, 'loss/train': 2.540256977081299} +02/25/2022 09:40:01 - INFO - codeparrot_training - Step 24768: {'lr': 0.0002701367152256658, 'samples': 12681728, 'steps': 24768, 'loss/train': 1.3141244649887085} +02/25/2022 09:40:04 - INFO - codeparrot_training - Step 24769: {'lr': 0.0002701204058853443, 'samples': 12682240, 'steps': 24769, 'loss/train': 2.6552889347076416} +02/25/2022 09:40:10 - INFO - codeparrot_training - Step 24770: {'lr': 0.0002701040964588334, 'samples': 12682752, 'steps': 24770, 'loss/train': 0.9351255297660828} +02/25/2022 09:40:14 - INFO - codeparrot_training - Step 24771: {'lr': 0.0002700877869462029, 'samples': 12683264, 'steps': 24771, 'loss/train': 3.129750967025757} +02/25/2022 09:40:17 - INFO - codeparrot_training - Step 24772: {'lr': 0.0002700714773475227, 'samples': 12683776, 'steps': 24772, 'loss/train': 2.1949305534362793} +02/25/2022 09:40:23 - INFO - codeparrot_training - Step 24773: {'lr': 0.00027005516766286265, 'samples': 12684288, 'steps': 24773, 'loss/train': 1.2875661849975586} +02/25/2022 09:40:26 - INFO - codeparrot_training - Step 24774: {'lr': 0.00027003885789229264, 'samples': 12684800, 'steps': 24774, 'loss/train': 2.469822406768799} +02/25/2022 09:40:32 - INFO - codeparrot_training - Step 24775: {'lr': 0.00027002254803588254, 'samples': 12685312, 'steps': 24775, 'loss/train': 1.4646978378295898} +02/25/2022 09:40:35 - INFO - codeparrot_training - Step 24776: {'lr': 0.00027000623809370224, 'samples': 12685824, 'steps': 24776, 'loss/train': 0.8952632546424866} +02/25/2022 09:40:41 - INFO - codeparrot_training - Step 24777: {'lr': 0.0002699899280658215, 'samples': 12686336, 'steps': 24777, 'loss/train': 1.0153728723526} +02/25/2022 09:40:45 - INFO - codeparrot_training - Step 24778: {'lr': 0.00026997361795231027, 'samples': 12686848, 'steps': 24778, 'loss/train': 2.2385873794555664} +02/25/2022 09:40:50 - INFO - codeparrot_training - Step 24779: {'lr': 0.0002699573077532384, 'samples': 12687360, 'steps': 24779, 'loss/train': 1.4095805883407593} +02/25/2022 09:40:54 - INFO - codeparrot_training - Step 24780: {'lr': 0.0002699409974686758, 'samples': 12687872, 'steps': 24780, 'loss/train': 0.6144838929176331} +02/25/2022 09:40:59 - INFO - codeparrot_training - Step 24781: {'lr': 0.0002699246870986923, 'samples': 12688384, 'steps': 24781, 'loss/train': 1.347374677658081} +02/25/2022 09:41:03 - INFO - codeparrot_training - Step 24782: {'lr': 0.00026990837664335763, 'samples': 12688896, 'steps': 24782, 'loss/train': 1.4676218032836914} +02/25/2022 09:41:08 - INFO - codeparrot_training - Step 24783: {'lr': 0.00026989206610274197, 'samples': 12689408, 'steps': 24783, 'loss/train': 2.816321611404419} +02/25/2022 09:41:12 - INFO - codeparrot_training - Step 24784: {'lr': 0.00026987575547691495, 'samples': 12689920, 'steps': 24784, 'loss/train': 1.5207104682922363} +02/25/2022 09:41:17 - INFO - codeparrot_training - Step 24785: {'lr': 0.0002698594447659465, 'samples': 12690432, 'steps': 24785, 'loss/train': 1.9138569831848145} +02/25/2022 09:41:23 - INFO - codeparrot_training - Step 24786: {'lr': 0.0002698431339699065, 'samples': 12690944, 'steps': 24786, 'loss/train': 2.3683454990386963} +02/25/2022 09:41:26 - INFO - codeparrot_training - Step 24787: {'lr': 0.00026982682308886483, 'samples': 12691456, 'steps': 24787, 'loss/train': 1.6553800106048584} +02/25/2022 09:41:32 - INFO - codeparrot_training - Step 24788: {'lr': 0.00026981051212289134, 'samples': 12691968, 'steps': 24788, 'loss/train': 1.0435470342636108} +02/25/2022 09:41:35 - INFO - codeparrot_training - Step 24789: {'lr': 0.00026979420107205594, 'samples': 12692480, 'steps': 24789, 'loss/train': 1.8520112037658691} +02/25/2022 09:41:41 - INFO - codeparrot_training - Step 24790: {'lr': 0.0002697778899364284, 'samples': 12692992, 'steps': 24790, 'loss/train': 1.8776912689208984} +02/25/2022 09:41:44 - INFO - codeparrot_training - Step 24791: {'lr': 0.0002697615787160787, 'samples': 12693504, 'steps': 24791, 'loss/train': 1.3093762397766113} +02/25/2022 09:41:51 - INFO - codeparrot_training - Step 24792: {'lr': 0.0002697452674110766, 'samples': 12694016, 'steps': 24792, 'loss/train': 1.1372944116592407} +02/25/2022 09:41:54 - INFO - codeparrot_training - Step 24793: {'lr': 0.0002697289560214921, 'samples': 12694528, 'steps': 24793, 'loss/train': 2.164684534072876} +02/25/2022 09:42:00 - INFO - codeparrot_training - Step 24794: {'lr': 0.000269712644547395, 'samples': 12695040, 'steps': 24794, 'loss/train': 2.130544424057007} +02/25/2022 09:42:03 - INFO - codeparrot_training - Step 24795: {'lr': 0.0002696963329888552, 'samples': 12695552, 'steps': 24795, 'loss/train': 1.784342646598816} +02/25/2022 09:42:09 - INFO - codeparrot_training - Step 24796: {'lr': 0.00026968002134594265, 'samples': 12696064, 'steps': 24796, 'loss/train': 2.583003044128418} +02/25/2022 09:42:12 - INFO - codeparrot_training - Step 24797: {'lr': 0.000269663709618727, 'samples': 12696576, 'steps': 24797, 'loss/train': 1.6890408992767334} +02/25/2022 09:42:18 - INFO - codeparrot_training - Step 24798: {'lr': 0.00026964739780727836, 'samples': 12697088, 'steps': 24798, 'loss/train': 2.2527852058410645} +02/25/2022 09:42:21 - INFO - codeparrot_training - Step 24799: {'lr': 0.00026963108591166645, 'samples': 12697600, 'steps': 24799, 'loss/train': 1.5724927186965942} +02/25/2022 09:42:27 - INFO - codeparrot_training - Step 24800: {'lr': 0.00026961477393196127, 'samples': 12698112, 'steps': 24800, 'loss/train': 2.0026538372039795} +02/25/2022 09:42:30 - INFO - codeparrot_training - Step 24801: {'lr': 0.00026959846186823253, 'samples': 12698624, 'steps': 24801, 'loss/train': 0.0752396211028099} +02/25/2022 09:42:36 - INFO - codeparrot_training - Step 24802: {'lr': 0.0002695821497205503, 'samples': 12699136, 'steps': 24802, 'loss/train': 2.7291741371154785} +02/25/2022 09:42:40 - INFO - codeparrot_training - Step 24803: {'lr': 0.0002695658374889843, 'samples': 12699648, 'steps': 24803, 'loss/train': 2.9530017375946045} +02/25/2022 09:42:45 - INFO - codeparrot_training - Step 24804: {'lr': 0.0002695495251736045, 'samples': 12700160, 'steps': 24804, 'loss/train': 1.4212478399276733} +02/25/2022 09:42:49 - INFO - codeparrot_training - Step 24805: {'lr': 0.0002695332127744807, 'samples': 12700672, 'steps': 24805, 'loss/train': 1.149299144744873} +02/25/2022 09:42:54 - INFO - codeparrot_training - Step 24806: {'lr': 0.00026951690029168286, 'samples': 12701184, 'steps': 24806, 'loss/train': 1.0933799743652344} +02/25/2022 09:42:58 - INFO - codeparrot_training - Step 24807: {'lr': 0.0002695005877252808, 'samples': 12701696, 'steps': 24807, 'loss/train': 1.3683966398239136} +02/25/2022 09:43:03 - INFO - codeparrot_training - Step 24808: {'lr': 0.00026948427507534435, 'samples': 12702208, 'steps': 24808, 'loss/train': 2.027744770050049} +02/25/2022 09:43:07 - INFO - codeparrot_training - Step 24809: {'lr': 0.00026946796234194356, 'samples': 12702720, 'steps': 24809, 'loss/train': 2.7946319580078125} +02/25/2022 09:43:12 - INFO - codeparrot_training - Step 24810: {'lr': 0.0002694516495251481, 'samples': 12703232, 'steps': 24810, 'loss/train': 2.4352428913116455} +02/25/2022 09:43:16 - INFO - codeparrot_training - Step 24811: {'lr': 0.00026943533662502803, 'samples': 12703744, 'steps': 24811, 'loss/train': 1.74042809009552} +02/25/2022 09:43:22 - INFO - codeparrot_training - Step 24812: {'lr': 0.0002694190236416531, 'samples': 12704256, 'steps': 24812, 'loss/train': 1.9915233850479126} +02/25/2022 09:43:25 - INFO - codeparrot_training - Step 24813: {'lr': 0.0002694027105750933, 'samples': 12704768, 'steps': 24813, 'loss/train': 2.4247989654541016} +02/25/2022 09:43:31 - INFO - codeparrot_training - Step 24814: {'lr': 0.00026938639742541835, 'samples': 12705280, 'steps': 24814, 'loss/train': 1.9830337762832642} +02/25/2022 09:43:34 - INFO - codeparrot_training - Step 24815: {'lr': 0.0002693700841926983, 'samples': 12705792, 'steps': 24815, 'loss/train': 1.444778561592102} +02/25/2022 09:43:40 - INFO - codeparrot_training - Step 24816: {'lr': 0.00026935377087700297, 'samples': 12706304, 'steps': 24816, 'loss/train': 2.545281171798706} +02/25/2022 09:43:43 - INFO - codeparrot_training - Step 24817: {'lr': 0.00026933745747840214, 'samples': 12706816, 'steps': 24817, 'loss/train': 2.891022205352783} +02/25/2022 09:43:49 - INFO - codeparrot_training - Step 24818: {'lr': 0.0002693211439969659, 'samples': 12707328, 'steps': 24818, 'loss/train': 1.9779574871063232} +02/25/2022 09:43:52 - INFO - codeparrot_training - Step 24819: {'lr': 0.00026930483043276394, 'samples': 12707840, 'steps': 24819, 'loss/train': 1.5759530067443848} +02/25/2022 09:43:58 - INFO - codeparrot_training - Step 24820: {'lr': 0.0002692885167858663, 'samples': 12708352, 'steps': 24820, 'loss/train': 2.7322146892547607} +02/25/2022 09:44:01 - INFO - codeparrot_training - Step 24821: {'lr': 0.0002692722030563427, 'samples': 12708864, 'steps': 24821, 'loss/train': 2.908230781555176} +02/25/2022 09:44:07 - INFO - codeparrot_training - Step 24822: {'lr': 0.00026925588924426317, 'samples': 12709376, 'steps': 24822, 'loss/train': 1.0032862424850464} +02/25/2022 09:44:10 - INFO - codeparrot_training - Step 24823: {'lr': 0.0002692395753496974, 'samples': 12709888, 'steps': 24823, 'loss/train': 1.842634916305542} +02/25/2022 09:44:16 - INFO - codeparrot_training - Step 24824: {'lr': 0.00026922326137271554, 'samples': 12710400, 'steps': 24824, 'loss/train': 2.001751184463501} +02/25/2022 09:44:20 - INFO - codeparrot_training - Step 24825: {'lr': 0.0002692069473133872, 'samples': 12710912, 'steps': 24825, 'loss/train': 1.853408932685852} +02/25/2022 09:44:26 - INFO - codeparrot_training - Step 24826: {'lr': 0.0002691906331717825, 'samples': 12711424, 'steps': 24826, 'loss/train': 0.14718309044837952} +02/25/2022 09:44:29 - INFO - codeparrot_training - Step 24827: {'lr': 0.0002691743189479712, 'samples': 12711936, 'steps': 24827, 'loss/train': 2.2776758670806885} +02/25/2022 09:44:35 - INFO - codeparrot_training - Step 24828: {'lr': 0.0002691580046420231, 'samples': 12712448, 'steps': 24828, 'loss/train': 1.8902251720428467} +02/25/2022 09:44:38 - INFO - codeparrot_training - Step 24829: {'lr': 0.00026914169025400833, 'samples': 12712960, 'steps': 24829, 'loss/train': 1.1861977577209473} +02/25/2022 09:44:46 - INFO - codeparrot_training - Step 24830: {'lr': 0.0002691253757839965, 'samples': 12713472, 'steps': 24830, 'loss/train': 1.8247123956680298} +02/25/2022 09:44:49 - INFO - codeparrot_training - Step 24831: {'lr': 0.0002691090612320578, 'samples': 12713984, 'steps': 24831, 'loss/train': 1.8515534400939941} +02/25/2022 09:44:55 - INFO - codeparrot_training - Step 24832: {'lr': 0.0002690927465982619, 'samples': 12714496, 'steps': 24832, 'loss/train': 2.1409778594970703} +02/25/2022 09:44:58 - INFO - codeparrot_training - Step 24833: {'lr': 0.0002690764318826787, 'samples': 12715008, 'steps': 24833, 'loss/train': 0.8388583064079285} +02/25/2022 09:45:04 - INFO - codeparrot_training - Step 24834: {'lr': 0.00026906011708537807, 'samples': 12715520, 'steps': 24834, 'loss/train': 1.1354522705078125} +02/25/2022 09:45:08 - INFO - codeparrot_training - Step 24835: {'lr': 0.0002690438022064301, 'samples': 12716032, 'steps': 24835, 'loss/train': 1.9713672399520874} +02/25/2022 09:45:13 - INFO - codeparrot_training - Step 24836: {'lr': 0.00026902748724590435, 'samples': 12716544, 'steps': 24836, 'loss/train': 2.2519001960754395} +02/25/2022 09:45:17 - INFO - codeparrot_training - Step 24837: {'lr': 0.00026901117220387105, 'samples': 12717056, 'steps': 24837, 'loss/train': 1.6257095336914062} +02/25/2022 09:45:22 - INFO - codeparrot_training - Step 24838: {'lr': 0.0002689948570803998, 'samples': 12717568, 'steps': 24838, 'loss/train': 1.3859540224075317} +02/25/2022 09:45:26 - INFO - codeparrot_training - Step 24839: {'lr': 0.00026897854187556066, 'samples': 12718080, 'steps': 24839, 'loss/train': 1.5837857723236084} +02/25/2022 09:45:31 - INFO - codeparrot_training - Step 24840: {'lr': 0.00026896222658942347, 'samples': 12718592, 'steps': 24840, 'loss/train': 2.2093896865844727} +02/25/2022 09:45:35 - INFO - codeparrot_training - Step 24841: {'lr': 0.00026894591122205813, 'samples': 12719104, 'steps': 24841, 'loss/train': 1.0886955261230469} +02/25/2022 09:45:42 - INFO - codeparrot_training - Step 24842: {'lr': 0.0002689295957735346, 'samples': 12719616, 'steps': 24842, 'loss/train': 1.7096246480941772} +02/25/2022 09:45:46 - INFO - codeparrot_training - Step 24843: {'lr': 0.0002689132802439226, 'samples': 12720128, 'steps': 24843, 'loss/train': 1.952592134475708} +02/25/2022 09:45:51 - INFO - codeparrot_training - Step 24844: {'lr': 0.0002688969646332921, 'samples': 12720640, 'steps': 24844, 'loss/train': 0.9055593609809875} +02/25/2022 09:45:55 - INFO - codeparrot_training - Step 24845: {'lr': 0.00026888064894171307, 'samples': 12721152, 'steps': 24845, 'loss/train': 1.931763768196106} +02/25/2022 09:46:00 - INFO - codeparrot_training - Step 24846: {'lr': 0.0002688643331692553, 'samples': 12721664, 'steps': 24846, 'loss/train': 2.015608549118042} +02/25/2022 09:46:04 - INFO - codeparrot_training - Step 24847: {'lr': 0.00026884801731598873, 'samples': 12722176, 'steps': 24847, 'loss/train': 2.2528727054595947} +02/25/2022 09:46:09 - INFO - codeparrot_training - Step 24848: {'lr': 0.00026883170138198323, 'samples': 12722688, 'steps': 24848, 'loss/train': 2.5241613388061523} +02/25/2022 09:46:13 - INFO - codeparrot_training - Step 24849: {'lr': 0.0002688153853673087, 'samples': 12723200, 'steps': 24849, 'loss/train': 1.9175479412078857} +02/25/2022 09:46:18 - INFO - codeparrot_training - Step 24850: {'lr': 0.000268799069272035, 'samples': 12723712, 'steps': 24850, 'loss/train': 1.4548085927963257} +02/25/2022 09:46:22 - INFO - codeparrot_training - Step 24851: {'lr': 0.00026878275309623215, 'samples': 12724224, 'steps': 24851, 'loss/train': 2.0615615844726562} +02/25/2022 09:46:29 - INFO - codeparrot_training - Step 24852: {'lr': 0.00026876643683996983, 'samples': 12724736, 'steps': 24852, 'loss/train': 1.844257116317749} +02/25/2022 09:46:32 - INFO - codeparrot_training - Step 24853: {'lr': 0.0002687501205033181, 'samples': 12725248, 'steps': 24853, 'loss/train': 1.035587191581726} +02/25/2022 09:46:38 - INFO - codeparrot_training - Step 24854: {'lr': 0.00026873380408634677, 'samples': 12725760, 'steps': 24854, 'loss/train': 1.9487367868423462} +02/25/2022 09:46:41 - INFO - codeparrot_training - Step 24855: {'lr': 0.0002687174875891259, 'samples': 12726272, 'steps': 24855, 'loss/train': 1.5183360576629639} +02/25/2022 09:46:47 - INFO - codeparrot_training - Step 24856: {'lr': 0.00026870117101172517, 'samples': 12726784, 'steps': 24856, 'loss/train': 2.2734506130218506} +02/25/2022 09:46:51 - INFO - codeparrot_training - Step 24857: {'lr': 0.0002686848543542146, 'samples': 12727296, 'steps': 24857, 'loss/train': 1.6893467903137207} +02/25/2022 09:46:56 - INFO - codeparrot_training - Step 24858: {'lr': 0.0002686685376166639, 'samples': 12727808, 'steps': 24858, 'loss/train': 1.6245406866073608} +02/25/2022 09:47:00 - INFO - codeparrot_training - Step 24859: {'lr': 0.0002686522207991433, 'samples': 12728320, 'steps': 24859, 'loss/train': 1.5985651016235352} +02/25/2022 09:47:05 - INFO - codeparrot_training - Step 24860: {'lr': 0.00026863590390172244, 'samples': 12728832, 'steps': 24860, 'loss/train': 1.3258453607559204} +02/25/2022 09:47:08 - INFO - codeparrot_training - Step 24861: {'lr': 0.0002686195869244713, 'samples': 12729344, 'steps': 24861, 'loss/train': 1.8844633102416992} +02/25/2022 09:47:16 - INFO - codeparrot_training - Step 24862: {'lr': 0.0002686032698674597, 'samples': 12729856, 'steps': 24862, 'loss/train': 2.275313138961792} +02/25/2022 09:47:19 - INFO - codeparrot_training - Step 24863: {'lr': 0.00026858695273075764, 'samples': 12730368, 'steps': 24863, 'loss/train': 2.365968942642212} +02/25/2022 09:47:25 - INFO - codeparrot_training - Step 24864: {'lr': 0.000268570635514435, 'samples': 12730880, 'steps': 24864, 'loss/train': 2.141315221786499} +02/25/2022 09:47:28 - INFO - codeparrot_training - Step 24865: {'lr': 0.0002685543182185616, 'samples': 12731392, 'steps': 24865, 'loss/train': 1.274315595626831} +02/25/2022 09:47:34 - INFO - codeparrot_training - Step 24866: {'lr': 0.00026853800084320747, 'samples': 12731904, 'steps': 24866, 'loss/train': 1.9560257196426392} +02/25/2022 09:47:37 - INFO - codeparrot_training - Step 24867: {'lr': 0.0002685216833884423, 'samples': 12732416, 'steps': 24867, 'loss/train': 1.280389666557312} +02/25/2022 09:47:43 - INFO - codeparrot_training - Step 24868: {'lr': 0.0002685053658543363, 'samples': 12732928, 'steps': 24868, 'loss/train': 1.3529318571090698} +02/25/2022 09:47:47 - INFO - codeparrot_training - Step 24869: {'lr': 0.00026848904824095904, 'samples': 12733440, 'steps': 24869, 'loss/train': 2.7981669902801514} +02/25/2022 09:47:52 - INFO - codeparrot_training - Step 24870: {'lr': 0.00026847273054838065, 'samples': 12733952, 'steps': 24870, 'loss/train': 2.1037886142730713} +02/25/2022 09:47:56 - INFO - codeparrot_training - Step 24871: {'lr': 0.0002684564127766709, 'samples': 12734464, 'steps': 24871, 'loss/train': 1.8249258995056152} +02/25/2022 09:48:01 - INFO - codeparrot_training - Step 24872: {'lr': 0.00026844009492589977, 'samples': 12734976, 'steps': 24872, 'loss/train': 1.8887410163879395} +02/25/2022 09:48:05 - INFO - codeparrot_training - Step 24873: {'lr': 0.00026842377699613714, 'samples': 12735488, 'steps': 24873, 'loss/train': 2.372438430786133} +02/25/2022 09:48:08 - INFO - codeparrot_training - Step 24874: {'lr': 0.0002684074589874529, 'samples': 12736000, 'steps': 24874, 'loss/train': 1.2539584636688232} +02/25/2022 09:48:16 - INFO - codeparrot_training - Step 24875: {'lr': 0.0002683911408999169, 'samples': 12736512, 'steps': 24875, 'loss/train': 2.336827278137207} +02/25/2022 09:48:19 - INFO - codeparrot_training - Step 24876: {'lr': 0.00026837482273359907, 'samples': 12737024, 'steps': 24876, 'loss/train': 2.133115530014038} +02/25/2022 09:48:25 - INFO - codeparrot_training - Step 24877: {'lr': 0.0002683585044885694, 'samples': 12737536, 'steps': 24877, 'loss/train': 2.400137186050415} +02/25/2022 09:48:28 - INFO - codeparrot_training - Step 24878: {'lr': 0.0002683421861648977, 'samples': 12738048, 'steps': 24878, 'loss/train': 1.3767703771591187} +02/25/2022 09:48:34 - INFO - codeparrot_training - Step 24879: {'lr': 0.0002683258677626539, 'samples': 12738560, 'steps': 24879, 'loss/train': 2.032876491546631} +02/25/2022 09:48:37 - INFO - codeparrot_training - Step 24880: {'lr': 0.00026830954928190793, 'samples': 12739072, 'steps': 24880, 'loss/train': 1.6309301853179932} +02/25/2022 09:48:43 - INFO - codeparrot_training - Step 24881: {'lr': 0.0002682932307227297, 'samples': 12739584, 'steps': 24881, 'loss/train': 0.4427894651889801} +02/25/2022 09:48:48 - INFO - codeparrot_training - Step 24882: {'lr': 0.00026827691208518897, 'samples': 12740096, 'steps': 24882, 'loss/train': 1.8768963813781738} +02/25/2022 09:48:52 - INFO - codeparrot_training - Step 24883: {'lr': 0.0002682605933693558, 'samples': 12740608, 'steps': 24883, 'loss/train': 1.7223317623138428} +02/25/2022 09:48:55 - INFO - codeparrot_training - Step 24884: {'lr': 0.00026824427457530005, 'samples': 12741120, 'steps': 24884, 'loss/train': 1.645879864692688} +02/25/2022 09:49:01 - INFO - codeparrot_training - Step 24885: {'lr': 0.00026822795570309165, 'samples': 12741632, 'steps': 24885, 'loss/train': 1.984671711921692} +02/25/2022 09:49:06 - INFO - codeparrot_training - Step 24886: {'lr': 0.0002682116367528004, 'samples': 12742144, 'steps': 24886, 'loss/train': 1.413815975189209} +02/25/2022 09:49:10 - INFO - codeparrot_training - Step 24887: {'lr': 0.0002681953177244964, 'samples': 12742656, 'steps': 24887, 'loss/train': 0.42039358615875244} +02/25/2022 09:49:13 - INFO - codeparrot_training - Step 24888: {'lr': 0.00026817899861824934, 'samples': 12743168, 'steps': 24888, 'loss/train': 1.6924446821212769} +02/25/2022 09:49:20 - INFO - codeparrot_training - Step 24889: {'lr': 0.00026816267943412925, 'samples': 12743680, 'steps': 24889, 'loss/train': 1.9290411472320557} +02/25/2022 09:49:26 - INFO - codeparrot_training - Step 24890: {'lr': 0.000268146360172206, 'samples': 12744192, 'steps': 24890, 'loss/train': 2.25826096534729} +02/25/2022 09:49:29 - INFO - codeparrot_training - Step 24891: {'lr': 0.0002681300408325495, 'samples': 12744704, 'steps': 24891, 'loss/train': 1.389148473739624} +02/25/2022 09:49:35 - INFO - codeparrot_training - Step 24892: {'lr': 0.00026811372141522964, 'samples': 12745216, 'steps': 24892, 'loss/train': 1.5085041522979736} +02/25/2022 09:49:38 - INFO - codeparrot_training - Step 24893: {'lr': 0.00026809740192031644, 'samples': 12745728, 'steps': 24893, 'loss/train': 0.5152866840362549} +02/25/2022 09:49:42 - INFO - codeparrot_training - Step 24894: {'lr': 0.0002680810823478797, 'samples': 12746240, 'steps': 24894, 'loss/train': 1.534449815750122} +02/25/2022 09:49:47 - INFO - codeparrot_training - Step 24895: {'lr': 0.0002680647626979893, 'samples': 12746752, 'steps': 24895, 'loss/train': 2.183363199234009} +02/25/2022 09:49:53 - INFO - codeparrot_training - Step 24896: {'lr': 0.00026804844297071524, 'samples': 12747264, 'steps': 24896, 'loss/train': 2.2756173610687256} +02/25/2022 09:49:56 - INFO - codeparrot_training - Step 24897: {'lr': 0.0002680321231661273, 'samples': 12747776, 'steps': 24897, 'loss/train': 1.3891857862472534} +02/25/2022 09:50:04 - INFO - codeparrot_training - Step 24898: {'lr': 0.00026801580328429555, 'samples': 12748288, 'steps': 24898, 'loss/train': 0.12074144184589386} +02/25/2022 09:50:07 - INFO - codeparrot_training - Step 24899: {'lr': 0.0002679994833252897, 'samples': 12748800, 'steps': 24899, 'loss/train': 1.473228096961975} +02/25/2022 09:50:13 - INFO - codeparrot_training - Step 24900: {'lr': 0.0002679831632891799, 'samples': 12749312, 'steps': 24900, 'loss/train': 2.710495948791504} +02/25/2022 09:50:16 - INFO - codeparrot_training - Step 24901: {'lr': 0.00026796684317603584, 'samples': 12749824, 'steps': 24901, 'loss/train': 1.7328565120697021} +02/25/2022 09:50:22 - INFO - codeparrot_training - Step 24902: {'lr': 0.0002679505229859276, 'samples': 12750336, 'steps': 24902, 'loss/train': 1.5025874376296997} +02/25/2022 09:50:25 - INFO - codeparrot_training - Step 24903: {'lr': 0.00026793420271892503, 'samples': 12750848, 'steps': 24903, 'loss/train': 1.9491593837738037} +02/25/2022 09:50:31 - INFO - codeparrot_training - Step 24904: {'lr': 0.000267917882375098, 'samples': 12751360, 'steps': 24904, 'loss/train': 3.3263776302337646} +02/25/2022 09:50:34 - INFO - codeparrot_training - Step 24905: {'lr': 0.00026790156195451647, 'samples': 12751872, 'steps': 24905, 'loss/train': 2.0799412727355957} +02/25/2022 09:50:40 - INFO - codeparrot_training - Step 24906: {'lr': 0.0002678852414572503, 'samples': 12752384, 'steps': 24906, 'loss/train': 0.7658665776252747} +02/25/2022 09:50:43 - INFO - codeparrot_training - Step 24907: {'lr': 0.0002678689208833695, 'samples': 12752896, 'steps': 24907, 'loss/train': 3.0476977825164795} +02/25/2022 09:50:50 - INFO - codeparrot_training - Step 24908: {'lr': 0.0002678526002329438, 'samples': 12753408, 'steps': 24908, 'loss/train': 1.5017503499984741} +02/25/2022 09:50:54 - INFO - codeparrot_training - Step 24909: {'lr': 0.00026783627950604334, 'samples': 12753920, 'steps': 24909, 'loss/train': 2.349515199661255} +02/25/2022 09:50:59 - INFO - codeparrot_training - Step 24910: {'lr': 0.0002678199587027379, 'samples': 12754432, 'steps': 24910, 'loss/train': 1.9906333684921265} +02/25/2022 09:51:03 - INFO - codeparrot_training - Step 24911: {'lr': 0.0002678036378230974, 'samples': 12754944, 'steps': 24911, 'loss/train': 0.7913855910301208} +02/25/2022 09:51:08 - INFO - codeparrot_training - Step 24912: {'lr': 0.0002677873168671918, 'samples': 12755456, 'steps': 24912, 'loss/train': 1.7299515008926392} +02/25/2022 09:51:12 - INFO - codeparrot_training - Step 24913: {'lr': 0.00026777099583509084, 'samples': 12755968, 'steps': 24913, 'loss/train': 3.56026554107666} +02/25/2022 09:51:17 - INFO - codeparrot_training - Step 24914: {'lr': 0.00026775467472686475, 'samples': 12756480, 'steps': 24914, 'loss/train': 1.643774151802063} +02/25/2022 09:51:20 - INFO - codeparrot_training - Step 24915: {'lr': 0.0002677383535425832, 'samples': 12756992, 'steps': 24915, 'loss/train': 1.1217113733291626} +02/25/2022 09:51:26 - INFO - codeparrot_training - Step 24916: {'lr': 0.00026772203228231617, 'samples': 12757504, 'steps': 24916, 'loss/train': 1.094772458076477} +02/25/2022 09:51:29 - INFO - codeparrot_training - Step 24917: {'lr': 0.0002677057109461336, 'samples': 12758016, 'steps': 24917, 'loss/train': 2.081655740737915} +02/25/2022 09:51:37 - INFO - codeparrot_training - Step 24918: {'lr': 0.0002676893895341054, 'samples': 12758528, 'steps': 24918, 'loss/train': 2.005033254623413} +02/25/2022 09:51:40 - INFO - codeparrot_training - Step 24919: {'lr': 0.0002676730680463014, 'samples': 12759040, 'steps': 24919, 'loss/train': 1.8220857381820679} +02/25/2022 09:51:46 - INFO - codeparrot_training - Step 24920: {'lr': 0.0002676567464827917, 'samples': 12759552, 'steps': 24920, 'loss/train': 1.720070719718933} +02/25/2022 09:51:49 - INFO - codeparrot_training - Step 24921: {'lr': 0.00026764042484364603, 'samples': 12760064, 'steps': 24921, 'loss/train': 2.696366310119629} +02/25/2022 09:51:55 - INFO - codeparrot_training - Step 24922: {'lr': 0.0002676241031289344, 'samples': 12760576, 'steps': 24922, 'loss/train': 1.7974830865859985} +02/25/2022 09:51:58 - INFO - codeparrot_training - Step 24923: {'lr': 0.0002676077813387267, 'samples': 12761088, 'steps': 24923, 'loss/train': 0.584373950958252} +02/25/2022 09:52:04 - INFO - codeparrot_training - Step 24924: {'lr': 0.00026759145947309284, 'samples': 12761600, 'steps': 24924, 'loss/train': 1.8534595966339111} +02/25/2022 09:52:07 - INFO - codeparrot_training - Step 24925: {'lr': 0.0002675751375321028, 'samples': 12762112, 'steps': 24925, 'loss/train': 2.036557674407959} +02/25/2022 09:52:13 - INFO - codeparrot_training - Step 24926: {'lr': 0.0002675588155158264, 'samples': 12762624, 'steps': 24926, 'loss/train': 2.2146878242492676} +02/25/2022 09:52:16 - INFO - codeparrot_training - Step 24927: {'lr': 0.0002675424934243337, 'samples': 12763136, 'steps': 24927, 'loss/train': 2.439091444015503} +02/25/2022 09:52:22 - INFO - codeparrot_training - Step 24928: {'lr': 0.0002675261712576944, 'samples': 12763648, 'steps': 24928, 'loss/train': 1.9425225257873535} +02/25/2022 09:52:25 - INFO - codeparrot_training - Step 24929: {'lr': 0.00026750984901597865, 'samples': 12764160, 'steps': 24929, 'loss/train': 1.7834248542785645} +02/25/2022 09:52:31 - INFO - codeparrot_training - Step 24930: {'lr': 0.0002674935266992562, 'samples': 12764672, 'steps': 24930, 'loss/train': 1.4715917110443115} +02/25/2022 09:52:34 - INFO - codeparrot_training - Step 24931: {'lr': 0.0002674772043075971, 'samples': 12765184, 'steps': 24931, 'loss/train': 2.0394773483276367} +02/25/2022 09:52:40 - INFO - codeparrot_training - Step 24932: {'lr': 0.00026746088184107116, 'samples': 12765696, 'steps': 24932, 'loss/train': 1.8661472797393799} +02/25/2022 09:52:43 - INFO - codeparrot_training - Step 24933: {'lr': 0.00026744455929974837, 'samples': 12766208, 'steps': 24933, 'loss/train': 1.2441176176071167} +02/25/2022 09:52:51 - INFO - codeparrot_training - Step 24934: {'lr': 0.0002674282366836986, 'samples': 12766720, 'steps': 24934, 'loss/train': 1.8973724842071533} +02/25/2022 09:52:54 - INFO - codeparrot_training - Step 24935: {'lr': 0.00026741191399299186, 'samples': 12767232, 'steps': 24935, 'loss/train': 2.035752773284912} +02/25/2022 09:53:00 - INFO - codeparrot_training - Step 24936: {'lr': 0.00026739559122769795, 'samples': 12767744, 'steps': 24936, 'loss/train': 2.289137601852417} +02/25/2022 09:53:03 - INFO - codeparrot_training - Step 24937: {'lr': 0.0002673792683878869, 'samples': 12768256, 'steps': 24937, 'loss/train': 1.8673814535140991} +02/25/2022 09:53:09 - INFO - codeparrot_training - Step 24938: {'lr': 0.0002673629454736285, 'samples': 12768768, 'steps': 24938, 'loss/train': 1.6944831609725952} +02/25/2022 09:53:12 - INFO - codeparrot_training - Step 24939: {'lr': 0.0002673466224849928, 'samples': 12769280, 'steps': 24939, 'loss/train': 1.2597445249557495} +02/25/2022 09:53:18 - INFO - codeparrot_training - Step 24940: {'lr': 0.00026733029942204974, 'samples': 12769792, 'steps': 24940, 'loss/train': 1.0115681886672974} +02/25/2022 09:53:21 - INFO - codeparrot_training - Step 24941: {'lr': 0.00026731397628486906, 'samples': 12770304, 'steps': 24941, 'loss/train': 1.9106547832489014} +02/25/2022 09:53:27 - INFO - codeparrot_training - Step 24942: {'lr': 0.00026729765307352093, 'samples': 12770816, 'steps': 24942, 'loss/train': 1.6540048122406006} +02/25/2022 09:53:30 - INFO - codeparrot_training - Step 24943: {'lr': 0.00026728132978807507, 'samples': 12771328, 'steps': 24943, 'loss/train': 2.1333460807800293} +02/25/2022 09:53:38 - INFO - codeparrot_training - Step 24944: {'lr': 0.0002672650064286015, 'samples': 12771840, 'steps': 24944, 'loss/train': 1.900303602218628} +02/25/2022 09:53:41 - INFO - codeparrot_training - Step 24945: {'lr': 0.00026724868299517006, 'samples': 12772352, 'steps': 24945, 'loss/train': 0.08900638669729233} +02/25/2022 09:53:47 - INFO - codeparrot_training - Step 24946: {'lr': 0.00026723235948785084, 'samples': 12772864, 'steps': 24946, 'loss/train': 1.2578349113464355} +02/25/2022 09:53:50 - INFO - codeparrot_training - Step 24947: {'lr': 0.0002672160359067136, 'samples': 12773376, 'steps': 24947, 'loss/train': 1.4240481853485107} +02/25/2022 09:53:56 - INFO - codeparrot_training - Step 24948: {'lr': 0.00026719971225182835, 'samples': 12773888, 'steps': 24948, 'loss/train': 1.9180265665054321} +02/25/2022 09:53:59 - INFO - codeparrot_training - Step 24949: {'lr': 0.00026718338852326504, 'samples': 12774400, 'steps': 24949, 'loss/train': 1.72811758518219} +02/25/2022 09:54:05 - INFO - codeparrot_training - Step 24950: {'lr': 0.0002671670647210934, 'samples': 12774912, 'steps': 24950, 'loss/train': 2.247767210006714} +02/25/2022 09:54:08 - INFO - codeparrot_training - Step 24951: {'lr': 0.0002671507408453837, 'samples': 12775424, 'steps': 24951, 'loss/train': 2.412048578262329} +02/25/2022 09:54:14 - INFO - codeparrot_training - Step 24952: {'lr': 0.0002671344168962055, 'samples': 12775936, 'steps': 24952, 'loss/train': 1.3504868745803833} +02/25/2022 09:54:17 - INFO - codeparrot_training - Step 24953: {'lr': 0.00026711809287362903, 'samples': 12776448, 'steps': 24953, 'loss/train': 1.8935706615447998} +02/25/2022 09:54:25 - INFO - codeparrot_training - Step 24954: {'lr': 0.000267101768777724, 'samples': 12776960, 'steps': 24954, 'loss/train': 1.974847674369812} +02/25/2022 09:54:28 - INFO - codeparrot_training - Step 24955: {'lr': 0.0002670854446085605, 'samples': 12777472, 'steps': 24955, 'loss/train': 3.0134706497192383} +02/25/2022 09:54:34 - INFO - codeparrot_training - Step 24956: {'lr': 0.00026706912036620836, 'samples': 12777984, 'steps': 24956, 'loss/train': 1.155659556388855} +02/25/2022 09:54:37 - INFO - codeparrot_training - Step 24957: {'lr': 0.0002670527960507375, 'samples': 12778496, 'steps': 24957, 'loss/train': 1.9154446125030518} +02/25/2022 09:54:43 - INFO - codeparrot_training - Step 24958: {'lr': 0.00026703647166221786, 'samples': 12779008, 'steps': 24958, 'loss/train': 1.2526443004608154} +02/25/2022 09:54:46 - INFO - codeparrot_training - Step 24959: {'lr': 0.0002670201472007194, 'samples': 12779520, 'steps': 24959, 'loss/train': 2.276221513748169} +02/25/2022 09:54:52 - INFO - codeparrot_training - Step 24960: {'lr': 0.00026700382266631206, 'samples': 12780032, 'steps': 24960, 'loss/train': 2.6500132083892822} +02/25/2022 09:54:55 - INFO - codeparrot_training - Step 24961: {'lr': 0.00026698749805906567, 'samples': 12780544, 'steps': 24961, 'loss/train': 2.2446229457855225} +02/25/2022 09:55:01 - INFO - codeparrot_training - Step 24962: {'lr': 0.00026697117337905034, 'samples': 12781056, 'steps': 24962, 'loss/train': 2.034681558609009} +02/25/2022 09:55:04 - INFO - codeparrot_training - Step 24963: {'lr': 0.00026695484862633583, 'samples': 12781568, 'steps': 24963, 'loss/train': 1.2532463073730469} +02/25/2022 09:55:11 - INFO - codeparrot_training - Step 24964: {'lr': 0.00026693852380099215, 'samples': 12782080, 'steps': 24964, 'loss/train': 1.6912041902542114} +02/25/2022 09:55:15 - INFO - codeparrot_training - Step 24965: {'lr': 0.0002669221989030892, 'samples': 12782592, 'steps': 24965, 'loss/train': 1.4864130020141602} +02/25/2022 09:55:20 - INFO - codeparrot_training - Step 24966: {'lr': 0.00026690587393269694, 'samples': 12783104, 'steps': 24966, 'loss/train': 2.3117659091949463} +02/25/2022 09:55:24 - INFO - codeparrot_training - Step 24967: {'lr': 0.0002668895488898853, 'samples': 12783616, 'steps': 24967, 'loss/train': 1.5027096271514893} +02/25/2022 09:55:29 - INFO - codeparrot_training - Step 24968: {'lr': 0.00026687322377472416, 'samples': 12784128, 'steps': 24968, 'loss/train': 1.3746724128723145} +02/25/2022 09:55:33 - INFO - codeparrot_training - Step 24969: {'lr': 0.00026685689858728346, 'samples': 12784640, 'steps': 24969, 'loss/train': 2.1076157093048096} +02/25/2022 09:55:38 - INFO - codeparrot_training - Step 24970: {'lr': 0.0002668405733276332, 'samples': 12785152, 'steps': 24970, 'loss/train': 2.6847290992736816} +02/25/2022 09:55:42 - INFO - codeparrot_training - Step 24971: {'lr': 0.00026682424799584324, 'samples': 12785664, 'steps': 24971, 'loss/train': 1.9541301727294922} +02/25/2022 09:55:48 - INFO - codeparrot_training - Step 24972: {'lr': 0.00026680792259198353, 'samples': 12786176, 'steps': 24972, 'loss/train': 8.260965347290039} +02/25/2022 09:55:51 - INFO - codeparrot_training - Step 24973: {'lr': 0.0002667915971161241, 'samples': 12786688, 'steps': 24973, 'loss/train': 0.8277385830879211} +02/25/2022 09:55:57 - INFO - codeparrot_training - Step 24974: {'lr': 0.00026677527156833473, 'samples': 12787200, 'steps': 24974, 'loss/train': 2.2886950969696045} +02/25/2022 09:56:00 - INFO - codeparrot_training - Step 24975: {'lr': 0.0002667589459486855, 'samples': 12787712, 'steps': 24975, 'loss/train': 2.0280275344848633} +02/25/2022 09:56:06 - INFO - codeparrot_training - Step 24976: {'lr': 0.00026674262025724627, 'samples': 12788224, 'steps': 24976, 'loss/train': 2.3432979583740234} +02/25/2022 09:56:09 - INFO - codeparrot_training - Step 24977: {'lr': 0.00026672629449408684, 'samples': 12788736, 'steps': 24977, 'loss/train': 2.0472216606140137} +02/25/2022 09:56:15 - INFO - codeparrot_training - Step 24978: {'lr': 0.0002667099686592774, 'samples': 12789248, 'steps': 24978, 'loss/train': 2.7924671173095703} +02/25/2022 09:56:18 - INFO - codeparrot_training - Step 24979: {'lr': 0.00026669364275288773, 'samples': 12789760, 'steps': 24979, 'loss/train': 0.6600639224052429} +02/25/2022 09:56:25 - INFO - codeparrot_training - Step 24980: {'lr': 0.0002666773167749878, 'samples': 12790272, 'steps': 24980, 'loss/train': 1.716858148574829} +02/25/2022 09:56:29 - INFO - codeparrot_training - Step 24981: {'lr': 0.00026666099072564746, 'samples': 12790784, 'steps': 24981, 'loss/train': 2.035151958465576} +02/25/2022 09:56:34 - INFO - codeparrot_training - Step 24982: {'lr': 0.00026664466460493686, 'samples': 12791296, 'steps': 24982, 'loss/train': 1.325531005859375} +02/25/2022 09:56:38 - INFO - codeparrot_training - Step 24983: {'lr': 0.0002666283384129257, 'samples': 12791808, 'steps': 24983, 'loss/train': 1.8949499130249023} +02/25/2022 09:56:43 - INFO - codeparrot_training - Step 24984: {'lr': 0.00026661201214968404, 'samples': 12792320, 'steps': 24984, 'loss/train': 1.9902961254119873} +02/25/2022 09:56:47 - INFO - codeparrot_training - Step 24985: {'lr': 0.0002665956858152818, 'samples': 12792832, 'steps': 24985, 'loss/train': 0.4217319190502167} +02/25/2022 09:56:52 - INFO - codeparrot_training - Step 24986: {'lr': 0.00026657935940978896, 'samples': 12793344, 'steps': 24986, 'loss/train': 3.0633857250213623} +02/25/2022 09:56:56 - INFO - codeparrot_training - Step 24987: {'lr': 0.00026656303293327534, 'samples': 12793856, 'steps': 24987, 'loss/train': 3.4580883979797363} +02/25/2022 09:57:01 - INFO - codeparrot_training - Step 24988: {'lr': 0.00026654670638581095, 'samples': 12794368, 'steps': 24988, 'loss/train': 1.8375736474990845} +02/25/2022 09:57:05 - INFO - codeparrot_training - Step 24989: {'lr': 0.00026653037976746575, 'samples': 12794880, 'steps': 24989, 'loss/train': 2.4298338890075684} +02/25/2022 09:57:12 - INFO - codeparrot_training - Step 24990: {'lr': 0.0002665140530783097, 'samples': 12795392, 'steps': 24990, 'loss/train': 1.2055655717849731} +02/25/2022 09:57:15 - INFO - codeparrot_training - Step 24991: {'lr': 0.00026649772631841257, 'samples': 12795904, 'steps': 24991, 'loss/train': 2.2322161197662354} +02/25/2022 09:57:21 - INFO - codeparrot_training - Step 24992: {'lr': 0.0002664813994878445, 'samples': 12796416, 'steps': 24992, 'loss/train': 1.461451768875122} +02/25/2022 09:57:25 - INFO - codeparrot_training - Step 24993: {'lr': 0.0002664650725866753, 'samples': 12796928, 'steps': 24993, 'loss/train': 1.0411195755004883} +02/25/2022 09:57:30 - INFO - codeparrot_training - Step 24994: {'lr': 0.00026644874561497506, 'samples': 12797440, 'steps': 24994, 'loss/train': 2.5259485244750977} +02/25/2022 09:57:34 - INFO - codeparrot_training - Step 24995: {'lr': 0.0002664324185728135, 'samples': 12797952, 'steps': 24995, 'loss/train': 2.2317144870758057} +02/25/2022 09:57:39 - INFO - codeparrot_training - Step 24996: {'lr': 0.0002664160914602607, 'samples': 12798464, 'steps': 24996, 'loss/train': 1.6283648014068604} +02/25/2022 09:57:43 - INFO - codeparrot_training - Step 24997: {'lr': 0.0002663997642773866, 'samples': 12798976, 'steps': 24997, 'loss/train': 1.6553277969360352} +02/25/2022 09:57:48 - INFO - codeparrot_training - Step 24998: {'lr': 0.0002663834370242611, 'samples': 12799488, 'steps': 24998, 'loss/train': 2.7944555282592773} +02/25/2022 09:57:52 - INFO - codeparrot_training - Step 24999: {'lr': 0.00026636710970095426, 'samples': 12800000, 'steps': 24999, 'loss/train': 0.8392263650894165} +02/25/2022 09:57:52 - INFO - codeparrot_training - Evaluating and saving model checkpoint