diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -32494,3 +32494,1009 @@ Use FP16 precision: False 02/25/2022 19:00:59 - INFO - codeparrot_training - Step 31998: {'lr': 0.0001543593766152899, 'samples': 16383488, 'steps': 31998, 'loss/train': 1.7603875398635864} 02/25/2022 19:01:03 - INFO - codeparrot_training - Step 31999: {'lr': 0.00015434425905712934, 'samples': 16384000, 'steps': 31999, 'loss/train': 1.2924124002456665} 02/25/2022 19:01:03 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/25/2022 19:01:20 - WARNING - huggingface_hub.repository - Several commits (32) will be pushed upstream. +02/25/2022 19:01:20 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/25/2022 19:01:55 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 59d2b90..06c1d4e floral-grass-11 -> floral-grass-11 + +02/25/2022 19:02:02 - INFO - codeparrot_training - Step 32000: {'lr': 0.00015432914190872756, 'samples': 16384512, 'steps': 32000, 'loss/train': 0.3606870770454407} +02/25/2022 19:02:05 - INFO - codeparrot_training - Step 32001: {'lr': 0.00015431402517014943, 'samples': 16385024, 'steps': 32001, 'loss/train': 1.5795643329620361} +02/25/2022 19:02:11 - INFO - codeparrot_training - Step 32002: {'lr': 0.0001542989088414596, 'samples': 16385536, 'steps': 32002, 'loss/train': 1.7262461185455322} +02/25/2022 19:02:14 - INFO - codeparrot_training - Step 32003: {'lr': 0.00015428379292272294, 'samples': 16386048, 'steps': 32003, 'loss/train': 0.9702414870262146} +02/25/2022 19:02:21 - INFO - codeparrot_training - Step 32004: {'lr': 0.000154268677414004, 'samples': 16386560, 'steps': 32004, 'loss/train': 3.348331928253174} +02/25/2022 19:02:24 - INFO - codeparrot_training - Step 32005: {'lr': 0.0001542535623153677, 'samples': 16387072, 'steps': 32005, 'loss/train': 0.5520191192626953} +02/25/2022 19:02:30 - INFO - codeparrot_training - Step 32006: {'lr': 0.0001542384476268787, 'samples': 16387584, 'steps': 32006, 'loss/train': 1.9785206317901611} +02/25/2022 19:02:33 - INFO - codeparrot_training - Step 32007: {'lr': 0.00015422333334860189, 'samples': 16388096, 'steps': 32007, 'loss/train': 1.1689119338989258} +02/25/2022 19:02:39 - INFO - codeparrot_training - Step 32008: {'lr': 0.00015420821948060182, 'samples': 16388608, 'steps': 32008, 'loss/train': 0.2037752866744995} +02/25/2022 19:02:42 - INFO - codeparrot_training - Step 32009: {'lr': 0.00015419310602294328, 'samples': 16389120, 'steps': 32009, 'loss/train': 1.7252893447875977} +02/25/2022 19:02:48 - INFO - codeparrot_training - Step 32010: {'lr': 0.0001541779929756911, 'samples': 16389632, 'steps': 32010, 'loss/train': 1.2176986932754517} +02/25/2022 19:02:51 - INFO - codeparrot_training - Step 32011: {'lr': 0.00015416288033891007, 'samples': 16390144, 'steps': 32011, 'loss/train': 1.7501472234725952} +02/25/2022 19:02:57 - INFO - codeparrot_training - Step 32012: {'lr': 0.00015414776811266471, 'samples': 16390656, 'steps': 32012, 'loss/train': 1.0003036260604858} +02/25/2022 19:03:00 - INFO - codeparrot_training - Step 32013: {'lr': 0.0001541326562970199, 'samples': 16391168, 'steps': 32013, 'loss/train': 2.307699680328369} +02/25/2022 19:03:07 - INFO - codeparrot_training - Step 32014: {'lr': 0.0001541175448920403, 'samples': 16391680, 'steps': 32014, 'loss/train': 1.3219987154006958} +02/25/2022 19:03:10 - INFO - codeparrot_training - Step 32015: {'lr': 0.00015410243389779078, 'samples': 16392192, 'steps': 32015, 'loss/train': 0.480421781539917} +02/25/2022 19:03:16 - INFO - codeparrot_training - Step 32016: {'lr': 0.00015408732331433596, 'samples': 16392704, 'steps': 32016, 'loss/train': 2.009598731994629} +02/25/2022 19:03:19 - INFO - codeparrot_training - Step 32017: {'lr': 0.00015407221314174056, 'samples': 16393216, 'steps': 32017, 'loss/train': 2.344987392425537} +02/25/2022 19:03:25 - INFO - codeparrot_training - Step 32018: {'lr': 0.00015405710338006935, 'samples': 16393728, 'steps': 32018, 'loss/train': 1.5353116989135742} +02/25/2022 19:03:28 - INFO - codeparrot_training - Step 32019: {'lr': 0.00015404199402938707, 'samples': 16394240, 'steps': 32019, 'loss/train': 1.5851848125457764} +02/25/2022 19:03:34 - INFO - codeparrot_training - Step 32020: {'lr': 0.0001540268850897584, 'samples': 16394752, 'steps': 32020, 'loss/train': 2.090320348739624} +02/25/2022 19:03:37 - INFO - codeparrot_training - Step 32021: {'lr': 0.00015401177656124804, 'samples': 16395264, 'steps': 32021, 'loss/train': 1.3740935325622559} +02/25/2022 19:03:43 - INFO - codeparrot_training - Step 32022: {'lr': 0.00015399666844392081, 'samples': 16395776, 'steps': 32022, 'loss/train': 2.2387239933013916} +02/25/2022 19:03:46 - INFO - codeparrot_training - Step 32023: {'lr': 0.00015398156073784133, 'samples': 16396288, 'steps': 32023, 'loss/train': 1.4275959730148315} +02/25/2022 19:03:53 - INFO - codeparrot_training - Step 32024: {'lr': 0.00015396645344307438, 'samples': 16396800, 'steps': 32024, 'loss/train': 1.3657249212265015} +02/25/2022 19:03:56 - INFO - codeparrot_training - Step 32025: {'lr': 0.0001539513465596846, 'samples': 16397312, 'steps': 32025, 'loss/train': 1.8009713888168335} +02/25/2022 19:04:01 - INFO - codeparrot_training - Step 32026: {'lr': 0.00015393624008773685, 'samples': 16397824, 'steps': 32026, 'loss/train': 1.7081345319747925} +02/25/2022 19:04:05 - INFO - codeparrot_training - Step 32027: {'lr': 0.00015392113402729567, 'samples': 16398336, 'steps': 32027, 'loss/train': 1.5810519456863403} +02/25/2022 19:04:11 - INFO - codeparrot_training - Step 32028: {'lr': 0.00015390602837842582, 'samples': 16398848, 'steps': 32028, 'loss/train': 1.6448781490325928} +02/25/2022 19:04:14 - INFO - codeparrot_training - Step 32029: {'lr': 0.00015389092314119213, 'samples': 16399360, 'steps': 32029, 'loss/train': 1.7197331190109253} +02/25/2022 19:04:20 - INFO - codeparrot_training - Step 32030: {'lr': 0.00015387581831565912, 'samples': 16399872, 'steps': 32030, 'loss/train': 1.612053632736206} +02/25/2022 19:04:23 - INFO - codeparrot_training - Step 32031: {'lr': 0.0001538607139018916, 'samples': 16400384, 'steps': 32031, 'loss/train': 1.8617560863494873} +02/25/2022 19:04:29 - INFO - codeparrot_training - Step 32032: {'lr': 0.00015384560989995422, 'samples': 16400896, 'steps': 32032, 'loss/train': 0.7518028616905212} +02/25/2022 19:04:32 - INFO - codeparrot_training - Step 32033: {'lr': 0.00015383050630991187, 'samples': 16401408, 'steps': 32033, 'loss/train': 0.9034668207168579} +02/25/2022 19:04:38 - INFO - codeparrot_training - Step 32034: {'lr': 0.00015381540313182893, 'samples': 16401920, 'steps': 32034, 'loss/train': 2.0993776321411133} +02/25/2022 19:04:42 - INFO - codeparrot_training - Step 32035: {'lr': 0.00015380030036577029, 'samples': 16402432, 'steps': 32035, 'loss/train': 2.0694406032562256} +02/25/2022 19:04:47 - INFO - codeparrot_training - Step 32036: {'lr': 0.0001537851980118006, 'samples': 16402944, 'steps': 32036, 'loss/train': 0.4352928400039673} +02/25/2022 19:04:53 - INFO - codeparrot_training - Step 32037: {'lr': 0.0001537700960699846, 'samples': 16403456, 'steps': 32037, 'loss/train': 1.75059974193573} +02/25/2022 19:04:56 - INFO - codeparrot_training - Step 32038: {'lr': 0.00015375499454038703, 'samples': 16403968, 'steps': 32038, 'loss/train': 1.6791043281555176} +02/25/2022 19:05:02 - INFO - codeparrot_training - Step 32039: {'lr': 0.00015373989342307238, 'samples': 16404480, 'steps': 32039, 'loss/train': 0.8867302536964417} +02/25/2022 19:05:05 - INFO - codeparrot_training - Step 32040: {'lr': 0.0001537247927181055, 'samples': 16404992, 'steps': 32040, 'loss/train': 1.4566854238510132} +02/25/2022 19:05:11 - INFO - codeparrot_training - Step 32041: {'lr': 0.00015370969242555103, 'samples': 16405504, 'steps': 32041, 'loss/train': 1.7527397871017456} +02/25/2022 19:05:14 - INFO - codeparrot_training - Step 32042: {'lr': 0.00015369459254547374, 'samples': 16406016, 'steps': 32042, 'loss/train': 1.806497573852539} +02/25/2022 19:05:19 - INFO - codeparrot_training - Step 32043: {'lr': 0.00015367949307793817, 'samples': 16406528, 'steps': 32043, 'loss/train': 2.0428524017333984} +02/25/2022 19:05:23 - INFO - codeparrot_training - Step 32044: {'lr': 0.00015366439402300902, 'samples': 16407040, 'steps': 32044, 'loss/train': 1.1290767192840576} +02/25/2022 19:05:29 - INFO - codeparrot_training - Step 32045: {'lr': 0.00015364929538075106, 'samples': 16407552, 'steps': 32045, 'loss/train': 0.6243045926094055} +02/25/2022 19:05:32 - INFO - codeparrot_training - Step 32046: {'lr': 0.000153634197151229, 'samples': 16408064, 'steps': 32046, 'loss/train': 2.4429209232330322} +02/25/2022 19:05:38 - INFO - codeparrot_training - Step 32047: {'lr': 0.00015361909933450736, 'samples': 16408576, 'steps': 32047, 'loss/train': 1.0027731657028198} +02/25/2022 19:05:41 - INFO - codeparrot_training - Step 32048: {'lr': 0.00015360400193065087, 'samples': 16409088, 'steps': 32048, 'loss/train': 1.831935167312622} +02/25/2022 19:05:47 - INFO - codeparrot_training - Step 32049: {'lr': 0.00015358890493972425, 'samples': 16409600, 'steps': 32049, 'loss/train': 1.6197808980941772} +02/25/2022 19:05:50 - INFO - codeparrot_training - Step 32050: {'lr': 0.00015357380836179214, 'samples': 16410112, 'steps': 32050, 'loss/train': 1.7030571699142456} +02/25/2022 19:05:56 - INFO - codeparrot_training - Step 32051: {'lr': 0.00015355871219691931, 'samples': 16410624, 'steps': 32051, 'loss/train': 1.4633952379226685} +02/25/2022 19:06:00 - INFO - codeparrot_training - Step 32052: {'lr': 0.00015354361644517024, 'samples': 16411136, 'steps': 32052, 'loss/train': 1.4353697299957275} +02/25/2022 19:06:05 - INFO - codeparrot_training - Step 32053: {'lr': 0.00015352852110660966, 'samples': 16411648, 'steps': 32053, 'loss/train': 2.0888679027557373} +02/25/2022 19:06:09 - INFO - codeparrot_training - Step 32054: {'lr': 0.00015351342618130228, 'samples': 16412160, 'steps': 32054, 'loss/train': 2.393428087234497} +02/25/2022 19:06:14 - INFO - codeparrot_training - Step 32055: {'lr': 0.00015349833166931288, 'samples': 16412672, 'steps': 32055, 'loss/train': 2.9036004543304443} +02/25/2022 19:06:18 - INFO - codeparrot_training - Step 32056: {'lr': 0.00015348323757070583, 'samples': 16413184, 'steps': 32056, 'loss/train': 2.3367366790771484} +02/25/2022 19:06:23 - INFO - codeparrot_training - Step 32057: {'lr': 0.00015346814388554598, 'samples': 16413696, 'steps': 32057, 'loss/train': 1.2342102527618408} +02/25/2022 19:06:27 - INFO - codeparrot_training - Step 32058: {'lr': 0.0001534530506138979, 'samples': 16414208, 'steps': 32058, 'loss/train': 0.8562390804290771} +02/25/2022 19:06:32 - INFO - codeparrot_training - Step 32059: {'lr': 0.00015343795775582647, 'samples': 16414720, 'steps': 32059, 'loss/train': 1.606260895729065} +02/25/2022 19:06:36 - INFO - codeparrot_training - Step 32060: {'lr': 0.00015342286531139603, 'samples': 16415232, 'steps': 32060, 'loss/train': 1.2808316946029663} +02/25/2022 19:06:42 - INFO - codeparrot_training - Step 32061: {'lr': 0.00015340777328067134, 'samples': 16415744, 'steps': 32061, 'loss/train': 2.294900417327881} +02/25/2022 19:06:46 - INFO - codeparrot_training - Step 32062: {'lr': 0.00015339268166371717, 'samples': 16416256, 'steps': 32062, 'loss/train': 0.21338191628456116} +02/25/2022 19:06:51 - INFO - codeparrot_training - Step 32063: {'lr': 0.000153377590460598, 'samples': 16416768, 'steps': 32063, 'loss/train': 1.3271257877349854} +02/25/2022 19:06:54 - INFO - codeparrot_training - Step 32064: {'lr': 0.00015336249967137861, 'samples': 16417280, 'steps': 32064, 'loss/train': 1.2165783643722534} +02/25/2022 19:07:00 - INFO - codeparrot_training - Step 32065: {'lr': 0.00015334740929612357, 'samples': 16417792, 'steps': 32065, 'loss/train': 2.2226016521453857} +02/25/2022 19:07:04 - INFO - codeparrot_training - Step 32066: {'lr': 0.00015333231933489756, 'samples': 16418304, 'steps': 32066, 'loss/train': 0.8278035521507263} +02/25/2022 19:07:09 - INFO - codeparrot_training - Step 32067: {'lr': 0.00015331722978776513, 'samples': 16418816, 'steps': 32067, 'loss/train': 1.4431467056274414} +02/25/2022 19:07:12 - INFO - codeparrot_training - Step 32068: {'lr': 0.00015330214065479103, 'samples': 16419328, 'steps': 32068, 'loss/train': 2.0595474243164062} +02/25/2022 19:07:18 - INFO - codeparrot_training - Step 32069: {'lr': 0.00015328705193603986, 'samples': 16419840, 'steps': 32069, 'loss/train': 1.831860065460205} +02/25/2022 19:07:22 - INFO - codeparrot_training - Step 32070: {'lr': 0.00015327196363157624, 'samples': 16420352, 'steps': 32070, 'loss/train': 1.6886903047561646} +02/25/2022 19:07:28 - INFO - codeparrot_training - Step 32071: {'lr': 0.0001532568757414648, 'samples': 16420864, 'steps': 32071, 'loss/train': 2.956587791442871} +02/25/2022 19:07:31 - INFO - codeparrot_training - Step 32072: {'lr': 0.0001532417882657702, 'samples': 16421376, 'steps': 32072, 'loss/train': 1.4410006999969482} +02/25/2022 19:07:37 - INFO - codeparrot_training - Step 32073: {'lr': 0.00015322670120455705, 'samples': 16421888, 'steps': 32073, 'loss/train': 1.8332265615463257} +02/25/2022 19:07:40 - INFO - codeparrot_training - Step 32074: {'lr': 0.00015321161455789, 'samples': 16422400, 'steps': 32074, 'loss/train': 1.5969840288162231} +02/25/2022 19:07:46 - INFO - codeparrot_training - Step 32075: {'lr': 0.0001531965283258336, 'samples': 16422912, 'steps': 32075, 'loss/train': 1.5386989116668701} +02/25/2022 19:07:50 - INFO - codeparrot_training - Step 32076: {'lr': 0.00015318144250845255, 'samples': 16423424, 'steps': 32076, 'loss/train': 0.8107405304908752} +02/25/2022 19:07:55 - INFO - codeparrot_training - Step 32077: {'lr': 0.00015316635710581157, 'samples': 16423936, 'steps': 32077, 'loss/train': 2.2198052406311035} +02/25/2022 19:07:58 - INFO - codeparrot_training - Step 32078: {'lr': 0.00015315127211797507, 'samples': 16424448, 'steps': 32078, 'loss/train': 1.8126347064971924} +02/25/2022 19:08:04 - INFO - codeparrot_training - Step 32079: {'lr': 0.00015313618754500774, 'samples': 16424960, 'steps': 32079, 'loss/train': 2.4541759490966797} +02/25/2022 19:08:07 - INFO - codeparrot_training - Step 32080: {'lr': 0.00015312110338697426, 'samples': 16425472, 'steps': 32080, 'loss/train': 2.4049551486968994} +02/25/2022 19:08:13 - INFO - codeparrot_training - Step 32081: {'lr': 0.00015310601964393927, 'samples': 16425984, 'steps': 32081, 'loss/train': 1.7723838090896606} +02/25/2022 19:08:17 - INFO - codeparrot_training - Step 32082: {'lr': 0.00015309093631596726, 'samples': 16426496, 'steps': 32082, 'loss/train': 1.9514926671981812} +02/25/2022 19:08:22 - INFO - codeparrot_training - Step 32083: {'lr': 0.0001530758534031229, 'samples': 16427008, 'steps': 32083, 'loss/train': 0.7206658124923706} +02/25/2022 19:08:26 - INFO - codeparrot_training - Step 32084: {'lr': 0.00015306077090547078, 'samples': 16427520, 'steps': 32084, 'loss/train': 1.3232718706130981} +02/25/2022 19:08:32 - INFO - codeparrot_training - Step 32085: {'lr': 0.00015304568882307557, 'samples': 16428032, 'steps': 32085, 'loss/train': 2.8251843452453613} +02/25/2022 19:08:35 - INFO - codeparrot_training - Step 32086: {'lr': 0.00015303060715600192, 'samples': 16428544, 'steps': 32086, 'loss/train': 1.1703813076019287} +02/25/2022 19:08:41 - INFO - codeparrot_training - Step 32087: {'lr': 0.0001530155259043143, 'samples': 16429056, 'steps': 32087, 'loss/train': 2.422834873199463} +02/25/2022 19:08:44 - INFO - codeparrot_training - Step 32088: {'lr': 0.00015300044506807731, 'samples': 16429568, 'steps': 32088, 'loss/train': 0.5209026336669922} +02/25/2022 19:08:50 - INFO - codeparrot_training - Step 32089: {'lr': 0.00015298536464735562, 'samples': 16430080, 'steps': 32089, 'loss/train': 1.880300760269165} +02/25/2022 19:08:53 - INFO - codeparrot_training - Step 32090: {'lr': 0.00015297028464221395, 'samples': 16430592, 'steps': 32090, 'loss/train': 1.9999984502792358} +02/25/2022 19:08:59 - INFO - codeparrot_training - Step 32091: {'lr': 0.00015295520505271664, 'samples': 16431104, 'steps': 32091, 'loss/train': 1.5997023582458496} +02/25/2022 19:09:02 - INFO - codeparrot_training - Step 32092: {'lr': 0.00015294012587892847, 'samples': 16431616, 'steps': 32092, 'loss/train': 2.377359390258789} +02/25/2022 19:09:08 - INFO - codeparrot_training - Step 32093: {'lr': 0.00015292504712091393, 'samples': 16432128, 'steps': 32093, 'loss/train': 2.70540714263916} +02/25/2022 19:09:11 - INFO - codeparrot_training - Step 32094: {'lr': 0.0001529099687787378, 'samples': 16432640, 'steps': 32094, 'loss/train': 2.134967803955078} +02/25/2022 19:09:17 - INFO - codeparrot_training - Step 32095: {'lr': 0.0001528948908524644, 'samples': 16433152, 'steps': 32095, 'loss/train': 3.335530996322632} +02/25/2022 19:09:21 - INFO - codeparrot_training - Step 32096: {'lr': 0.00015287981334215851, 'samples': 16433664, 'steps': 32096, 'loss/train': 1.8269751071929932} +02/25/2022 19:09:26 - INFO - codeparrot_training - Step 32097: {'lr': 0.0001528647362478846, 'samples': 16434176, 'steps': 32097, 'loss/train': 1.2229468822479248} +02/25/2022 19:09:30 - INFO - codeparrot_training - Step 32098: {'lr': 0.00015284965956970732, 'samples': 16434688, 'steps': 32098, 'loss/train': 0.839073121547699} +02/25/2022 19:09:35 - INFO - codeparrot_training - Step 32099: {'lr': 0.00015283458330769137, 'samples': 16435200, 'steps': 32099, 'loss/train': 1.888695240020752} +02/25/2022 19:09:39 - INFO - codeparrot_training - Step 32100: {'lr': 0.0001528195074619011, 'samples': 16435712, 'steps': 32100, 'loss/train': 2.032506227493286} +02/25/2022 19:09:44 - INFO - codeparrot_training - Step 32101: {'lr': 0.00015280443203240124, 'samples': 16436224, 'steps': 32101, 'loss/train': 1.1756312847137451} +02/25/2022 19:09:48 - INFO - codeparrot_training - Step 32102: {'lr': 0.00015278935701925633, 'samples': 16436736, 'steps': 32102, 'loss/train': 2.283989667892456} +02/25/2022 19:09:53 - INFO - codeparrot_training - Step 32103: {'lr': 0.00015277428242253097, 'samples': 16437248, 'steps': 32103, 'loss/train': 1.487247347831726} +02/25/2022 19:09:57 - INFO - codeparrot_training - Step 32104: {'lr': 0.00015275920824228968, 'samples': 16437760, 'steps': 32104, 'loss/train': 0.944449782371521} +02/25/2022 19:10:02 - INFO - codeparrot_training - Step 32105: {'lr': 0.00015274413447859703, 'samples': 16438272, 'steps': 32105, 'loss/train': 1.5960766077041626} +02/25/2022 19:10:06 - INFO - codeparrot_training - Step 32106: {'lr': 0.00015272906113151768, 'samples': 16438784, 'steps': 32106, 'loss/train': 1.8329920768737793} +02/25/2022 19:10:12 - INFO - codeparrot_training - Step 32107: {'lr': 0.00015271398820111614, 'samples': 16439296, 'steps': 32107, 'loss/train': 1.9043269157409668} +02/25/2022 19:10:16 - INFO - codeparrot_training - Step 32108: {'lr': 0.00015269891568745698, 'samples': 16439808, 'steps': 32108, 'loss/train': 1.1749900579452515} +02/25/2022 19:10:21 - INFO - codeparrot_training - Step 32109: {'lr': 0.0001526838435906048, 'samples': 16440320, 'steps': 32109, 'loss/train': 1.8395562171936035} +02/25/2022 19:10:24 - INFO - codeparrot_training - Step 32110: {'lr': 0.000152668771910624, 'samples': 16440832, 'steps': 32110, 'loss/train': 1.3838906288146973} +02/25/2022 19:10:30 - INFO - codeparrot_training - Step 32111: {'lr': 0.0001526537006475794, 'samples': 16441344, 'steps': 32111, 'loss/train': 2.6590213775634766} +02/25/2022 19:10:33 - INFO - codeparrot_training - Step 32112: {'lr': 0.00015263862980153538, 'samples': 16441856, 'steps': 32112, 'loss/train': 0.9427378177642822} +02/25/2022 19:10:39 - INFO - codeparrot_training - Step 32113: {'lr': 0.0001526235593725566, 'samples': 16442368, 'steps': 32113, 'loss/train': 1.7967900037765503} +02/25/2022 19:10:45 - INFO - codeparrot_training - Step 32114: {'lr': 0.00015260848936070754, 'samples': 16442880, 'steps': 32114, 'loss/train': 1.6786161661148071} +02/25/2022 19:10:48 - INFO - codeparrot_training - Step 32115: {'lr': 0.00015259341976605275, 'samples': 16443392, 'steps': 32115, 'loss/train': 2.338987112045288} +02/25/2022 19:10:52 - INFO - codeparrot_training - Step 32116: {'lr': 0.0001525783505886569, 'samples': 16443904, 'steps': 32116, 'loss/train': 1.472335934638977} +02/25/2022 19:10:59 - INFO - codeparrot_training - Step 32117: {'lr': 0.00015256328182858446, 'samples': 16444416, 'steps': 32117, 'loss/train': 1.4607925415039062} +02/25/2022 19:11:02 - INFO - codeparrot_training - Step 32118: {'lr': 0.00015254821348589993, 'samples': 16444928, 'steps': 32118, 'loss/train': 1.111583948135376} +02/25/2022 19:11:08 - INFO - codeparrot_training - Step 32119: {'lr': 0.00015253314556066787, 'samples': 16445440, 'steps': 32119, 'loss/train': 1.794385552406311} +02/25/2022 19:11:11 - INFO - codeparrot_training - Step 32120: {'lr': 0.000152518078052953, 'samples': 16445952, 'steps': 32120, 'loss/train': 1.9882535934448242} +02/25/2022 19:11:17 - INFO - codeparrot_training - Step 32121: {'lr': 0.00015250301096281967, 'samples': 16446464, 'steps': 32121, 'loss/train': 8.773414611816406} +02/25/2022 19:11:20 - INFO - codeparrot_training - Step 32122: {'lr': 0.0001524879442903324, 'samples': 16446976, 'steps': 32122, 'loss/train': 2.1362810134887695} +02/25/2022 19:11:24 - INFO - codeparrot_training - Step 32123: {'lr': 0.00015247287803555584, 'samples': 16447488, 'steps': 32123, 'loss/train': 0.4721432328224182} +02/25/2022 19:11:29 - INFO - codeparrot_training - Step 32124: {'lr': 0.00015245781219855453, 'samples': 16448000, 'steps': 32124, 'loss/train': 0.6360771059989929} +02/25/2022 19:11:35 - INFO - codeparrot_training - Step 32125: {'lr': 0.00015244274677939307, 'samples': 16448512, 'steps': 32125, 'loss/train': 1.5727765560150146} +02/25/2022 19:11:38 - INFO - codeparrot_training - Step 32126: {'lr': 0.00015242768177813577, 'samples': 16449024, 'steps': 32126, 'loss/train': 2.066434621810913} +02/25/2022 19:11:45 - INFO - codeparrot_training - Step 32127: {'lr': 0.00015241261719484733, 'samples': 16449536, 'steps': 32127, 'loss/train': 1.9476970434188843} +02/25/2022 19:11:48 - INFO - codeparrot_training - Step 32128: {'lr': 0.00015239755302959225, 'samples': 16450048, 'steps': 32128, 'loss/train': 1.1157804727554321} +02/25/2022 19:11:53 - INFO - codeparrot_training - Step 32129: {'lr': 0.00015238248928243516, 'samples': 16450560, 'steps': 32129, 'loss/train': 1.0420268774032593} +02/25/2022 19:11:57 - INFO - codeparrot_training - Step 32130: {'lr': 0.0001523674259534404, 'samples': 16451072, 'steps': 32130, 'loss/train': 1.4700485467910767} +02/25/2022 19:12:02 - INFO - codeparrot_training - Step 32131: {'lr': 0.00015235236304267255, 'samples': 16451584, 'steps': 32131, 'loss/train': 1.53701651096344} +02/25/2022 19:12:06 - INFO - codeparrot_training - Step 32132: {'lr': 0.00015233730055019617, 'samples': 16452096, 'steps': 32132, 'loss/train': 1.4403636455535889} +02/25/2022 19:12:12 - INFO - codeparrot_training - Step 32133: {'lr': 0.00015232223847607592, 'samples': 16452608, 'steps': 32133, 'loss/train': 2.6908533573150635} +02/25/2022 19:12:15 - INFO - codeparrot_training - Step 32134: {'lr': 0.0001523071768203761, 'samples': 16453120, 'steps': 32134, 'loss/train': 1.4578200578689575} +02/25/2022 19:12:21 - INFO - codeparrot_training - Step 32135: {'lr': 0.00015229211558316126, 'samples': 16453632, 'steps': 32135, 'loss/train': 1.8918706178665161} +02/25/2022 19:12:24 - INFO - codeparrot_training - Step 32136: {'lr': 0.000152277054764496, 'samples': 16454144, 'steps': 32136, 'loss/train': 1.830186128616333} +02/25/2022 19:12:30 - INFO - codeparrot_training - Step 32137: {'lr': 0.00015226199436444476, 'samples': 16454656, 'steps': 32137, 'loss/train': 1.0561946630477905} +02/25/2022 19:12:33 - INFO - codeparrot_training - Step 32138: {'lr': 0.00015224693438307228, 'samples': 16455168, 'steps': 32138, 'loss/train': 0.8843085765838623} +02/25/2022 19:12:39 - INFO - codeparrot_training - Step 32139: {'lr': 0.00015223187482044274, 'samples': 16455680, 'steps': 32139, 'loss/train': 3.4875128269195557} +02/25/2022 19:12:42 - INFO - codeparrot_training - Step 32140: {'lr': 0.00015221681567662083, 'samples': 16456192, 'steps': 32140, 'loss/train': 0.8410459756851196} +02/25/2022 19:12:48 - INFO - codeparrot_training - Step 32141: {'lr': 0.000152201756951671, 'samples': 16456704, 'steps': 32141, 'loss/train': 1.2555756568908691} +02/25/2022 19:12:51 - INFO - codeparrot_training - Step 32142: {'lr': 0.00015218669864565794, 'samples': 16457216, 'steps': 32142, 'loss/train': 1.162388801574707} +02/25/2022 19:12:58 - INFO - codeparrot_training - Step 32143: {'lr': 0.00015217164075864584, 'samples': 16457728, 'steps': 32143, 'loss/train': 1.9992378950119019} +02/25/2022 19:13:01 - INFO - codeparrot_training - Step 32144: {'lr': 0.0001521565832906994, 'samples': 16458240, 'steps': 32144, 'loss/train': 0.14400219917297363} +02/25/2022 19:13:07 - INFO - codeparrot_training - Step 32145: {'lr': 0.0001521415262418831, 'samples': 16458752, 'steps': 32145, 'loss/train': 1.5225850343704224} +02/25/2022 19:13:10 - INFO - codeparrot_training - Step 32146: {'lr': 0.00015212646961226144, 'samples': 16459264, 'steps': 32146, 'loss/train': 1.5470852851867676} +02/25/2022 19:13:16 - INFO - codeparrot_training - Step 32147: {'lr': 0.00015211141340189894, 'samples': 16459776, 'steps': 32147, 'loss/train': 1.4106649160385132} +02/25/2022 19:13:19 - INFO - codeparrot_training - Step 32148: {'lr': 0.00015209635761085998, 'samples': 16460288, 'steps': 32148, 'loss/train': 2.2167809009552} +02/25/2022 19:13:24 - INFO - codeparrot_training - Step 32149: {'lr': 0.00015208130223920923, 'samples': 16460800, 'steps': 32149, 'loss/train': 1.9636964797973633} +02/25/2022 19:13:28 - INFO - codeparrot_training - Step 32150: {'lr': 0.000152066247287011, 'samples': 16461312, 'steps': 32150, 'loss/train': 1.0256893634796143} +02/25/2022 19:13:33 - INFO - codeparrot_training - Step 32151: {'lr': 0.00015205119275432994, 'samples': 16461824, 'steps': 32151, 'loss/train': 4.954133033752441} +02/25/2022 19:13:37 - INFO - codeparrot_training - Step 32152: {'lr': 0.00015203613864123042, 'samples': 16462336, 'steps': 32152, 'loss/train': 1.060043215751648} +02/25/2022 19:13:43 - INFO - codeparrot_training - Step 32153: {'lr': 0.00015202108494777702, 'samples': 16462848, 'steps': 32153, 'loss/train': 0.5198764801025391} +02/25/2022 19:13:47 - INFO - codeparrot_training - Step 32154: {'lr': 0.00015200603167403416, 'samples': 16463360, 'steps': 32154, 'loss/train': 2.2902448177337646} +02/25/2022 19:13:52 - INFO - codeparrot_training - Step 32155: {'lr': 0.00015199097882006635, 'samples': 16463872, 'steps': 32155, 'loss/train': 2.0287246704101562} +02/25/2022 19:13:56 - INFO - codeparrot_training - Step 32156: {'lr': 0.000151975926385938, 'samples': 16464384, 'steps': 32156, 'loss/train': 2.140232563018799} +02/25/2022 19:14:01 - INFO - codeparrot_training - Step 32157: {'lr': 0.00015196087437171373, 'samples': 16464896, 'steps': 32157, 'loss/train': 2.3248045444488525} +02/25/2022 19:14:05 - INFO - codeparrot_training - Step 32158: {'lr': 0.0001519458227774579, 'samples': 16465408, 'steps': 32158, 'loss/train': 0.9733873605728149} +02/25/2022 19:14:10 - INFO - codeparrot_training - Step 32159: {'lr': 0.00015193077160323505, 'samples': 16465920, 'steps': 32159, 'loss/train': 2.111279249191284} +02/25/2022 19:14:14 - INFO - codeparrot_training - Step 32160: {'lr': 0.0001519157208491097, 'samples': 16466432, 'steps': 32160, 'loss/train': 0.050616730004549026} +02/25/2022 19:14:19 - INFO - codeparrot_training - Step 32161: {'lr': 0.00015190067051514618, 'samples': 16466944, 'steps': 32161, 'loss/train': 2.2860357761383057} +02/25/2022 19:14:23 - INFO - codeparrot_training - Step 32162: {'lr': 0.000151885620601409, 'samples': 16467456, 'steps': 32162, 'loss/train': 0.4523526132106781} +02/25/2022 19:14:29 - INFO - codeparrot_training - Step 32163: {'lr': 0.0001518705711079627, 'samples': 16467968, 'steps': 32163, 'loss/train': 3.2783427238464355} +02/25/2022 19:14:32 - INFO - codeparrot_training - Step 32164: {'lr': 0.0001518555220348718, 'samples': 16468480, 'steps': 32164, 'loss/train': 2.176258087158203} +02/25/2022 19:14:38 - INFO - codeparrot_training - Step 32165: {'lr': 0.00015184047338220058, 'samples': 16468992, 'steps': 32165, 'loss/train': 0.49233490228652954} +02/25/2022 19:14:41 - INFO - codeparrot_training - Step 32166: {'lr': 0.00015182542515001363, 'samples': 16469504, 'steps': 32166, 'loss/train': 0.6116815209388733} +02/25/2022 19:14:47 - INFO - codeparrot_training - Step 32167: {'lr': 0.00015181037733837533, 'samples': 16470016, 'steps': 32167, 'loss/train': 1.5458869934082031} +02/25/2022 19:14:50 - INFO - codeparrot_training - Step 32168: {'lr': 0.00015179532994735034, 'samples': 16470528, 'steps': 32168, 'loss/train': 1.0361545085906982} +02/25/2022 19:14:56 - INFO - codeparrot_training - Step 32169: {'lr': 0.00015178028297700285, 'samples': 16471040, 'steps': 32169, 'loss/train': 2.213805675506592} +02/25/2022 19:14:59 - INFO - codeparrot_training - Step 32170: {'lr': 0.00015176523642739746, 'samples': 16471552, 'steps': 32170, 'loss/train': 0.6268891096115112} +02/25/2022 19:15:05 - INFO - codeparrot_training - Step 32171: {'lr': 0.0001517501902985986, 'samples': 16472064, 'steps': 32171, 'loss/train': 1.7239582538604736} +02/25/2022 19:15:08 - INFO - codeparrot_training - Step 32172: {'lr': 0.00015173514459067072, 'samples': 16472576, 'steps': 32172, 'loss/train': 1.4961861371994019} +02/25/2022 19:15:14 - INFO - codeparrot_training - Step 32173: {'lr': 0.00015172009930367836, 'samples': 16473088, 'steps': 32173, 'loss/train': 2.1971781253814697} +02/25/2022 19:15:17 - INFO - codeparrot_training - Step 32174: {'lr': 0.0001517050544376858, 'samples': 16473600, 'steps': 32174, 'loss/train': 1.9610031843185425} +02/25/2022 19:15:23 - INFO - codeparrot_training - Step 32175: {'lr': 0.00015169000999275758, 'samples': 16474112, 'steps': 32175, 'loss/train': 0.16922281682491302} +02/25/2022 19:15:26 - INFO - codeparrot_training - Step 32176: {'lr': 0.00015167496596895814, 'samples': 16474624, 'steps': 32176, 'loss/train': 1.3845646381378174} +02/25/2022 19:15:32 - INFO - codeparrot_training - Step 32177: {'lr': 0.00015165992236635203, 'samples': 16475136, 'steps': 32177, 'loss/train': 1.2828192710876465} +02/25/2022 19:15:35 - INFO - codeparrot_training - Step 32178: {'lr': 0.00015164487918500346, 'samples': 16475648, 'steps': 32178, 'loss/train': 1.4181867837905884} +02/25/2022 19:15:42 - INFO - codeparrot_training - Step 32179: {'lr': 0.00015162983642497702, 'samples': 16476160, 'steps': 32179, 'loss/train': 2.120661497116089} +02/25/2022 19:15:45 - INFO - codeparrot_training - Step 32180: {'lr': 0.00015161479408633713, 'samples': 16476672, 'steps': 32180, 'loss/train': 2.622908353805542} +02/25/2022 19:15:50 - INFO - codeparrot_training - Step 32181: {'lr': 0.00015159975216914832, 'samples': 16477184, 'steps': 32181, 'loss/train': 2.021345376968384} +02/25/2022 19:15:54 - INFO - codeparrot_training - Step 32182: {'lr': 0.0001515847106734748, 'samples': 16477696, 'steps': 32182, 'loss/train': 2.836031675338745} +02/25/2022 19:15:59 - INFO - codeparrot_training - Step 32183: {'lr': 0.00015156966959938118, 'samples': 16478208, 'steps': 32183, 'loss/train': 1.6484124660491943} +02/25/2022 19:16:03 - INFO - codeparrot_training - Step 32184: {'lr': 0.0001515546289469318, 'samples': 16478720, 'steps': 32184, 'loss/train': 1.2992713451385498} +02/25/2022 19:16:08 - INFO - codeparrot_training - Step 32185: {'lr': 0.0001515395887161911, 'samples': 16479232, 'steps': 32185, 'loss/train': 1.5771814584732056} +02/25/2022 19:16:14 - INFO - codeparrot_training - Step 32186: {'lr': 0.0001515245489072237, 'samples': 16479744, 'steps': 32186, 'loss/train': 1.7997910976409912} +02/25/2022 19:16:17 - INFO - codeparrot_training - Step 32187: {'lr': 0.00015150950952009374, 'samples': 16480256, 'steps': 32187, 'loss/train': 2.053393602371216} +02/25/2022 19:16:24 - INFO - codeparrot_training - Step 32188: {'lr': 0.00015149447055486575, 'samples': 16480768, 'steps': 32188, 'loss/train': 2.0865182876586914} +02/25/2022 19:16:27 - INFO - codeparrot_training - Step 32189: {'lr': 0.0001514794320116042, 'samples': 16481280, 'steps': 32189, 'loss/train': 0.32376283407211304} +02/25/2022 19:16:33 - INFO - codeparrot_training - Step 32190: {'lr': 0.00015146439389037355, 'samples': 16481792, 'steps': 32190, 'loss/train': 1.196940302848816} +02/25/2022 19:16:36 - INFO - codeparrot_training - Step 32191: {'lr': 0.00015144935619123805, 'samples': 16482304, 'steps': 32191, 'loss/train': 1.2933422327041626} +02/25/2022 19:16:42 - INFO - codeparrot_training - Step 32192: {'lr': 0.00015143431891426223, 'samples': 16482816, 'steps': 32192, 'loss/train': 2.626477003097534} +02/25/2022 19:16:45 - INFO - codeparrot_training - Step 32193: {'lr': 0.00015141928205951056, 'samples': 16483328, 'steps': 32193, 'loss/train': 3.6751291751861572} +02/25/2022 19:16:51 - INFO - codeparrot_training - Step 32194: {'lr': 0.0001514042456270473, 'samples': 16483840, 'steps': 32194, 'loss/train': 2.1206066608428955} +02/25/2022 19:16:54 - INFO - codeparrot_training - Step 32195: {'lr': 0.000151389209616937, 'samples': 16484352, 'steps': 32195, 'loss/train': 1.5654696226119995} +02/25/2022 19:17:00 - INFO - codeparrot_training - Step 32196: {'lr': 0.00015137417402924397, 'samples': 16484864, 'steps': 32196, 'loss/train': 2.389921188354492} +02/25/2022 19:17:03 - INFO - codeparrot_training - Step 32197: {'lr': 0.00015135913886403268, 'samples': 16485376, 'steps': 32197, 'loss/train': 2.205422878265381} +02/25/2022 19:17:09 - INFO - codeparrot_training - Step 32198: {'lr': 0.0001513441041213675, 'samples': 16485888, 'steps': 32198, 'loss/train': 1.6165269613265991} +02/25/2022 19:17:13 - INFO - codeparrot_training - Step 32199: {'lr': 0.0001513290698013129, 'samples': 16486400, 'steps': 32199, 'loss/train': 1.6551381349563599} +02/25/2022 19:17:18 - INFO - codeparrot_training - Step 32200: {'lr': 0.00015131403590393322, 'samples': 16486912, 'steps': 32200, 'loss/train': 2.567089557647705} +02/25/2022 19:17:22 - INFO - codeparrot_training - Step 32201: {'lr': 0.00015129900242929285, 'samples': 16487424, 'steps': 32201, 'loss/train': 2.299525499343872} +02/25/2022 19:17:27 - INFO - codeparrot_training - Step 32202: {'lr': 0.0001512839693774562, 'samples': 16487936, 'steps': 32202, 'loss/train': 1.3168189525604248} +02/25/2022 19:17:31 - INFO - codeparrot_training - Step 32203: {'lr': 0.00015126893674848773, 'samples': 16488448, 'steps': 32203, 'loss/train': 1.5388538837432861} +02/25/2022 19:17:36 - INFO - codeparrot_training - Step 32204: {'lr': 0.00015125390454245177, 'samples': 16488960, 'steps': 32204, 'loss/train': 0.9252519011497498} +02/25/2022 19:17:40 - INFO - codeparrot_training - Step 32205: {'lr': 0.00015123887275941266, 'samples': 16489472, 'steps': 32205, 'loss/train': 1.110875129699707} +02/25/2022 19:17:45 - INFO - codeparrot_training - Step 32206: {'lr': 0.0001512238413994349, 'samples': 16489984, 'steps': 32206, 'loss/train': 1.1377098560333252} +02/25/2022 19:17:49 - INFO - codeparrot_training - Step 32207: {'lr': 0.00015120881046258285, 'samples': 16490496, 'steps': 32207, 'loss/train': 2.8585071563720703} +02/25/2022 19:17:55 - INFO - codeparrot_training - Step 32208: {'lr': 0.00015119377994892095, 'samples': 16491008, 'steps': 32208, 'loss/train': 1.7140177488327026} +02/25/2022 19:17:59 - INFO - codeparrot_training - Step 32209: {'lr': 0.00015117874985851344, 'samples': 16491520, 'steps': 32209, 'loss/train': 1.2388315200805664} +02/25/2022 19:18:04 - INFO - codeparrot_training - Step 32210: {'lr': 0.00015116372019142478, 'samples': 16492032, 'steps': 32210, 'loss/train': 1.3309440612792969} +02/25/2022 19:18:08 - INFO - codeparrot_training - Step 32211: {'lr': 0.00015114869094771932, 'samples': 16492544, 'steps': 32211, 'loss/train': 1.8389467000961304} +02/25/2022 19:18:13 - INFO - codeparrot_training - Step 32212: {'lr': 0.00015113366212746166, 'samples': 16493056, 'steps': 32212, 'loss/train': 0.8844029307365417} +02/25/2022 19:18:17 - INFO - codeparrot_training - Step 32213: {'lr': 0.00015111863373071583, 'samples': 16493568, 'steps': 32213, 'loss/train': 0.40649211406707764} +02/25/2022 19:18:23 - INFO - codeparrot_training - Step 32214: {'lr': 0.0001511036057575464, 'samples': 16494080, 'steps': 32214, 'loss/train': 2.34197735786438} +02/25/2022 19:18:26 - INFO - codeparrot_training - Step 32215: {'lr': 0.00015108857820801772, 'samples': 16494592, 'steps': 32215, 'loss/train': 2.0781137943267822} +02/25/2022 19:18:32 - INFO - codeparrot_training - Step 32216: {'lr': 0.00015107355108219425, 'samples': 16495104, 'steps': 32216, 'loss/train': 1.3165700435638428} +02/25/2022 19:18:35 - INFO - codeparrot_training - Step 32217: {'lr': 0.0001510585243801402, 'samples': 16495616, 'steps': 32217, 'loss/train': 1.8521404266357422} +02/25/2022 19:18:41 - INFO - codeparrot_training - Step 32218: {'lr': 0.00015104349810192, 'samples': 16496128, 'steps': 32218, 'loss/train': 1.7540336847305298} +02/25/2022 19:18:44 - INFO - codeparrot_training - Step 32219: {'lr': 0.00015102847224759803, 'samples': 16496640, 'steps': 32219, 'loss/train': 1.5529921054840088} +02/25/2022 19:18:50 - INFO - codeparrot_training - Step 32220: {'lr': 0.00015101344681723867, 'samples': 16497152, 'steps': 32220, 'loss/train': 1.6578809022903442} +02/25/2022 19:18:53 - INFO - codeparrot_training - Step 32221: {'lr': 0.00015099842181090633, 'samples': 16497664, 'steps': 32221, 'loss/train': 1.8058582544326782} +02/25/2022 19:18:59 - INFO - codeparrot_training - Step 32222: {'lr': 0.00015098339722866529, 'samples': 16498176, 'steps': 32222, 'loss/train': 1.709394097328186} +02/25/2022 19:19:02 - INFO - codeparrot_training - Step 32223: {'lr': 0.00015096837307057988, 'samples': 16498688, 'steps': 32223, 'loss/train': 1.873930811882019} +02/25/2022 19:19:09 - INFO - codeparrot_training - Step 32224: {'lr': 0.0001509533493367145, 'samples': 16499200, 'steps': 32224, 'loss/train': 0.7976589798927307} +02/25/2022 19:19:12 - INFO - codeparrot_training - Step 32225: {'lr': 0.00015093832602713363, 'samples': 16499712, 'steps': 32225, 'loss/train': 2.253554582595825} +02/25/2022 19:19:18 - INFO - codeparrot_training - Step 32226: {'lr': 0.00015092330314190144, 'samples': 16500224, 'steps': 32226, 'loss/train': 2.6590070724487305} +02/25/2022 19:19:21 - INFO - codeparrot_training - Step 32227: {'lr': 0.00015090828068108238, 'samples': 16500736, 'steps': 32227, 'loss/train': 1.500365972518921} +02/25/2022 19:19:27 - INFO - codeparrot_training - Step 32228: {'lr': 0.00015089325864474075, 'samples': 16501248, 'steps': 32228, 'loss/train': 2.505763053894043} +02/25/2022 19:19:30 - INFO - codeparrot_training - Step 32229: {'lr': 0.000150878237032941, 'samples': 16501760, 'steps': 32229, 'loss/train': 2.8648841381073} +02/25/2022 19:19:36 - INFO - codeparrot_training - Step 32230: {'lr': 0.00015086321584574736, 'samples': 16502272, 'steps': 32230, 'loss/train': 2.0625882148742676} +02/25/2022 19:19:39 - INFO - codeparrot_training - Step 32231: {'lr': 0.0001508481950832242, 'samples': 16502784, 'steps': 32231, 'loss/train': 1.1973849534988403} +02/25/2022 19:19:45 - INFO - codeparrot_training - Step 32232: {'lr': 0.0001508331747454359, 'samples': 16503296, 'steps': 32232, 'loss/train': 1.9053893089294434} +02/25/2022 19:19:48 - INFO - codeparrot_training - Step 32233: {'lr': 0.00015081815483244682, 'samples': 16503808, 'steps': 32233, 'loss/train': 2.1886916160583496} +02/25/2022 19:19:54 - INFO - codeparrot_training - Step 32234: {'lr': 0.00015080313534432128, 'samples': 16504320, 'steps': 32234, 'loss/train': 1.807803750038147} +02/25/2022 19:19:58 - INFO - codeparrot_training - Step 32235: {'lr': 0.00015078811628112354, 'samples': 16504832, 'steps': 32235, 'loss/train': 1.881060004234314} +02/25/2022 19:20:03 - INFO - codeparrot_training - Step 32236: {'lr': 0.00015077309764291804, 'samples': 16505344, 'steps': 32236, 'loss/train': 2.407815933227539} +02/25/2022 19:20:06 - INFO - codeparrot_training - Step 32237: {'lr': 0.00015075807942976913, 'samples': 16505856, 'steps': 32237, 'loss/train': 1.120932936668396} +02/25/2022 19:20:12 - INFO - codeparrot_training - Step 32238: {'lr': 0.00015074306164174106, 'samples': 16506368, 'steps': 32238, 'loss/train': 2.388240337371826} +02/25/2022 19:20:16 - INFO - codeparrot_training - Step 32239: {'lr': 0.00015072804427889819, 'samples': 16506880, 'steps': 32239, 'loss/train': 2.1095659732818604} +02/25/2022 19:20:23 - INFO - codeparrot_training - Step 32240: {'lr': 0.00015071302734130488, 'samples': 16507392, 'steps': 32240, 'loss/train': 1.3532465696334839} +02/25/2022 19:20:26 - INFO - codeparrot_training - Step 32241: {'lr': 0.0001506980108290254, 'samples': 16507904, 'steps': 32241, 'loss/train': 0.46470528841018677} +02/25/2022 19:20:32 - INFO - codeparrot_training - Step 32242: {'lr': 0.0001506829947421241, 'samples': 16508416, 'steps': 32242, 'loss/train': 1.6249240636825562} +02/25/2022 19:20:35 - INFO - codeparrot_training - Step 32243: {'lr': 0.00015066797908066536, 'samples': 16508928, 'steps': 32243, 'loss/train': 1.2109366655349731} +02/25/2022 19:20:41 - INFO - codeparrot_training - Step 32244: {'lr': 0.00015065296384471344, 'samples': 16509440, 'steps': 32244, 'loss/train': 1.9693979024887085} +02/25/2022 19:20:44 - INFO - codeparrot_training - Step 32245: {'lr': 0.00015063794903433266, 'samples': 16509952, 'steps': 32245, 'loss/train': 1.5340776443481445} +02/25/2022 19:20:50 - INFO - codeparrot_training - Step 32246: {'lr': 0.00015062293464958735, 'samples': 16510464, 'steps': 32246, 'loss/train': 1.1609259843826294} +02/25/2022 19:20:53 - INFO - codeparrot_training - Step 32247: {'lr': 0.0001506079206905419, 'samples': 16510976, 'steps': 32247, 'loss/train': 1.5968750715255737} +02/25/2022 19:20:59 - INFO - codeparrot_training - Step 32248: {'lr': 0.00015059290715726048, 'samples': 16511488, 'steps': 32248, 'loss/train': 1.9594534635543823} +02/25/2022 19:21:02 - INFO - codeparrot_training - Step 32249: {'lr': 0.00015057789404980749, 'samples': 16512000, 'steps': 32249, 'loss/train': 0.9509637355804443} +02/25/2022 19:21:09 - INFO - codeparrot_training - Step 32250: {'lr': 0.00015056288136824724, 'samples': 16512512, 'steps': 32250, 'loss/train': 1.2636862993240356} +02/25/2022 19:21:13 - INFO - codeparrot_training - Step 32251: {'lr': 0.0001505478691126441, 'samples': 16513024, 'steps': 32251, 'loss/train': 0.9818252921104431} +02/25/2022 19:21:18 - INFO - codeparrot_training - Step 32252: {'lr': 0.00015053285728306224, 'samples': 16513536, 'steps': 32252, 'loss/train': 1.9109362363815308} +02/25/2022 19:21:22 - INFO - codeparrot_training - Step 32253: {'lr': 0.000150517845879566, 'samples': 16514048, 'steps': 32253, 'loss/train': 1.8378657102584839} +02/25/2022 19:21:27 - INFO - codeparrot_training - Step 32254: {'lr': 0.00015050283490221974, 'samples': 16514560, 'steps': 32254, 'loss/train': 1.2148056030273438} +02/25/2022 19:21:31 - INFO - codeparrot_training - Step 32255: {'lr': 0.00015048782435108776, 'samples': 16515072, 'steps': 32255, 'loss/train': 2.220336437225342} +02/25/2022 19:21:36 - INFO - codeparrot_training - Step 32256: {'lr': 0.00015047281422623441, 'samples': 16515584, 'steps': 32256, 'loss/train': 1.813839316368103} +02/25/2022 19:21:40 - INFO - codeparrot_training - Step 32257: {'lr': 0.00015045780452772385, 'samples': 16516096, 'steps': 32257, 'loss/train': 1.558762788772583} +02/25/2022 19:21:45 - INFO - codeparrot_training - Step 32258: {'lr': 0.0001504427952556204, 'samples': 16516608, 'steps': 32258, 'loss/train': 1.2582591772079468} +02/25/2022 19:21:49 - INFO - codeparrot_training - Step 32259: {'lr': 0.00015042778640998844, 'samples': 16517120, 'steps': 32259, 'loss/train': 2.369511365890503} +02/25/2022 19:21:55 - INFO - codeparrot_training - Step 32260: {'lr': 0.00015041277799089233, 'samples': 16517632, 'steps': 32260, 'loss/train': 1.5628235340118408} +02/25/2022 19:21:58 - INFO - codeparrot_training - Step 32261: {'lr': 0.00015039776999839613, 'samples': 16518144, 'steps': 32261, 'loss/train': 3.29669189453125} +02/25/2022 19:22:04 - INFO - codeparrot_training - Step 32262: {'lr': 0.00015038276243256428, 'samples': 16518656, 'steps': 32262, 'loss/train': 1.3060795068740845} +02/25/2022 19:22:07 - INFO - codeparrot_training - Step 32263: {'lr': 0.00015036775529346104, 'samples': 16519168, 'steps': 32263, 'loss/train': 1.205307126045227} +02/25/2022 19:22:13 - INFO - codeparrot_training - Step 32264: {'lr': 0.0001503527485811508, 'samples': 16519680, 'steps': 32264, 'loss/train': 2.0570287704467773} +02/25/2022 19:22:16 - INFO - codeparrot_training - Step 32265: {'lr': 0.0001503377422956976, 'samples': 16520192, 'steps': 32265, 'loss/train': 0.6859361529350281} +02/25/2022 19:22:22 - INFO - codeparrot_training - Step 32266: {'lr': 0.00015032273643716593, 'samples': 16520704, 'steps': 32266, 'loss/train': 1.232322096824646} +02/25/2022 19:22:25 - INFO - codeparrot_training - Step 32267: {'lr': 0.00015030773100561996, 'samples': 16521216, 'steps': 32267, 'loss/train': 2.1694436073303223} +02/25/2022 19:22:31 - INFO - codeparrot_training - Step 32268: {'lr': 0.00015029272600112402, 'samples': 16521728, 'steps': 32268, 'loss/train': 1.8544132709503174} +02/25/2022 19:22:34 - INFO - codeparrot_training - Step 32269: {'lr': 0.0001502777214237425, 'samples': 16522240, 'steps': 32269, 'loss/train': 1.9719080924987793} +02/25/2022 19:22:40 - INFO - codeparrot_training - Step 32270: {'lr': 0.00015026271727353947, 'samples': 16522752, 'steps': 32270, 'loss/train': 2.713430643081665} +02/25/2022 19:22:44 - INFO - codeparrot_training - Step 32271: {'lr': 0.00015024771355057925, 'samples': 16523264, 'steps': 32271, 'loss/train': 2.079793930053711} +02/25/2022 19:22:49 - INFO - codeparrot_training - Step 32272: {'lr': 0.0001502327102549262, 'samples': 16523776, 'steps': 32272, 'loss/train': 2.1581075191497803} +02/25/2022 19:22:53 - INFO - codeparrot_training - Step 32273: {'lr': 0.00015021770738664458, 'samples': 16524288, 'steps': 32273, 'loss/train': 1.2612062692642212} +02/25/2022 19:22:58 - INFO - codeparrot_training - Step 32274: {'lr': 0.00015020270494579854, 'samples': 16524800, 'steps': 32274, 'loss/train': 1.3858479261398315} +02/25/2022 19:23:02 - INFO - codeparrot_training - Step 32275: {'lr': 0.00015018770293245243, 'samples': 16525312, 'steps': 32275, 'loss/train': 0.9788023829460144} +02/25/2022 19:23:07 - INFO - codeparrot_training - Step 32276: {'lr': 0.0001501727013466705, 'samples': 16525824, 'steps': 32276, 'loss/train': 2.2804222106933594} +02/25/2022 19:23:11 - INFO - codeparrot_training - Step 32277: {'lr': 0.00015015770018851713, 'samples': 16526336, 'steps': 32277, 'loss/train': 1.738651990890503} +02/25/2022 19:23:16 - INFO - codeparrot_training - Step 32278: {'lr': 0.00015014269945805636, 'samples': 16526848, 'steps': 32278, 'loss/train': 0.4127237796783447} +02/25/2022 19:23:20 - INFO - codeparrot_training - Step 32279: {'lr': 0.00015012769915535257, 'samples': 16527360, 'steps': 32279, 'loss/train': 1.448751449584961} +02/25/2022 19:23:26 - INFO - codeparrot_training - Step 32280: {'lr': 0.00015011269928047002, 'samples': 16527872, 'steps': 32280, 'loss/train': 2.204979419708252} +02/25/2022 19:23:29 - INFO - codeparrot_training - Step 32281: {'lr': 0.00015009769983347293, 'samples': 16528384, 'steps': 32281, 'loss/train': 0.8856463432312012} +02/25/2022 19:23:35 - INFO - codeparrot_training - Step 32282: {'lr': 0.0001500827008144256, 'samples': 16528896, 'steps': 32282, 'loss/train': 2.540271043777466} +02/25/2022 19:23:38 - INFO - codeparrot_training - Step 32283: {'lr': 0.0001500677022233922, 'samples': 16529408, 'steps': 32283, 'loss/train': 2.3061041831970215} +02/25/2022 19:23:44 - INFO - codeparrot_training - Step 32284: {'lr': 0.00015005270406043714, 'samples': 16529920, 'steps': 32284, 'loss/train': 2.043139934539795} +02/25/2022 19:23:47 - INFO - codeparrot_training - Step 32285: {'lr': 0.00015003770632562448, 'samples': 16530432, 'steps': 32285, 'loss/train': 1.3319199085235596} +02/25/2022 19:23:53 - INFO - codeparrot_training - Step 32286: {'lr': 0.00015002270901901855, 'samples': 16530944, 'steps': 32286, 'loss/train': 0.8908756971359253} +02/25/2022 19:23:58 - INFO - codeparrot_training - Step 32287: {'lr': 0.00015000771214068362, 'samples': 16531456, 'steps': 32287, 'loss/train': 2.5564520359039307} +02/25/2022 19:24:02 - INFO - codeparrot_training - Step 32288: {'lr': 0.00014999271569068385, 'samples': 16531968, 'steps': 32288, 'loss/train': 2.058134078979492} +02/25/2022 19:24:08 - INFO - codeparrot_training - Step 32289: {'lr': 0.00014997771966908353, 'samples': 16532480, 'steps': 32289, 'loss/train': 1.7867285013198853} +02/25/2022 19:24:11 - INFO - codeparrot_training - Step 32290: {'lr': 0.00014996272407594694, 'samples': 16532992, 'steps': 32290, 'loss/train': 1.731552243232727} +02/25/2022 19:24:17 - INFO - codeparrot_training - Step 32291: {'lr': 0.0001499477289113383, 'samples': 16533504, 'steps': 32291, 'loss/train': 0.7351446151733398} +02/25/2022 19:24:20 - INFO - codeparrot_training - Step 32292: {'lr': 0.00014993273417532172, 'samples': 16534016, 'steps': 32292, 'loss/train': 2.1299233436584473} +02/25/2022 19:24:26 - INFO - codeparrot_training - Step 32293: {'lr': 0.00014991773986796158, 'samples': 16534528, 'steps': 32293, 'loss/train': 1.0895709991455078} +02/25/2022 19:24:29 - INFO - codeparrot_training - Step 32294: {'lr': 0.000149902745989322, 'samples': 16535040, 'steps': 32294, 'loss/train': 2.260838747024536} +02/25/2022 19:24:36 - INFO - codeparrot_training - Step 32295: {'lr': 0.00014988775253946745, 'samples': 16535552, 'steps': 32295, 'loss/train': 1.7025607824325562} +02/25/2022 19:24:39 - INFO - codeparrot_training - Step 32296: {'lr': 0.00014987275951846185, 'samples': 16536064, 'steps': 32296, 'loss/train': 2.390096426010132} +02/25/2022 19:24:45 - INFO - codeparrot_training - Step 32297: {'lr': 0.00014985776692636954, 'samples': 16536576, 'steps': 32297, 'loss/train': 2.3033804893493652} +02/25/2022 19:24:48 - INFO - codeparrot_training - Step 32298: {'lr': 0.0001498427747632548, 'samples': 16537088, 'steps': 32298, 'loss/train': 2.121675968170166} +02/25/2022 19:24:54 - INFO - codeparrot_training - Step 32299: {'lr': 0.0001498277830291819, 'samples': 16537600, 'steps': 32299, 'loss/train': 1.3470861911773682} +02/25/2022 19:24:57 - INFO - codeparrot_training - Step 32300: {'lr': 0.00014981279172421482, 'samples': 16538112, 'steps': 32300, 'loss/train': 1.252536416053772} +02/25/2022 19:25:03 - INFO - codeparrot_training - Step 32301: {'lr': 0.00014979780084841792, 'samples': 16538624, 'steps': 32301, 'loss/train': 0.7695764899253845} +02/25/2022 19:25:06 - INFO - codeparrot_training - Step 32302: {'lr': 0.00014978281040185548, 'samples': 16539136, 'steps': 32302, 'loss/train': 1.116349220275879} +02/25/2022 19:25:10 - INFO - codeparrot_training - Step 32303: {'lr': 0.00014976782038459164, 'samples': 16539648, 'steps': 32303, 'loss/train': 1.879724383354187} +02/25/2022 19:25:15 - INFO - codeparrot_training - Step 32304: {'lr': 0.00014975283079669073, 'samples': 16540160, 'steps': 32304, 'loss/train': 2.2061822414398193} +02/25/2022 19:25:19 - INFO - codeparrot_training - Step 32305: {'lr': 0.00014973784163821674, 'samples': 16540672, 'steps': 32305, 'loss/train': 2.3807365894317627} +02/25/2022 19:25:25 - INFO - codeparrot_training - Step 32306: {'lr': 0.00014972285290923402, 'samples': 16541184, 'steps': 32306, 'loss/train': 1.610237717628479} +02/25/2022 19:25:31 - INFO - codeparrot_training - Step 32307: {'lr': 0.00014970786460980672, 'samples': 16541696, 'steps': 32307, 'loss/train': 1.5615085363388062} +02/25/2022 19:25:34 - INFO - codeparrot_training - Step 32308: {'lr': 0.00014969287673999927, 'samples': 16542208, 'steps': 32308, 'loss/train': 2.7214581966400146} +02/25/2022 19:25:40 - INFO - codeparrot_training - Step 32309: {'lr': 0.0001496778892998755, 'samples': 16542720, 'steps': 32309, 'loss/train': 1.3820781707763672} +02/25/2022 19:25:43 - INFO - codeparrot_training - Step 32310: {'lr': 0.00014966290228949982, 'samples': 16543232, 'steps': 32310, 'loss/train': 2.3556127548217773} +02/25/2022 19:25:49 - INFO - codeparrot_training - Step 32311: {'lr': 0.00014964791570893642, 'samples': 16543744, 'steps': 32311, 'loss/train': 0.9765664339065552} +02/25/2022 19:25:52 - INFO - codeparrot_training - Step 32312: {'lr': 0.0001496329295582496, 'samples': 16544256, 'steps': 32312, 'loss/train': 1.417349100112915} +02/25/2022 19:25:57 - INFO - codeparrot_training - Step 32313: {'lr': 0.00014961794383750327, 'samples': 16544768, 'steps': 32313, 'loss/train': 1.7288947105407715} +02/25/2022 19:26:01 - INFO - codeparrot_training - Step 32314: {'lr': 0.00014960295854676186, 'samples': 16545280, 'steps': 32314, 'loss/train': 2.018951177597046} +02/25/2022 19:26:07 - INFO - codeparrot_training - Step 32315: {'lr': 0.00014958797368608945, 'samples': 16545792, 'steps': 32315, 'loss/train': 2.0905697345733643} +02/25/2022 19:26:11 - INFO - codeparrot_training - Step 32316: {'lr': 0.0001495729892555503, 'samples': 16546304, 'steps': 32316, 'loss/train': 2.4227921962738037} +02/25/2022 19:26:16 - INFO - codeparrot_training - Step 32317: {'lr': 0.0001495580052552087, 'samples': 16546816, 'steps': 32317, 'loss/train': 1.0265288352966309} +02/25/2022 19:26:20 - INFO - codeparrot_training - Step 32318: {'lr': 0.00014954302168512857, 'samples': 16547328, 'steps': 32318, 'loss/train': 2.15868878364563} +02/25/2022 19:26:26 - INFO - codeparrot_training - Step 32319: {'lr': 0.00014952803854537429, 'samples': 16547840, 'steps': 32319, 'loss/train': 1.7687561511993408} +02/25/2022 19:26:29 - INFO - codeparrot_training - Step 32320: {'lr': 0.00014951305583600999, 'samples': 16548352, 'steps': 32320, 'loss/train': 2.0340535640716553} +02/25/2022 19:26:35 - INFO - codeparrot_training - Step 32321: {'lr': 0.00014949807355709986, 'samples': 16548864, 'steps': 32321, 'loss/train': 2.162201166152954} +02/25/2022 19:26:38 - INFO - codeparrot_training - Step 32322: {'lr': 0.000149483091708708, 'samples': 16549376, 'steps': 32322, 'loss/train': 1.4235886335372925} +02/25/2022 19:26:44 - INFO - codeparrot_training - Step 32323: {'lr': 0.0001494681102908987, 'samples': 16549888, 'steps': 32323, 'loss/train': 0.40066954493522644} +02/25/2022 19:26:47 - INFO - codeparrot_training - Step 32324: {'lr': 0.00014945312930373611, 'samples': 16550400, 'steps': 32324, 'loss/train': 2.9656054973602295} +02/25/2022 19:26:53 - INFO - codeparrot_training - Step 32325: {'lr': 0.0001494381487472844, 'samples': 16550912, 'steps': 32325, 'loss/train': 2.722982883453369} +02/25/2022 19:26:56 - INFO - codeparrot_training - Step 32326: {'lr': 0.00014942316862160768, 'samples': 16551424, 'steps': 32326, 'loss/train': 2.050794839859009} +02/25/2022 19:27:02 - INFO - codeparrot_training - Step 32327: {'lr': 0.00014940818892677021, 'samples': 16551936, 'steps': 32327, 'loss/train': 1.385240912437439} +02/25/2022 19:27:06 - INFO - codeparrot_training - Step 32328: {'lr': 0.00014939320966283608, 'samples': 16552448, 'steps': 32328, 'loss/train': 2.0653727054595947} +02/25/2022 19:27:11 - INFO - codeparrot_training - Step 32329: {'lr': 0.00014937823082986952, 'samples': 16552960, 'steps': 32329, 'loss/train': 2.039252519607544} +02/25/2022 19:27:15 - INFO - codeparrot_training - Step 32330: {'lr': 0.00014936325242793466, 'samples': 16553472, 'steps': 32330, 'loss/train': 1.0067226886749268} +02/25/2022 19:27:20 - INFO - codeparrot_training - Step 32331: {'lr': 0.0001493482744570957, 'samples': 16553984, 'steps': 32331, 'loss/train': 1.7187745571136475} +02/25/2022 19:27:24 - INFO - codeparrot_training - Step 32332: {'lr': 0.0001493332969174167, 'samples': 16554496, 'steps': 32332, 'loss/train': 1.872113823890686} +02/25/2022 19:27:29 - INFO - codeparrot_training - Step 32333: {'lr': 0.00014931831980896193, 'samples': 16555008, 'steps': 32333, 'loss/train': 2.166632652282715} +02/25/2022 19:27:33 - INFO - codeparrot_training - Step 32334: {'lr': 0.0001493033431317956, 'samples': 16555520, 'steps': 32334, 'loss/train': 2.0489113330841064} +02/25/2022 19:27:38 - INFO - codeparrot_training - Step 32335: {'lr': 0.00014928836688598164, 'samples': 16556032, 'steps': 32335, 'loss/train': 1.5888949632644653} +02/25/2022 19:27:42 - INFO - codeparrot_training - Step 32336: {'lr': 0.00014927339107158436, 'samples': 16556544, 'steps': 32336, 'loss/train': 1.2976455688476562} +02/25/2022 19:27:47 - INFO - codeparrot_training - Step 32337: {'lr': 0.00014925841568866788, 'samples': 16557056, 'steps': 32337, 'loss/train': 1.5581536293029785} +02/25/2022 19:27:51 - INFO - codeparrot_training - Step 32338: {'lr': 0.00014924344073729648, 'samples': 16557568, 'steps': 32338, 'loss/train': 2.0754854679107666} +02/25/2022 19:27:56 - INFO - codeparrot_training - Step 32339: {'lr': 0.00014922846621753406, 'samples': 16558080, 'steps': 32339, 'loss/train': 2.1283252239227295} +02/25/2022 19:28:00 - INFO - codeparrot_training - Step 32340: {'lr': 0.0001492134921294449, 'samples': 16558592, 'steps': 32340, 'loss/train': 1.738769769668579} +02/25/2022 19:28:06 - INFO - codeparrot_training - Step 32341: {'lr': 0.00014919851847309312, 'samples': 16559104, 'steps': 32341, 'loss/train': 1.997680902481079} +02/25/2022 19:28:10 - INFO - codeparrot_training - Step 32342: {'lr': 0.00014918354524854293, 'samples': 16559616, 'steps': 32342, 'loss/train': 1.2354999780654907} +02/25/2022 19:28:15 - INFO - codeparrot_training - Step 32343: {'lr': 0.00014916857245585847, 'samples': 16560128, 'steps': 32343, 'loss/train': 0.13435079157352448} +02/25/2022 19:28:18 - INFO - codeparrot_training - Step 32344: {'lr': 0.00014915360009510375, 'samples': 16560640, 'steps': 32344, 'loss/train': 2.8112382888793945} +02/25/2022 19:28:24 - INFO - codeparrot_training - Step 32345: {'lr': 0.000149138628166343, 'samples': 16561152, 'steps': 32345, 'loss/train': 0.7388100028038025} +02/25/2022 19:28:27 - INFO - codeparrot_training - Step 32346: {'lr': 0.00014912365666964032, 'samples': 16561664, 'steps': 32346, 'loss/train': 2.7711784839630127} +02/25/2022 19:28:33 - INFO - codeparrot_training - Step 32347: {'lr': 0.00014910868560505996, 'samples': 16562176, 'steps': 32347, 'loss/train': 0.7430292963981628} +02/25/2022 19:28:37 - INFO - codeparrot_training - Step 32348: {'lr': 0.00014909371497266583, 'samples': 16562688, 'steps': 32348, 'loss/train': 1.8972395658493042} +02/25/2022 19:28:42 - INFO - codeparrot_training - Step 32349: {'lr': 0.00014907874477252222, 'samples': 16563200, 'steps': 32349, 'loss/train': 0.7243121862411499} +02/25/2022 19:28:46 - INFO - codeparrot_training - Step 32350: {'lr': 0.0001490637750046932, 'samples': 16563712, 'steps': 32350, 'loss/train': 1.9064499139785767} +02/25/2022 19:28:52 - INFO - codeparrot_training - Step 32351: {'lr': 0.00014904880566924295, 'samples': 16564224, 'steps': 32351, 'loss/train': 1.4657062292099} +02/25/2022 19:28:55 - INFO - codeparrot_training - Step 32352: {'lr': 0.00014903383676623564, 'samples': 16564736, 'steps': 32352, 'loss/train': 2.2063562870025635} +02/25/2022 19:29:01 - INFO - codeparrot_training - Step 32353: {'lr': 0.00014901886829573523, 'samples': 16565248, 'steps': 32353, 'loss/train': 1.4842668771743774} +02/25/2022 19:29:04 - INFO - codeparrot_training - Step 32354: {'lr': 0.0001490039002578059, 'samples': 16565760, 'steps': 32354, 'loss/train': 1.9563828706741333} +02/25/2022 19:29:10 - INFO - codeparrot_training - Step 32355: {'lr': 0.0001489889326525118, 'samples': 16566272, 'steps': 32355, 'loss/train': 2.100186586380005} +02/25/2022 19:29:13 - INFO - codeparrot_training - Step 32356: {'lr': 0.00014897396547991712, 'samples': 16566784, 'steps': 32356, 'loss/train': 2.0756497383117676} +02/25/2022 19:29:19 - INFO - codeparrot_training - Step 32357: {'lr': 0.0001489589987400858, 'samples': 16567296, 'steps': 32357, 'loss/train': 1.6552541255950928} +02/25/2022 19:29:22 - INFO - codeparrot_training - Step 32358: {'lr': 0.00014894403243308207, 'samples': 16567808, 'steps': 32358, 'loss/train': 1.8560599088668823} +02/25/2022 19:29:28 - INFO - codeparrot_training - Step 32359: {'lr': 0.00014892906655897, 'samples': 16568320, 'steps': 32359, 'loss/train': 0.39049747586250305} +02/25/2022 19:29:34 - INFO - codeparrot_training - Step 32360: {'lr': 0.0001489141011178138, 'samples': 16568832, 'steps': 32360, 'loss/train': 2.875079870223999} +02/25/2022 19:29:38 - INFO - codeparrot_training - Step 32361: {'lr': 0.00014889913610967743, 'samples': 16569344, 'steps': 32361, 'loss/train': 1.2644240856170654} +02/25/2022 19:29:41 - INFO - codeparrot_training - Step 32362: {'lr': 0.00014888417153462503, 'samples': 16569856, 'steps': 32362, 'loss/train': 1.3506760597229004} +02/25/2022 19:29:47 - INFO - codeparrot_training - Step 32363: {'lr': 0.00014886920739272071, 'samples': 16570368, 'steps': 32363, 'loss/train': 1.1997145414352417} +02/25/2022 19:29:51 - INFO - codeparrot_training - Step 32364: {'lr': 0.00014885424368402868, 'samples': 16570880, 'steps': 32364, 'loss/train': 3.3429555892944336} +02/25/2022 19:29:56 - INFO - codeparrot_training - Step 32365: {'lr': 0.00014883928040861294, 'samples': 16571392, 'steps': 32365, 'loss/train': 1.9886585474014282} +02/25/2022 19:30:00 - INFO - codeparrot_training - Step 32366: {'lr': 0.00014882431756653756, 'samples': 16571904, 'steps': 32366, 'loss/train': 2.280878782272339} +02/25/2022 19:30:05 - INFO - codeparrot_training - Step 32367: {'lr': 0.0001488093551578667, 'samples': 16572416, 'steps': 32367, 'loss/train': 2.1996753215789795} +02/25/2022 19:30:09 - INFO - codeparrot_training - Step 32368: {'lr': 0.0001487943931826644, 'samples': 16572928, 'steps': 32368, 'loss/train': 0.8394864201545715} +02/25/2022 19:30:14 - INFO - codeparrot_training - Step 32369: {'lr': 0.00014877943164099484, 'samples': 16573440, 'steps': 32369, 'loss/train': 0.8564398288726807} +02/25/2022 19:30:18 - INFO - codeparrot_training - Step 32370: {'lr': 0.000148764470532922, 'samples': 16573952, 'steps': 32370, 'loss/train': 1.2653359174728394} +02/25/2022 19:30:24 - INFO - codeparrot_training - Step 32371: {'lr': 0.00014874950985851003, 'samples': 16574464, 'steps': 32371, 'loss/train': 1.6284235715866089} +02/25/2022 19:30:27 - INFO - codeparrot_training - Step 32372: {'lr': 0.00014873454961782304, 'samples': 16574976, 'steps': 32372, 'loss/train': 2.074125289916992} +02/25/2022 19:30:33 - INFO - codeparrot_training - Step 32373: {'lr': 0.0001487195898109251, 'samples': 16575488, 'steps': 32373, 'loss/train': 2.0571651458740234} +02/25/2022 19:30:37 - INFO - codeparrot_training - Step 32374: {'lr': 0.00014870463043788025, 'samples': 16576000, 'steps': 32374, 'loss/train': 1.0151761770248413} +02/25/2022 19:30:42 - INFO - codeparrot_training - Step 32375: {'lr': 0.00014868967149875257, 'samples': 16576512, 'steps': 32375, 'loss/train': 0.9607380628585815} +02/25/2022 19:30:46 - INFO - codeparrot_training - Step 32376: {'lr': 0.0001486747129936062, 'samples': 16577024, 'steps': 32376, 'loss/train': 3.0268239974975586} +02/25/2022 19:30:51 - INFO - codeparrot_training - Step 32377: {'lr': 0.0001486597549225051, 'samples': 16577536, 'steps': 32377, 'loss/train': 1.6138197183609009} +02/25/2022 19:30:55 - INFO - codeparrot_training - Step 32378: {'lr': 0.00014864479728551362, 'samples': 16578048, 'steps': 32378, 'loss/train': 1.1942315101623535} +02/25/2022 19:31:00 - INFO - codeparrot_training - Step 32379: {'lr': 0.00014862984008269547, 'samples': 16578560, 'steps': 32379, 'loss/train': 0.864029586315155} +02/25/2022 19:31:04 - INFO - codeparrot_training - Step 32380: {'lr': 0.00014861488331411492, 'samples': 16579072, 'steps': 32380, 'loss/train': 2.648854970932007} +02/25/2022 19:31:09 - INFO - codeparrot_training - Step 32381: {'lr': 0.00014859992697983604, 'samples': 16579584, 'steps': 32381, 'loss/train': 0.8249788880348206} +02/25/2022 19:31:13 - INFO - codeparrot_training - Step 32382: {'lr': 0.00014858497107992296, 'samples': 16580096, 'steps': 32382, 'loss/train': 0.701622724533081} +02/25/2022 19:31:18 - INFO - codeparrot_training - Step 32383: {'lr': 0.0001485700156144396, 'samples': 16580608, 'steps': 32383, 'loss/train': 1.7681553363800049} +02/25/2022 19:31:22 - INFO - codeparrot_training - Step 32384: {'lr': 0.00014855506058345002, 'samples': 16581120, 'steps': 32384, 'loss/train': 1.8353049755096436} +02/25/2022 19:31:27 - INFO - codeparrot_training - Step 32385: {'lr': 0.00014854010598701838, 'samples': 16581632, 'steps': 32385, 'loss/train': 1.6558737754821777} +02/25/2022 19:31:31 - INFO - codeparrot_training - Step 32386: {'lr': 0.0001485251518252088, 'samples': 16582144, 'steps': 32386, 'loss/train': 1.4918975830078125} +02/25/2022 19:31:36 - INFO - codeparrot_training - Step 32387: {'lr': 0.00014851019809808516, 'samples': 16582656, 'steps': 32387, 'loss/train': 2.743694305419922} +02/25/2022 19:31:40 - INFO - codeparrot_training - Step 32388: {'lr': 0.0001484952448057116, 'samples': 16583168, 'steps': 32388, 'loss/train': 1.87287437915802} +02/25/2022 19:31:46 - INFO - codeparrot_training - Step 32389: {'lr': 0.0001484802919481522, 'samples': 16583680, 'steps': 32389, 'loss/train': 2.2721259593963623} +02/25/2022 19:31:50 - INFO - codeparrot_training - Step 32390: {'lr': 0.00014846533952547094, 'samples': 16584192, 'steps': 32390, 'loss/train': 2.5826575756073} +02/25/2022 19:31:55 - INFO - codeparrot_training - Step 32391: {'lr': 0.00014845038753773208, 'samples': 16584704, 'steps': 32391, 'loss/train': 1.1010130643844604} +02/25/2022 19:31:59 - INFO - codeparrot_training - Step 32392: {'lr': 0.00014843543598499936, 'samples': 16585216, 'steps': 32392, 'loss/train': 1.7832626104354858} +02/25/2022 19:32:04 - INFO - codeparrot_training - Step 32393: {'lr': 0.00014842048486733703, 'samples': 16585728, 'steps': 32393, 'loss/train': 1.3486442565917969} +02/25/2022 19:32:08 - INFO - codeparrot_training - Step 32394: {'lr': 0.00014840553418480907, 'samples': 16586240, 'steps': 32394, 'loss/train': 1.0889757871627808} +02/25/2022 19:32:13 - INFO - codeparrot_training - Step 32395: {'lr': 0.00014839058393747965, 'samples': 16586752, 'steps': 32395, 'loss/train': 1.8366035223007202} +02/25/2022 19:32:16 - INFO - codeparrot_training - Step 32396: {'lr': 0.0001483756341254126, 'samples': 16587264, 'steps': 32396, 'loss/train': 3.02665638923645} +02/25/2022 19:32:22 - INFO - codeparrot_training - Step 32397: {'lr': 0.00014836068474867204, 'samples': 16587776, 'steps': 32397, 'loss/train': 2.0333051681518555} +02/25/2022 19:32:25 - INFO - codeparrot_training - Step 32398: {'lr': 0.00014834573580732209, 'samples': 16588288, 'steps': 32398, 'loss/train': 2.0648722648620605} +02/25/2022 19:32:32 - INFO - codeparrot_training - Step 32399: {'lr': 0.00014833078730142675, 'samples': 16588800, 'steps': 32399, 'loss/train': 2.195464611053467} +02/25/2022 19:32:35 - INFO - codeparrot_training - Step 32400: {'lr': 0.00014831583923105, 'samples': 16589312, 'steps': 32400, 'loss/train': 1.9659101963043213} +02/25/2022 19:32:41 - INFO - codeparrot_training - Step 32401: {'lr': 0.00014830089159625586, 'samples': 16589824, 'steps': 32401, 'loss/train': 0.2595829665660858} +02/25/2022 19:32:44 - INFO - codeparrot_training - Step 32402: {'lr': 0.00014828594439710843, 'samples': 16590336, 'steps': 32402, 'loss/train': 2.886333465576172} +02/25/2022 19:32:50 - INFO - codeparrot_training - Step 32403: {'lr': 0.0001482709976336717, 'samples': 16590848, 'steps': 32403, 'loss/train': 0.9266557693481445} +02/25/2022 19:32:53 - INFO - codeparrot_training - Step 32404: {'lr': 0.00014825605130600983, 'samples': 16591360, 'steps': 32404, 'loss/train': 1.7129405736923218} +02/25/2022 19:32:59 - INFO - codeparrot_training - Step 32405: {'lr': 0.00014824110541418665, 'samples': 16591872, 'steps': 32405, 'loss/train': 0.6770980358123779} +02/25/2022 19:33:02 - INFO - codeparrot_training - Step 32406: {'lr': 0.00014822615995826622, 'samples': 16592384, 'steps': 32406, 'loss/train': 2.1913487911224365} +02/25/2022 19:33:08 - INFO - codeparrot_training - Step 32407: {'lr': 0.00014821121493831268, 'samples': 16592896, 'steps': 32407, 'loss/train': 0.9003936052322388} +02/25/2022 19:33:11 - INFO - codeparrot_training - Step 32408: {'lr': 0.00014819627035439, 'samples': 16593408, 'steps': 32408, 'loss/train': 1.6318864822387695} +02/25/2022 19:33:17 - INFO - codeparrot_training - Step 32409: {'lr': 0.00014818132620656206, 'samples': 16593920, 'steps': 32409, 'loss/train': 1.6203229427337646} +02/25/2022 19:33:21 - INFO - codeparrot_training - Step 32410: {'lr': 0.000148166382494893, 'samples': 16594432, 'steps': 32410, 'loss/train': 1.3255537748336792} +02/25/2022 19:33:26 - INFO - codeparrot_training - Step 32411: {'lr': 0.0001481514392194469, 'samples': 16594944, 'steps': 32411, 'loss/train': 2.2070844173431396} +02/25/2022 19:33:30 - INFO - codeparrot_training - Step 32412: {'lr': 0.00014813649638028764, 'samples': 16595456, 'steps': 32412, 'loss/train': 2.1397461891174316} +02/25/2022 19:33:35 - INFO - codeparrot_training - Step 32413: {'lr': 0.00014812155397747931, 'samples': 16595968, 'steps': 32413, 'loss/train': 1.8519595861434937} +02/25/2022 19:33:39 - INFO - codeparrot_training - Step 32414: {'lr': 0.0001481066120110859, 'samples': 16596480, 'steps': 32414, 'loss/train': 0.7094731330871582} +02/25/2022 19:33:44 - INFO - codeparrot_training - Step 32415: {'lr': 0.00014809167048117139, 'samples': 16596992, 'steps': 32415, 'loss/train': 1.8885993957519531} +02/25/2022 19:33:48 - INFO - codeparrot_training - Step 32416: {'lr': 0.00014807672938779975, 'samples': 16597504, 'steps': 32416, 'loss/train': 1.122735857963562} +02/25/2022 19:33:53 - INFO - codeparrot_training - Step 32417: {'lr': 0.00014806178873103516, 'samples': 16598016, 'steps': 32417, 'loss/train': 1.1100540161132812} +02/25/2022 19:33:57 - INFO - codeparrot_training - Step 32418: {'lr': 0.00014804684851094145, 'samples': 16598528, 'steps': 32418, 'loss/train': 1.671758770942688} +02/25/2022 19:34:03 - INFO - codeparrot_training - Step 32419: {'lr': 0.0001480319087275826, 'samples': 16599040, 'steps': 32419, 'loss/train': 1.9955915212631226} +02/25/2022 19:34:07 - INFO - codeparrot_training - Step 32420: {'lr': 0.00014801696938102272, 'samples': 16599552, 'steps': 32420, 'loss/train': 1.784430742263794} +02/25/2022 19:34:12 - INFO - codeparrot_training - Step 32421: {'lr': 0.00014800203047132576, 'samples': 16600064, 'steps': 32421, 'loss/train': 1.9255657196044922} +02/25/2022 19:34:16 - INFO - codeparrot_training - Step 32422: {'lr': 0.00014798709199855575, 'samples': 16600576, 'steps': 32422, 'loss/train': 2.7534384727478027} +02/25/2022 19:34:21 - INFO - codeparrot_training - Step 32423: {'lr': 0.00014797215396277657, 'samples': 16601088, 'steps': 32423, 'loss/train': 1.1202454566955566} +02/25/2022 19:34:24 - INFO - codeparrot_training - Step 32424: {'lr': 0.00014795721636405232, 'samples': 16601600, 'steps': 32424, 'loss/train': 1.6647528409957886} +02/25/2022 19:34:30 - INFO - codeparrot_training - Step 32425: {'lr': 0.00014794227920244697, 'samples': 16602112, 'steps': 32425, 'loss/train': 1.5797253847122192} +02/25/2022 19:34:34 - INFO - codeparrot_training - Step 32426: {'lr': 0.00014792734247802452, 'samples': 16602624, 'steps': 32426, 'loss/train': 1.0034444332122803} +02/25/2022 19:34:39 - INFO - codeparrot_training - Step 32427: {'lr': 0.00014791240619084888, 'samples': 16603136, 'steps': 32427, 'loss/train': 1.3269704580307007} +02/25/2022 19:34:43 - INFO - codeparrot_training - Step 32428: {'lr': 0.00014789747034098407, 'samples': 16603648, 'steps': 32428, 'loss/train': 2.880244493484497} +02/25/2022 19:34:49 - INFO - codeparrot_training - Step 32429: {'lr': 0.00014788253492849403, 'samples': 16604160, 'steps': 32429, 'loss/train': 2.087341785430908} +02/25/2022 19:34:52 - INFO - codeparrot_training - Step 32430: {'lr': 0.0001478675999534429, 'samples': 16604672, 'steps': 32430, 'loss/train': 2.6799347400665283} +02/25/2022 19:34:58 - INFO - codeparrot_training - Step 32431: {'lr': 0.00014785266541589444, 'samples': 16605184, 'steps': 32431, 'loss/train': 2.979008197784424} +02/25/2022 19:35:01 - INFO - codeparrot_training - Step 32432: {'lr': 0.00014783773131591278, 'samples': 16605696, 'steps': 32432, 'loss/train': 1.423139214515686} +02/25/2022 19:35:07 - INFO - codeparrot_training - Step 32433: {'lr': 0.00014782279765356178, 'samples': 16606208, 'steps': 32433, 'loss/train': 0.9647095203399658} +02/25/2022 19:35:10 - INFO - codeparrot_training - Step 32434: {'lr': 0.0001478078644289056, 'samples': 16606720, 'steps': 32434, 'loss/train': 0.5865122675895691} +02/25/2022 19:35:16 - INFO - codeparrot_training - Step 32435: {'lr': 0.00014779293164200798, 'samples': 16607232, 'steps': 32435, 'loss/train': 1.6733436584472656} +02/25/2022 19:35:19 - INFO - codeparrot_training - Step 32436: {'lr': 0.00014777799929293294, 'samples': 16607744, 'steps': 32436, 'loss/train': 1.204079508781433} +02/25/2022 19:35:25 - INFO - codeparrot_training - Step 32437: {'lr': 0.00014776306738174453, 'samples': 16608256, 'steps': 32437, 'loss/train': 2.0726795196533203} +02/25/2022 19:35:30 - INFO - codeparrot_training - Step 32438: {'lr': 0.00014774813590850665, 'samples': 16608768, 'steps': 32438, 'loss/train': 2.0674962997436523} +02/25/2022 19:35:34 - INFO - codeparrot_training - Step 32439: {'lr': 0.00014773320487328342, 'samples': 16609280, 'steps': 32439, 'loss/train': 1.2218797206878662} +02/25/2022 19:35:37 - INFO - codeparrot_training - Step 32440: {'lr': 0.00014771827427613856, 'samples': 16609792, 'steps': 32440, 'loss/train': 2.6540238857269287} +02/25/2022 19:35:43 - INFO - codeparrot_training - Step 32441: {'lr': 0.00014770334411713613, 'samples': 16610304, 'steps': 32441, 'loss/train': 1.991631269454956} +02/25/2022 19:35:48 - INFO - codeparrot_training - Step 32442: {'lr': 0.0001476884143963401, 'samples': 16610816, 'steps': 32442, 'loss/train': 1.7428946495056152} +02/25/2022 19:35:52 - INFO - codeparrot_training - Step 32443: {'lr': 0.0001476734851138145, 'samples': 16611328, 'steps': 32443, 'loss/train': 1.5309555530548096} +02/25/2022 19:35:58 - INFO - codeparrot_training - Step 32444: {'lr': 0.0001476585562696231, 'samples': 16611840, 'steps': 32444, 'loss/train': 1.5594590902328491} +02/25/2022 19:36:01 - INFO - codeparrot_training - Step 32445: {'lr': 0.00014764362786382995, 'samples': 16612352, 'steps': 32445, 'loss/train': 1.4648518562316895} +02/25/2022 19:36:07 - INFO - codeparrot_training - Step 32446: {'lr': 0.00014762869989649898, 'samples': 16612864, 'steps': 32446, 'loss/train': 0.8124240636825562} +02/25/2022 19:36:10 - INFO - codeparrot_training - Step 32447: {'lr': 0.0001476137723676943, 'samples': 16613376, 'steps': 32447, 'loss/train': 1.1763348579406738} +02/25/2022 19:36:16 - INFO - codeparrot_training - Step 32448: {'lr': 0.00014759884527747958, 'samples': 16613888, 'steps': 32448, 'loss/train': 1.5468896627426147} +02/25/2022 19:36:19 - INFO - codeparrot_training - Step 32449: {'lr': 0.0001475839186259189, 'samples': 16614400, 'steps': 32449, 'loss/train': 0.6370042562484741} +02/25/2022 19:36:25 - INFO - codeparrot_training - Step 32450: {'lr': 0.00014756899241307614, 'samples': 16614912, 'steps': 32450, 'loss/train': 1.643971562385559} +02/25/2022 19:36:28 - INFO - codeparrot_training - Step 32451: {'lr': 0.0001475540666390154, 'samples': 16615424, 'steps': 32451, 'loss/train': 2.157505750656128} +02/25/2022 19:36:32 - INFO - codeparrot_training - Step 32452: {'lr': 0.00014753914130380047, 'samples': 16615936, 'steps': 32452, 'loss/train': 3.6336867809295654} +02/25/2022 19:36:37 - INFO - codeparrot_training - Step 32453: {'lr': 0.0001475242164074953, 'samples': 16616448, 'steps': 32453, 'loss/train': 3.3105990886688232} +02/25/2022 19:36:41 - INFO - codeparrot_training - Step 32454: {'lr': 0.00014750929195016385, 'samples': 16616960, 'steps': 32454, 'loss/train': 0.29292500019073486} +02/25/2022 19:36:47 - INFO - codeparrot_training - Step 32455: {'lr': 0.0001474943679318701, 'samples': 16617472, 'steps': 32455, 'loss/train': 0.9915158152580261} +02/25/2022 19:36:51 - INFO - codeparrot_training - Step 32456: {'lr': 0.0001474794443526779, 'samples': 16617984, 'steps': 32456, 'loss/train': 2.117870807647705} +02/25/2022 19:36:56 - INFO - codeparrot_training - Step 32457: {'lr': 0.0001474645212126512, 'samples': 16618496, 'steps': 32457, 'loss/train': 1.7741111516952515} +02/25/2022 19:37:02 - INFO - codeparrot_training - Step 32458: {'lr': 0.00014744959851185397, 'samples': 16619008, 'steps': 32458, 'loss/train': 0.20407609641551971} +02/25/2022 19:37:05 - INFO - codeparrot_training - Step 32459: {'lr': 0.00014743467625035001, 'samples': 16619520, 'steps': 32459, 'loss/train': 0.8214003443717957} +02/25/2022 19:37:11 - INFO - codeparrot_training - Step 32460: {'lr': 0.00014741975442820335, 'samples': 16620032, 'steps': 32460, 'loss/train': 2.6957285404205322} +02/25/2022 19:37:14 - INFO - codeparrot_training - Step 32461: {'lr': 0.00014740483304547794, 'samples': 16620544, 'steps': 32461, 'loss/train': 2.6454899311065674} +02/25/2022 19:37:20 - INFO - codeparrot_training - Step 32462: {'lr': 0.00014738991210223767, 'samples': 16621056, 'steps': 32462, 'loss/train': 2.3153529167175293} +02/25/2022 19:37:23 - INFO - codeparrot_training - Step 32463: {'lr': 0.00014737499159854633, 'samples': 16621568, 'steps': 32463, 'loss/train': 1.7571243047714233} +02/25/2022 19:37:29 - INFO - codeparrot_training - Step 32464: {'lr': 0.00014736007153446802, 'samples': 16622080, 'steps': 32464, 'loss/train': 1.3341004848480225} +02/25/2022 19:37:32 - INFO - codeparrot_training - Step 32465: {'lr': 0.0001473451519100666, 'samples': 16622592, 'steps': 32465, 'loss/train': 1.1712263822555542} +02/25/2022 19:37:39 - INFO - codeparrot_training - Step 32466: {'lr': 0.00014733023272540586, 'samples': 16623104, 'steps': 32466, 'loss/train': 0.3323075473308563} +02/25/2022 19:37:42 - INFO - codeparrot_training - Step 32467: {'lr': 0.0001473153139805498, 'samples': 16623616, 'steps': 32467, 'loss/train': 2.4661805629730225} +02/25/2022 19:37:48 - INFO - codeparrot_training - Step 32468: {'lr': 0.00014730039567556239, 'samples': 16624128, 'steps': 32468, 'loss/train': 1.6095824241638184} +02/25/2022 19:37:51 - INFO - codeparrot_training - Step 32469: {'lr': 0.00014728547781050753, 'samples': 16624640, 'steps': 32469, 'loss/train': 1.788891315460205} +02/25/2022 19:37:57 - INFO - codeparrot_training - Step 32470: {'lr': 0.00014727056038544895, 'samples': 16625152, 'steps': 32470, 'loss/train': 1.8447391986846924} +02/25/2022 19:38:00 - INFO - codeparrot_training - Step 32471: {'lr': 0.0001472556434004507, 'samples': 16625664, 'steps': 32471, 'loss/train': 1.5776461362838745} +02/25/2022 19:38:06 - INFO - codeparrot_training - Step 32472: {'lr': 0.00014724072685557666, 'samples': 16626176, 'steps': 32472, 'loss/train': 2.286524772644043} +02/25/2022 19:38:09 - INFO - codeparrot_training - Step 32473: {'lr': 0.00014722581075089067, 'samples': 16626688, 'steps': 32473, 'loss/train': 2.2687315940856934} +02/25/2022 19:38:15 - INFO - codeparrot_training - Step 32474: {'lr': 0.0001472108950864568, 'samples': 16627200, 'steps': 32474, 'loss/train': 1.6679648160934448} +02/25/2022 19:38:19 - INFO - codeparrot_training - Step 32475: {'lr': 0.0001471959798623387, 'samples': 16627712, 'steps': 32475, 'loss/train': 2.1277120113372803} +02/25/2022 19:38:25 - INFO - codeparrot_training - Step 32476: {'lr': 0.0001471810650786004, 'samples': 16628224, 'steps': 32476, 'loss/train': 2.6028218269348145} +02/25/2022 19:38:28 - INFO - codeparrot_training - Step 32477: {'lr': 0.00014716615073530575, 'samples': 16628736, 'steps': 32477, 'loss/train': 1.2286666631698608} +02/25/2022 19:38:32 - INFO - codeparrot_training - Step 32478: {'lr': 0.00014715123683251878, 'samples': 16629248, 'steps': 32478, 'loss/train': 0.06970261037349701} +02/25/2022 19:38:37 - INFO - codeparrot_training - Step 32479: {'lr': 0.00014713632337030313, 'samples': 16629760, 'steps': 32479, 'loss/train': 1.4311376810073853} +02/25/2022 19:38:41 - INFO - codeparrot_training - Step 32480: {'lr': 0.0001471214103487228, 'samples': 16630272, 'steps': 32480, 'loss/train': 1.5347286462783813} +02/25/2022 19:38:46 - INFO - codeparrot_training - Step 32481: {'lr': 0.0001471064977678417, 'samples': 16630784, 'steps': 32481, 'loss/train': 1.024117112159729} +02/25/2022 19:38:50 - INFO - codeparrot_training - Step 32482: {'lr': 0.0001470915856277238, 'samples': 16631296, 'steps': 32482, 'loss/train': 0.7031924724578857} +02/25/2022 19:38:55 - INFO - codeparrot_training - Step 32483: {'lr': 0.00014707667392843278, 'samples': 16631808, 'steps': 32483, 'loss/train': 1.365310549736023} +02/25/2022 19:38:59 - INFO - codeparrot_training - Step 32484: {'lr': 0.00014706176267003258, 'samples': 16632320, 'steps': 32484, 'loss/train': 2.31606125831604} +02/25/2022 19:39:04 - INFO - codeparrot_training - Step 32485: {'lr': 0.0001470468518525871, 'samples': 16632832, 'steps': 32485, 'loss/train': 1.7600618600845337} +02/25/2022 19:39:08 - INFO - codeparrot_training - Step 32486: {'lr': 0.0001470319414761602, 'samples': 16633344, 'steps': 32486, 'loss/train': 0.2055341750383377} +02/25/2022 19:39:13 - INFO - codeparrot_training - Step 32487: {'lr': 0.0001470170315408159, 'samples': 16633856, 'steps': 32487, 'loss/train': 1.2472333908081055} +02/25/2022 19:39:17 - INFO - codeparrot_training - Step 32488: {'lr': 0.00014700212204661785, 'samples': 16634368, 'steps': 32488, 'loss/train': 1.270909070968628} +02/25/2022 19:39:22 - INFO - codeparrot_training - Step 32489: {'lr': 0.00014698721299362996, 'samples': 16634880, 'steps': 32489, 'loss/train': 1.7824101448059082} +02/25/2022 19:39:26 - INFO - codeparrot_training - Step 32490: {'lr': 0.00014697230438191617, 'samples': 16635392, 'steps': 32490, 'loss/train': 1.016020655632019} +02/25/2022 19:39:33 - INFO - codeparrot_training - Step 32491: {'lr': 0.00014695739621154038, 'samples': 16635904, 'steps': 32491, 'loss/train': 1.9061532020568848} +02/25/2022 19:39:36 - INFO - codeparrot_training - Step 32492: {'lr': 0.0001469424884825663, 'samples': 16636416, 'steps': 32492, 'loss/train': 1.249210000038147} +02/25/2022 19:39:42 - INFO - codeparrot_training - Step 32493: {'lr': 0.00014692758119505789, 'samples': 16636928, 'steps': 32493, 'loss/train': 1.5328309535980225} +02/25/2022 19:39:45 - INFO - codeparrot_training - Step 32494: {'lr': 0.000146912674349079, 'samples': 16637440, 'steps': 32494, 'loss/train': 2.420698881149292} +02/25/2022 19:39:51 - INFO - codeparrot_training - Step 32495: {'lr': 0.00014689776794469357, 'samples': 16637952, 'steps': 32495, 'loss/train': 1.5119743347167969} +02/25/2022 19:39:56 - INFO - codeparrot_training - Step 32496: {'lr': 0.00014688286198196525, 'samples': 16638464, 'steps': 32496, 'loss/train': 1.5790690183639526} +02/25/2022 19:40:00 - INFO - codeparrot_training - Step 32497: {'lr': 0.00014686795646095803, 'samples': 16638976, 'steps': 32497, 'loss/train': 1.986587405204773} +02/25/2022 19:40:05 - INFO - codeparrot_training - Step 32498: {'lr': 0.00014685305138173574, 'samples': 16639488, 'steps': 32498, 'loss/train': 1.8447834253311157} +02/25/2022 19:40:09 - INFO - codeparrot_training - Step 32499: {'lr': 0.00014683814674436218, 'samples': 16640000, 'steps': 32499, 'loss/train': 0.35245779156684875} +02/25/2022 19:40:14 - INFO - codeparrot_training - Step 32500: {'lr': 0.00014682324254890135, 'samples': 16640512, 'steps': 32500, 'loss/train': 1.0489423274993896} +02/25/2022 19:40:18 - INFO - codeparrot_training - Step 32501: {'lr': 0.00014680833879541689, 'samples': 16641024, 'steps': 32501, 'loss/train': 1.6987041234970093} +02/25/2022 19:40:24 - INFO - codeparrot_training - Step 32502: {'lr': 0.00014679343548397282, 'samples': 16641536, 'steps': 32502, 'loss/train': 0.5247048735618591} +02/25/2022 19:40:28 - INFO - codeparrot_training - Step 32503: {'lr': 0.00014677853261463281, 'samples': 16642048, 'steps': 32503, 'loss/train': 1.193146824836731} +02/25/2022 19:40:33 - INFO - codeparrot_training - Step 32504: {'lr': 0.00014676363018746087, 'samples': 16642560, 'steps': 32504, 'loss/train': 1.7393361330032349} +02/25/2022 19:40:37 - INFO - codeparrot_training - Step 32505: {'lr': 0.00014674872820252076, 'samples': 16643072, 'steps': 32505, 'loss/train': 0.5128758549690247} +02/25/2022 19:40:42 - INFO - codeparrot_training - Step 32506: {'lr': 0.00014673382665987626, 'samples': 16643584, 'steps': 32506, 'loss/train': 1.4535635709762573} +02/25/2022 19:40:46 - INFO - codeparrot_training - Step 32507: {'lr': 0.00014671892555959124, 'samples': 16644096, 'steps': 32507, 'loss/train': 2.1673965454101562} +02/25/2022 19:40:51 - INFO - codeparrot_training - Step 32508: {'lr': 0.0001467040249017296, 'samples': 16644608, 'steps': 32508, 'loss/train': 2.990795850753784} +02/25/2022 19:40:55 - INFO - codeparrot_training - Step 32509: {'lr': 0.00014668912468635513, 'samples': 16645120, 'steps': 32509, 'loss/train': 1.3850171566009521} +02/25/2022 19:41:00 - INFO - codeparrot_training - Step 32510: {'lr': 0.00014667422491353158, 'samples': 16645632, 'steps': 32510, 'loss/train': 1.5463197231292725} +02/25/2022 19:41:04 - INFO - codeparrot_training - Step 32511: {'lr': 0.00014665932558332285, 'samples': 16646144, 'steps': 32511, 'loss/train': 1.9332659244537354} +02/25/2022 19:41:10 - INFO - codeparrot_training - Step 32512: {'lr': 0.00014664442669579277, 'samples': 16646656, 'steps': 32512, 'loss/train': 2.481074571609497} +02/25/2022 19:41:14 - INFO - codeparrot_training - Step 32513: {'lr': 0.00014662952825100523, 'samples': 16647168, 'steps': 32513, 'loss/train': 1.76753568649292} +02/25/2022 19:41:19 - INFO - codeparrot_training - Step 32514: {'lr': 0.00014661463024902386, 'samples': 16647680, 'steps': 32514, 'loss/train': 1.5623332262039185} +02/25/2022 19:41:23 - INFO - codeparrot_training - Step 32515: {'lr': 0.0001465997326899126, 'samples': 16648192, 'steps': 32515, 'loss/train': 2.2548162937164307} +02/25/2022 19:41:28 - INFO - codeparrot_training - Step 32516: {'lr': 0.00014658483557373523, 'samples': 16648704, 'steps': 32516, 'loss/train': 1.596663475036621} +02/25/2022 19:41:31 - INFO - codeparrot_training - Step 32517: {'lr': 0.0001465699389005557, 'samples': 16649216, 'steps': 32517, 'loss/train': 1.362250804901123} +02/25/2022 19:41:37 - INFO - codeparrot_training - Step 32518: {'lr': 0.0001465550426704376, 'samples': 16649728, 'steps': 32518, 'loss/train': 1.934340476989746} +02/25/2022 19:41:40 - INFO - codeparrot_training - Step 32519: {'lr': 0.00014654014688344485, 'samples': 16650240, 'steps': 32519, 'loss/train': 2.4947545528411865} +02/25/2022 19:41:46 - INFO - codeparrot_training - Step 32520: {'lr': 0.0001465252515396413, 'samples': 16650752, 'steps': 32520, 'loss/train': 1.3492956161499023} +02/25/2022 19:41:49 - INFO - codeparrot_training - Step 32521: {'lr': 0.0001465103566390907, 'samples': 16651264, 'steps': 32521, 'loss/train': 1.0873219966888428} +02/25/2022 19:41:55 - INFO - codeparrot_training - Step 32522: {'lr': 0.00014649546218185695, 'samples': 16651776, 'steps': 32522, 'loss/train': 0.9952855706214905} +02/25/2022 19:41:58 - INFO - codeparrot_training - Step 32523: {'lr': 0.00014648056816800366, 'samples': 16652288, 'steps': 32523, 'loss/train': 1.4225622415542603} +02/25/2022 19:42:04 - INFO - codeparrot_training - Step 32524: {'lr': 0.00014646567459759475, 'samples': 16652800, 'steps': 32524, 'loss/train': 1.1771849393844604} +02/25/2022 19:42:07 - INFO - codeparrot_training - Step 32525: {'lr': 0.00014645078147069402, 'samples': 16653312, 'steps': 32525, 'loss/train': 1.9480422735214233} +02/25/2022 19:42:14 - INFO - codeparrot_training - Step 32526: {'lr': 0.0001464358887873654, 'samples': 16653824, 'steps': 32526, 'loss/train': 1.8456637859344482} +02/25/2022 19:42:17 - INFO - codeparrot_training - Step 32527: {'lr': 0.00014642099654767237, 'samples': 16654336, 'steps': 32527, 'loss/train': 1.543684482574463} +02/25/2022 19:42:23 - INFO - codeparrot_training - Step 32528: {'lr': 0.00014640610475167898, 'samples': 16654848, 'steps': 32528, 'loss/train': 1.0154361724853516} +02/25/2022 19:42:26 - INFO - codeparrot_training - Step 32529: {'lr': 0.00014639121339944888, 'samples': 16655360, 'steps': 32529, 'loss/train': 2.256618022918701} +02/25/2022 19:42:32 - INFO - codeparrot_training - Step 32530: {'lr': 0.00014637632249104608, 'samples': 16655872, 'steps': 32530, 'loss/train': 8.19294548034668} +02/25/2022 19:42:35 - INFO - codeparrot_training - Step 32531: {'lr': 0.00014636143202653406, 'samples': 16656384, 'steps': 32531, 'loss/train': 1.152690052986145} +02/25/2022 19:42:41 - INFO - codeparrot_training - Step 32532: {'lr': 0.0001463465420059768, 'samples': 16656896, 'steps': 32532, 'loss/train': 1.9793477058410645} +02/25/2022 19:42:44 - INFO - codeparrot_training - Step 32533: {'lr': 0.00014633165242943804, 'samples': 16657408, 'steps': 32533, 'loss/train': 1.5656373500823975} +02/25/2022 19:42:50 - INFO - codeparrot_training - Step 32534: {'lr': 0.00014631676329698152, 'samples': 16657920, 'steps': 32534, 'loss/train': 1.5136991739273071} +02/25/2022 19:42:53 - INFO - codeparrot_training - Step 32535: {'lr': 0.00014630187460867118, 'samples': 16658432, 'steps': 32535, 'loss/train': 2.1637156009674072} +02/25/2022 19:42:59 - INFO - codeparrot_training - Step 32536: {'lr': 0.0001462869863645706, 'samples': 16658944, 'steps': 32536, 'loss/train': 2.0958502292633057} +02/25/2022 19:43:02 - INFO - codeparrot_training - Step 32537: {'lr': 0.0001462720985647436, 'samples': 16659456, 'steps': 32537, 'loss/train': 2.6352405548095703} +02/25/2022 19:43:08 - INFO - codeparrot_training - Step 32538: {'lr': 0.00014625721120925406, 'samples': 16659968, 'steps': 32538, 'loss/train': 2.3502702713012695} +02/25/2022 19:43:12 - INFO - codeparrot_training - Step 32539: {'lr': 0.0001462423242981657, 'samples': 16660480, 'steps': 32539, 'loss/train': 1.1097592115402222} +02/25/2022 19:43:17 - INFO - codeparrot_training - Step 32540: {'lr': 0.0001462274378315422, 'samples': 16660992, 'steps': 32540, 'loss/train': 1.4944705963134766} +02/25/2022 19:43:21 - INFO - codeparrot_training - Step 32541: {'lr': 0.00014621255180944742, 'samples': 16661504, 'steps': 32541, 'loss/train': 0.9297956228256226} +02/25/2022 19:43:26 - INFO - codeparrot_training - Step 32542: {'lr': 0.0001461976662319452, 'samples': 16662016, 'steps': 32542, 'loss/train': 1.0841641426086426} +02/25/2022 19:43:30 - INFO - codeparrot_training - Step 32543: {'lr': 0.00014618278109909916, 'samples': 16662528, 'steps': 32543, 'loss/train': 2.223261833190918} +02/25/2022 19:43:35 - INFO - codeparrot_training - Step 32544: {'lr': 0.0001461678964109731, 'samples': 16663040, 'steps': 32544, 'loss/train': 2.282331943511963} +02/25/2022 19:43:39 - INFO - codeparrot_training - Step 32545: {'lr': 0.00014615301216763083, 'samples': 16663552, 'steps': 32545, 'loss/train': 1.8736226558685303} +02/25/2022 19:43:44 - INFO - codeparrot_training - Step 32546: {'lr': 0.00014613812836913604, 'samples': 16664064, 'steps': 32546, 'loss/train': 1.63163423538208} +02/25/2022 19:43:48 - INFO - codeparrot_training - Step 32547: {'lr': 0.0001461232450155525, 'samples': 16664576, 'steps': 32547, 'loss/train': 2.2463481426239014} +02/25/2022 19:43:54 - INFO - codeparrot_training - Step 32548: {'lr': 0.0001461083621069441, 'samples': 16665088, 'steps': 32548, 'loss/train': 2.52325177192688} +02/25/2022 19:43:57 - INFO - codeparrot_training - Step 32549: {'lr': 0.00014609347964337447, 'samples': 16665600, 'steps': 32549, 'loss/train': 1.5620390176773071} +02/25/2022 19:44:03 - INFO - codeparrot_training - Step 32550: {'lr': 0.00014607859762490733, 'samples': 16666112, 'steps': 32550, 'loss/train': 2.4591023921966553} +02/25/2022 19:44:06 - INFO - codeparrot_training - Step 32551: {'lr': 0.00014606371605160645, 'samples': 16666624, 'steps': 32551, 'loss/train': 1.6290680170059204} +02/25/2022 19:44:12 - INFO - codeparrot_training - Step 32552: {'lr': 0.0001460488349235357, 'samples': 16667136, 'steps': 32552, 'loss/train': 0.312151163816452} +02/25/2022 19:44:15 - INFO - codeparrot_training - Step 32553: {'lr': 0.00014603395424075868, 'samples': 16667648, 'steps': 32553, 'loss/train': 2.084946632385254} +02/25/2022 19:44:21 - INFO - codeparrot_training - Step 32554: {'lr': 0.00014601907400333918, 'samples': 16668160, 'steps': 32554, 'loss/train': 1.6850183010101318} +02/25/2022 19:44:24 - INFO - codeparrot_training - Step 32555: {'lr': 0.00014600419421134092, 'samples': 16668672, 'steps': 32555, 'loss/train': 1.7700989246368408} +02/25/2022 19:44:30 - INFO - codeparrot_training - Step 32556: {'lr': 0.0001459893148648278, 'samples': 16669184, 'steps': 32556, 'loss/train': 1.63833749294281} +02/25/2022 19:44:33 - INFO - codeparrot_training - Step 32557: {'lr': 0.0001459744359638633, 'samples': 16669696, 'steps': 32557, 'loss/train': 3.292646884918213} +02/25/2022 19:44:40 - INFO - codeparrot_training - Step 32558: {'lr': 0.00014595955750851126, 'samples': 16670208, 'steps': 32558, 'loss/train': 2.00081205368042} +02/25/2022 19:44:45 - INFO - codeparrot_training - Step 32559: {'lr': 0.00014594467949883552, 'samples': 16670720, 'steps': 32559, 'loss/train': 0.06072288751602173} +02/25/2022 19:44:49 - INFO - codeparrot_training - Step 32560: {'lr': 0.00014592980193489974, 'samples': 16671232, 'steps': 32560, 'loss/train': 2.46502947807312} +02/25/2022 19:44:52 - INFO - codeparrot_training - Step 32561: {'lr': 0.00014591492481676765, 'samples': 16671744, 'steps': 32561, 'loss/train': 3.4827399253845215} +02/25/2022 19:44:58 - INFO - codeparrot_training - Step 32562: {'lr': 0.00014590004814450287, 'samples': 16672256, 'steps': 32562, 'loss/train': 2.4513795375823975} +02/25/2022 19:45:03 - INFO - codeparrot_training - Step 32563: {'lr': 0.0001458851719181693, 'samples': 16672768, 'steps': 32563, 'loss/train': 2.3921098709106445} +02/25/2022 19:45:07 - INFO - codeparrot_training - Step 32564: {'lr': 0.00014587029613783063, 'samples': 16673280, 'steps': 32564, 'loss/train': 2.1346254348754883} +02/25/2022 19:45:12 - INFO - codeparrot_training - Step 32565: {'lr': 0.00014585542080355053, 'samples': 16673792, 'steps': 32565, 'loss/train': 4.798409461975098} +02/25/2022 19:45:16 - INFO - codeparrot_training - Step 32566: {'lr': 0.00014584054591539264, 'samples': 16674304, 'steps': 32566, 'loss/train': 1.7423176765441895} +02/25/2022 19:45:19 - INFO - codeparrot_training - Step 32567: {'lr': 0.00014582567147342085, 'samples': 16674816, 'steps': 32567, 'loss/train': 2.152841329574585} +02/25/2022 19:45:25 - INFO - codeparrot_training - Step 32568: {'lr': 0.00014581079747769886, 'samples': 16675328, 'steps': 32568, 'loss/train': 0.5792051553726196} +02/25/2022 19:45:29 - INFO - codeparrot_training - Step 32569: {'lr': 0.00014579592392829015, 'samples': 16675840, 'steps': 32569, 'loss/train': 1.7976208925247192} +02/25/2022 19:45:34 - INFO - codeparrot_training - Step 32570: {'lr': 0.0001457810508252588, 'samples': 16676352, 'steps': 32570, 'loss/train': 2.345736503601074} +02/25/2022 19:45:40 - INFO - codeparrot_training - Step 32571: {'lr': 0.00014576617816866827, 'samples': 16676864, 'steps': 32571, 'loss/train': 2.0118987560272217} +02/25/2022 19:45:43 - INFO - codeparrot_training - Step 32572: {'lr': 0.00014575130595858237, 'samples': 16677376, 'steps': 32572, 'loss/train': 1.2332576513290405} +02/25/2022 19:45:49 - INFO - codeparrot_training - Step 32573: {'lr': 0.00014573643419506466, 'samples': 16677888, 'steps': 32573, 'loss/train': 1.5530755519866943} +02/25/2022 19:45:52 - INFO - codeparrot_training - Step 32574: {'lr': 0.00014572156287817918, 'samples': 16678400, 'steps': 32574, 'loss/train': 1.4895217418670654} +02/25/2022 19:45:58 - INFO - codeparrot_training - Step 32575: {'lr': 0.0001457066920079892, 'samples': 16678912, 'steps': 32575, 'loss/train': 0.6072016954421997} +02/25/2022 19:46:01 - INFO - codeparrot_training - Step 32576: {'lr': 0.00014569182158455873, 'samples': 16679424, 'steps': 32576, 'loss/train': 1.2757741212844849} +02/25/2022 19:46:07 - INFO - codeparrot_training - Step 32577: {'lr': 0.00014567695160795128, 'samples': 16679936, 'steps': 32577, 'loss/train': 1.829365611076355} +02/25/2022 19:46:10 - INFO - codeparrot_training - Step 32578: {'lr': 0.00014566208207823084, 'samples': 16680448, 'steps': 32578, 'loss/train': 1.710878849029541} +02/25/2022 19:46:16 - INFO - codeparrot_training - Step 32579: {'lr': 0.00014564721299546068, 'samples': 16680960, 'steps': 32579, 'loss/train': 0.3618599474430084} +02/25/2022 19:46:19 - INFO - codeparrot_training - Step 32580: {'lr': 0.00014563234435970487, 'samples': 16681472, 'steps': 32580, 'loss/train': 2.0613863468170166} +02/25/2022 19:46:25 - INFO - codeparrot_training - Step 32581: {'lr': 0.00014561747617102683, 'samples': 16681984, 'steps': 32581, 'loss/train': 2.2673733234405518} +02/25/2022 19:46:29 - INFO - codeparrot_training - Step 32582: {'lr': 0.00014560260842949048, 'samples': 16682496, 'steps': 32582, 'loss/train': 2.2760581970214844} +02/25/2022 19:46:32 - INFO - codeparrot_training - Step 32583: {'lr': 0.00014558774113515943, 'samples': 16683008, 'steps': 32583, 'loss/train': 1.891520619392395} +02/25/2022 19:46:39 - INFO - codeparrot_training - Step 32584: {'lr': 0.00014557287428809733, 'samples': 16683520, 'steps': 32584, 'loss/train': 1.7041987180709839} +02/25/2022 19:46:42 - INFO - codeparrot_training - Step 32585: {'lr': 0.00014555800788836775, 'samples': 16684032, 'steps': 32585, 'loss/train': 2.355768918991089} +02/25/2022 19:46:48 - INFO - codeparrot_training - Step 32586: {'lr': 0.00014554314193603463, 'samples': 16684544, 'steps': 32586, 'loss/train': 1.9019343852996826} +02/25/2022 19:46:51 - INFO - codeparrot_training - Step 32587: {'lr': 0.0001455282764311615, 'samples': 16685056, 'steps': 32587, 'loss/train': 1.5980372428894043} +02/25/2022 19:46:56 - INFO - codeparrot_training - Step 32588: {'lr': 0.00014551341137381208, 'samples': 16685568, 'steps': 32588, 'loss/train': 2.3350040912628174} +02/25/2022 19:47:00 - INFO - codeparrot_training - Step 32589: {'lr': 0.00014549854676404992, 'samples': 16686080, 'steps': 32589, 'loss/train': 2.4348292350769043} +02/25/2022 19:47:06 - INFO - codeparrot_training - Step 32590: {'lr': 0.00014548368260193888, 'samples': 16686592, 'steps': 32590, 'loss/train': 0.8817393183708191} +02/25/2022 19:47:09 - INFO - codeparrot_training - Step 32591: {'lr': 0.0001454688188875426, 'samples': 16687104, 'steps': 32591, 'loss/train': 1.4493517875671387} +02/25/2022 19:47:14 - INFO - codeparrot_training - Step 32592: {'lr': 0.00014545395562092468, 'samples': 16687616, 'steps': 32592, 'loss/train': 1.8948256969451904} +02/25/2022 19:47:21 - INFO - codeparrot_training - Step 32593: {'lr': 0.00014543909280214873, 'samples': 16688128, 'steps': 32593, 'loss/train': 0.7754912376403809} +02/25/2022 19:47:24 - INFO - codeparrot_training - Step 32594: {'lr': 0.0001454242304312786, 'samples': 16688640, 'steps': 32594, 'loss/train': 1.6801598072052002} +02/25/2022 19:47:29 - INFO - codeparrot_training - Step 32595: {'lr': 0.00014540936850837775, 'samples': 16689152, 'steps': 32595, 'loss/train': 1.9028607606887817} +02/25/2022 19:47:33 - INFO - codeparrot_training - Step 32596: {'lr': 0.00014539450703351015, 'samples': 16689664, 'steps': 32596, 'loss/train': 0.9734052419662476} +02/25/2022 19:47:38 - INFO - codeparrot_training - Step 32597: {'lr': 0.00014537964600673907, 'samples': 16690176, 'steps': 32597, 'loss/train': 2.524299144744873} +02/25/2022 19:47:42 - INFO - codeparrot_training - Step 32598: {'lr': 0.00014536478542812847, 'samples': 16690688, 'steps': 32598, 'loss/train': 1.862568974494934} +02/25/2022 19:47:48 - INFO - codeparrot_training - Step 32599: {'lr': 0.00014534992529774182, 'samples': 16691200, 'steps': 32599, 'loss/train': 0.26445573568344116} +02/25/2022 19:47:51 - INFO - codeparrot_training - Step 32600: {'lr': 0.00014533506561564306, 'samples': 16691712, 'steps': 32600, 'loss/train': 1.1494972705841064} +02/25/2022 19:47:56 - INFO - codeparrot_training - Step 32601: {'lr': 0.0001453202063818954, 'samples': 16692224, 'steps': 32601, 'loss/train': 1.9411702156066895} +02/25/2022 19:48:00 - INFO - codeparrot_training - Step 32602: {'lr': 0.0001453053475965629, 'samples': 16692736, 'steps': 32602, 'loss/train': 1.9713162183761597} +02/25/2022 19:48:06 - INFO - codeparrot_training - Step 32603: {'lr': 0.00014529048925970888, 'samples': 16693248, 'steps': 32603, 'loss/train': 2.7197704315185547} +02/25/2022 19:48:10 - INFO - codeparrot_training - Step 32604: {'lr': 0.0001452756313713974, 'samples': 16693760, 'steps': 32604, 'loss/train': 1.9436630010604858} +02/25/2022 19:48:15 - INFO - codeparrot_training - Step 32605: {'lr': 0.00014526077393169166, 'samples': 16694272, 'steps': 32605, 'loss/train': 2.1888067722320557} +02/25/2022 19:48:19 - INFO - codeparrot_training - Step 32606: {'lr': 0.00014524591694065558, 'samples': 16694784, 'steps': 32606, 'loss/train': 1.585457444190979} +02/25/2022 19:48:24 - INFO - codeparrot_training - Step 32607: {'lr': 0.00014523106039835277, 'samples': 16695296, 'steps': 32607, 'loss/train': 1.813966989517212} +02/25/2022 19:48:28 - INFO - codeparrot_training - Step 32608: {'lr': 0.00014521620430484668, 'samples': 16695808, 'steps': 32608, 'loss/train': 2.113527774810791} +02/25/2022 19:48:33 - INFO - codeparrot_training - Step 32609: {'lr': 0.00014520134866020124, 'samples': 16696320, 'steps': 32609, 'loss/train': 1.8030260801315308} +02/25/2022 19:48:37 - INFO - codeparrot_training - Step 32610: {'lr': 0.00014518649346447992, 'samples': 16696832, 'steps': 32610, 'loss/train': 1.4430670738220215} +02/25/2022 19:48:42 - INFO - codeparrot_training - Step 32611: {'lr': 0.00014517163871774637, 'samples': 16697344, 'steps': 32611, 'loss/train': 1.356073021888733} +02/25/2022 19:48:46 - INFO - codeparrot_training - Step 32612: {'lr': 0.00014515678442006416, 'samples': 16697856, 'steps': 32612, 'loss/train': 2.759971857070923} +02/25/2022 19:48:51 - INFO - codeparrot_training - Step 32613: {'lr': 0.0001451419305714971, 'samples': 16698368, 'steps': 32613, 'loss/train': 1.8395285606384277} +02/25/2022 19:48:55 - INFO - codeparrot_training - Step 32614: {'lr': 0.00014512707717210868, 'samples': 16698880, 'steps': 32614, 'loss/train': 1.7062286138534546} +02/25/2022 19:49:01 - INFO - codeparrot_training - Step 32615: {'lr': 0.0001451122242219626, 'samples': 16699392, 'steps': 32615, 'loss/train': 2.3146731853485107} +02/25/2022 19:49:04 - INFO - codeparrot_training - Step 32616: {'lr': 0.00014509737172112232, 'samples': 16699904, 'steps': 32616, 'loss/train': 1.404025912284851} +02/25/2022 19:49:10 - INFO - codeparrot_training - Step 32617: {'lr': 0.0001450825196696517, 'samples': 16700416, 'steps': 32617, 'loss/train': 1.3700178861618042} +02/25/2022 19:49:13 - INFO - codeparrot_training - Step 32618: {'lr': 0.00014506766806761422, 'samples': 16700928, 'steps': 32618, 'loss/train': 2.280946969985962} +02/25/2022 19:49:19 - INFO - codeparrot_training - Step 32619: {'lr': 0.00014505281691507354, 'samples': 16701440, 'steps': 32619, 'loss/train': 1.6797267198562622} +02/25/2022 19:49:22 - INFO - codeparrot_training - Step 32620: {'lr': 0.0001450379662120932, 'samples': 16701952, 'steps': 32620, 'loss/train': 1.0425136089324951} +02/25/2022 19:49:28 - INFO - codeparrot_training - Step 32621: {'lr': 0.00014502311595873695, 'samples': 16702464, 'steps': 32621, 'loss/train': 0.9134673476219177} +02/25/2022 19:49:31 - INFO - codeparrot_training - Step 32622: {'lr': 0.00014500826615506838, 'samples': 16702976, 'steps': 32622, 'loss/train': 1.7135289907455444} +02/25/2022 19:49:37 - INFO - codeparrot_training - Step 32623: {'lr': 0.00014499341680115102, 'samples': 16703488, 'steps': 32623, 'loss/train': 1.8786330223083496} +02/25/2022 19:49:40 - INFO - codeparrot_training - Step 32624: {'lr': 0.00014497856789704843, 'samples': 16704000, 'steps': 32624, 'loss/train': 1.5957815647125244} +02/25/2022 19:49:46 - INFO - codeparrot_training - Step 32625: {'lr': 0.00014496371944282442, 'samples': 16704512, 'steps': 32625, 'loss/train': 1.5695611238479614} +02/25/2022 19:49:49 - INFO - codeparrot_training - Step 32626: {'lr': 0.00014494887143854247, 'samples': 16705024, 'steps': 32626, 'loss/train': 1.8321589231491089} +02/25/2022 19:49:55 - INFO - codeparrot_training - Step 32627: {'lr': 0.0001449340238842662, 'samples': 16705536, 'steps': 32627, 'loss/train': 2.1155900955200195} +02/25/2022 19:49:58 - INFO - codeparrot_training - Step 32628: {'lr': 0.0001449191767800591, 'samples': 16706048, 'steps': 32628, 'loss/train': 2.2852180004119873} +02/25/2022 19:50:04 - INFO - codeparrot_training - Step 32629: {'lr': 0.000144904330125985, 'samples': 16706560, 'steps': 32629, 'loss/train': 1.7796056270599365} +02/25/2022 19:50:08 - INFO - codeparrot_training - Step 32630: {'lr': 0.00014488948392210724, 'samples': 16707072, 'steps': 32630, 'loss/train': 2.3164689540863037} +02/25/2022 19:50:13 - INFO - codeparrot_training - Step 32631: {'lr': 0.00014487463816848978, 'samples': 16707584, 'steps': 32631, 'loss/train': 2.30336594581604} +02/25/2022 19:50:17 - INFO - codeparrot_training - Step 32632: {'lr': 0.00014485979286519578, 'samples': 16708096, 'steps': 32632, 'loss/train': 0.7707721590995789} +02/25/2022 19:50:22 - INFO - codeparrot_training - Step 32633: {'lr': 0.00014484494801228915, 'samples': 16708608, 'steps': 32633, 'loss/train': 2.6576788425445557} +02/25/2022 19:50:26 - INFO - codeparrot_training - Step 32634: {'lr': 0.00014483010360983329, 'samples': 16709120, 'steps': 32634, 'loss/train': 1.0598995685577393} +02/25/2022 19:50:31 - INFO - codeparrot_training - Step 32635: {'lr': 0.00014481525965789205, 'samples': 16709632, 'steps': 32635, 'loss/train': 1.0487781763076782} +02/25/2022 19:50:35 - INFO - codeparrot_training - Step 32636: {'lr': 0.00014480041615652864, 'samples': 16710144, 'steps': 32636, 'loss/train': 1.2969876527786255} +02/25/2022 19:50:40 - INFO - codeparrot_training - Step 32637: {'lr': 0.00014478557310580696, 'samples': 16710656, 'steps': 32637, 'loss/train': 0.21595679223537445} +02/25/2022 19:50:44 - INFO - codeparrot_training - Step 32638: {'lr': 0.00014477073050579034, 'samples': 16711168, 'steps': 32638, 'loss/train': 2.559903383255005} +02/25/2022 19:50:50 - INFO - codeparrot_training - Step 32639: {'lr': 0.00014475588835654275, 'samples': 16711680, 'steps': 32639, 'loss/train': 1.2127933502197266} +02/25/2022 19:50:54 - INFO - codeparrot_training - Step 32640: {'lr': 0.00014474104665812727, 'samples': 16712192, 'steps': 32640, 'loss/train': 2.8421521186828613} +02/25/2022 19:50:59 - INFO - codeparrot_training - Step 32641: {'lr': 0.00014472620541060782, 'samples': 16712704, 'steps': 32641, 'loss/train': 1.2105140686035156} +02/25/2022 19:51:03 - INFO - codeparrot_training - Step 32642: {'lr': 0.00014471136461404778, 'samples': 16713216, 'steps': 32642, 'loss/train': 2.8805480003356934} +02/25/2022 19:51:08 - INFO - codeparrot_training - Step 32643: {'lr': 0.00014469652426851094, 'samples': 16713728, 'steps': 32643, 'loss/train': 1.6034473180770874} +02/25/2022 19:51:12 - INFO - codeparrot_training - Step 32644: {'lr': 0.00014468168437406075, 'samples': 16714240, 'steps': 32644, 'loss/train': 0.9617477655410767} +02/25/2022 19:51:17 - INFO - codeparrot_training - Step 32645: {'lr': 0.00014466684493076077, 'samples': 16714752, 'steps': 32645, 'loss/train': 1.3194661140441895} +02/25/2022 19:51:21 - INFO - codeparrot_training - Step 32646: {'lr': 0.0001446520059386745, 'samples': 16715264, 'steps': 32646, 'loss/train': 1.4790725708007812} +02/25/2022 19:51:26 - INFO - codeparrot_training - Step 32647: {'lr': 0.00014463716739786565, 'samples': 16715776, 'steps': 32647, 'loss/train': 0.9794721007347107} +02/25/2022 19:51:30 - INFO - codeparrot_training - Step 32648: {'lr': 0.00014462232930839776, 'samples': 16716288, 'steps': 32648, 'loss/train': 1.3576139211654663} +02/25/2022 19:51:36 - INFO - codeparrot_training - Step 32649: {'lr': 0.00014460749167033432, 'samples': 16716800, 'steps': 32649, 'loss/train': 0.7192365527153015} +02/25/2022 19:51:40 - INFO - codeparrot_training - Step 32650: {'lr': 0.00014459265448373883, 'samples': 16717312, 'steps': 32650, 'loss/train': 2.836864709854126} +02/25/2022 19:51:45 - INFO - codeparrot_training - Step 32651: {'lr': 0.00014457781774867504, 'samples': 16717824, 'steps': 32651, 'loss/train': 0.8483403325080872} +02/25/2022 19:51:49 - INFO - codeparrot_training - Step 32652: {'lr': 0.0001445629814652064, 'samples': 16718336, 'steps': 32652, 'loss/train': 2.5078163146972656} +02/25/2022 19:51:54 - INFO - codeparrot_training - Step 32653: {'lr': 0.00014454814563339647, 'samples': 16718848, 'steps': 32653, 'loss/train': 1.23050057888031} +02/25/2022 19:51:57 - INFO - codeparrot_training - Step 32654: {'lr': 0.0001445333102533088, 'samples': 16719360, 'steps': 32654, 'loss/train': 2.6685192584991455} +02/25/2022 19:52:03 - INFO - codeparrot_training - Step 32655: {'lr': 0.00014451847532500684, 'samples': 16719872, 'steps': 32655, 'loss/train': 1.9983913898468018} +02/25/2022 19:52:06 - INFO - codeparrot_training - Step 32656: {'lr': 0.00014450364084855433, 'samples': 16720384, 'steps': 32656, 'loss/train': 1.031639814376831} +02/25/2022 19:52:12 - INFO - codeparrot_training - Step 32657: {'lr': 0.0001444888068240147, 'samples': 16720896, 'steps': 32657, 'loss/train': 1.767969012260437} +02/25/2022 19:52:15 - INFO - codeparrot_training - Step 32658: {'lr': 0.00014447397325145157, 'samples': 16721408, 'steps': 32658, 'loss/train': 0.7126955389976501} +02/25/2022 19:52:21 - INFO - codeparrot_training - Step 32659: {'lr': 0.0001444591401309283, 'samples': 16721920, 'steps': 32659, 'loss/train': 2.202270984649658} +02/25/2022 19:52:27 - INFO - codeparrot_training - Step 32660: {'lr': 0.00014444430746250866, 'samples': 16722432, 'steps': 32660, 'loss/train': 0.9081114530563354} +02/25/2022 19:52:31 - INFO - codeparrot_training - Step 32661: {'lr': 0.0001444294752462561, 'samples': 16722944, 'steps': 32661, 'loss/train': 1.5721038579940796} +02/25/2022 19:52:36 - INFO - codeparrot_training - Step 32662: {'lr': 0.00014441464348223415, 'samples': 16723456, 'steps': 32662, 'loss/train': 1.8994137048721313} +02/25/2022 19:52:40 - INFO - codeparrot_training - Step 32663: {'lr': 0.00014439981217050625, 'samples': 16723968, 'steps': 32663, 'loss/train': 2.639317512512207} +02/25/2022 19:52:43 - INFO - codeparrot_training - Step 32664: {'lr': 0.0001443849813111361, 'samples': 16724480, 'steps': 32664, 'loss/train': 2.0904910564422607} +02/25/2022 19:52:49 - INFO - codeparrot_training - Step 32665: {'lr': 0.00014437015090418715, 'samples': 16724992, 'steps': 32665, 'loss/train': 1.0360610485076904} +02/25/2022 19:52:52 - INFO - codeparrot_training - Step 32666: {'lr': 0.00014435532094972292, 'samples': 16725504, 'steps': 32666, 'loss/train': 0.766703724861145} +02/25/2022 19:52:58 - INFO - codeparrot_training - Step 32667: {'lr': 0.00014434049144780686, 'samples': 16726016, 'steps': 32667, 'loss/train': 1.8390617370605469} +02/25/2022 19:53:03 - INFO - codeparrot_training - Step 32668: {'lr': 0.0001443256623985027, 'samples': 16726528, 'steps': 32668, 'loss/train': 2.619417428970337} +02/25/2022 19:53:07 - INFO - codeparrot_training - Step 32669: {'lr': 0.00014431083380187377, 'samples': 16727040, 'steps': 32669, 'loss/train': 1.761723518371582} +02/25/2022 19:53:13 - INFO - codeparrot_training - Step 32670: {'lr': 0.00014429600565798385, 'samples': 16727552, 'steps': 32670, 'loss/train': 1.877333164215088} +02/25/2022 19:53:16 - INFO - codeparrot_training - Step 32671: {'lr': 0.00014428117796689606, 'samples': 16728064, 'steps': 32671, 'loss/train': 0.5953896045684814} +02/25/2022 19:53:20 - INFO - codeparrot_training - Step 32672: {'lr': 0.00014426635072867423, 'samples': 16728576, 'steps': 32672, 'loss/train': 0.05128796398639679} +02/25/2022 19:53:25 - INFO - codeparrot_training - Step 32673: {'lr': 0.00014425152394338168, 'samples': 16729088, 'steps': 32673, 'loss/train': 1.9015426635742188} +02/25/2022 19:53:28 - INFO - codeparrot_training - Step 32674: {'lr': 0.00014423669761108222, 'samples': 16729600, 'steps': 32674, 'loss/train': 1.4324573278427124} +02/25/2022 19:53:35 - INFO - codeparrot_training - Step 32675: {'lr': 0.000144221871731839, 'samples': 16730112, 'steps': 32675, 'loss/train': 1.5122604370117188} +02/25/2022 19:53:38 - INFO - codeparrot_training - Step 32676: {'lr': 0.00014420704630571573, 'samples': 16730624, 'steps': 32676, 'loss/train': 2.054382562637329} +02/25/2022 19:53:44 - INFO - codeparrot_training - Step 32677: {'lr': 0.00014419222133277586, 'samples': 16731136, 'steps': 32677, 'loss/train': 0.923971951007843} +02/25/2022 19:53:47 - INFO - codeparrot_training - Step 32678: {'lr': 0.00014417739681308296, 'samples': 16731648, 'steps': 32678, 'loss/train': 2.005750894546509} +02/25/2022 19:53:53 - INFO - codeparrot_training - Step 32679: {'lr': 0.0001441625727467005, 'samples': 16732160, 'steps': 32679, 'loss/train': 0.8481064438819885} +02/25/2022 19:53:56 - INFO - codeparrot_training - Step 32680: {'lr': 0.000144147749133692, 'samples': 16732672, 'steps': 32680, 'loss/train': 2.0719780921936035} +02/25/2022 19:54:02 - INFO - codeparrot_training - Step 32681: {'lr': 0.00014413292597412078, 'samples': 16733184, 'steps': 32681, 'loss/train': 1.6500420570373535} +02/25/2022 19:54:06 - INFO - codeparrot_training - Step 32682: {'lr': 0.00014411810326805065, 'samples': 16733696, 'steps': 32682, 'loss/train': 0.3047272264957428} +02/25/2022 19:54:11 - INFO - codeparrot_training - Step 32683: {'lr': 0.00014410328101554487, 'samples': 16734208, 'steps': 32683, 'loss/train': 1.6220779418945312} +02/25/2022 19:54:15 - INFO - codeparrot_training - Step 32684: {'lr': 0.00014408845921666706, 'samples': 16734720, 'steps': 32684, 'loss/train': 1.9734541177749634} +02/25/2022 19:54:20 - INFO - codeparrot_training - Step 32685: {'lr': 0.00014407363787148053, 'samples': 16735232, 'steps': 32685, 'loss/train': 1.9033491611480713} +02/25/2022 19:54:24 - INFO - codeparrot_training - Step 32686: {'lr': 0.000144058816980049, 'samples': 16735744, 'steps': 32686, 'loss/train': 1.803678274154663} +02/25/2022 19:54:30 - INFO - codeparrot_training - Step 32687: {'lr': 0.0001440439965424359, 'samples': 16736256, 'steps': 32687, 'loss/train': 1.5141916275024414} +02/25/2022 19:54:33 - INFO - codeparrot_training - Step 32688: {'lr': 0.00014402917655870464, 'samples': 16736768, 'steps': 32688, 'loss/train': 0.7240457534790039} +02/25/2022 19:54:39 - INFO - codeparrot_training - Step 32689: {'lr': 0.00014401435702891867, 'samples': 16737280, 'steps': 32689, 'loss/train': 1.481002926826477} +02/25/2022 19:54:42 - INFO - codeparrot_training - Step 32690: {'lr': 0.0001439995379531416, 'samples': 16737792, 'steps': 32690, 'loss/train': 1.670120358467102} +02/25/2022 19:54:48 - INFO - codeparrot_training - Step 32691: {'lr': 0.00014398471933143686, 'samples': 16738304, 'steps': 32691, 'loss/train': 1.852657675743103} +02/25/2022 19:54:51 - INFO - codeparrot_training - Step 32692: {'lr': 0.00014396990116386792, 'samples': 16738816, 'steps': 32692, 'loss/train': 2.0642690658569336} +02/25/2022 19:54:57 - INFO - codeparrot_training - Step 32693: {'lr': 0.0001439550834504982, 'samples': 16739328, 'steps': 32693, 'loss/train': 2.485919713973999} +02/25/2022 19:55:00 - INFO - codeparrot_training - Step 32694: {'lr': 0.00014394026619139128, 'samples': 16739840, 'steps': 32694, 'loss/train': 0.9345873594284058} +02/25/2022 19:55:06 - INFO - codeparrot_training - Step 32695: {'lr': 0.0001439254493866106, 'samples': 16740352, 'steps': 32695, 'loss/train': 1.3722742795944214} +02/25/2022 19:55:09 - INFO - codeparrot_training - Step 32696: {'lr': 0.0001439106330362196, 'samples': 16740864, 'steps': 32696, 'loss/train': 2.4172208309173584} +02/25/2022 19:55:15 - INFO - codeparrot_training - Step 32697: {'lr': 0.00014389581714028166, 'samples': 16741376, 'steps': 32697, 'loss/train': 1.4686102867126465} +02/25/2022 19:55:19 - INFO - codeparrot_training - Step 32698: {'lr': 0.00014388100169886045, 'samples': 16741888, 'steps': 32698, 'loss/train': 1.51478910446167} +02/25/2022 19:55:24 - INFO - codeparrot_training - Step 32699: {'lr': 0.00014386618671201933, 'samples': 16742400, 'steps': 32699, 'loss/train': 1.7136051654815674} +02/25/2022 19:55:28 - INFO - codeparrot_training - Step 32700: {'lr': 0.00014385137217982178, 'samples': 16742912, 'steps': 32700, 'loss/train': 0.48612454533576965} +02/25/2022 19:55:33 - INFO - codeparrot_training - Step 32701: {'lr': 0.00014383655810233125, 'samples': 16743424, 'steps': 32701, 'loss/train': 1.5959501266479492} +02/25/2022 19:55:37 - INFO - codeparrot_training - Step 32702: {'lr': 0.00014382174447961105, 'samples': 16743936, 'steps': 32702, 'loss/train': 1.6617835760116577} +02/25/2022 19:55:42 - INFO - codeparrot_training - Step 32703: {'lr': 0.00014380693131172493, 'samples': 16744448, 'steps': 32703, 'loss/train': 2.0757088661193848} +02/25/2022 19:55:46 - INFO - codeparrot_training - Step 32704: {'lr': 0.0001437921185987361, 'samples': 16744960, 'steps': 32704, 'loss/train': 2.3541574478149414} +02/25/2022 19:55:51 - INFO - codeparrot_training - Step 32705: {'lr': 0.00014377730634070827, 'samples': 16745472, 'steps': 32705, 'loss/train': 1.3640838861465454} +02/25/2022 19:55:55 - INFO - codeparrot_training - Step 32706: {'lr': 0.00014376249453770454, 'samples': 16745984, 'steps': 32706, 'loss/train': 1.4453153610229492} +02/25/2022 19:56:01 - INFO - codeparrot_training - Step 32707: {'lr': 0.00014374768318978865, 'samples': 16746496, 'steps': 32707, 'loss/train': 0.6854455471038818} +02/25/2022 19:56:06 - INFO - codeparrot_training - Step 32708: {'lr': 0.00014373287229702388, 'samples': 16747008, 'steps': 32708, 'loss/train': 1.5794174671173096} +02/25/2022 19:56:10 - INFO - codeparrot_training - Step 32709: {'lr': 0.0001437180618594739, 'samples': 16747520, 'steps': 32709, 'loss/train': 2.129223108291626} +02/25/2022 19:56:15 - INFO - codeparrot_training - Step 32710: {'lr': 0.00014370325187720178, 'samples': 16748032, 'steps': 32710, 'loss/train': 2.1325604915618896} +02/25/2022 19:56:19 - INFO - codeparrot_training - Step 32711: {'lr': 0.00014368844235027135, 'samples': 16748544, 'steps': 32711, 'loss/train': 1.5141938924789429} +02/25/2022 19:56:24 - INFO - codeparrot_training - Step 32712: {'lr': 0.00014367363327874572, 'samples': 16749056, 'steps': 32712, 'loss/train': 1.8307890892028809} +02/25/2022 19:56:28 - INFO - codeparrot_training - Step 32713: {'lr': 0.0001436588246626887, 'samples': 16749568, 'steps': 32713, 'loss/train': 1.2590118646621704} +02/25/2022 19:56:33 - INFO - codeparrot_training - Step 32714: {'lr': 0.0001436440165021633, 'samples': 16750080, 'steps': 32714, 'loss/train': 1.5183470249176025} +02/25/2022 19:56:37 - INFO - codeparrot_training - Step 32715: {'lr': 0.00014362920879723324, 'samples': 16750592, 'steps': 32715, 'loss/train': 2.2522971630096436} +02/25/2022 19:56:42 - INFO - codeparrot_training - Step 32716: {'lr': 0.0001436144015479618, 'samples': 16751104, 'steps': 32716, 'loss/train': 1.300566554069519} +02/25/2022 19:56:46 - INFO - codeparrot_training - Step 32717: {'lr': 0.00014359959475441258, 'samples': 16751616, 'steps': 32717, 'loss/train': 1.5501042604446411} +02/25/2022 19:56:51 - INFO - codeparrot_training - Step 32718: {'lr': 0.0001435847884166489, 'samples': 16752128, 'steps': 32718, 'loss/train': 1.1983247995376587} +02/25/2022 19:56:55 - INFO - codeparrot_training - Step 32719: {'lr': 0.0001435699825347342, 'samples': 16752640, 'steps': 32719, 'loss/train': 1.5109435319900513} +02/25/2022 19:57:00 - INFO - codeparrot_training - Step 32720: {'lr': 0.00014355517710873183, 'samples': 16753152, 'steps': 32720, 'loss/train': 1.50523841381073} +02/25/2022 19:57:04 - INFO - codeparrot_training - Step 32721: {'lr': 0.00014354037213870535, 'samples': 16753664, 'steps': 32721, 'loss/train': 1.6309013366699219} +02/25/2022 19:57:11 - INFO - codeparrot_training - Step 32722: {'lr': 0.0001435255676247181, 'samples': 16754176, 'steps': 32722, 'loss/train': 1.374910593032837} +02/25/2022 19:57:14 - INFO - codeparrot_training - Step 32723: {'lr': 0.0001435107635668335, 'samples': 16754688, 'steps': 32723, 'loss/train': 2.370889186859131} +02/25/2022 19:57:20 - INFO - codeparrot_training - Step 32724: {'lr': 0.00014349595996511493, 'samples': 16755200, 'steps': 32724, 'loss/train': 1.9662412405014038} +02/25/2022 19:57:23 - INFO - codeparrot_training - Step 32725: {'lr': 0.00014348115681962593, 'samples': 16755712, 'steps': 32725, 'loss/train': 0.6598528027534485} +02/25/2022 19:57:29 - INFO - codeparrot_training - Step 32726: {'lr': 0.00014346635413042968, 'samples': 16756224, 'steps': 32726, 'loss/train': 2.1078903675079346} +02/25/2022 19:57:32 - INFO - codeparrot_training - Step 32727: {'lr': 0.00014345155189759003, 'samples': 16756736, 'steps': 32727, 'loss/train': 2.4211621284484863} +02/25/2022 19:57:38 - INFO - codeparrot_training - Step 32728: {'lr': 0.00014343675012116984, 'samples': 16757248, 'steps': 32728, 'loss/train': 1.2968860864639282} +02/25/2022 19:57:41 - INFO - codeparrot_training - Step 32729: {'lr': 0.0001434219488012329, 'samples': 16757760, 'steps': 32729, 'loss/train': 2.9250118732452393} +02/25/2022 19:57:47 - INFO - codeparrot_training - Step 32730: {'lr': 0.00014340714793784233, 'samples': 16758272, 'steps': 32730, 'loss/train': 1.5965840816497803} +02/25/2022 19:57:50 - INFO - codeparrot_training - Step 32731: {'lr': 0.00014339234753106195, 'samples': 16758784, 'steps': 32731, 'loss/train': 1.2012847661972046} +02/25/2022 19:57:56 - INFO - codeparrot_training - Step 32732: {'lr': 0.00014337754758095468, 'samples': 16759296, 'steps': 32732, 'loss/train': 2.1962292194366455} +02/25/2022 19:58:00 - INFO - codeparrot_training - Step 32733: {'lr': 0.00014336274808758427, 'samples': 16759808, 'steps': 32733, 'loss/train': 2.402853488922119} +02/25/2022 19:58:05 - INFO - codeparrot_training - Step 32734: {'lr': 0.00014334794905101396, 'samples': 16760320, 'steps': 32734, 'loss/train': 2.621136426925659} +02/25/2022 19:58:09 - INFO - codeparrot_training - Step 32735: {'lr': 0.00014333315047130712, 'samples': 16760832, 'steps': 32735, 'loss/train': 1.6332625150680542} +02/25/2022 19:58:14 - INFO - codeparrot_training - Step 32736: {'lr': 0.00014331835234852717, 'samples': 16761344, 'steps': 32736, 'loss/train': 2.4094061851501465} +02/25/2022 19:58:18 - INFO - codeparrot_training - Step 32737: {'lr': 0.0001433035546827376, 'samples': 16761856, 'steps': 32737, 'loss/train': 2.4797909259796143} +02/25/2022 19:58:23 - INFO - codeparrot_training - Step 32738: {'lr': 0.00014328875747400172, 'samples': 16762368, 'steps': 32738, 'loss/train': 1.9192359447479248} +02/25/2022 19:58:27 - INFO - codeparrot_training - Step 32739: {'lr': 0.0001432739607223828, 'samples': 16762880, 'steps': 32739, 'loss/train': 2.141568660736084} +02/25/2022 19:58:32 - INFO - codeparrot_training - Step 32740: {'lr': 0.00014325916442794445, 'samples': 16763392, 'steps': 32740, 'loss/train': 2.0738911628723145} +02/25/2022 19:58:36 - INFO - codeparrot_training - Step 32741: {'lr': 0.00014324436859074996, 'samples': 16763904, 'steps': 32741, 'loss/train': 3.309900999069214} +02/25/2022 19:58:42 - INFO - codeparrot_training - Step 32742: {'lr': 0.0001432295732108627, 'samples': 16764416, 'steps': 32742, 'loss/train': 1.4107065200805664} +02/25/2022 19:58:45 - INFO - codeparrot_training - Step 32743: {'lr': 0.00014321477828834595, 'samples': 16764928, 'steps': 32743, 'loss/train': 0.8776313066482544} +02/25/2022 19:58:51 - INFO - codeparrot_training - Step 32744: {'lr': 0.00014319998382326328, 'samples': 16765440, 'steps': 32744, 'loss/train': 1.152510166168213} +02/25/2022 19:58:54 - INFO - codeparrot_training - Step 32745: {'lr': 0.00014318518981567792, 'samples': 16765952, 'steps': 32745, 'loss/train': 1.1839841604232788} +02/25/2022 19:59:00 - INFO - codeparrot_training - Step 32746: {'lr': 0.00014317039626565335, 'samples': 16766464, 'steps': 32746, 'loss/train': 1.3747538328170776} +02/25/2022 19:59:04 - INFO - codeparrot_training - Step 32747: {'lr': 0.00014315560317325275, 'samples': 16766976, 'steps': 32747, 'loss/train': 1.5950841903686523} +02/25/2022 19:59:09 - INFO - codeparrot_training - Step 32748: {'lr': 0.00014314081053853984, 'samples': 16767488, 'steps': 32748, 'loss/train': 1.55852210521698} +02/25/2022 19:59:13 - INFO - codeparrot_training - Step 32749: {'lr': 0.00014312601836157752, 'samples': 16768000, 'steps': 32749, 'loss/train': 2.1252944469451904} +02/25/2022 19:59:18 - INFO - codeparrot_training - Step 32750: {'lr': 0.00014311122664242953, 'samples': 16768512, 'steps': 32750, 'loss/train': 1.4113901853561401} +02/25/2022 19:59:22 - INFO - codeparrot_training - Step 32751: {'lr': 0.00014309643538115902, 'samples': 16769024, 'steps': 32751, 'loss/train': 2.2619760036468506} +02/25/2022 19:59:28 - INFO - codeparrot_training - Step 32752: {'lr': 0.00014308164457782951, 'samples': 16769536, 'steps': 32752, 'loss/train': 1.678704023361206} +02/25/2022 19:59:31 - INFO - codeparrot_training - Step 32753: {'lr': 0.0001430668542325043, 'samples': 16770048, 'steps': 32753, 'loss/train': 0.7408945560455322} +02/25/2022 19:59:37 - INFO - codeparrot_training - Step 32754: {'lr': 0.0001430520643452467, 'samples': 16770560, 'steps': 32754, 'loss/train': 2.108042001724243} +02/25/2022 19:59:40 - INFO - codeparrot_training - Step 32755: {'lr': 0.00014303727491612003, 'samples': 16771072, 'steps': 32755, 'loss/train': 2.2555551528930664} +02/25/2022 19:59:46 - INFO - codeparrot_training - Step 32756: {'lr': 0.0001430224859451878, 'samples': 16771584, 'steps': 32756, 'loss/train': 2.203788995742798} +02/25/2022 19:59:49 - INFO - codeparrot_training - Step 32757: {'lr': 0.0001430076974325133, 'samples': 16772096, 'steps': 32757, 'loss/train': 1.6723406314849854} +02/25/2022 19:59:55 - INFO - codeparrot_training - Step 32758: {'lr': 0.00014299290937815979, 'samples': 16772608, 'steps': 32758, 'loss/train': 1.8768762350082397} +02/25/2022 19:59:58 - INFO - codeparrot_training - Step 32759: {'lr': 0.0001429781217821906, 'samples': 16773120, 'steps': 32759, 'loss/train': 1.6370434761047363} +02/25/2022 20:00:04 - INFO - codeparrot_training - Step 32760: {'lr': 0.00014296333464466928, 'samples': 16773632, 'steps': 32760, 'loss/train': 1.656333088874817} +02/25/2022 20:00:07 - INFO - codeparrot_training - Step 32761: {'lr': 0.00014294854796565904, 'samples': 16774144, 'steps': 32761, 'loss/train': 2.308772563934326} +02/25/2022 20:00:13 - INFO - codeparrot_training - Step 32762: {'lr': 0.00014293376174522322, 'samples': 16774656, 'steps': 32762, 'loss/train': 1.7694058418273926} +02/25/2022 20:00:16 - INFO - codeparrot_training - Step 32763: {'lr': 0.00014291897598342506, 'samples': 16775168, 'steps': 32763, 'loss/train': 0.5264816880226135} +02/25/2022 20:00:22 - INFO - codeparrot_training - Step 32764: {'lr': 0.00014290419068032812, 'samples': 16775680, 'steps': 32764, 'loss/train': 2.178091287612915} +02/25/2022 20:00:25 - INFO - codeparrot_training - Step 32765: {'lr': 0.0001428894058359955, 'samples': 16776192, 'steps': 32765, 'loss/train': 1.227644681930542} +02/25/2022 20:00:31 - INFO - codeparrot_training - Step 32766: {'lr': 0.00014287462145049084, 'samples': 16776704, 'steps': 32766, 'loss/train': 1.404919147491455} +02/25/2022 20:00:34 - INFO - codeparrot_training - Step 32767: {'lr': 0.00014285983752387714, 'samples': 16777216, 'steps': 32767, 'loss/train': 1.2115883827209473} +02/25/2022 20:00:40 - INFO - codeparrot_training - Step 32768: {'lr': 0.00014284505405621795, 'samples': 16777728, 'steps': 32768, 'loss/train': 0.5569152235984802} +02/25/2022 20:00:44 - INFO - codeparrot_training - Step 32769: {'lr': 0.0001428302710475764, 'samples': 16778240, 'steps': 32769, 'loss/train': 1.3821772336959839} +02/25/2022 20:00:49 - INFO - codeparrot_training - Step 32770: {'lr': 0.00014281548849801617, 'samples': 16778752, 'steps': 32770, 'loss/train': 0.4669029712677002} +02/25/2022 20:00:55 - INFO - codeparrot_training - Step 32771: {'lr': 0.00014280070640760011, 'samples': 16779264, 'steps': 32771, 'loss/train': 0.9726061224937439} +02/25/2022 20:00:58 - INFO - codeparrot_training - Step 32772: {'lr': 0.00014278592477639195, 'samples': 16779776, 'steps': 32772, 'loss/train': 1.459582805633545} +02/25/2022 20:01:04 - INFO - codeparrot_training - Step 32773: {'lr': 0.0001427711436044547, 'samples': 16780288, 'steps': 32773, 'loss/train': 3.1283576488494873} +02/25/2022 20:01:07 - INFO - codeparrot_training - Step 32774: {'lr': 0.000142756362891852, 'samples': 16780800, 'steps': 32774, 'loss/train': 0.8975827693939209} +02/25/2022 20:01:13 - INFO - codeparrot_training - Step 32775: {'lr': 0.00014274158263864685, 'samples': 16781312, 'steps': 32775, 'loss/train': 2.010094404220581} +02/25/2022 20:01:16 - INFO - codeparrot_training - Step 32776: {'lr': 0.00014272680284490276, 'samples': 16781824, 'steps': 32776, 'loss/train': 1.9739956855773926} +02/25/2022 20:01:23 - INFO - codeparrot_training - Step 32777: {'lr': 0.00014271202351068287, 'samples': 16782336, 'steps': 32777, 'loss/train': 1.9123178720474243} +02/25/2022 20:01:27 - INFO - codeparrot_training - Step 32778: {'lr': 0.00014269724463605074, 'samples': 16782848, 'steps': 32778, 'loss/train': 1.6754976511001587} +02/25/2022 20:01:32 - INFO - codeparrot_training - Step 32779: {'lr': 0.00014268246622106952, 'samples': 16783360, 'steps': 32779, 'loss/train': 2.1960582733154297} +02/25/2022 20:01:36 - INFO - codeparrot_training - Step 32780: {'lr': 0.00014266768826580255, 'samples': 16783872, 'steps': 32780, 'loss/train': 1.1884649991989136} +02/25/2022 20:01:41 - INFO - codeparrot_training - Step 32781: {'lr': 0.00014265291077031312, 'samples': 16784384, 'steps': 32781, 'loss/train': 1.93251633644104} +02/25/2022 20:01:45 - INFO - codeparrot_training - Step 32782: {'lr': 0.00014263813373466445, 'samples': 16784896, 'steps': 32782, 'loss/train': 1.337907075881958} +02/25/2022 20:01:50 - INFO - codeparrot_training - Step 32783: {'lr': 0.00014262335715891999, 'samples': 16785408, 'steps': 32783, 'loss/train': 0.6538954377174377} +02/25/2022 20:01:54 - INFO - codeparrot_training - Step 32784: {'lr': 0.00014260858104314298, 'samples': 16785920, 'steps': 32784, 'loss/train': 2.4194469451904297} +02/25/2022 20:01:59 - INFO - codeparrot_training - Step 32785: {'lr': 0.0001425938053873967, 'samples': 16786432, 'steps': 32785, 'loss/train': 0.6877073049545288} +02/25/2022 20:02:03 - INFO - codeparrot_training - Step 32786: {'lr': 0.00014257903019174436, 'samples': 16786944, 'steps': 32786, 'loss/train': 2.034318447113037} +02/25/2022 20:02:09 - INFO - codeparrot_training - Step 32787: {'lr': 0.00014256425545624947, 'samples': 16787456, 'steps': 32787, 'loss/train': 0.8257609009742737} +02/25/2022 20:02:13 - INFO - codeparrot_training - Step 32788: {'lr': 0.00014254948118097517, 'samples': 16787968, 'steps': 32788, 'loss/train': 1.5751572847366333} +02/25/2022 20:02:18 - INFO - codeparrot_training - Step 32789: {'lr': 0.00014253470736598478, 'samples': 16788480, 'steps': 32789, 'loss/train': 1.5892513990402222} +02/25/2022 20:02:22 - INFO - codeparrot_training - Step 32790: {'lr': 0.00014251993401134146, 'samples': 16788992, 'steps': 32790, 'loss/train': 1.7945740222930908} +02/25/2022 20:02:27 - INFO - codeparrot_training - Step 32791: {'lr': 0.00014250516111710877, 'samples': 16789504, 'steps': 32791, 'loss/train': 0.5350008010864258} +02/25/2022 20:02:31 - INFO - codeparrot_training - Step 32792: {'lr': 0.0001424903886833498, 'samples': 16790016, 'steps': 32792, 'loss/train': 2.1410818099975586} +02/25/2022 20:02:36 - INFO - codeparrot_training - Step 32793: {'lr': 0.00014247561671012785, 'samples': 16790528, 'steps': 32793, 'loss/train': 1.8263975381851196} +02/25/2022 20:02:40 - INFO - codeparrot_training - Step 32794: {'lr': 0.00014246084519750613, 'samples': 16791040, 'steps': 32794, 'loss/train': 1.4393744468688965} +02/25/2022 20:02:45 - INFO - codeparrot_training - Step 32795: {'lr': 0.0001424460741455481, 'samples': 16791552, 'steps': 32795, 'loss/train': 2.066148281097412} +02/25/2022 20:02:49 - INFO - codeparrot_training - Step 32796: {'lr': 0.00014243130355431693, 'samples': 16792064, 'steps': 32796, 'loss/train': 1.7517191171646118} +02/25/2022 20:02:56 - INFO - codeparrot_training - Step 32797: {'lr': 0.0001424165334238759, 'samples': 16792576, 'steps': 32797, 'loss/train': 1.7475212812423706} +02/25/2022 20:02:59 - INFO - codeparrot_training - Step 32798: {'lr': 0.0001424017637542882, 'samples': 16793088, 'steps': 32798, 'loss/train': 0.4339800775051117} +02/25/2022 20:03:05 - INFO - codeparrot_training - Step 32799: {'lr': 0.00014238699454561726, 'samples': 16793600, 'steps': 32799, 'loss/train': 1.0771853923797607} +02/25/2022 20:03:08 - INFO - codeparrot_training - Step 32800: {'lr': 0.00014237222579792616, 'samples': 16794112, 'steps': 32800, 'loss/train': 0.9307793378829956} +02/25/2022 20:03:14 - INFO - codeparrot_training - Step 32801: {'lr': 0.00014235745751127847, 'samples': 16794624, 'steps': 32801, 'loss/train': 2.038780927658081} +02/25/2022 20:03:17 - INFO - codeparrot_training - Step 32802: {'lr': 0.00014234268968573707, 'samples': 16795136, 'steps': 32802, 'loss/train': 1.3817161321640015} +02/25/2022 20:03:23 - INFO - codeparrot_training - Step 32803: {'lr': 0.00014232792232136548, 'samples': 16795648, 'steps': 32803, 'loss/train': 1.59222412109375} +02/25/2022 20:03:26 - INFO - codeparrot_training - Step 32804: {'lr': 0.00014231315541822682, 'samples': 16796160, 'steps': 32804, 'loss/train': 1.6140148639678955} +02/25/2022 20:03:32 - INFO - codeparrot_training - Step 32805: {'lr': 0.0001422983889763846, 'samples': 16796672, 'steps': 32805, 'loss/train': 0.5027738213539124} +02/25/2022 20:03:35 - INFO - codeparrot_training - Step 32806: {'lr': 0.00014228362299590163, 'samples': 16797184, 'steps': 32806, 'loss/train': 2.640164613723755} +02/25/2022 20:03:39 - INFO - codeparrot_training - Step 32807: {'lr': 0.00014226885747684154, 'samples': 16797696, 'steps': 32807, 'loss/train': 8.759848594665527} +02/25/2022 20:03:45 - INFO - codeparrot_training - Step 32808: {'lr': 0.00014225409241926739, 'samples': 16798208, 'steps': 32808, 'loss/train': 2.3352572917938232} +02/25/2022 20:03:49 - INFO - codeparrot_training - Step 32809: {'lr': 0.00014223932782324268, 'samples': 16798720, 'steps': 32809, 'loss/train': 1.123358130455017} +02/25/2022 20:03:54 - INFO - codeparrot_training - Step 32810: {'lr': 0.00014222456368883024, 'samples': 16799232, 'steps': 32810, 'loss/train': 1.2572427988052368} +02/25/2022 20:03:58 - INFO - codeparrot_training - Step 32811: {'lr': 0.00014220980001609365, 'samples': 16799744, 'steps': 32811, 'loss/train': 2.2566707134246826} +02/25/2022 20:04:04 - INFO - codeparrot_training - Step 32812: {'lr': 0.000142195036805096, 'samples': 16800256, 'steps': 32812, 'loss/train': 1.9251874685287476} +02/25/2022 20:04:07 - INFO - codeparrot_training - Step 32813: {'lr': 0.0001421802740559006, 'samples': 16800768, 'steps': 32813, 'loss/train': 0.09611745923757553} +02/25/2022 20:04:13 - INFO - codeparrot_training - Step 32814: {'lr': 0.00014216551176857072, 'samples': 16801280, 'steps': 32814, 'loss/train': 1.021488904953003} +02/25/2022 20:04:16 - INFO - codeparrot_training - Step 32815: {'lr': 0.00014215074994316956, 'samples': 16801792, 'steps': 32815, 'loss/train': 1.315518856048584} +02/25/2022 20:04:22 - INFO - codeparrot_training - Step 32816: {'lr': 0.00014213598857976023, 'samples': 16802304, 'steps': 32816, 'loss/train': 2.037801504135132} +02/25/2022 20:04:27 - INFO - codeparrot_training - Step 32817: {'lr': 0.00014212122767840615, 'samples': 16802816, 'steps': 32817, 'loss/train': 0.9158303141593933} +02/25/2022 20:04:31 - INFO - codeparrot_training - Step 32818: {'lr': 0.0001421064672391705, 'samples': 16803328, 'steps': 32818, 'loss/train': 2.670583963394165} +02/25/2022 20:04:37 - INFO - codeparrot_training - Step 32819: {'lr': 0.00014209170726211647, 'samples': 16803840, 'steps': 32819, 'loss/train': 1.5096617937088013} +02/25/2022 20:04:41 - INFO - codeparrot_training - Step 32820: {'lr': 0.00014207694774730722, 'samples': 16804352, 'steps': 32820, 'loss/train': 1.8073617219924927} +02/25/2022 20:04:46 - INFO - codeparrot_training - Step 32821: {'lr': 0.00014206218869480613, 'samples': 16804864, 'steps': 32821, 'loss/train': 0.5628405809402466} +02/25/2022 20:04:50 - INFO - codeparrot_training - Step 32822: {'lr': 0.0001420474301046764, 'samples': 16805376, 'steps': 32822, 'loss/train': 1.4855765104293823} +02/25/2022 20:04:55 - INFO - codeparrot_training - Step 32823: {'lr': 0.00014203267197698116, 'samples': 16805888, 'steps': 32823, 'loss/train': 1.5590139627456665} +02/25/2022 20:04:59 - INFO - codeparrot_training - Step 32824: {'lr': 0.00014201791431178357, 'samples': 16806400, 'steps': 32824, 'loss/train': 1.2142512798309326} +02/25/2022 20:05:04 - INFO - codeparrot_training - Step 32825: {'lr': 0.00014200315710914704, 'samples': 16806912, 'steps': 32825, 'loss/train': 0.8279922604560852} +02/25/2022 20:05:08 - INFO - codeparrot_training - Step 32826: {'lr': 0.00014198840036913472, 'samples': 16807424, 'steps': 32826, 'loss/train': 0.8543905019760132} +02/25/2022 20:05:13 - INFO - codeparrot_training - Step 32827: {'lr': 0.00014197364409180977, 'samples': 16807936, 'steps': 32827, 'loss/train': 1.1598032712936401} +02/25/2022 20:05:17 - INFO - codeparrot_training - Step 32828: {'lr': 0.00014195888827723535, 'samples': 16808448, 'steps': 32828, 'loss/train': 1.534693717956543} +02/25/2022 20:05:22 - INFO - codeparrot_training - Step 32829: {'lr': 0.00014194413292547482, 'samples': 16808960, 'steps': 32829, 'loss/train': 1.9712468385696411} +02/25/2022 20:05:25 - INFO - codeparrot_training - Step 32830: {'lr': 0.00014192937803659135, 'samples': 16809472, 'steps': 32830, 'loss/train': 1.5513536930084229} +02/25/2022 20:05:31 - INFO - codeparrot_training - Step 32831: {'lr': 0.00014191462361064805, 'samples': 16809984, 'steps': 32831, 'loss/train': 2.1370108127593994} +02/25/2022 20:05:34 - INFO - codeparrot_training - Step 32832: {'lr': 0.00014189986964770825, 'samples': 16810496, 'steps': 32832, 'loss/train': 1.3149220943450928} +02/25/2022 20:05:40 - INFO - codeparrot_training - Step 32833: {'lr': 0.0001418851161478349, 'samples': 16811008, 'steps': 32833, 'loss/train': 2.668180227279663} +02/25/2022 20:05:44 - INFO - codeparrot_training - Step 32834: {'lr': 0.00014187036311109154, 'samples': 16811520, 'steps': 32834, 'loss/train': 1.194145917892456} +02/25/2022 20:05:50 - INFO - codeparrot_training - Step 32835: {'lr': 0.0001418556105375411, 'samples': 16812032, 'steps': 32835, 'loss/train': 2.2563109397888184} +02/25/2022 20:05:53 - INFO - codeparrot_training - Step 32836: {'lr': 0.00014184085842724708, 'samples': 16812544, 'steps': 32836, 'loss/train': 1.9797940254211426} +02/25/2022 20:05:59 - INFO - codeparrot_training - Step 32837: {'lr': 0.0001418261067802723, 'samples': 16813056, 'steps': 32837, 'loss/train': 2.155033826828003} +02/25/2022 20:06:02 - INFO - codeparrot_training - Step 32838: {'lr': 0.00014181135559668018, 'samples': 16813568, 'steps': 32838, 'loss/train': 0.993283748626709} +02/25/2022 20:06:08 - INFO - codeparrot_training - Step 32839: {'lr': 0.0001417966048765338, 'samples': 16814080, 'steps': 32839, 'loss/train': 1.9749411344528198} +02/25/2022 20:06:11 - INFO - codeparrot_training - Step 32840: {'lr': 0.0001417818546198966, 'samples': 16814592, 'steps': 32840, 'loss/train': 1.293125033378601} +02/25/2022 20:06:17 - INFO - codeparrot_training - Step 32841: {'lr': 0.00014176710482683135, 'samples': 16815104, 'steps': 32841, 'loss/train': 2.366556406021118} +02/25/2022 20:06:20 - INFO - codeparrot_training - Step 32842: {'lr': 0.0001417523554974016, 'samples': 16815616, 'steps': 32842, 'loss/train': 2.071415424346924} +02/25/2022 20:06:26 - INFO - codeparrot_training - Step 32843: {'lr': 0.00014173760663167023, 'samples': 16816128, 'steps': 32843, 'loss/train': 1.687054991722107} +02/25/2022 20:06:30 - INFO - codeparrot_training - Step 32844: {'lr': 0.00014172285822970082, 'samples': 16816640, 'steps': 32844, 'loss/train': 1.7060917615890503} +02/25/2022 20:06:35 - INFO - codeparrot_training - Step 32845: {'lr': 0.00014170811029155613, 'samples': 16817152, 'steps': 32845, 'loss/train': 2.132366418838501} +02/25/2022 20:06:39 - INFO - codeparrot_training - Step 32846: {'lr': 0.00014169336281729956, 'samples': 16817664, 'steps': 32846, 'loss/train': 1.6334660053253174} +02/25/2022 20:06:44 - INFO - codeparrot_training - Step 32847: {'lr': 0.0001416786158069941, 'samples': 16818176, 'steps': 32847, 'loss/train': 1.469781756401062} +02/25/2022 20:06:48 - INFO - codeparrot_training - Step 32848: {'lr': 0.0001416638692607032, 'samples': 16818688, 'steps': 32848, 'loss/train': 2.0682122707366943} +02/25/2022 20:06:53 - INFO - codeparrot_training - Step 32849: {'lr': 0.00014164912317848988, 'samples': 16819200, 'steps': 32849, 'loss/train': 1.5972638130187988} +02/25/2022 20:06:57 - INFO - codeparrot_training - Step 32850: {'lr': 0.0001416343775604173, 'samples': 16819712, 'steps': 32850, 'loss/train': 0.36140212416648865} +02/25/2022 20:07:02 - INFO - codeparrot_training - Step 32851: {'lr': 0.00014161963240654857, 'samples': 16820224, 'steps': 32851, 'loss/train': 2.0866732597351074} +02/25/2022 20:07:06 - INFO - codeparrot_training - Step 32852: {'lr': 0.000141604887716947, 'samples': 16820736, 'steps': 32852, 'loss/train': 1.1813610792160034} +02/25/2022 20:07:11 - INFO - codeparrot_training - Step 32853: {'lr': 0.00014159014349167566, 'samples': 16821248, 'steps': 32853, 'loss/train': 1.0654414892196655} +02/25/2022 20:07:15 - INFO - codeparrot_training - Step 32854: {'lr': 0.0001415753997307977, 'samples': 16821760, 'steps': 32854, 'loss/train': 1.992229700088501} +02/25/2022 20:07:21 - INFO - codeparrot_training - Step 32855: {'lr': 0.00014156065643437628, 'samples': 16822272, 'steps': 32855, 'loss/train': 1.2218401432037354} +02/25/2022 20:07:24 - INFO - codeparrot_training - Step 32856: {'lr': 0.0001415459136024746, 'samples': 16822784, 'steps': 32856, 'loss/train': 1.2177947759628296} +02/25/2022 20:07:30 - INFO - codeparrot_training - Step 32857: {'lr': 0.0001415311712351558, 'samples': 16823296, 'steps': 32857, 'loss/train': 1.235691785812378} +02/25/2022 20:07:33 - INFO - codeparrot_training - Step 32858: {'lr': 0.00014151642933248304, 'samples': 16823808, 'steps': 32858, 'loss/train': 2.2234504222869873} +02/25/2022 20:07:39 - INFO - codeparrot_training - Step 32859: {'lr': 0.00014150168789451933, 'samples': 16824320, 'steps': 32859, 'loss/train': 1.8202614784240723} +02/25/2022 20:07:42 - INFO - codeparrot_training - Step 32860: {'lr': 0.00014148694692132804, 'samples': 16824832, 'steps': 32860, 'loss/train': 2.070849657058716} +02/25/2022 20:07:48 - INFO - codeparrot_training - Step 32861: {'lr': 0.00014147220641297213, 'samples': 16825344, 'steps': 32861, 'loss/train': 0.8616870045661926} +02/25/2022 20:07:51 - INFO - codeparrot_training - Step 32862: {'lr': 0.000141457466369515, 'samples': 16825856, 'steps': 32862, 'loss/train': 0.6561073660850525} +02/25/2022 20:07:57 - INFO - codeparrot_training - Step 32863: {'lr': 0.00014144272679101944, 'samples': 16826368, 'steps': 32863, 'loss/train': 1.1476329565048218} +02/25/2022 20:08:00 - INFO - codeparrot_training - Step 32864: {'lr': 0.00014142798767754886, 'samples': 16826880, 'steps': 32864, 'loss/train': 1.9029687643051147} +02/25/2022 20:08:06 - INFO - codeparrot_training - Step 32865: {'lr': 0.0001414132490291663, 'samples': 16827392, 'steps': 32865, 'loss/train': 1.0413790941238403} +02/25/2022 20:08:09 - INFO - codeparrot_training - Step 32866: {'lr': 0.0001413985108459349, 'samples': 16827904, 'steps': 32866, 'loss/train': 1.1820979118347168} +02/25/2022 20:08:15 - INFO - codeparrot_training - Step 32867: {'lr': 0.00014138377312791772, 'samples': 16828416, 'steps': 32867, 'loss/train': 2.109445095062256} +02/25/2022 20:08:18 - INFO - codeparrot_training - Step 32868: {'lr': 0.00014136903587517804, 'samples': 16828928, 'steps': 32868, 'loss/train': 1.9847474098205566} +02/25/2022 20:08:24 - INFO - codeparrot_training - Step 32869: {'lr': 0.0001413542990877789, 'samples': 16829440, 'steps': 32869, 'loss/train': 1.7672747373580933} +02/25/2022 20:08:28 - INFO - codeparrot_training - Step 32870: {'lr': 0.0001413395627657835, 'samples': 16829952, 'steps': 32870, 'loss/train': 1.5557104349136353} +02/25/2022 20:08:34 - INFO - codeparrot_training - Step 32871: {'lr': 0.00014132482690925476, 'samples': 16830464, 'steps': 32871, 'loss/train': 1.9749342203140259} +02/25/2022 20:08:39 - INFO - codeparrot_training - Step 32872: {'lr': 0.00014131009151825607, 'samples': 16830976, 'steps': 32872, 'loss/train': 1.632596492767334} +02/25/2022 20:08:42 - INFO - codeparrot_training - Step 32873: {'lr': 0.00014129535659285046, 'samples': 16831488, 'steps': 32873, 'loss/train': 1.1747798919677734} +02/25/2022 20:08:48 - INFO - codeparrot_training - Step 32874: {'lr': 0.0001412806221331009, 'samples': 16832000, 'steps': 32874, 'loss/train': 0.8990028500556946} +02/25/2022 20:08:52 - INFO - codeparrot_training - Step 32875: {'lr': 0.0001412658881390707, 'samples': 16832512, 'steps': 32875, 'loss/train': 2.813483238220215} +02/25/2022 20:08:57 - INFO - codeparrot_training - Step 32876: {'lr': 0.00014125115461082293, 'samples': 16833024, 'steps': 32876, 'loss/train': 1.6831914186477661} +02/25/2022 20:09:00 - INFO - codeparrot_training - Step 32877: {'lr': 0.00014123642154842072, 'samples': 16833536, 'steps': 32877, 'loss/train': 2.7684366703033447} +02/25/2022 20:09:07 - INFO - codeparrot_training - Step 32878: {'lr': 0.00014122168895192702, 'samples': 16834048, 'steps': 32878, 'loss/train': 1.8163495063781738} +02/25/2022 20:09:10 - INFO - codeparrot_training - Step 32879: {'lr': 0.00014120695682140529, 'samples': 16834560, 'steps': 32879, 'loss/train': 1.284104347229004} +02/25/2022 20:09:16 - INFO - codeparrot_training - Step 32880: {'lr': 0.00014119222515691815, 'samples': 16835072, 'steps': 32880, 'loss/train': 0.9736930131912231} +02/25/2022 20:09:19 - INFO - codeparrot_training - Step 32881: {'lr': 0.0001411774939585291, 'samples': 16835584, 'steps': 32881, 'loss/train': 1.817723274230957} +02/25/2022 20:09:25 - INFO - codeparrot_training - Step 32882: {'lr': 0.000141162763226301, 'samples': 16836096, 'steps': 32882, 'loss/train': 0.9431028366088867} +02/25/2022 20:09:28 - INFO - codeparrot_training - Step 32883: {'lr': 0.00014114803296029726, 'samples': 16836608, 'steps': 32883, 'loss/train': 1.243801236152649} +02/25/2022 20:09:34 - INFO - codeparrot_training - Step 32884: {'lr': 0.0001411333031605806, 'samples': 16837120, 'steps': 32884, 'loss/train': 1.7301125526428223} +02/25/2022 20:09:37 - INFO - codeparrot_training - Step 32885: {'lr': 0.0001411185738272144, 'samples': 16837632, 'steps': 32885, 'loss/train': 1.5202199220657349} +02/25/2022 20:09:43 - INFO - codeparrot_training - Step 32886: {'lr': 0.00014110384496026157, 'samples': 16838144, 'steps': 32886, 'loss/train': 0.9737198352813721} +02/25/2022 20:09:46 - INFO - codeparrot_training - Step 32887: {'lr': 0.00014108911655978535, 'samples': 16838656, 'steps': 32887, 'loss/train': 1.0447450876235962} +02/25/2022 20:09:50 - INFO - codeparrot_training - Step 32888: {'lr': 0.00014107438862584883, 'samples': 16839168, 'steps': 32888, 'loss/train': 1.9728931188583374} +02/25/2022 20:09:56 - INFO - codeparrot_training - Step 32889: {'lr': 0.00014105966115851497, 'samples': 16839680, 'steps': 32889, 'loss/train': 8.358400344848633} +02/25/2022 20:09:59 - INFO - codeparrot_training - Step 32890: {'lr': 0.00014104493415784686, 'samples': 16840192, 'steps': 32890, 'loss/train': 1.3235503435134888} +02/25/2022 20:10:05 - INFO - codeparrot_training - Step 32891: {'lr': 0.00014103020762390778, 'samples': 16840704, 'steps': 32891, 'loss/train': 3.6748952865600586} +02/25/2022 20:10:09 - INFO - codeparrot_training - Step 32892: {'lr': 0.00014101548155676064, 'samples': 16841216, 'steps': 32892, 'loss/train': 0.6811104416847229} +02/25/2022 20:10:14 - INFO - codeparrot_training - Step 32893: {'lr': 0.0001410007559564686, 'samples': 16841728, 'steps': 32893, 'loss/train': 1.6587862968444824} +02/25/2022 20:10:17 - INFO - codeparrot_training - Step 32894: {'lr': 0.0001409860308230946, 'samples': 16842240, 'steps': 32894, 'loss/train': 2.457082748413086} +02/25/2022 20:10:23 - INFO - codeparrot_training - Step 32895: {'lr': 0.00014097130615670195, 'samples': 16842752, 'steps': 32895, 'loss/train': 0.04452995955944061} +02/25/2022 20:10:27 - INFO - codeparrot_training - Step 32896: {'lr': 0.0001409565819573535, 'samples': 16843264, 'steps': 32896, 'loss/train': 0.7368571162223816} +02/25/2022 20:10:32 - INFO - codeparrot_training - Step 32897: {'lr': 0.0001409418582251126, 'samples': 16843776, 'steps': 32897, 'loss/train': 0.04284963756799698} +02/25/2022 20:10:36 - INFO - codeparrot_training - Step 32898: {'lr': 0.00014092713496004198, 'samples': 16844288, 'steps': 32898, 'loss/train': 2.2399790287017822} +02/25/2022 20:10:41 - INFO - codeparrot_training - Step 32899: {'lr': 0.00014091241216220496, 'samples': 16844800, 'steps': 32899, 'loss/train': 1.9340670108795166} +02/25/2022 20:10:45 - INFO - codeparrot_training - Step 32900: {'lr': 0.00014089768983166444, 'samples': 16845312, 'steps': 32900, 'loss/train': 0.8082156181335449} +02/25/2022 20:10:51 - INFO - codeparrot_training - Step 32901: {'lr': 0.00014088296796848377, 'samples': 16845824, 'steps': 32901, 'loss/train': 2.0896153450012207} +02/25/2022 20:10:55 - INFO - codeparrot_training - Step 32902: {'lr': 0.00014086824657272558, 'samples': 16846336, 'steps': 32902, 'loss/train': 1.1744898557662964} +02/25/2022 20:11:00 - INFO - codeparrot_training - Step 32903: {'lr': 0.0001408535256444533, 'samples': 16846848, 'steps': 32903, 'loss/train': 3.229671001434326} +02/25/2022 20:11:04 - INFO - codeparrot_training - Step 32904: {'lr': 0.00014083880518372976, 'samples': 16847360, 'steps': 32904, 'loss/train': 1.036189317703247} +02/25/2022 20:11:09 - INFO - codeparrot_training - Step 32905: {'lr': 0.00014082408519061835, 'samples': 16847872, 'steps': 32905, 'loss/train': 2.1869750022888184} +02/25/2022 20:11:13 - INFO - codeparrot_training - Step 32906: {'lr': 0.00014080936566518166, 'samples': 16848384, 'steps': 32906, 'loss/train': 1.297544240951538} +02/25/2022 20:11:18 - INFO - codeparrot_training - Step 32907: {'lr': 0.00014079464660748305, 'samples': 16848896, 'steps': 32907, 'loss/train': 1.8179373741149902} +02/25/2022 20:11:22 - INFO - codeparrot_training - Step 32908: {'lr': 0.00014077992801758544, 'samples': 16849408, 'steps': 32908, 'loss/train': 1.12723970413208} +02/25/2022 20:11:27 - INFO - codeparrot_training - Step 32909: {'lr': 0.00014076520989555197, 'samples': 16849920, 'steps': 32909, 'loss/train': 1.931674599647522} +02/25/2022 20:11:31 - INFO - codeparrot_training - Step 32910: {'lr': 0.00014075049224144572, 'samples': 16850432, 'steps': 32910, 'loss/train': 1.2654401063919067} +02/25/2022 20:11:37 - INFO - codeparrot_training - Step 32911: {'lr': 0.00014073577505532964, 'samples': 16850944, 'steps': 32911, 'loss/train': 1.4919214248657227} +02/25/2022 20:11:40 - INFO - codeparrot_training - Step 32912: {'lr': 0.00014072105833726683, 'samples': 16851456, 'steps': 32912, 'loss/train': 1.9069550037384033} +02/25/2022 20:11:46 - INFO - codeparrot_training - Step 32913: {'lr': 0.00014070634208732019, 'samples': 16851968, 'steps': 32913, 'loss/train': 2.5669286251068115} +02/25/2022 20:11:49 - INFO - codeparrot_training - Step 32914: {'lr': 0.000140691626305553, 'samples': 16852480, 'steps': 32914, 'loss/train': 2.8780205249786377} +02/25/2022 20:11:55 - INFO - codeparrot_training - Step 32915: {'lr': 0.00014067691099202813, 'samples': 16852992, 'steps': 32915, 'loss/train': 1.3380640745162964} +02/25/2022 20:11:59 - INFO - codeparrot_training - Step 32916: {'lr': 0.0001406621961468087, 'samples': 16853504, 'steps': 32916, 'loss/train': 3.0907726287841797} +02/25/2022 20:12:04 - INFO - codeparrot_training - Step 32917: {'lr': 0.00014064748176995757, 'samples': 16854016, 'steps': 32917, 'loss/train': 1.5749415159225464} +02/25/2022 20:12:08 - INFO - codeparrot_training - Step 32918: {'lr': 0.000140632767861538, 'samples': 16854528, 'steps': 32918, 'loss/train': 1.6800742149353027} +02/25/2022 20:12:13 - INFO - codeparrot_training - Step 32919: {'lr': 0.00014061805442161296, 'samples': 16855040, 'steps': 32919, 'loss/train': 1.8837531805038452} +02/25/2022 20:12:17 - INFO - codeparrot_training - Step 32920: {'lr': 0.00014060334145024543, 'samples': 16855552, 'steps': 32920, 'loss/train': 1.7325338125228882} +02/25/2022 20:12:22 - INFO - codeparrot_training - Step 32921: {'lr': 0.00014058862894749836, 'samples': 16856064, 'steps': 32921, 'loss/train': 0.8436089158058167} +02/25/2022 20:12:26 - INFO - codeparrot_training - Step 32922: {'lr': 0.00014057391691343492, 'samples': 16856576, 'steps': 32922, 'loss/train': 2.62870192527771} +02/25/2022 20:12:31 - INFO - codeparrot_training - Step 32923: {'lr': 0.0001405592053481181, 'samples': 16857088, 'steps': 32923, 'loss/train': 1.4654548168182373} +02/25/2022 20:12:35 - INFO - codeparrot_training - Step 32924: {'lr': 0.0001405444942516109, 'samples': 16857600, 'steps': 32924, 'loss/train': 1.807477593421936} +02/25/2022 20:12:40 - INFO - codeparrot_training - Step 32925: {'lr': 0.00014052978362397622, 'samples': 16858112, 'steps': 32925, 'loss/train': 1.6300159692764282} +02/25/2022 20:12:44 - INFO - codeparrot_training - Step 32926: {'lr': 0.00014051507346527728, 'samples': 16858624, 'steps': 32926, 'loss/train': 1.9050114154815674} +02/25/2022 20:12:50 - INFO - codeparrot_training - Step 32927: {'lr': 0.00014050036377557702, 'samples': 16859136, 'steps': 32927, 'loss/train': 1.1461583375930786} +02/25/2022 20:12:53 - INFO - codeparrot_training - Step 32928: {'lr': 0.0001404856545549384, 'samples': 16859648, 'steps': 32928, 'loss/train': 1.7911757230758667} +02/25/2022 20:12:59 - INFO - codeparrot_training - Step 32929: {'lr': 0.0001404709458034244, 'samples': 16860160, 'steps': 32929, 'loss/train': 2.375532388687134} +02/25/2022 20:13:02 - INFO - codeparrot_training - Step 32930: {'lr': 0.00014045623752109815, 'samples': 16860672, 'steps': 32930, 'loss/train': 1.6886895895004272} +02/25/2022 20:13:08 - INFO - codeparrot_training - Step 32931: {'lr': 0.00014044152970802264, 'samples': 16861184, 'steps': 32931, 'loss/train': 2.639129638671875} +02/25/2022 20:13:11 - INFO - codeparrot_training - Step 32932: {'lr': 0.0001404268223642608, 'samples': 16861696, 'steps': 32932, 'loss/train': 1.8888440132141113} +02/25/2022 20:13:17 - INFO - codeparrot_training - Step 32933: {'lr': 0.00014041211548987553, 'samples': 16862208, 'steps': 32933, 'loss/train': 1.4811986684799194} +02/25/2022 20:13:20 - INFO - codeparrot_training - Step 32934: {'lr': 0.0001403974090849301, 'samples': 16862720, 'steps': 32934, 'loss/train': 1.5700603723526} +02/25/2022 20:13:26 - INFO - codeparrot_training - Step 32935: {'lr': 0.00014038270314948727, 'samples': 16863232, 'steps': 32935, 'loss/train': 1.983089566230774} +02/25/2022 20:13:29 - INFO - codeparrot_training - Step 32936: {'lr': 0.0001403679976836103, 'samples': 16863744, 'steps': 32936, 'loss/train': 2.0529675483703613} +02/25/2022 20:13:36 - INFO - codeparrot_training - Step 32937: {'lr': 0.00014035329268736186, 'samples': 16864256, 'steps': 32937, 'loss/train': 1.0987694263458252} +02/25/2022 20:13:39 - INFO - codeparrot_training - Step 32938: {'lr': 0.00014033858816080516, 'samples': 16864768, 'steps': 32938, 'loss/train': 1.6088453531265259} +02/25/2022 20:13:45 - INFO - codeparrot_training - Step 32939: {'lr': 0.00014032388410400304, 'samples': 16865280, 'steps': 32939, 'loss/train': 1.8725632429122925} +02/25/2022 20:13:48 - INFO - codeparrot_training - Step 32940: {'lr': 0.00014030918051701876, 'samples': 16865792, 'steps': 32940, 'loss/train': 2.402594566345215} +02/25/2022 20:13:54 - INFO - codeparrot_training - Step 32941: {'lr': 0.00014029447739991496, 'samples': 16866304, 'steps': 32941, 'loss/train': 1.5512654781341553} +02/25/2022 20:13:57 - INFO - codeparrot_training - Step 32942: {'lr': 0.00014027977475275483, 'samples': 16866816, 'steps': 32942, 'loss/train': 0.975470781326294} +02/25/2022 20:14:03 - INFO - codeparrot_training - Step 32943: {'lr': 0.00014026507257560125, 'samples': 16867328, 'steps': 32943, 'loss/train': 1.9085592031478882} +02/25/2022 20:14:06 - INFO - codeparrot_training - Step 32944: {'lr': 0.00014025037086851734, 'samples': 16867840, 'steps': 32944, 'loss/train': 1.1147199869155884} +02/25/2022 20:14:13 - INFO - codeparrot_training - Step 32945: {'lr': 0.00014023566963156596, 'samples': 16868352, 'steps': 32945, 'loss/train': 2.1558568477630615} +02/25/2022 20:14:16 - INFO - codeparrot_training - Step 32946: {'lr': 0.00014022096886481017, 'samples': 16868864, 'steps': 32946, 'loss/train': 2.259960651397705} +02/25/2022 20:14:20 - INFO - codeparrot_training - Step 32947: {'lr': 0.00014020626856831275, 'samples': 16869376, 'steps': 32947, 'loss/train': 1.7982772588729858} +02/25/2022 20:14:26 - INFO - codeparrot_training - Step 32948: {'lr': 0.00014019156874213695, 'samples': 16869888, 'steps': 32948, 'loss/train': 1.83175790309906} +02/25/2022 20:14:29 - INFO - codeparrot_training - Step 32949: {'lr': 0.00014017686938634554, 'samples': 16870400, 'steps': 32949, 'loss/train': 2.2320189476013184} +02/25/2022 20:14:35 - INFO - codeparrot_training - Step 32950: {'lr': 0.0001401621705010016, 'samples': 16870912, 'steps': 32950, 'loss/train': 1.8464229106903076} +02/25/2022 20:14:38 - INFO - codeparrot_training - Step 32951: {'lr': 0.00014014747208616795, 'samples': 16871424, 'steps': 32951, 'loss/train': 1.6401088237762451} +02/25/2022 20:14:44 - INFO - codeparrot_training - Step 32952: {'lr': 0.00014013277414190773, 'samples': 16871936, 'steps': 32952, 'loss/train': 0.16312041878700256} +02/25/2022 20:14:48 - INFO - codeparrot_training - Step 32953: {'lr': 0.0001401180766682838, 'samples': 16872448, 'steps': 32953, 'loss/train': 2.1603598594665527} +02/25/2022 20:14:53 - INFO - codeparrot_training - Step 32954: {'lr': 0.0001401033796653592, 'samples': 16872960, 'steps': 32954, 'loss/train': 2.182615041732788} +02/25/2022 20:14:57 - INFO - codeparrot_training - Step 32955: {'lr': 0.00014008868313319668, 'samples': 16873472, 'steps': 32955, 'loss/train': 1.686958909034729} +02/25/2022 20:15:02 - INFO - codeparrot_training - Step 32956: {'lr': 0.00014007398707185941, 'samples': 16873984, 'steps': 32956, 'loss/train': 2.1222081184387207} +02/25/2022 20:15:05 - INFO - codeparrot_training - Step 32957: {'lr': 0.00014005929148141035, 'samples': 16874496, 'steps': 32957, 'loss/train': 2.1863038539886475} +02/25/2022 20:15:12 - INFO - codeparrot_training - Step 32958: {'lr': 0.0001400445963619123, 'samples': 16875008, 'steps': 32958, 'loss/train': 2.584641933441162} +02/25/2022 20:15:16 - INFO - codeparrot_training - Step 32959: {'lr': 0.00014002990171342833, 'samples': 16875520, 'steps': 32959, 'loss/train': 1.0032398700714111} +02/25/2022 20:15:21 - INFO - codeparrot_training - Step 32960: {'lr': 0.0001400152075360212, 'samples': 16876032, 'steps': 32960, 'loss/train': 1.5014384984970093} +02/25/2022 20:15:25 - INFO - codeparrot_training - Step 32961: {'lr': 0.00014000051382975415, 'samples': 16876544, 'steps': 32961, 'loss/train': 1.2810039520263672} +02/25/2022 20:15:30 - INFO - codeparrot_training - Step 32962: {'lr': 0.00013998582059468996, 'samples': 16877056, 'steps': 32962, 'loss/train': 2.713489055633545} +02/25/2022 20:15:34 - INFO - codeparrot_training - Step 32963: {'lr': 0.00013997112783089156, 'samples': 16877568, 'steps': 32963, 'loss/train': 1.9830602407455444} +02/25/2022 20:15:39 - INFO - codeparrot_training - Step 32964: {'lr': 0.0001399564355384218, 'samples': 16878080, 'steps': 32964, 'loss/train': 1.4365499019622803} +02/25/2022 20:15:43 - INFO - codeparrot_training - Step 32965: {'lr': 0.00013994174371734385, 'samples': 16878592, 'steps': 32965, 'loss/train': 1.7163954973220825} +02/25/2022 20:15:48 - INFO - codeparrot_training - Step 32966: {'lr': 0.00013992705236772052, 'samples': 16879104, 'steps': 32966, 'loss/train': 2.427286386489868} +02/25/2022 20:15:51 - INFO - codeparrot_training - Step 32967: {'lr': 0.00013991236148961473, 'samples': 16879616, 'steps': 32967, 'loss/train': 1.2211790084838867} +02/25/2022 20:15:57 - INFO - codeparrot_training - Step 32968: {'lr': 0.00013989767108308932, 'samples': 16880128, 'steps': 32968, 'loss/train': 1.1106135845184326} +02/25/2022 20:16:01 - INFO - codeparrot_training - Step 32969: {'lr': 0.00013988298114820747, 'samples': 16880640, 'steps': 32969, 'loss/train': 0.5665706396102905} +02/25/2022 20:16:06 - INFO - codeparrot_training - Step 32970: {'lr': 0.00013986829168503184, 'samples': 16881152, 'steps': 32970, 'loss/train': 1.8018169403076172} +02/25/2022 20:16:10 - INFO - codeparrot_training - Step 32971: {'lr': 0.00013985360269362567, 'samples': 16881664, 'steps': 32971, 'loss/train': 2.2217936515808105} +02/25/2022 20:16:15 - INFO - codeparrot_training - Step 32972: {'lr': 0.00013983891417405147, 'samples': 16882176, 'steps': 32972, 'loss/train': 0.04800500348210335} +02/25/2022 20:16:21 - INFO - codeparrot_training - Step 32973: {'lr': 0.00013982422612637252, 'samples': 16882688, 'steps': 32973, 'loss/train': 1.3335580825805664} +02/25/2022 20:16:25 - INFO - codeparrot_training - Step 32974: {'lr': 0.0001398095385506515, 'samples': 16883200, 'steps': 32974, 'loss/train': 1.90020751953125} +02/25/2022 20:16:30 - INFO - codeparrot_training - Step 32975: {'lr': 0.0001397948514469516, 'samples': 16883712, 'steps': 32975, 'loss/train': 1.9262281656265259} +02/25/2022 20:16:34 - INFO - codeparrot_training - Step 32976: {'lr': 0.0001397801648153354, 'samples': 16884224, 'steps': 32976, 'loss/train': 2.8658032417297363} +02/25/2022 20:16:39 - INFO - codeparrot_training - Step 32977: {'lr': 0.00013976547865586603, 'samples': 16884736, 'steps': 32977, 'loss/train': 0.4628658592700958} +02/25/2022 20:16:43 - INFO - codeparrot_training - Step 32978: {'lr': 0.00013975079296860626, 'samples': 16885248, 'steps': 32978, 'loss/train': 2.484701156616211} +02/25/2022 20:16:48 - INFO - codeparrot_training - Step 32979: {'lr': 0.00013973610775361932, 'samples': 16885760, 'steps': 32979, 'loss/train': 1.6312931776046753} +02/25/2022 20:16:52 - INFO - codeparrot_training - Step 32980: {'lr': 0.00013972142301096763, 'samples': 16886272, 'steps': 32980, 'loss/train': 1.5839051008224487} +02/25/2022 20:16:57 - INFO - codeparrot_training - Step 32981: {'lr': 0.00013970673874071448, 'samples': 16886784, 'steps': 32981, 'loss/train': 2.063230276107788} +02/25/2022 20:17:01 - INFO - codeparrot_training - Step 32982: {'lr': 0.00013969205494292254, 'samples': 16887296, 'steps': 32982, 'loss/train': 1.2427608966827393} +02/25/2022 20:17:07 - INFO - codeparrot_training - Step 32983: {'lr': 0.00013967737161765486, 'samples': 16887808, 'steps': 32983, 'loss/train': 0.2305237203836441} +02/25/2022 20:17:10 - INFO - codeparrot_training - Step 32984: {'lr': 0.00013966268876497434, 'samples': 16888320, 'steps': 32984, 'loss/train': 2.375281572341919} +02/25/2022 20:17:16 - INFO - codeparrot_training - Step 32985: {'lr': 0.00013964800638494385, 'samples': 16888832, 'steps': 32985, 'loss/train': 1.9276641607284546} +02/25/2022 20:17:19 - INFO - codeparrot_training - Step 32986: {'lr': 0.00013963332447762612, 'samples': 16889344, 'steps': 32986, 'loss/train': 1.2853120565414429} +02/25/2022 20:17:25 - INFO - codeparrot_training - Step 32987: {'lr': 0.00013961864304308427, 'samples': 16889856, 'steps': 32987, 'loss/train': 2.719148874282837} +02/25/2022 20:17:28 - INFO - codeparrot_training - Step 32988: {'lr': 0.0001396039620813811, 'samples': 16890368, 'steps': 32988, 'loss/train': 1.9276344776153564} +02/25/2022 20:17:34 - INFO - codeparrot_training - Step 32989: {'lr': 0.00013958928159257954, 'samples': 16890880, 'steps': 32989, 'loss/train': 1.7885204553604126} +02/25/2022 20:17:37 - INFO - codeparrot_training - Step 32990: {'lr': 0.0001395746015767423, 'samples': 16891392, 'steps': 32990, 'loss/train': 1.6885907649993896} +02/25/2022 20:17:43 - INFO - codeparrot_training - Step 32991: {'lr': 0.00013955992203393253, 'samples': 16891904, 'steps': 32991, 'loss/train': 1.5914902687072754} +02/25/2022 20:17:46 - INFO - codeparrot_training - Step 32992: {'lr': 0.000139545242964213, 'samples': 16892416, 'steps': 32992, 'loss/train': 1.9701309204101562} +02/25/2022 20:17:52 - INFO - codeparrot_training - Step 32993: {'lr': 0.00013953056436764654, 'samples': 16892928, 'steps': 32993, 'loss/train': 1.6841686964035034} +02/25/2022 20:17:56 - INFO - codeparrot_training - Step 32994: {'lr': 0.000139515886244296, 'samples': 16893440, 'steps': 32994, 'loss/train': 1.2654300928115845} +02/25/2022 20:18:01 - INFO - codeparrot_training - Step 32995: {'lr': 0.00013950120859422438, 'samples': 16893952, 'steps': 32995, 'loss/train': 1.7574375867843628} +02/25/2022 20:18:05 - INFO - codeparrot_training - Step 32996: {'lr': 0.0001394865314174945, 'samples': 16894464, 'steps': 32996, 'loss/train': 1.605767011642456} +02/25/2022 20:18:11 - INFO - codeparrot_training - Step 32997: {'lr': 0.00013947185471416927, 'samples': 16894976, 'steps': 32997, 'loss/train': 1.5172046422958374} +02/25/2022 20:18:14 - INFO - codeparrot_training - Step 32998: {'lr': 0.0001394571784843114, 'samples': 16895488, 'steps': 32998, 'loss/train': 2.2089178562164307} +02/25/2022 20:18:20 - INFO - codeparrot_training - Step 32999: {'lr': 0.00013944250272798393, 'samples': 16896000, 'steps': 32999, 'loss/train': 1.2068666219711304} +02/25/2022 20:18:20 - INFO - codeparrot_training - Evaluating and saving model checkpoint