diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -22999,3 +22999,7472 @@ Use FP16 precision: False 03/04/2022 07:04:27 - INFO - codeparrot_training - Step 14998: {'lr': 0.000490544541905651, 'samples': 7679488, 'steps': 14998, 'loss/train': 2.165583372116089} 03/04/2022 07:04:31 - INFO - codeparrot_training - Step 14999: {'lr': 0.0004905430961831242, 'samples': 7680000, 'steps': 14999, 'loss/train': 1.8511422872543335} 03/04/2022 07:04:31 - INFO - codeparrot_training - Evaluating and saving model checkpoint +03/04/2022 07:04:44 - WARNING - huggingface_hub.repository - Several commits (3) will be pushed upstream. +03/04/2022 07:04:44 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +03/04/2022 07:05:08 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/cm_code_clippy + ad512a7..7d4fba8 glowing-puddle-3 -> glowing-puddle-3 + +03/04/2022 07:05:11 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/04/2022 07:05:15 - INFO - codeparrot_training - Step 15000: {'lr': 0.0004905416503522123, 'samples': 7680512, 'steps': 15000, 'loss/train': 1.124108910560608} +03/04/2022 07:05:18 - INFO - codeparrot_training - Step 15001: {'lr': 0.0004905402044129162, 'samples': 7681024, 'steps': 15001, 'loss/train': 2.0032455921173096} +03/04/2022 07:05:19 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 07:05:23 - INFO - codeparrot_training - Step 15002: {'lr': 0.0004905387583652363, 'samples': 7681536, 'steps': 15002, 'loss/train': 2.7251927852630615} +03/04/2022 07:05:26 - INFO - codeparrot_training - Step 15003: {'lr': 0.0004905373122091734, 'samples': 7682048, 'steps': 15003, 'loss/train': 1.965111255645752} +03/04/2022 07:05:27 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 07:05:31 - INFO - codeparrot_training - Step 15004: {'lr': 0.0004905358659447281, 'samples': 7682560, 'steps': 15004, 'loss/train': 1.788163423538208} +03/04/2022 07:05:34 - INFO - codeparrot_training - Step 15005: {'lr': 0.000490534419571901, 'samples': 7683072, 'steps': 15005, 'loss/train': 1.9516544342041016} +03/04/2022 07:05:36 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/04/2022 07:05:40 - INFO - codeparrot_training - Step 15006: {'lr': 0.0004905329730906929, 'samples': 7683584, 'steps': 15006, 'loss/train': 2.2833268642425537} +03/04/2022 07:05:43 - INFO - codeparrot_training - Step 15007: {'lr': 0.0004905315265011043, 'samples': 7684096, 'steps': 15007, 'loss/train': 2.0812675952911377} +03/04/2022 07:05:45 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 07:05:48 - INFO - codeparrot_training - Step 15008: {'lr': 0.0004905300798031359, 'samples': 7684608, 'steps': 15008, 'loss/train': 1.2740222215652466} +03/04/2022 07:05:52 - INFO - codeparrot_training - Step 15009: {'lr': 0.0004905286329967883, 'samples': 7685120, 'steps': 15009, 'loss/train': 2.039226531982422} +03/04/2022 07:05:54 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 07:05:57 - INFO - codeparrot_training - Step 15010: {'lr': 0.0004905271860820622, 'samples': 7685632, 'steps': 15010, 'loss/train': 1.8363571166992188} +03/04/2022 07:06:00 - INFO - codeparrot_training - Step 15011: {'lr': 0.0004905257390589585, 'samples': 7686144, 'steps': 15011, 'loss/train': 0.8878414034843445} +03/04/2022 07:06:02 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 07:06:05 - INFO - codeparrot_training - Step 15012: {'lr': 0.0004905242919274774, 'samples': 7686656, 'steps': 15012, 'loss/train': 1.7550570964813232} +03/04/2022 07:06:08 - INFO - codeparrot_training - Step 15013: {'lr': 0.0004905228446876197, 'samples': 7687168, 'steps': 15013, 'loss/train': 1.8136323690414429} +03/04/2022 07:06:10 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 07:06:14 - INFO - codeparrot_training - Step 15014: {'lr': 0.0004905213973393863, 'samples': 7687680, 'steps': 15014, 'loss/train': 1.7531710863113403} +03/04/2022 07:06:17 - INFO - codeparrot_training - Step 15015: {'lr': 0.0004905199498827776, 'samples': 7688192, 'steps': 15015, 'loss/train': 1.9604685306549072} +03/04/2022 07:06:18 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/04/2022 07:06:22 - INFO - codeparrot_training - Step 15016: {'lr': 0.0004905185023177942, 'samples': 7688704, 'steps': 15016, 'loss/train': 2.5374135971069336} +03/04/2022 07:06:25 - INFO - codeparrot_training - Step 15017: {'lr': 0.0004905170546444371, 'samples': 7689216, 'steps': 15017, 'loss/train': 1.3926631212234497} +03/04/2022 07:06:27 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 07:06:31 - INFO - codeparrot_training - Step 15018: {'lr': 0.0004905156068627065, 'samples': 7689728, 'steps': 15018, 'loss/train': 1.8392575979232788} +03/04/2022 07:06:34 - INFO - codeparrot_training - Step 15019: {'lr': 0.0004905141589726035, 'samples': 7690240, 'steps': 15019, 'loss/train': 1.5275486707687378} +03/04/2022 07:06:36 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 07:06:39 - INFO - codeparrot_training - Step 15020: {'lr': 0.0004905127109741284, 'samples': 7690752, 'steps': 15020, 'loss/train': 2.386080026626587} +03/04/2022 07:06:42 - INFO - codeparrot_training - Step 15021: {'lr': 0.000490511262867282, 'samples': 7691264, 'steps': 15021, 'loss/train': 1.7695971727371216} +03/04/2022 07:06:44 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 07:06:47 - INFO - codeparrot_training - Step 15022: {'lr': 0.000490509814652065, 'samples': 7691776, 'steps': 15022, 'loss/train': 2.126218795776367} +03/04/2022 07:06:51 - INFO - codeparrot_training - Step 15023: {'lr': 0.0004905083663284779, 'samples': 7692288, 'steps': 15023, 'loss/train': 2.6205484867095947} +03/04/2022 07:06:52 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 07:06:56 - INFO - codeparrot_training - Step 15024: {'lr': 0.0004905069178965214, 'samples': 7692800, 'steps': 15024, 'loss/train': 1.3500251770019531} +03/04/2022 07:06:59 - INFO - codeparrot_training - Step 15025: {'lr': 0.0004905054693561963, 'samples': 7693312, 'steps': 15025, 'loss/train': 2.3454055786132812} +03/04/2022 07:07:01 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 07:07:04 - INFO - codeparrot_training - Step 15026: {'lr': 0.0004905040207075032, 'samples': 7693824, 'steps': 15026, 'loss/train': 1.996700644493103} +03/04/2022 07:07:07 - INFO - codeparrot_training - Step 15027: {'lr': 0.0004905025719504426, 'samples': 7694336, 'steps': 15027, 'loss/train': 2.2744147777557373} +03/04/2022 07:07:09 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/04/2022 07:07:13 - INFO - codeparrot_training - Step 15028: {'lr': 0.0004905011230850152, 'samples': 7694848, 'steps': 15028, 'loss/train': 0.8628334403038025} +03/04/2022 07:07:16 - INFO - codeparrot_training - Step 15029: {'lr': 0.0004904996741112218, 'samples': 7695360, 'steps': 15029, 'loss/train': 1.7831729650497437} +03/04/2022 07:07:18 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 07:07:22 - INFO - codeparrot_training - Step 15030: {'lr': 0.0004904982250290629, 'samples': 7695872, 'steps': 15030, 'loss/train': 1.9099750518798828} +03/04/2022 07:07:25 - INFO - codeparrot_training - Step 15031: {'lr': 0.0004904967758385393, 'samples': 7696384, 'steps': 15031, 'loss/train': 1.6613608598709106} +03/04/2022 07:07:26 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 07:07:30 - INFO - codeparrot_training - Step 15032: {'lr': 0.0004904953265396515, 'samples': 7696896, 'steps': 15032, 'loss/train': 1.2869060039520264} +03/04/2022 07:07:33 - INFO - codeparrot_training - Step 15033: {'lr': 0.0004904938771324002, 'samples': 7697408, 'steps': 15033, 'loss/train': 2.502641439437866} +03/04/2022 07:07:35 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/04/2022 07:07:38 - INFO - codeparrot_training - Step 15034: {'lr': 0.0004904924276167861, 'samples': 7697920, 'steps': 15034, 'loss/train': 1.7009127140045166} +03/04/2022 07:07:42 - INFO - codeparrot_training - Step 15035: {'lr': 0.0004904909779928099, 'samples': 7698432, 'steps': 15035, 'loss/train': 2.0681939125061035} +03/04/2022 07:07:43 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 07:07:47 - INFO - codeparrot_training - Step 15036: {'lr': 0.000490489528260472, 'samples': 7698944, 'steps': 15036, 'loss/train': 2.3724069595336914} +03/04/2022 07:07:50 - INFO - codeparrot_training - Step 15037: {'lr': 0.0004904880784197734, 'samples': 7699456, 'steps': 15037, 'loss/train': 1.9611097574234009} +03/04/2022 07:07:51 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 07:07:55 - INFO - codeparrot_training - Step 15038: {'lr': 0.0004904866284707144, 'samples': 7699968, 'steps': 15038, 'loss/train': 1.6469544172286987} +03/04/2022 07:07:58 - INFO - codeparrot_training - Step 15039: {'lr': 0.000490485178413296, 'samples': 7700480, 'steps': 15039, 'loss/train': 1.4382404088974} +03/04/2022 07:07:59 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/04/2022 07:08:04 - INFO - codeparrot_training - Step 15040: {'lr': 0.0004904837282475186, 'samples': 7700992, 'steps': 15040, 'loss/train': 2.009895086288452} +03/04/2022 07:08:07 - INFO - codeparrot_training - Step 15041: {'lr': 0.000490482277973383, 'samples': 7701504, 'steps': 15041, 'loss/train': 2.740320920944214} +03/04/2022 07:08:08 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/04/2022 07:08:12 - INFO - codeparrot_training - Step 15042: {'lr': 0.0004904808275908898, 'samples': 7702016, 'steps': 15042, 'loss/train': 1.9641858339309692} +03/04/2022 07:08:15 - INFO - codeparrot_training - Step 15043: {'lr': 0.0004904793771000396, 'samples': 7702528, 'steps': 15043, 'loss/train': 1.9127044677734375} +03/04/2022 07:08:17 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 07:08:21 - INFO - codeparrot_training - Step 15044: {'lr': 0.0004904779265008331, 'samples': 7703040, 'steps': 15044, 'loss/train': 1.3630226850509644} +03/04/2022 07:08:24 - INFO - codeparrot_training - Step 15045: {'lr': 0.000490476475793271, 'samples': 7703552, 'steps': 15045, 'loss/train': 2.223031759262085} +03/04/2022 07:08:26 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 07:08:29 - INFO - codeparrot_training - Step 15046: {'lr': 0.0004904750249773538, 'samples': 7704064, 'steps': 15046, 'loss/train': 2.304105758666992} +03/04/2022 07:08:32 - INFO - codeparrot_training - Step 15047: {'lr': 0.0004904735740530825, 'samples': 7704576, 'steps': 15047, 'loss/train': 1.2682045698165894} +03/04/2022 07:08:34 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 07:08:37 - INFO - codeparrot_training - Step 15048: {'lr': 0.0004904721230204573, 'samples': 7705088, 'steps': 15048, 'loss/train': 1.1306997537612915} +03/04/2022 07:08:41 - INFO - codeparrot_training - Step 15049: {'lr': 0.0004904706718794791, 'samples': 7705600, 'steps': 15049, 'loss/train': 2.649277925491333} +03/04/2022 07:08:42 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 07:08:46 - INFO - codeparrot_training - Step 15050: {'lr': 0.0004904692206301487, 'samples': 7706112, 'steps': 15050, 'loss/train': 2.3671743869781494} +03/04/2022 07:08:49 - INFO - codeparrot_training - Step 15051: {'lr': 0.0004904677692724664, 'samples': 7706624, 'steps': 15051, 'loss/train': 1.6050739288330078} +03/04/2022 07:08:51 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 07:08:54 - INFO - codeparrot_training - Step 15052: {'lr': 0.000490466317806433, 'samples': 7707136, 'steps': 15052, 'loss/train': 2.0828144550323486} +03/04/2022 07:08:57 - INFO - codeparrot_training - Step 15053: {'lr': 0.0004904648662320493, 'samples': 7707648, 'steps': 15053, 'loss/train': 1.9346091747283936} +03/04/2022 07:08:59 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 07:09:03 - INFO - codeparrot_training - Step 15054: {'lr': 0.0004904634145493159, 'samples': 7708160, 'steps': 15054, 'loss/train': 1.2007808685302734} +03/04/2022 07:09:06 - INFO - codeparrot_training - Step 15055: {'lr': 0.0004904619627582332, 'samples': 7708672, 'steps': 15055, 'loss/train': 1.6872395277023315} +03/04/2022 07:09:09 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/04/2022 07:09:12 - INFO - codeparrot_training - Step 15056: {'lr': 0.0004904605108588023, 'samples': 7709184, 'steps': 15056, 'loss/train': 1.869847297668457} +03/04/2022 07:09:15 - INFO - codeparrot_training - Step 15057: {'lr': 0.0004904590588510234, 'samples': 7709696, 'steps': 15057, 'loss/train': 2.2120299339294434} +03/04/2022 07:09:17 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 07:09:20 - INFO - codeparrot_training - Step 15058: {'lr': 0.0004904576067348975, 'samples': 7710208, 'steps': 15058, 'loss/train': 1.7059688568115234} +03/04/2022 07:09:23 - INFO - codeparrot_training - Step 15059: {'lr': 0.000490456154510425, 'samples': 7710720, 'steps': 15059, 'loss/train': 2.4730095863342285} +03/04/2022 07:09:26 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 07:09:28 - INFO - codeparrot_training - Step 15060: {'lr': 0.0004904547021776067, 'samples': 7711232, 'steps': 15060, 'loss/train': 1.04810631275177} +03/04/2022 07:09:32 - INFO - codeparrot_training - Step 15061: {'lr': 0.0004904532497364432, 'samples': 7711744, 'steps': 15061, 'loss/train': 2.8565568923950195} +03/04/2022 07:09:34 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 07:09:37 - INFO - codeparrot_training - Step 15062: {'lr': 0.0004904517971869352, 'samples': 7712256, 'steps': 15062, 'loss/train': 2.3585734367370605} +03/04/2022 07:09:40 - INFO - codeparrot_training - Step 15063: {'lr': 0.0004904503445290833, 'samples': 7712768, 'steps': 15063, 'loss/train': 1.2566827535629272} +03/04/2022 07:09:42 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 07:09:45 - INFO - codeparrot_training - Step 15064: {'lr': 0.0004904488917628882, 'samples': 7713280, 'steps': 15064, 'loss/train': 1.9569579362869263} +03/04/2022 07:09:48 - INFO - codeparrot_training - Step 15065: {'lr': 0.0004904474388883507, 'samples': 7713792, 'steps': 15065, 'loss/train': 2.10269832611084} +03/04/2022 07:09:51 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 07:09:54 - INFO - codeparrot_training - Step 15066: {'lr': 0.000490445985905471, 'samples': 7714304, 'steps': 15066, 'loss/train': 1.148998737335205} +03/04/2022 07:09:57 - INFO - codeparrot_training - Step 15067: {'lr': 0.0004904445328142503, 'samples': 7714816, 'steps': 15067, 'loss/train': 2.0510916709899902} +03/04/2022 07:09:59 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 07:10:02 - INFO - codeparrot_training - Step 15068: {'lr': 0.0004904430796146889, 'samples': 7715328, 'steps': 15068, 'loss/train': 1.3475439548492432} +03/04/2022 07:10:05 - INFO - codeparrot_training - Step 15069: {'lr': 0.0004904416263067876, 'samples': 7715840, 'steps': 15069, 'loss/train': 2.5242676734924316} +03/04/2022 07:10:08 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/04/2022 07:10:11 - INFO - codeparrot_training - Step 15070: {'lr': 0.0004904401728905469, 'samples': 7716352, 'steps': 15070, 'loss/train': 1.4060122966766357} +03/04/2022 07:10:14 - INFO - codeparrot_training - Step 15071: {'lr': 0.0004904387193659677, 'samples': 7716864, 'steps': 15071, 'loss/train': 1.8636971712112427} +03/04/2022 07:10:16 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/04/2022 07:10:19 - INFO - codeparrot_training - Step 15072: {'lr': 0.0004904372657330504, 'samples': 7717376, 'steps': 15072, 'loss/train': 1.5916359424591064} +03/04/2022 07:10:23 - INFO - codeparrot_training - Step 15073: {'lr': 0.0004904358119917959, 'samples': 7717888, 'steps': 15073, 'loss/train': 1.9074276685714722} +03/04/2022 07:10:26 - INFO - codeparrot_training - Step 15074: {'lr': 0.0004904343581422047, 'samples': 7718400, 'steps': 15074, 'loss/train': 3.2441749572753906} +03/04/2022 07:10:26 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/04/2022 07:10:31 - INFO - codeparrot_training - Step 15075: {'lr': 0.0004904329041842774, 'samples': 7718912, 'steps': 15075, 'loss/train': 2.0777430534362793} +03/04/2022 07:10:34 - INFO - codeparrot_training - Step 15076: {'lr': 0.0004904314501180148, 'samples': 7719424, 'steps': 15076, 'loss/train': 1.8914417028427124} +03/04/2022 07:10:35 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 07:10:39 - INFO - codeparrot_training - Step 15077: {'lr': 0.0004904299959434175, 'samples': 7719936, 'steps': 15077, 'loss/train': 2.005387783050537} +03/04/2022 07:10:43 - INFO - codeparrot_training - Step 15078: {'lr': 0.0004904285416604862, 'samples': 7720448, 'steps': 15078, 'loss/train': 1.587920904159546} +03/04/2022 07:10:43 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 07:10:48 - INFO - codeparrot_training - Step 15079: {'lr': 0.0004904270872692215, 'samples': 7720960, 'steps': 15079, 'loss/train': 2.0889852046966553} +03/04/2022 07:10:51 - INFO - codeparrot_training - Step 15080: {'lr': 0.0004904256327696241, 'samples': 7721472, 'steps': 15080, 'loss/train': 1.4562541246414185} +03/04/2022 07:10:51 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 07:10:56 - INFO - codeparrot_training - Step 15081: {'lr': 0.0004904241781616945, 'samples': 7721984, 'steps': 15081, 'loss/train': 1.8364921808242798} +03/04/2022 07:10:59 - INFO - codeparrot_training - Step 15082: {'lr': 0.0004904227234454335, 'samples': 7722496, 'steps': 15082, 'loss/train': 1.363698124885559} +03/04/2022 07:11:00 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 07:11:05 - INFO - codeparrot_training - Step 15083: {'lr': 0.0004904212686208418, 'samples': 7723008, 'steps': 15083, 'loss/train': 1.8146157264709473} +03/04/2022 07:11:08 - INFO - codeparrot_training - Step 15084: {'lr': 0.00049041981368792, 'samples': 7723520, 'steps': 15084, 'loss/train': 2.31183123588562} +03/04/2022 07:11:08 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 07:11:13 - INFO - codeparrot_training - Step 15085: {'lr': 0.0004904183586466686, 'samples': 7724032, 'steps': 15085, 'loss/train': 2.5919792652130127} +03/04/2022 07:11:16 - INFO - codeparrot_training - Step 15086: {'lr': 0.0004904169034970885, 'samples': 7724544, 'steps': 15086, 'loss/train': 1.306950569152832} +03/04/2022 07:11:17 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 07:11:22 - INFO - codeparrot_training - Step 15087: {'lr': 0.0004904154482391803, 'samples': 7725056, 'steps': 15087, 'loss/train': 1.560609221458435} +03/04/2022 07:11:25 - INFO - codeparrot_training - Step 15088: {'lr': 0.0004904139928729445, 'samples': 7725568, 'steps': 15088, 'loss/train': 1.7392244338989258} +03/04/2022 07:11:25 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 07:11:30 - INFO - codeparrot_training - Step 15089: {'lr': 0.0004904125373983819, 'samples': 7726080, 'steps': 15089, 'loss/train': 2.120112419128418} +03/04/2022 07:11:33 - INFO - codeparrot_training - Step 15090: {'lr': 0.0004904110818154931, 'samples': 7726592, 'steps': 15090, 'loss/train': 1.361201286315918} +03/04/2022 07:11:33 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 07:11:38 - INFO - codeparrot_training - Step 15091: {'lr': 0.0004904096261242789, 'samples': 7727104, 'steps': 15091, 'loss/train': 1.7765558958053589} +03/04/2022 07:11:42 - INFO - codeparrot_training - Step 15092: {'lr': 0.0004904081703247397, 'samples': 7727616, 'steps': 15092, 'loss/train': 1.2364274263381958} +03/04/2022 07:11:42 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 07:11:47 - INFO - codeparrot_training - Step 15093: {'lr': 0.0004904067144168763, 'samples': 7728128, 'steps': 15093, 'loss/train': 1.9350498914718628} +03/04/2022 07:11:50 - INFO - codeparrot_training - Step 15094: {'lr': 0.0004904052584006895, 'samples': 7728640, 'steps': 15094, 'loss/train': 1.9972333908081055} +03/04/2022 07:11:50 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 07:11:55 - INFO - codeparrot_training - Step 15095: {'lr': 0.0004904038022761797, 'samples': 7729152, 'steps': 15095, 'loss/train': 2.5483715534210205} +03/04/2022 07:11:59 - INFO - codeparrot_training - Step 15096: {'lr': 0.0004904023460433475, 'samples': 7729664, 'steps': 15096, 'loss/train': 2.1004207134246826} +03/04/2022 07:11:59 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 07:12:04 - INFO - codeparrot_training - Step 15097: {'lr': 0.0004904008897021939, 'samples': 7730176, 'steps': 15097, 'loss/train': 2.2273781299591064} +03/04/2022 07:12:07 - INFO - codeparrot_training - Step 15098: {'lr': 0.0004903994332527193, 'samples': 7730688, 'steps': 15098, 'loss/train': 2.102306365966797} +03/04/2022 07:12:07 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 07:12:12 - INFO - codeparrot_training - Step 15099: {'lr': 0.0004903979766949244, 'samples': 7731200, 'steps': 15099, 'loss/train': 1.6808604001998901} +03/04/2022 07:12:16 - INFO - codeparrot_training - Step 15100: {'lr': 0.00049039652002881, 'samples': 7731712, 'steps': 15100, 'loss/train': 1.13270103931427} +03/04/2022 07:12:16 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 07:12:21 - INFO - codeparrot_training - Step 15101: {'lr': 0.0004903950632543766, 'samples': 7732224, 'steps': 15101, 'loss/train': 1.835746169090271} +03/04/2022 07:12:24 - INFO - codeparrot_training - Step 15102: {'lr': 0.0004903936063716248, 'samples': 7732736, 'steps': 15102, 'loss/train': 1.6485700607299805} +03/04/2022 07:12:24 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 07:12:29 - INFO - codeparrot_training - Step 15103: {'lr': 0.0004903921493805554, 'samples': 7733248, 'steps': 15103, 'loss/train': 2.1256768703460693} +03/04/2022 07:12:32 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 07:12:35 - INFO - codeparrot_training - Step 15104: {'lr': 0.000490390692281169, 'samples': 7733760, 'steps': 15104, 'loss/train': 0.5741967558860779} +03/04/2022 07:12:38 - INFO - codeparrot_training - Step 15105: {'lr': 0.0004903892350734663, 'samples': 7734272, 'steps': 15105, 'loss/train': 2.141782522201538} +03/04/2022 07:12:41 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/04/2022 07:12:43 - INFO - codeparrot_training - Step 15106: {'lr': 0.0004903877777574479, 'samples': 7734784, 'steps': 15106, 'loss/train': 1.3447749614715576} +03/04/2022 07:12:46 - INFO - codeparrot_training - Step 15107: {'lr': 0.0004903863203331145, 'samples': 7735296, 'steps': 15107, 'loss/train': 2.221787929534912} +03/04/2022 07:12:49 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 07:12:51 - INFO - codeparrot_training - Step 15108: {'lr': 0.0004903848628004667, 'samples': 7735808, 'steps': 15108, 'loss/train': 1.5001084804534912} +03/04/2022 07:12:55 - INFO - codeparrot_training - Step 15109: {'lr': 0.0004903834051595052, 'samples': 7736320, 'steps': 15109, 'loss/train': 2.3787012100219727} +03/04/2022 07:12:58 - INFO - codeparrot_training - Step 15110: {'lr': 0.0004903819474102306, 'samples': 7736832, 'steps': 15110, 'loss/train': 1.1772141456604004} +03/04/2022 07:12:58 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 07:13:03 - INFO - codeparrot_training - Step 15111: {'lr': 0.0004903804895526437, 'samples': 7737344, 'steps': 15111, 'loss/train': 1.8408877849578857} +03/04/2022 07:13:06 - INFO - codeparrot_training - Step 15112: {'lr': 0.0004903790315867449, 'samples': 7737856, 'steps': 15112, 'loss/train': 1.5420091152191162} +03/04/2022 07:13:06 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 07:13:12 - INFO - codeparrot_training - Step 15113: {'lr': 0.0004903775735125352, 'samples': 7738368, 'steps': 15113, 'loss/train': 2.2399837970733643} +03/04/2022 07:13:15 - INFO - codeparrot_training - Step 15114: {'lr': 0.0004903761153300149, 'samples': 7738880, 'steps': 15114, 'loss/train': 2.094606876373291} +03/04/2022 07:13:17 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/04/2022 07:13:20 - INFO - codeparrot_training - Step 15115: {'lr': 0.000490374657039185, 'samples': 7739392, 'steps': 15115, 'loss/train': 1.7523351907730103} +03/04/2022 07:13:23 - INFO - codeparrot_training - Step 15116: {'lr': 0.0004903731986400459, 'samples': 7739904, 'steps': 15116, 'loss/train': 0.9121655225753784} +03/04/2022 07:13:25 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/04/2022 07:13:29 - INFO - codeparrot_training - Step 15117: {'lr': 0.0004903717401325983, 'samples': 7740416, 'steps': 15117, 'loss/train': 1.7191216945648193} +03/04/2022 07:13:32 - INFO - codeparrot_training - Step 15118: {'lr': 0.000490370281516843, 'samples': 7740928, 'steps': 15118, 'loss/train': 1.6949079036712646} +03/04/2022 07:13:34 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 07:13:37 - INFO - codeparrot_training - Step 15119: {'lr': 0.0004903688227927806, 'samples': 7741440, 'steps': 15119, 'loss/train': 2.0688724517822266} +03/04/2022 07:13:40 - INFO - codeparrot_training - Step 15120: {'lr': 0.0004903673639604116, 'samples': 7741952, 'steps': 15120, 'loss/train': 1.5784456729888916} +03/04/2022 07:13:42 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 07:13:46 - INFO - codeparrot_training - Step 15121: {'lr': 0.0004903659050197369, 'samples': 7742464, 'steps': 15121, 'loss/train': 1.3689708709716797} +03/04/2022 07:13:49 - INFO - codeparrot_training - Step 15122: {'lr': 0.0004903644459707569, 'samples': 7742976, 'steps': 15122, 'loss/train': 2.1300764083862305} +03/04/2022 07:13:51 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 07:13:54 - INFO - codeparrot_training - Step 15123: {'lr': 0.0004903629868134725, 'samples': 7743488, 'steps': 15123, 'loss/train': 2.0668346881866455} +03/04/2022 07:13:57 - INFO - codeparrot_training - Step 15124: {'lr': 0.0004903615275478841, 'samples': 7744000, 'steps': 15124, 'loss/train': 1.5456273555755615} +03/04/2022 07:14:00 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 07:14:03 - INFO - codeparrot_training - Step 15125: {'lr': 0.0004903600681739926, 'samples': 7744512, 'steps': 15125, 'loss/train': 1.9291473627090454} +03/04/2022 07:14:06 - INFO - codeparrot_training - Step 15126: {'lr': 0.0004903586086917986, 'samples': 7745024, 'steps': 15126, 'loss/train': 2.6652748584747314} +03/04/2022 07:14:08 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 07:14:11 - INFO - codeparrot_training - Step 15127: {'lr': 0.0004903571491013027, 'samples': 7745536, 'steps': 15127, 'loss/train': 1.7841682434082031} +03/04/2022 07:14:14 - INFO - codeparrot_training - Step 15128: {'lr': 0.0004903556894025055, 'samples': 7746048, 'steps': 15128, 'loss/train': 1.5957554578781128} +03/04/2022 07:14:16 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 07:14:20 - INFO - codeparrot_training - Step 15129: {'lr': 0.0004903542295954077, 'samples': 7746560, 'steps': 15129, 'loss/train': 1.7252914905548096} +03/04/2022 07:14:23 - INFO - codeparrot_training - Step 15130: {'lr': 0.0004903527696800102, 'samples': 7747072, 'steps': 15130, 'loss/train': 2.686516284942627} +03/04/2022 07:14:24 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 07:14:28 - INFO - codeparrot_training - Step 15131: {'lr': 0.0004903513096563133, 'samples': 7747584, 'steps': 15131, 'loss/train': 1.9319047927856445} +03/04/2022 07:14:31 - INFO - codeparrot_training - Step 15132: {'lr': 0.0004903498495243178, 'samples': 7748096, 'steps': 15132, 'loss/train': 1.4640710353851318} +03/04/2022 07:14:33 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 07:14:36 - INFO - codeparrot_training - Step 15133: {'lr': 0.0004903483892840244, 'samples': 7748608, 'steps': 15133, 'loss/train': 2.272279739379883} +03/04/2022 07:14:40 - INFO - codeparrot_training - Step 15134: {'lr': 0.0004903469289354338, 'samples': 7749120, 'steps': 15134, 'loss/train': 3.2980268001556396} +03/04/2022 07:14:42 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/04/2022 07:14:45 - INFO - codeparrot_training - Step 15135: {'lr': 0.0004903454684785465, 'samples': 7749632, 'steps': 15135, 'loss/train': 1.8776437044143677} +03/04/2022 07:14:48 - INFO - codeparrot_training - Step 15136: {'lr': 0.0004903440079133633, 'samples': 7750144, 'steps': 15136, 'loss/train': 2.0369532108306885} +03/04/2022 07:14:50 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 07:14:53 - INFO - codeparrot_training - Step 15137: {'lr': 0.0004903425472398846, 'samples': 7750656, 'steps': 15137, 'loss/train': 2.3640620708465576} +03/04/2022 07:14:57 - INFO - codeparrot_training - Step 15138: {'lr': 0.0004903410864581115, 'samples': 7751168, 'steps': 15138, 'loss/train': 2.666942834854126} +03/04/2022 07:14:58 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 07:15:02 - INFO - codeparrot_training - Step 15139: {'lr': 0.0004903396255680443, 'samples': 7751680, 'steps': 15139, 'loss/train': 2.244220018386841} +03/04/2022 07:15:05 - INFO - codeparrot_training - Step 15140: {'lr': 0.0004903381645696838, 'samples': 7752192, 'steps': 15140, 'loss/train': 1.952545404434204} +03/04/2022 07:15:07 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 07:15:10 - INFO - codeparrot_training - Step 15141: {'lr': 0.0004903367034630307, 'samples': 7752704, 'steps': 15141, 'loss/train': 1.6053801774978638} +03/04/2022 07:15:13 - INFO - codeparrot_training - Step 15142: {'lr': 0.0004903352422480855, 'samples': 7753216, 'steps': 15142, 'loss/train': 2.145606756210327} +03/04/2022 07:15:15 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 07:15:19 - INFO - codeparrot_training - Step 15143: {'lr': 0.000490333780924849, 'samples': 7753728, 'steps': 15143, 'loss/train': 1.378994345664978} +03/04/2022 07:15:22 - INFO - codeparrot_training - Step 15144: {'lr': 0.0004903323194933218, 'samples': 7754240, 'steps': 15144, 'loss/train': 1.8914250135421753} +03/04/2022 07:15:23 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/04/2022 07:15:27 - INFO - codeparrot_training - Step 15145: {'lr': 0.0004903308579535045, 'samples': 7754752, 'steps': 15145, 'loss/train': 1.6645804643630981} +03/04/2022 07:15:30 - INFO - codeparrot_training - Step 15146: {'lr': 0.0004903293963053979, 'samples': 7755264, 'steps': 15146, 'loss/train': 1.6760786771774292} +03/04/2022 07:15:31 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/04/2022 07:15:36 - INFO - codeparrot_training - Step 15147: {'lr': 0.0004903279345490026, 'samples': 7755776, 'steps': 15147, 'loss/train': 0.5476749539375305} +03/04/2022 07:15:39 - INFO - codeparrot_training - Step 15148: {'lr': 0.0004903264726843191, 'samples': 7756288, 'steps': 15148, 'loss/train': 2.1113502979278564} +03/04/2022 07:15:40 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 07:15:44 - INFO - codeparrot_training - Step 15149: {'lr': 0.0004903250107113483, 'samples': 7756800, 'steps': 15149, 'loss/train': 0.5562407374382019} +03/04/2022 07:15:47 - INFO - codeparrot_training - Step 15150: {'lr': 0.0004903235486300908, 'samples': 7757312, 'steps': 15150, 'loss/train': 2.553931713104248} +03/04/2022 07:15:48 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 07:15:52 - INFO - codeparrot_training - Step 15151: {'lr': 0.0004903220864405471, 'samples': 7757824, 'steps': 15151, 'loss/train': 1.8831093311309814} +03/04/2022 07:15:56 - INFO - codeparrot_training - Step 15152: {'lr': 0.000490320624142718, 'samples': 7758336, 'steps': 15152, 'loss/train': 2.189178466796875} +03/04/2022 07:15:56 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 07:16:01 - INFO - codeparrot_training - Step 15153: {'lr': 0.0004903191617366043, 'samples': 7758848, 'steps': 15153, 'loss/train': 1.3438587188720703} +03/04/2022 07:16:04 - INFO - codeparrot_training - Step 15154: {'lr': 0.0004903176992222063, 'samples': 7759360, 'steps': 15154, 'loss/train': 2.5476737022399902} +03/04/2022 07:16:05 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 07:16:09 - INFO - codeparrot_training - Step 15155: {'lr': 0.000490316236599525, 'samples': 7759872, 'steps': 15155, 'loss/train': 0.9849637746810913} +03/04/2022 07:16:12 - INFO - codeparrot_training - Step 15156: {'lr': 0.0004903147738685609, 'samples': 7760384, 'steps': 15156, 'loss/train': 2.612017869949341} +03/04/2022 07:16:13 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/04/2022 07:16:18 - INFO - codeparrot_training - Step 15157: {'lr': 0.0004903133110293145, 'samples': 7760896, 'steps': 15157, 'loss/train': 3.3260271549224854} +03/04/2022 07:16:21 - INFO - codeparrot_training - Step 15158: {'lr': 0.0004903118480817868, 'samples': 7761408, 'steps': 15158, 'loss/train': 1.8378442525863647} +03/04/2022 07:16:22 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/04/2022 07:16:26 - INFO - codeparrot_training - Step 15159: {'lr': 0.0004903103850259781, 'samples': 7761920, 'steps': 15159, 'loss/train': 1.5548557043075562} +03/04/2022 07:16:29 - INFO - codeparrot_training - Step 15160: {'lr': 0.0004903089218618895, 'samples': 7762432, 'steps': 15160, 'loss/train': 1.8729041814804077} +03/04/2022 07:16:30 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 07:16:35 - INFO - codeparrot_training - Step 15161: {'lr': 0.0004903074585895212, 'samples': 7762944, 'steps': 15161, 'loss/train': 1.6403844356536865} +03/04/2022 07:16:38 - INFO - codeparrot_training - Step 15162: {'lr': 0.0004903059952088742, 'samples': 7763456, 'steps': 15162, 'loss/train': 2.155118465423584} +03/04/2022 07:16:38 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 07:16:43 - INFO - codeparrot_training - Step 15163: {'lr': 0.0004903045317199489, 'samples': 7763968, 'steps': 15163, 'loss/train': 2.757357120513916} +03/04/2022 07:16:46 - INFO - codeparrot_training - Step 15164: {'lr': 0.0004903030681227463, 'samples': 7764480, 'steps': 15164, 'loss/train': 0.7976189255714417} +03/04/2022 07:16:47 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/04/2022 07:16:51 - INFO - codeparrot_training - Step 15165: {'lr': 0.0004903016044172666, 'samples': 7764992, 'steps': 15165, 'loss/train': 1.812813639640808} +03/04/2022 07:16:55 - INFO - codeparrot_training - Step 15166: {'lr': 0.0004903001406035109, 'samples': 7765504, 'steps': 15166, 'loss/train': 1.7326570749282837} +03/04/2022 07:16:55 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 07:17:00 - INFO - codeparrot_training - Step 15167: {'lr': 0.0004902986766814795, 'samples': 7766016, 'steps': 15167, 'loss/train': 1.8663133382797241} +03/04/2022 07:17:03 - INFO - codeparrot_training - Step 15168: {'lr': 0.0004902972126511734, 'samples': 7766528, 'steps': 15168, 'loss/train': 1.5511295795440674} +03/04/2022 07:17:04 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/04/2022 07:17:08 - INFO - codeparrot_training - Step 15169: {'lr': 0.0004902957485125929, 'samples': 7767040, 'steps': 15169, 'loss/train': 1.2507461309432983} +03/04/2022 07:17:12 - INFO - codeparrot_training - Step 15170: {'lr': 0.0004902942842657389, 'samples': 7767552, 'steps': 15170, 'loss/train': 1.5924980640411377} +03/04/2022 07:17:13 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/04/2022 07:17:17 - INFO - codeparrot_training - Step 15171: {'lr': 0.0004902928199106121, 'samples': 7768064, 'steps': 15171, 'loss/train': 0.4915942847728729} +03/04/2022 07:17:20 - INFO - codeparrot_training - Step 15172: {'lr': 0.000490291355447213, 'samples': 7768576, 'steps': 15172, 'loss/train': 3.7962846755981445} +03/04/2022 07:17:21 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 07:17:25 - INFO - codeparrot_training - Step 15173: {'lr': 0.0004902898908755424, 'samples': 7769088, 'steps': 15173, 'loss/train': 1.7255077362060547} +03/04/2022 07:17:29 - INFO - codeparrot_training - Step 15174: {'lr': 0.0004902884261956007, 'samples': 7769600, 'steps': 15174, 'loss/train': 1.0302788019180298} +03/04/2022 07:17:30 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 07:17:34 - INFO - codeparrot_training - Step 15175: {'lr': 0.0004902869614073889, 'samples': 7770112, 'steps': 15175, 'loss/train': 1.2291743755340576} +03/04/2022 07:17:37 - INFO - codeparrot_training - Step 15176: {'lr': 0.0004902854965109074, 'samples': 7770624, 'steps': 15176, 'loss/train': 1.9349719285964966} +03/04/2022 07:17:39 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 07:17:42 - INFO - codeparrot_training - Step 15177: {'lr': 0.0004902840315061571, 'samples': 7771136, 'steps': 15177, 'loss/train': 0.698824942111969} +03/04/2022 07:17:45 - INFO - codeparrot_training - Step 15178: {'lr': 0.0004902825663931384, 'samples': 7771648, 'steps': 15178, 'loss/train': 1.9414786100387573} +03/04/2022 07:17:47 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 07:17:51 - INFO - codeparrot_training - Step 15179: {'lr': 0.0004902811011718521, 'samples': 7772160, 'steps': 15179, 'loss/train': 1.5617390871047974} +03/04/2022 07:17:54 - INFO - codeparrot_training - Step 15180: {'lr': 0.0004902796358422989, 'samples': 7772672, 'steps': 15180, 'loss/train': 2.312124490737915} +03/04/2022 07:17:56 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 07:17:59 - INFO - codeparrot_training - Step 15181: {'lr': 0.0004902781704044793, 'samples': 7773184, 'steps': 15181, 'loss/train': 1.7238904237747192} +03/04/2022 07:18:02 - INFO - codeparrot_training - Step 15182: {'lr': 0.0004902767048583942, 'samples': 7773696, 'steps': 15182, 'loss/train': 2.102057695388794} +03/04/2022 07:18:04 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 07:18:08 - INFO - codeparrot_training - Step 15183: {'lr': 0.000490275239204044, 'samples': 7774208, 'steps': 15183, 'loss/train': 1.9617153406143188} +03/04/2022 07:18:11 - INFO - codeparrot_training - Step 15184: {'lr': 0.0004902737734414296, 'samples': 7774720, 'steps': 15184, 'loss/train': 1.735923171043396} +03/04/2022 07:18:13 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/04/2022 07:18:16 - INFO - codeparrot_training - Step 15185: {'lr': 0.0004902723075705514, 'samples': 7775232, 'steps': 15185, 'loss/train': 1.4334698915481567} +03/04/2022 07:18:19 - INFO - codeparrot_training - Step 15186: {'lr': 0.0004902708415914103, 'samples': 7775744, 'steps': 15186, 'loss/train': 2.086953639984131} +03/04/2022 07:18:21 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 07:18:25 - INFO - codeparrot_training - Step 15187: {'lr': 0.0004902693755040069, 'samples': 7776256, 'steps': 15187, 'loss/train': 2.099219799041748} +03/04/2022 07:18:28 - INFO - codeparrot_training - Step 15188: {'lr': 0.0004902679093083418, 'samples': 7776768, 'steps': 15188, 'loss/train': 1.505436897277832} +03/04/2022 07:18:30 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 07:18:33 - INFO - codeparrot_training - Step 15189: {'lr': 0.0004902664430044156, 'samples': 7777280, 'steps': 15189, 'loss/train': 2.5259106159210205} +03/04/2022 07:18:36 - INFO - codeparrot_training - Step 15190: {'lr': 0.0004902649765922292, 'samples': 7777792, 'steps': 15190, 'loss/train': 1.666769027709961} +03/04/2022 07:18:38 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 07:18:41 - INFO - codeparrot_training - Step 15191: {'lr': 0.0004902635100717831, 'samples': 7778304, 'steps': 15191, 'loss/train': 2.411850929260254} +03/04/2022 07:18:45 - INFO - codeparrot_training - Step 15192: {'lr': 0.0004902620434430778, 'samples': 7778816, 'steps': 15192, 'loss/train': 1.8869235515594482} +03/04/2022 07:18:47 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/04/2022 07:18:50 - INFO - codeparrot_training - Step 15193: {'lr': 0.0004902605767061142, 'samples': 7779328, 'steps': 15193, 'loss/train': 2.333179235458374} +03/04/2022 07:18:53 - INFO - codeparrot_training - Step 15194: {'lr': 0.000490259109860893, 'samples': 7779840, 'steps': 15194, 'loss/train': 2.038902997970581} +03/04/2022 07:18:55 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 07:18:58 - INFO - codeparrot_training - Step 15195: {'lr': 0.0004902576429074146, 'samples': 7780352, 'steps': 15195, 'loss/train': 2.3260610103607178} +03/04/2022 07:19:01 - INFO - codeparrot_training - Step 15196: {'lr': 0.0004902561758456799, 'samples': 7780864, 'steps': 15196, 'loss/train': 1.5010385513305664} +03/04/2022 07:19:03 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/04/2022 07:19:07 - INFO - codeparrot_training - Step 15197: {'lr': 0.0004902547086756895, 'samples': 7781376, 'steps': 15197, 'loss/train': 1.5812582969665527} +03/04/2022 07:19:10 - INFO - codeparrot_training - Step 15198: {'lr': 0.000490253241397444, 'samples': 7781888, 'steps': 15198, 'loss/train': 1.8837867975234985} +03/04/2022 07:19:12 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 07:19:15 - INFO - codeparrot_training - Step 15199: {'lr': 0.0004902517740109441, 'samples': 7782400, 'steps': 15199, 'loss/train': 0.7866479754447937} +03/04/2022 07:19:18 - INFO - codeparrot_training - Step 15200: {'lr': 0.0004902503065161905, 'samples': 7782912, 'steps': 15200, 'loss/train': 1.841648817062378} +03/04/2022 07:19:20 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/04/2022 07:19:24 - INFO - codeparrot_training - Step 15201: {'lr': 0.0004902488389131837, 'samples': 7783424, 'steps': 15201, 'loss/train': 2.3344342708587646} +03/04/2022 07:19:27 - INFO - codeparrot_training - Step 15202: {'lr': 0.0004902473712019246, 'samples': 7783936, 'steps': 15202, 'loss/train': 2.5264275074005127} +03/04/2022 07:19:29 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 07:19:32 - INFO - codeparrot_training - Step 15203: {'lr': 0.0004902459033824137, 'samples': 7784448, 'steps': 15203, 'loss/train': 1.5581485033035278} +03/04/2022 07:19:35 - INFO - codeparrot_training - Step 15204: {'lr': 0.0004902444354546516, 'samples': 7784960, 'steps': 15204, 'loss/train': 2.0676255226135254} +03/04/2022 07:19:37 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/04/2022 07:19:40 - INFO - codeparrot_training - Step 15205: {'lr': 0.0004902429674186392, 'samples': 7785472, 'steps': 15205, 'loss/train': 1.9360315799713135} +03/04/2022 07:19:43 - INFO - codeparrot_training - Step 15206: {'lr': 0.000490241499274377, 'samples': 7785984, 'steps': 15206, 'loss/train': 2.9299957752227783} +03/04/2022 07:19:45 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 07:19:49 - INFO - codeparrot_training - Step 15207: {'lr': 0.0004902400310218657, 'samples': 7786496, 'steps': 15207, 'loss/train': 1.4535551071166992} +03/04/2022 07:19:52 - INFO - codeparrot_training - Step 15208: {'lr': 0.0004902385626611059, 'samples': 7787008, 'steps': 15208, 'loss/train': 2.0935304164886475} +03/04/2022 07:19:54 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 07:19:57 - INFO - codeparrot_training - Step 15209: {'lr': 0.0004902370941920984, 'samples': 7787520, 'steps': 15209, 'loss/train': 1.5534124374389648} +03/04/2022 07:20:00 - INFO - codeparrot_training - Step 15210: {'lr': 0.0004902356256148437, 'samples': 7788032, 'steps': 15210, 'loss/train': 1.4026484489440918} +03/04/2022 07:20:02 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 07:20:06 - INFO - codeparrot_training - Step 15211: {'lr': 0.0004902341569293425, 'samples': 7788544, 'steps': 15211, 'loss/train': 1.85818350315094} +03/04/2022 07:20:09 - INFO - codeparrot_training - Step 15212: {'lr': 0.0004902326881355955, 'samples': 7789056, 'steps': 15212, 'loss/train': 0.7760187387466431} +03/04/2022 07:20:11 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 07:20:14 - INFO - codeparrot_training - Step 15213: {'lr': 0.0004902312192336034, 'samples': 7789568, 'steps': 15213, 'loss/train': 2.8424904346466064} +03/04/2022 07:20:17 - INFO - codeparrot_training - Step 15214: {'lr': 0.000490229750223367, 'samples': 7790080, 'steps': 15214, 'loss/train': 2.1905300617218018} +03/04/2022 07:20:19 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/04/2022 07:20:22 - INFO - codeparrot_training - Step 15215: {'lr': 0.0004902282811048864, 'samples': 7790592, 'steps': 15215, 'loss/train': 2.2173264026641846} +03/04/2022 07:20:26 - INFO - codeparrot_training - Step 15216: {'lr': 0.000490226811878163, 'samples': 7791104, 'steps': 15216, 'loss/train': 2.5282797813415527} +03/04/2022 07:20:28 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 07:20:31 - INFO - codeparrot_training - Step 15217: {'lr': 0.0004902253425431969, 'samples': 7791616, 'steps': 15217, 'loss/train': 1.6504093408584595} +03/04/2022 07:20:34 - INFO - codeparrot_training - Step 15218: {'lr': 0.000490223873099989, 'samples': 7792128, 'steps': 15218, 'loss/train': 2.87551212310791} +03/04/2022 07:20:36 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 07:20:40 - INFO - codeparrot_training - Step 15219: {'lr': 0.00049022240354854, 'samples': 7792640, 'steps': 15219, 'loss/train': 2.115551471710205} +03/04/2022 07:20:43 - INFO - codeparrot_training - Step 15220: {'lr': 0.0004902209338888503, 'samples': 7793152, 'steps': 15220, 'loss/train': 2.09633207321167} +03/04/2022 07:20:45 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 07:20:48 - INFO - codeparrot_training - Step 15221: {'lr': 0.000490219464120921, 'samples': 7793664, 'steps': 15221, 'loss/train': 2.0706825256347656} +03/04/2022 07:20:51 - INFO - codeparrot_training - Step 15222: {'lr': 0.0004902179942447524, 'samples': 7794176, 'steps': 15222, 'loss/train': 0.9063794612884521} +03/04/2022 07:20:53 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 07:20:56 - INFO - codeparrot_training - Step 15223: {'lr': 0.0004902165242603452, 'samples': 7794688, 'steps': 15223, 'loss/train': 2.0035009384155273} +03/04/2022 07:21:00 - INFO - codeparrot_training - Step 15224: {'lr': 0.0004902150541677003, 'samples': 7795200, 'steps': 15224, 'loss/train': 1.4303995370864868} +03/04/2022 07:21:01 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 07:21:05 - INFO - codeparrot_training - Step 15225: {'lr': 0.0004902135839668181, 'samples': 7795712, 'steps': 15225, 'loss/train': 1.6400343179702759} +03/04/2022 07:21:08 - INFO - codeparrot_training - Step 15226: {'lr': 0.0004902121136576994, 'samples': 7796224, 'steps': 15226, 'loss/train': 2.183572769165039} +03/04/2022 07:21:10 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 07:21:13 - INFO - codeparrot_training - Step 15227: {'lr': 0.0004902106432403448, 'samples': 7796736, 'steps': 15227, 'loss/train': 1.891182541847229} +03/04/2022 07:21:16 - INFO - codeparrot_training - Step 15228: {'lr': 0.0004902091727147551, 'samples': 7797248, 'steps': 15228, 'loss/train': 1.5757231712341309} +03/04/2022 07:21:18 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 07:21:22 - INFO - codeparrot_training - Step 15229: {'lr': 0.0004902077020809307, 'samples': 7797760, 'steps': 15229, 'loss/train': 1.731397032737732} +03/04/2022 07:21:25 - INFO - codeparrot_training - Step 15230: {'lr': 0.0004902062313388725, 'samples': 7798272, 'steps': 15230, 'loss/train': 1.678324818611145} +03/04/2022 07:21:27 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 07:21:30 - INFO - codeparrot_training - Step 15231: {'lr': 0.0004902047604885811, 'samples': 7798784, 'steps': 15231, 'loss/train': 1.8555573225021362} +03/04/2022 07:21:33 - INFO - codeparrot_training - Step 15232: {'lr': 0.0004902032895300571, 'samples': 7799296, 'steps': 15232, 'loss/train': 1.5739105939865112} +03/04/2022 07:21:37 - INFO - codeparrot_training - Step 15233: {'lr': 0.0004902018184633012, 'samples': 7799808, 'steps': 15233, 'loss/train': 1.8048126697540283} +03/04/2022 07:21:37 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 07:21:42 - INFO - codeparrot_training - Step 15234: {'lr': 0.0004902003472883141, 'samples': 7800320, 'steps': 15234, 'loss/train': 1.7564748525619507} +03/04/2022 07:21:45 - INFO - codeparrot_training - Step 15235: {'lr': 0.0004901988760050964, 'samples': 7800832, 'steps': 15235, 'loss/train': 3.1761510372161865} +03/04/2022 07:21:45 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 07:21:50 - INFO - codeparrot_training - Step 15236: {'lr': 0.0004901974046136488, 'samples': 7801344, 'steps': 15236, 'loss/train': 1.9835566282272339} +03/04/2022 07:21:53 - INFO - codeparrot_training - Step 15237: {'lr': 0.000490195933113972, 'samples': 7801856, 'steps': 15237, 'loss/train': 2.1465821266174316} +03/04/2022 07:21:53 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 07:21:59 - INFO - codeparrot_training - Step 15238: {'lr': 0.0004901944615060665, 'samples': 7802368, 'steps': 15238, 'loss/train': 2.3163886070251465} +03/04/2022 07:22:02 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 07:22:04 - INFO - codeparrot_training - Step 15239: {'lr': 0.0004901929897899331, 'samples': 7802880, 'steps': 15239, 'loss/train': 0.7243784666061401} +03/04/2022 07:22:07 - INFO - codeparrot_training - Step 15240: {'lr': 0.0004901915179655726, 'samples': 7803392, 'steps': 15240, 'loss/train': 2.2140231132507324} +03/04/2022 07:22:10 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 07:22:12 - INFO - codeparrot_training - Step 15241: {'lr': 0.0004901900460329853, 'samples': 7803904, 'steps': 15241, 'loss/train': 1.2285170555114746} +03/04/2022 07:22:16 - INFO - codeparrot_training - Step 15242: {'lr': 0.0004901885739921723, 'samples': 7804416, 'steps': 15242, 'loss/train': 2.7111494541168213} +03/04/2022 07:22:18 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 07:22:21 - INFO - codeparrot_training - Step 15243: {'lr': 0.0004901871018431339, 'samples': 7804928, 'steps': 15243, 'loss/train': 1.1326370239257812} +03/04/2022 07:22:24 - INFO - codeparrot_training - Step 15244: {'lr': 0.0004901856295858708, 'samples': 7805440, 'steps': 15244, 'loss/train': 2.541508913040161} +03/04/2022 07:22:27 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 07:22:29 - INFO - codeparrot_training - Step 15245: {'lr': 0.0004901841572203839, 'samples': 7805952, 'steps': 15245, 'loss/train': 2.1089820861816406} +03/04/2022 07:22:32 - INFO - codeparrot_training - Step 15246: {'lr': 0.0004901826847466738, 'samples': 7806464, 'steps': 15246, 'loss/train': 1.3867634534835815} +03/04/2022 07:22:35 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/04/2022 07:22:38 - INFO - codeparrot_training - Step 15247: {'lr': 0.000490181212164741, 'samples': 7806976, 'steps': 15247, 'loss/train': 1.8241404294967651} +03/04/2022 07:22:41 - INFO - codeparrot_training - Step 15248: {'lr': 0.0004901797394745861, 'samples': 7807488, 'steps': 15248, 'loss/train': 2.1299917697906494} +03/04/2022 07:22:44 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 07:22:46 - INFO - codeparrot_training - Step 15249: {'lr': 0.0004901782666762102, 'samples': 7808000, 'steps': 15249, 'loss/train': 1.1770910024642944} +03/04/2022 07:22:49 - INFO - codeparrot_training - Step 15250: {'lr': 0.0004901767937696135, 'samples': 7808512, 'steps': 15250, 'loss/train': 1.8715301752090454} +03/04/2022 07:22:52 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 07:22:55 - INFO - codeparrot_training - Step 15251: {'lr': 0.0004901753207547969, 'samples': 7809024, 'steps': 15251, 'loss/train': 0.9937017560005188} +03/04/2022 07:22:58 - INFO - codeparrot_training - Step 15252: {'lr': 0.000490173847631761, 'samples': 7809536, 'steps': 15252, 'loss/train': 1.591740369796753} +03/04/2022 07:23:01 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 07:23:03 - INFO - codeparrot_training - Step 15253: {'lr': 0.0004901723744005065, 'samples': 7810048, 'steps': 15253, 'loss/train': 1.4574079513549805} +03/04/2022 07:23:06 - INFO - codeparrot_training - Step 15254: {'lr': 0.0004901709010610339, 'samples': 7810560, 'steps': 15254, 'loss/train': 1.7175101041793823} +03/04/2022 07:23:09 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 07:23:12 - INFO - codeparrot_training - Step 15255: {'lr': 0.0004901694276133441, 'samples': 7811072, 'steps': 15255, 'loss/train': 1.7223721742630005} +03/04/2022 07:23:15 - INFO - codeparrot_training - Step 15256: {'lr': 0.0004901679540574377, 'samples': 7811584, 'steps': 15256, 'loss/train': 1.258499026298523} +03/04/2022 07:23:18 - INFO - codeparrot_training - Step 15257: {'lr': 0.0004901664803933153, 'samples': 7812096, 'steps': 15257, 'loss/train': 3.2945361137390137} +03/04/2022 07:23:19 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 07:23:23 - INFO - codeparrot_training - Step 15258: {'lr': 0.0004901650066209775, 'samples': 7812608, 'steps': 15258, 'loss/train': 1.926669716835022} +03/04/2022 07:23:26 - INFO - codeparrot_training - Step 15259: {'lr': 0.0004901635327404252, 'samples': 7813120, 'steps': 15259, 'loss/train': 2.186293601989746} +03/04/2022 07:23:27 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/04/2022 07:23:32 - INFO - codeparrot_training - Step 15260: {'lr': 0.0004901620587516587, 'samples': 7813632, 'steps': 15260, 'loss/train': 2.044063091278076} +03/04/2022 07:23:35 - INFO - codeparrot_training - Step 15261: {'lr': 0.0004901605846546791, 'samples': 7814144, 'steps': 15261, 'loss/train': 1.447873830795288} +03/04/2022 07:23:35 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/04/2022 07:23:40 - INFO - codeparrot_training - Step 15262: {'lr': 0.0004901591104494868, 'samples': 7814656, 'steps': 15262, 'loss/train': 1.7271538972854614} +03/04/2022 07:23:43 - INFO - codeparrot_training - Step 15263: {'lr': 0.0004901576361360825, 'samples': 7815168, 'steps': 15263, 'loss/train': 1.806506872177124} +03/04/2022 07:23:44 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 07:23:49 - INFO - codeparrot_training - Step 15264: {'lr': 0.0004901561617144667, 'samples': 7815680, 'steps': 15264, 'loss/train': 2.1292619705200195} +03/04/2022 07:23:52 - INFO - codeparrot_training - Step 15265: {'lr': 0.0004901546871846405, 'samples': 7816192, 'steps': 15265, 'loss/train': 1.0907188653945923} +03/04/2022 07:23:53 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 07:23:57 - INFO - codeparrot_training - Step 15266: {'lr': 0.0004901532125466041, 'samples': 7816704, 'steps': 15266, 'loss/train': 1.791675090789795} +03/04/2022 07:24:00 - INFO - codeparrot_training - Step 15267: {'lr': 0.0004901517378003584, 'samples': 7817216, 'steps': 15267, 'loss/train': 1.4815144538879395} +03/04/2022 07:24:01 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 07:24:06 - INFO - codeparrot_training - Step 15268: {'lr': 0.0004901502629459042, 'samples': 7817728, 'steps': 15268, 'loss/train': 1.456441879272461} +03/04/2022 07:24:09 - INFO - codeparrot_training - Step 15269: {'lr': 0.000490148787983242, 'samples': 7818240, 'steps': 15269, 'loss/train': 1.6347016096115112} +03/04/2022 07:24:09 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 07:24:14 - INFO - codeparrot_training - Step 15270: {'lr': 0.0004901473129123723, 'samples': 7818752, 'steps': 15270, 'loss/train': 2.2746646404266357} +03/04/2022 07:24:17 - INFO - codeparrot_training - Step 15271: {'lr': 0.0004901458377332959, 'samples': 7819264, 'steps': 15271, 'loss/train': 1.4392486810684204} +03/04/2022 07:24:18 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 07:24:22 - INFO - codeparrot_training - Step 15272: {'lr': 0.0004901443624460136, 'samples': 7819776, 'steps': 15272, 'loss/train': 2.8615505695343018} +03/04/2022 07:24:26 - INFO - codeparrot_training - Step 15273: {'lr': 0.000490142887050526, 'samples': 7820288, 'steps': 15273, 'loss/train': 1.342346429824829} +03/04/2022 07:24:26 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 07:24:31 - INFO - codeparrot_training - Step 15274: {'lr': 0.0004901414115468335, 'samples': 7820800, 'steps': 15274, 'loss/train': 2.0299603939056396} +03/04/2022 07:24:34 - INFO - codeparrot_training - Step 15275: {'lr': 0.0004901399359349372, 'samples': 7821312, 'steps': 15275, 'loss/train': 2.2227683067321777} +03/04/2022 07:24:34 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 07:24:39 - INFO - codeparrot_training - Step 15276: {'lr': 0.0004901384602148376, 'samples': 7821824, 'steps': 15276, 'loss/train': 2.1070261001586914} +03/04/2022 07:24:42 - INFO - codeparrot_training - Step 15277: {'lr': 0.0004901369843865351, 'samples': 7822336, 'steps': 15277, 'loss/train': 1.4880017042160034} +03/04/2022 07:24:43 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 07:24:48 - INFO - codeparrot_training - Step 15278: {'lr': 0.0004901355084500307, 'samples': 7822848, 'steps': 15278, 'loss/train': 3.137118339538574} +03/04/2022 07:24:51 - INFO - codeparrot_training - Step 15279: {'lr': 0.000490134032405325, 'samples': 7823360, 'steps': 15279, 'loss/train': 2.436488628387451} +03/04/2022 07:24:51 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 07:24:56 - INFO - codeparrot_training - Step 15280: {'lr': 0.0004901325562524185, 'samples': 7823872, 'steps': 15280, 'loss/train': 1.637918472290039} +03/04/2022 07:24:59 - INFO - codeparrot_training - Step 15281: {'lr': 0.0004901310799913121, 'samples': 7824384, 'steps': 15281, 'loss/train': 2.729994535446167} +03/04/2022 07:25:00 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 07:25:04 - INFO - codeparrot_training - Step 15282: {'lr': 0.0004901296036220062, 'samples': 7824896, 'steps': 15282, 'loss/train': 2.30092453956604} +03/04/2022 07:25:08 - INFO - codeparrot_training - Step 15283: {'lr': 0.0004901281271445016, 'samples': 7825408, 'steps': 15283, 'loss/train': 2.0814621448516846} +03/04/2022 07:25:08 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/04/2022 07:25:13 - INFO - codeparrot_training - Step 15284: {'lr': 0.000490126650558799, 'samples': 7825920, 'steps': 15284, 'loss/train': 1.6219745874404907} +03/04/2022 07:25:16 - INFO - codeparrot_training - Step 15285: {'lr': 0.000490125173864899, 'samples': 7826432, 'steps': 15285, 'loss/train': 1.4268916845321655} +03/04/2022 07:25:17 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 07:25:21 - INFO - codeparrot_training - Step 15286: {'lr': 0.0004901236970628024, 'samples': 7826944, 'steps': 15286, 'loss/train': 1.961130976676941} +03/04/2022 07:25:25 - INFO - codeparrot_training - Step 15287: {'lr': 0.0004901222201525099, 'samples': 7827456, 'steps': 15287, 'loss/train': 1.8257182836532593} +03/04/2022 07:25:26 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/04/2022 07:25:30 - INFO - codeparrot_training - Step 15288: {'lr': 0.0004901207431340218, 'samples': 7827968, 'steps': 15288, 'loss/train': 2.2725532054901123} +03/04/2022 07:25:33 - INFO - codeparrot_training - Step 15289: {'lr': 0.000490119266007339, 'samples': 7828480, 'steps': 15289, 'loss/train': 1.7536238431930542} +03/04/2022 07:25:34 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 07:25:38 - INFO - codeparrot_training - Step 15290: {'lr': 0.0004901177887724623, 'samples': 7828992, 'steps': 15290, 'loss/train': 2.088576555252075} +03/04/2022 07:25:42 - INFO - codeparrot_training - Step 15291: {'lr': 0.0004901163114293921, 'samples': 7829504, 'steps': 15291, 'loss/train': 2.578240394592285} +03/04/2022 07:25:43 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/04/2022 07:25:47 - INFO - codeparrot_training - Step 15292: {'lr': 0.0004901148339781293, 'samples': 7830016, 'steps': 15292, 'loss/train': 1.7349385023117065} +03/04/2022 07:25:50 - INFO - codeparrot_training - Step 15293: {'lr': 0.0004901133564186744, 'samples': 7830528, 'steps': 15293, 'loss/train': 1.563867211341858} +03/04/2022 07:25:51 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 07:25:55 - INFO - codeparrot_training - Step 15294: {'lr': 0.0004901118787510281, 'samples': 7831040, 'steps': 15294, 'loss/train': 1.7346563339233398} +03/04/2022 07:25:58 - INFO - codeparrot_training - Step 15295: {'lr': 0.0004901104009751912, 'samples': 7831552, 'steps': 15295, 'loss/train': 2.137028694152832} +03/04/2022 07:25:59 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 07:26:04 - INFO - codeparrot_training - Step 15296: {'lr': 0.0004901089230911642, 'samples': 7832064, 'steps': 15296, 'loss/train': 2.3368046283721924} +03/04/2022 07:26:07 - INFO - codeparrot_training - Step 15297: {'lr': 0.0004901074450989479, 'samples': 7832576, 'steps': 15297, 'loss/train': 1.988429069519043} +03/04/2022 07:26:08 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/04/2022 07:26:12 - INFO - codeparrot_training - Step 15298: {'lr': 0.0004901059669985427, 'samples': 7833088, 'steps': 15298, 'loss/train': 2.413472890853882} +03/04/2022 07:26:15 - INFO - codeparrot_training - Step 15299: {'lr': 0.0004901044887899496, 'samples': 7833600, 'steps': 15299, 'loss/train': 1.4780139923095703} +03/04/2022 07:26:16 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 07:26:20 - INFO - codeparrot_training - Step 15300: {'lr': 0.0004901030104731691, 'samples': 7834112, 'steps': 15300, 'loss/train': 1.8444883823394775} +03/04/2022 07:26:24 - INFO - codeparrot_training - Step 15301: {'lr': 0.0004901015320482019, 'samples': 7834624, 'steps': 15301, 'loss/train': 1.8762239217758179} +03/04/2022 07:26:25 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 07:26:29 - INFO - codeparrot_training - Step 15302: {'lr': 0.0004901000535150486, 'samples': 7835136, 'steps': 15302, 'loss/train': 2.056536912918091} +03/04/2022 07:26:32 - INFO - codeparrot_training - Step 15303: {'lr': 0.0004900985748737101, 'samples': 7835648, 'steps': 15303, 'loss/train': 2.1779062747955322} +03/04/2022 07:26:33 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 07:26:38 - INFO - codeparrot_training - Step 15304: {'lr': 0.0004900970961241866, 'samples': 7836160, 'steps': 15304, 'loss/train': 2.4206621646881104} +03/04/2022 07:26:41 - INFO - codeparrot_training - Step 15305: {'lr': 0.0004900956172664792, 'samples': 7836672, 'steps': 15305, 'loss/train': 0.9039513468742371} +03/04/2022 07:26:43 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 07:26:46 - INFO - codeparrot_training - Step 15306: {'lr': 0.0004900941383005884, 'samples': 7837184, 'steps': 15306, 'loss/train': 1.9638017416000366} +03/04/2022 07:26:49 - INFO - codeparrot_training - Step 15307: {'lr': 0.0004900926592265149, 'samples': 7837696, 'steps': 15307, 'loss/train': 1.99547278881073} +03/04/2022 07:26:52 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/04/2022 07:26:54 - INFO - codeparrot_training - Step 15308: {'lr': 0.0004900911800442593, 'samples': 7838208, 'steps': 15308, 'loss/train': 2.008004903793335} +03/04/2022 07:26:58 - INFO - codeparrot_training - Step 15309: {'lr': 0.0004900897007538225, 'samples': 7838720, 'steps': 15309, 'loss/train': 2.3296170234680176} +03/04/2022 07:27:00 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 07:27:03 - INFO - codeparrot_training - Step 15310: {'lr': 0.0004900882213552049, 'samples': 7839232, 'steps': 15310, 'loss/train': 1.9096734523773193} +03/04/2022 07:27:06 - INFO - codeparrot_training - Step 15311: {'lr': 0.0004900867418484072, 'samples': 7839744, 'steps': 15311, 'loss/train': 0.9354264736175537} +03/04/2022 07:27:08 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/04/2022 07:27:12 - INFO - codeparrot_training - Step 15312: {'lr': 0.0004900852622334301, 'samples': 7840256, 'steps': 15312, 'loss/train': 1.0957728624343872} +03/04/2022 07:27:15 - INFO - codeparrot_training - Step 15313: {'lr': 0.0004900837825102743, 'samples': 7840768, 'steps': 15313, 'loss/train': 1.95063316822052} +03/04/2022 07:27:17 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 07:27:20 - INFO - codeparrot_training - Step 15314: {'lr': 0.0004900823026789405, 'samples': 7841280, 'steps': 15314, 'loss/train': 1.5769211053848267} +03/04/2022 07:27:23 - INFO - codeparrot_training - Step 15315: {'lr': 0.0004900808227394293, 'samples': 7841792, 'steps': 15315, 'loss/train': 1.635298252105713} +03/04/2022 07:27:25 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 07:27:29 - INFO - codeparrot_training - Step 15316: {'lr': 0.0004900793426917412, 'samples': 7842304, 'steps': 15316, 'loss/train': 2.0013387203216553} +03/04/2022 07:27:32 - INFO - codeparrot_training - Step 15317: {'lr': 0.0004900778625358774, 'samples': 7842816, 'steps': 15317, 'loss/train': 1.2605314254760742} +03/04/2022 07:27:34 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 07:27:37 - INFO - codeparrot_training - Step 15318: {'lr': 0.000490076382271838, 'samples': 7843328, 'steps': 15318, 'loss/train': 2.136317253112793} +03/04/2022 07:27:40 - INFO - codeparrot_training - Step 15319: {'lr': 0.0004900749018996238, 'samples': 7843840, 'steps': 15319, 'loss/train': 2.025815725326538} +03/04/2022 07:27:43 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/04/2022 07:27:45 - INFO - codeparrot_training - Step 15320: {'lr': 0.0004900734214192358, 'samples': 7844352, 'steps': 15320, 'loss/train': 2.649696111679077} +03/04/2022 07:27:49 - INFO - codeparrot_training - Step 15321: {'lr': 0.0004900719408306743, 'samples': 7844864, 'steps': 15321, 'loss/train': 1.9660518169403076} +03/04/2022 07:27:52 - INFO - codeparrot_training - Step 15322: {'lr': 0.0004900704601339401, 'samples': 7845376, 'steps': 15322, 'loss/train': 0.16374270617961884} +03/04/2022 07:27:52 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 07:27:57 - INFO - codeparrot_training - Step 15323: {'lr': 0.0004900689793290339, 'samples': 7845888, 'steps': 15323, 'loss/train': 1.5246270895004272} +03/04/2022 07:28:00 - INFO - codeparrot_training - Step 15324: {'lr': 0.0004900674984159562, 'samples': 7846400, 'steps': 15324, 'loss/train': 1.683797001838684} +03/04/2022 07:28:00 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 07:28:06 - INFO - codeparrot_training - Step 15325: {'lr': 0.0004900660173947079, 'samples': 7846912, 'steps': 15325, 'loss/train': 2.071974277496338} +03/04/2022 07:28:08 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 07:28:11 - INFO - codeparrot_training - Step 15326: {'lr': 0.0004900645362652895, 'samples': 7847424, 'steps': 15326, 'loss/train': 1.0616647005081177} +03/04/2022 07:28:14 - INFO - codeparrot_training - Step 15327: {'lr': 0.0004900630550277018, 'samples': 7847936, 'steps': 15327, 'loss/train': 2.631700277328491} +03/04/2022 07:28:17 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 07:28:19 - INFO - codeparrot_training - Step 15328: {'lr': 0.0004900615736819452, 'samples': 7848448, 'steps': 15328, 'loss/train': 2.0920910835266113} +03/04/2022 07:28:23 - INFO - codeparrot_training - Step 15329: {'lr': 0.0004900600922280207, 'samples': 7848960, 'steps': 15329, 'loss/train': 2.014713764190674} +03/04/2022 07:28:25 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 07:28:28 - INFO - codeparrot_training - Step 15330: {'lr': 0.0004900586106659289, 'samples': 7849472, 'steps': 15330, 'loss/train': 2.2079737186431885} +03/04/2022 07:28:31 - INFO - codeparrot_training - Step 15331: {'lr': 0.0004900571289956703, 'samples': 7849984, 'steps': 15331, 'loss/train': 2.026646375656128} +03/04/2022 07:28:34 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 07:28:37 - INFO - codeparrot_training - Step 15332: {'lr': 0.0004900556472172457, 'samples': 7850496, 'steps': 15332, 'loss/train': 2.5494980812072754} +03/04/2022 07:28:40 - INFO - codeparrot_training - Step 15333: {'lr': 0.0004900541653306557, 'samples': 7851008, 'steps': 15333, 'loss/train': 2.3377397060394287} +03/04/2022 07:28:43 - INFO - codeparrot_training - Step 15334: {'lr': 0.0004900526833359009, 'samples': 7851520, 'steps': 15334, 'loss/train': 2.220517635345459} +03/04/2022 07:28:43 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 07:28:48 - INFO - codeparrot_training - Step 15335: {'lr': 0.0004900512012329822, 'samples': 7852032, 'steps': 15335, 'loss/train': 2.1473395824432373} +03/04/2022 07:28:51 - INFO - codeparrot_training - Step 15336: {'lr': 0.0004900497190219002, 'samples': 7852544, 'steps': 15336, 'loss/train': 1.8074777126312256} +03/04/2022 07:28:51 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/04/2022 07:28:57 - INFO - codeparrot_training - Step 15337: {'lr': 0.0004900482367026554, 'samples': 7853056, 'steps': 15337, 'loss/train': 2.5616025924682617} +03/04/2022 07:29:00 - INFO - codeparrot_training - Step 15338: {'lr': 0.0004900467542752485, 'samples': 7853568, 'steps': 15338, 'loss/train': 0.3747663199901581} +03/04/2022 07:29:00 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 07:29:05 - INFO - codeparrot_training - Step 15339: {'lr': 0.0004900452717396803, 'samples': 7854080, 'steps': 15339, 'loss/train': 0.3845232427120209} +03/04/2022 07:29:08 - INFO - codeparrot_training - Step 15340: {'lr': 0.0004900437890959515, 'samples': 7854592, 'steps': 15340, 'loss/train': 2.1470634937286377} +03/04/2022 07:29:08 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 07:29:13 - INFO - codeparrot_training - Step 15341: {'lr': 0.0004900423063440625, 'samples': 7855104, 'steps': 15341, 'loss/train': 3.0711188316345215} +03/04/2022 07:29:17 - INFO - codeparrot_training - Step 15342: {'lr': 0.0004900408234840142, 'samples': 7855616, 'steps': 15342, 'loss/train': 2.0878663063049316} +03/04/2022 07:29:17 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/04/2022 07:29:22 - INFO - codeparrot_training - Step 15343: {'lr': 0.0004900393405158073, 'samples': 7856128, 'steps': 15343, 'loss/train': 2.511601448059082} +03/04/2022 07:29:25 - INFO - codeparrot_training - Step 15344: {'lr': 0.0004900378574394423, 'samples': 7856640, 'steps': 15344, 'loss/train': 2.314175844192505} +03/04/2022 07:29:26 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 07:29:30 - INFO - codeparrot_training - Step 15345: {'lr': 0.00049003637425492, 'samples': 7857152, 'steps': 15345, 'loss/train': 2.021688938140869} +03/04/2022 07:29:34 - INFO - codeparrot_training - Step 15346: {'lr': 0.0004900348909622409, 'samples': 7857664, 'steps': 15346, 'loss/train': 1.5831284523010254} +03/04/2022 07:29:34 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/04/2022 07:29:39 - INFO - codeparrot_training - Step 15347: {'lr': 0.0004900334075614059, 'samples': 7858176, 'steps': 15347, 'loss/train': 2.6837236881256104} +03/04/2022 07:29:42 - INFO - codeparrot_training - Step 15348: {'lr': 0.0004900319240524155, 'samples': 7858688, 'steps': 15348, 'loss/train': 1.692237377166748} +03/04/2022 07:29:42 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 07:29:47 - INFO - codeparrot_training - Step 15349: {'lr': 0.0004900304404352704, 'samples': 7859200, 'steps': 15349, 'loss/train': 2.2119593620300293} +03/04/2022 07:29:50 - INFO - codeparrot_training - Step 15350: {'lr': 0.0004900289567099713, 'samples': 7859712, 'steps': 15350, 'loss/train': 1.4525253772735596} +03/04/2022 07:29:51 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/04/2022 07:29:56 - INFO - codeparrot_training - Step 15351: {'lr': 0.000490027472876519, 'samples': 7860224, 'steps': 15351, 'loss/train': 0.9180548191070557} +03/04/2022 07:29:59 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 07:30:01 - INFO - codeparrot_training - Step 15352: {'lr': 0.0004900259889349138, 'samples': 7860736, 'steps': 15352, 'loss/train': 2.1855111122131348} +03/04/2022 07:30:05 - INFO - codeparrot_training - Step 15353: {'lr': 0.0004900245048851567, 'samples': 7861248, 'steps': 15353, 'loss/train': 2.287900924682617} +03/04/2022 07:30:08 - INFO - codeparrot_training - Step 15354: {'lr': 0.0004900230207272483, 'samples': 7861760, 'steps': 15354, 'loss/train': 0.17259082198143005} +03/04/2022 07:30:08 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 07:30:13 - INFO - codeparrot_training - Step 15355: {'lr': 0.000490021536461189, 'samples': 7862272, 'steps': 15355, 'loss/train': 1.9748649597167969} +03/04/2022 07:30:16 - INFO - codeparrot_training - Step 15356: {'lr': 0.00049002005208698, 'samples': 7862784, 'steps': 15356, 'loss/train': 0.2705856263637543} +03/04/2022 07:30:17 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 07:30:21 - INFO - codeparrot_training - Step 15357: {'lr': 0.0004900185676046214, 'samples': 7863296, 'steps': 15357, 'loss/train': 1.4590259790420532} +03/04/2022 07:30:25 - INFO - codeparrot_training - Step 15358: {'lr': 0.0004900170830141144, 'samples': 7863808, 'steps': 15358, 'loss/train': 1.8637088537216187} +03/04/2022 07:30:26 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/04/2022 07:30:30 - INFO - codeparrot_training - Step 15359: {'lr': 0.0004900155983154592, 'samples': 7864320, 'steps': 15359, 'loss/train': 2.0526022911071777} +03/04/2022 07:30:33 - INFO - codeparrot_training - Step 15360: {'lr': 0.0004900141135086569, 'samples': 7864832, 'steps': 15360, 'loss/train': 1.3982640504837036} +03/04/2022 07:30:34 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 07:30:38 - INFO - codeparrot_training - Step 15361: {'lr': 0.0004900126285937077, 'samples': 7865344, 'steps': 15361, 'loss/train': 1.1787586212158203} +03/04/2022 07:30:41 - INFO - codeparrot_training - Step 15362: {'lr': 0.0004900111435706127, 'samples': 7865856, 'steps': 15362, 'loss/train': 1.76839017868042} +03/04/2022 07:30:42 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 07:30:47 - INFO - codeparrot_training - Step 15363: {'lr': 0.0004900096584393723, 'samples': 7866368, 'steps': 15363, 'loss/train': 1.76022469997406} +03/04/2022 07:30:50 - INFO - codeparrot_training - Step 15364: {'lr': 0.0004900081731999872, 'samples': 7866880, 'steps': 15364, 'loss/train': 1.8010162115097046} +03/04/2022 07:30:50 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 07:30:55 - INFO - codeparrot_training - Step 15365: {'lr': 0.0004900066878524582, 'samples': 7867392, 'steps': 15365, 'loss/train': 1.8669334650039673} +03/04/2022 07:30:58 - INFO - codeparrot_training - Step 15366: {'lr': 0.0004900052023967859, 'samples': 7867904, 'steps': 15366, 'loss/train': 2.011361598968506} +03/04/2022 07:30:59 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 07:31:04 - INFO - codeparrot_training - Step 15367: {'lr': 0.0004900037168329709, 'samples': 7868416, 'steps': 15367, 'loss/train': 2.5831313133239746} +03/04/2022 07:31:07 - INFO - codeparrot_training - Step 15368: {'lr': 0.000490002231161014, 'samples': 7868928, 'steps': 15368, 'loss/train': 1.6952158212661743} +03/04/2022 07:31:07 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 07:31:12 - INFO - codeparrot_training - Step 15369: {'lr': 0.0004900007453809157, 'samples': 7869440, 'steps': 15369, 'loss/train': 2.6012558937072754} +03/04/2022 07:31:15 - INFO - codeparrot_training - Step 15370: {'lr': 0.0004899992594926769, 'samples': 7869952, 'steps': 15370, 'loss/train': 2.082909107208252} +03/04/2022 07:31:16 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 07:31:21 - INFO - codeparrot_training - Step 15371: {'lr': 0.000489997773496298, 'samples': 7870464, 'steps': 15371, 'loss/train': 1.5804387331008911} +03/04/2022 07:31:24 - INFO - codeparrot_training - Step 15372: {'lr': 0.0004899962873917798, 'samples': 7870976, 'steps': 15372, 'loss/train': 1.3597859144210815} +03/04/2022 07:31:25 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/04/2022 07:31:29 - INFO - codeparrot_training - Step 15373: {'lr': 0.000489994801179123, 'samples': 7871488, 'steps': 15373, 'loss/train': 0.5767632722854614} +03/04/2022 07:31:32 - INFO - codeparrot_training - Step 15374: {'lr': 0.0004899933148583284, 'samples': 7872000, 'steps': 15374, 'loss/train': 0.2697375416755676} +03/04/2022 07:31:33 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 07:31:37 - INFO - codeparrot_training - Step 15375: {'lr': 0.0004899918284293964, 'samples': 7872512, 'steps': 15375, 'loss/train': 1.9592632055282593} +03/04/2022 07:31:41 - INFO - codeparrot_training - Step 15376: {'lr': 0.0004899903418923278, 'samples': 7873024, 'steps': 15376, 'loss/train': 1.408479928970337} +03/04/2022 07:31:42 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 07:31:46 - INFO - codeparrot_training - Step 15377: {'lr': 0.0004899888552471232, 'samples': 7873536, 'steps': 15377, 'loss/train': 1.783188819885254} +03/04/2022 07:31:49 - INFO - codeparrot_training - Step 15378: {'lr': 0.0004899873684937833, 'samples': 7874048, 'steps': 15378, 'loss/train': 1.876893162727356} +03/04/2022 07:31:50 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 07:31:54 - INFO - codeparrot_training - Step 15379: {'lr': 0.0004899858816323089, 'samples': 7874560, 'steps': 15379, 'loss/train': 1.3946398496627808} +03/04/2022 07:31:57 - INFO - codeparrot_training - Step 15380: {'lr': 0.0004899843946627006, 'samples': 7875072, 'steps': 15380, 'loss/train': 1.9439603090286255} +03/04/2022 07:31:58 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 07:32:03 - INFO - codeparrot_training - Step 15381: {'lr': 0.0004899829075849589, 'samples': 7875584, 'steps': 15381, 'loss/train': 1.363639235496521} +03/04/2022 07:32:06 - INFO - codeparrot_training - Step 15382: {'lr': 0.0004899814203990847, 'samples': 7876096, 'steps': 15382, 'loss/train': 2.6532938480377197} +03/04/2022 07:32:07 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 07:32:11 - INFO - codeparrot_training - Step 15383: {'lr': 0.0004899799331050785, 'samples': 7876608, 'steps': 15383, 'loss/train': 1.9514384269714355} +03/04/2022 07:32:14 - INFO - codeparrot_training - Step 15384: {'lr': 0.0004899784457029411, 'samples': 7877120, 'steps': 15384, 'loss/train': 1.534258484840393} +03/04/2022 07:32:15 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/04/2022 07:32:19 - INFO - codeparrot_training - Step 15385: {'lr': 0.000489976958192673, 'samples': 7877632, 'steps': 15385, 'loss/train': 2.0465333461761475} +03/04/2022 07:32:23 - INFO - codeparrot_training - Step 15386: {'lr': 0.0004899754705742752, 'samples': 7878144, 'steps': 15386, 'loss/train': 1.786224126815796} +03/04/2022 07:32:24 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 07:32:28 - INFO - codeparrot_training - Step 15387: {'lr': 0.0004899739828477481, 'samples': 7878656, 'steps': 15387, 'loss/train': 1.5553171634674072} +03/04/2022 07:32:31 - INFO - codeparrot_training - Step 15388: {'lr': 0.0004899724950130923, 'samples': 7879168, 'steps': 15388, 'loss/train': 2.878305435180664} +03/04/2022 07:32:32 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 07:32:36 - INFO - codeparrot_training - Step 15389: {'lr': 0.0004899710070703087, 'samples': 7879680, 'steps': 15389, 'loss/train': 1.093400001525879} +03/04/2022 07:32:40 - INFO - codeparrot_training - Step 15390: {'lr': 0.0004899695190193978, 'samples': 7880192, 'steps': 15390, 'loss/train': 1.1266734600067139} +03/04/2022 07:32:41 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 07:32:45 - INFO - codeparrot_training - Step 15391: {'lr': 0.0004899680308603604, 'samples': 7880704, 'steps': 15391, 'loss/train': 2.5061869621276855} +03/04/2022 07:32:48 - INFO - codeparrot_training - Step 15392: {'lr': 0.000489966542593197, 'samples': 7881216, 'steps': 15392, 'loss/train': 0.8983432650566101} +03/04/2022 07:32:49 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 07:32:53 - INFO - codeparrot_training - Step 15393: {'lr': 0.0004899650542179085, 'samples': 7881728, 'steps': 15393, 'loss/train': 1.6177502870559692} +03/04/2022 07:32:56 - INFO - codeparrot_training - Step 15394: {'lr': 0.0004899635657344954, 'samples': 7882240, 'steps': 15394, 'loss/train': 1.330331563949585} +03/04/2022 07:32:58 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 07:33:02 - INFO - codeparrot_training - Step 15395: {'lr': 0.0004899620771429585, 'samples': 7882752, 'steps': 15395, 'loss/train': 3.236412763595581} +03/04/2022 07:33:05 - INFO - codeparrot_training - Step 15396: {'lr': 0.0004899605884432983, 'samples': 7883264, 'steps': 15396, 'loss/train': 2.59488582611084} +03/04/2022 07:33:06 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 07:33:10 - INFO - codeparrot_training - Step 15397: {'lr': 0.0004899590996355155, 'samples': 7883776, 'steps': 15397, 'loss/train': 1.7290860414505005} +03/04/2022 07:33:13 - INFO - codeparrot_training - Step 15398: {'lr': 0.000489957610719611, 'samples': 7884288, 'steps': 15398, 'loss/train': 1.3584386110305786} +03/04/2022 07:33:14 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 07:33:19 - INFO - codeparrot_training - Step 15399: {'lr': 0.0004899561216955852, 'samples': 7884800, 'steps': 15399, 'loss/train': 1.5643391609191895} +03/04/2022 07:33:22 - INFO - codeparrot_training - Step 15400: {'lr': 0.0004899546325634388, 'samples': 7885312, 'steps': 15400, 'loss/train': 2.9252898693084717} +03/04/2022 07:33:24 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/04/2022 07:33:27 - INFO - codeparrot_training - Step 15401: {'lr': 0.0004899531433231728, 'samples': 7885824, 'steps': 15401, 'loss/train': 0.5506502389907837} +03/04/2022 07:33:30 - INFO - codeparrot_training - Step 15402: {'lr': 0.0004899516539747874, 'samples': 7886336, 'steps': 15402, 'loss/train': 1.3804658651351929} +03/04/2022 07:33:32 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/04/2022 07:33:36 - INFO - codeparrot_training - Step 15403: {'lr': 0.0004899501645182835, 'samples': 7886848, 'steps': 15403, 'loss/train': 1.5797665119171143} +03/04/2022 07:33:39 - INFO - codeparrot_training - Step 15404: {'lr': 0.0004899486749536618, 'samples': 7887360, 'steps': 15404, 'loss/train': 1.5813759565353394} +03/04/2022 07:33:41 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 07:33:44 - INFO - codeparrot_training - Step 15405: {'lr': 0.000489947185280923, 'samples': 7887872, 'steps': 15405, 'loss/train': 1.7318525314331055} +03/04/2022 07:33:47 - INFO - codeparrot_training - Step 15406: {'lr': 0.0004899456955000676, 'samples': 7888384, 'steps': 15406, 'loss/train': 1.0347015857696533} +03/04/2022 07:33:49 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/04/2022 07:33:53 - INFO - codeparrot_training - Step 15407: {'lr': 0.0004899442056110964, 'samples': 7888896, 'steps': 15407, 'loss/train': 1.2970328330993652} +03/04/2022 07:33:56 - INFO - codeparrot_training - Step 15408: {'lr': 0.00048994271561401, 'samples': 7889408, 'steps': 15408, 'loss/train': 1.0163860321044922} +03/04/2022 07:33:58 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 07:34:01 - INFO - codeparrot_training - Step 15409: {'lr': 0.0004899412255088091, 'samples': 7889920, 'steps': 15409, 'loss/train': 1.7994736433029175} +03/04/2022 07:34:04 - INFO - codeparrot_training - Step 15410: {'lr': 0.0004899397352954945, 'samples': 7890432, 'steps': 15410, 'loss/train': 2.405808210372925} +03/04/2022 07:34:06 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 07:34:10 - INFO - codeparrot_training - Step 15411: {'lr': 0.0004899382449740667, 'samples': 7890944, 'steps': 15411, 'loss/train': 2.838749408721924} +03/04/2022 07:34:13 - INFO - codeparrot_training - Step 15412: {'lr': 0.0004899367545445264, 'samples': 7891456, 'steps': 15412, 'loss/train': 2.049316883087158} +03/04/2022 07:34:14 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 07:34:19 - INFO - codeparrot_training - Step 15413: {'lr': 0.0004899352640068743, 'samples': 7891968, 'steps': 15413, 'loss/train': 2.0771126747131348} +03/04/2022 07:34:22 - INFO - codeparrot_training - Step 15414: {'lr': 0.0004899337733611113, 'samples': 7892480, 'steps': 15414, 'loss/train': 2.6827025413513184} +03/04/2022 07:34:25 - INFO - codeparrot_training - Step 15415: {'lr': 0.0004899322826072375, 'samples': 7892992, 'steps': 15415, 'loss/train': 2.4132609367370605} +03/04/2022 07:34:25 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/04/2022 07:34:30 - INFO - codeparrot_training - Step 15416: {'lr': 0.0004899307917452542, 'samples': 7893504, 'steps': 15416, 'loss/train': 2.190967082977295} +03/04/2022 07:34:34 - INFO - codeparrot_training - Step 15417: {'lr': 0.0004899293007751616, 'samples': 7894016, 'steps': 15417, 'loss/train': 1.7037417888641357} +03/04/2022 07:34:34 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 07:34:39 - INFO - codeparrot_training - Step 15418: {'lr': 0.0004899278096969605, 'samples': 7894528, 'steps': 15418, 'loss/train': 1.549236536026001} +03/04/2022 07:34:42 - INFO - codeparrot_training - Step 15419: {'lr': 0.0004899263185106518, 'samples': 7895040, 'steps': 15419, 'loss/train': 2.7990338802337646} +03/04/2022 07:34:43 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 07:34:47 - INFO - codeparrot_training - Step 15420: {'lr': 0.000489924827216236, 'samples': 7895552, 'steps': 15420, 'loss/train': 1.5396349430084229} +03/04/2022 07:34:50 - INFO - codeparrot_training - Step 15421: {'lr': 0.0004899233358137137, 'samples': 7896064, 'steps': 15421, 'loss/train': 1.5125325918197632} +03/04/2022 07:34:51 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 07:34:56 - INFO - codeparrot_training - Step 15422: {'lr': 0.0004899218443030857, 'samples': 7896576, 'steps': 15422, 'loss/train': 3.9291932582855225} +03/04/2022 07:34:59 - INFO - codeparrot_training - Step 15423: {'lr': 0.0004899203526843526, 'samples': 7897088, 'steps': 15423, 'loss/train': 1.889095425605774} +03/04/2022 07:35:00 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 07:35:04 - INFO - codeparrot_training - Step 15424: {'lr': 0.000489918860957515, 'samples': 7897600, 'steps': 15424, 'loss/train': 1.970321774482727} +03/04/2022 07:35:07 - INFO - codeparrot_training - Step 15425: {'lr': 0.0004899173691225737, 'samples': 7898112, 'steps': 15425, 'loss/train': 1.9425466060638428} +03/04/2022 07:35:08 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 07:35:12 - INFO - codeparrot_training - Step 15426: {'lr': 0.0004899158771795295, 'samples': 7898624, 'steps': 15426, 'loss/train': 1.9016437530517578} +03/04/2022 07:35:16 - INFO - codeparrot_training - Step 15427: {'lr': 0.0004899143851283827, 'samples': 7899136, 'steps': 15427, 'loss/train': 2.6441962718963623} +03/04/2022 07:35:16 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 07:35:21 - INFO - codeparrot_training - Step 15428: {'lr': 0.0004899128929691343, 'samples': 7899648, 'steps': 15428, 'loss/train': 2.0410842895507812} +03/04/2022 07:35:24 - INFO - codeparrot_training - Step 15429: {'lr': 0.0004899114007017849, 'samples': 7900160, 'steps': 15429, 'loss/train': 2.4312095642089844} +03/04/2022 07:35:25 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 07:35:29 - INFO - codeparrot_training - Step 15430: {'lr': 0.000489909908326335, 'samples': 7900672, 'steps': 15430, 'loss/train': 2.036289930343628} +03/04/2022 07:35:33 - INFO - codeparrot_training - Step 15431: {'lr': 0.0004899084158427855, 'samples': 7901184, 'steps': 15431, 'loss/train': 1.9077401161193848} +03/04/2022 07:35:33 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/04/2022 07:35:38 - INFO - codeparrot_training - Step 15432: {'lr': 0.0004899069232511368, 'samples': 7901696, 'steps': 15432, 'loss/train': 2.345599889755249} +03/04/2022 07:35:41 - INFO - codeparrot_training - Step 15433: {'lr': 0.0004899054305513899, 'samples': 7902208, 'steps': 15433, 'loss/train': 1.9886152744293213} +03/04/2022 07:35:42 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/04/2022 07:35:46 - INFO - codeparrot_training - Step 15434: {'lr': 0.0004899039377435452, 'samples': 7902720, 'steps': 15434, 'loss/train': 1.7461305856704712} +03/04/2022 07:35:49 - INFO - codeparrot_training - Step 15435: {'lr': 0.0004899024448276036, 'samples': 7903232, 'steps': 15435, 'loss/train': 2.007394313812256} +03/04/2022 07:35:50 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 07:35:55 - INFO - codeparrot_training - Step 15436: {'lr': 0.0004899009518035657, 'samples': 7903744, 'steps': 15436, 'loss/train': 1.881616473197937} +03/04/2022 07:35:58 - INFO - codeparrot_training - Step 15437: {'lr': 0.000489899458671432, 'samples': 7904256, 'steps': 15437, 'loss/train': 2.014139413833618} +03/04/2022 07:35:59 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 07:36:03 - INFO - codeparrot_training - Step 15438: {'lr': 0.0004898979654312034, 'samples': 7904768, 'steps': 15438, 'loss/train': 2.6880035400390625} +03/04/2022 07:36:06 - INFO - codeparrot_training - Step 15439: {'lr': 0.0004898964720828804, 'samples': 7905280, 'steps': 15439, 'loss/train': 1.6997483968734741} +03/04/2022 07:36:08 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 07:36:12 - INFO - codeparrot_training - Step 15440: {'lr': 0.0004898949786264638, 'samples': 7905792, 'steps': 15440, 'loss/train': 0.6220483779907227} +03/04/2022 07:36:15 - INFO - codeparrot_training - Step 15441: {'lr': 0.0004898934850619542, 'samples': 7906304, 'steps': 15441, 'loss/train': 2.204728603363037} +03/04/2022 07:36:16 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 07:36:20 - INFO - codeparrot_training - Step 15442: {'lr': 0.0004898919913893522, 'samples': 7906816, 'steps': 15442, 'loss/train': 1.9425610303878784} +03/04/2022 07:36:23 - INFO - codeparrot_training - Step 15443: {'lr': 0.0004898904976086588, 'samples': 7907328, 'steps': 15443, 'loss/train': 1.3312140703201294} +03/04/2022 07:36:24 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 07:36:29 - INFO - codeparrot_training - Step 15444: {'lr': 0.0004898890037198743, 'samples': 7907840, 'steps': 15444, 'loss/train': 2.002432107925415} +03/04/2022 07:36:32 - INFO - codeparrot_training - Step 15445: {'lr': 0.0004898875097229995, 'samples': 7908352, 'steps': 15445, 'loss/train': 2.4981448650360107} +03/04/2022 07:36:33 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 07:36:37 - INFO - codeparrot_training - Step 15446: {'lr': 0.0004898860156180351, 'samples': 7908864, 'steps': 15446, 'loss/train': 2.014364004135132} +03/04/2022 07:36:40 - INFO - codeparrot_training - Step 15447: {'lr': 0.0004898845214049818, 'samples': 7909376, 'steps': 15447, 'loss/train': 1.142008662223816} +03/04/2022 07:36:41 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 07:36:45 - INFO - codeparrot_training - Step 15448: {'lr': 0.0004898830270838403, 'samples': 7909888, 'steps': 15448, 'loss/train': 1.0710009336471558} +03/04/2022 07:36:49 - INFO - codeparrot_training - Step 15449: {'lr': 0.0004898815326546111, 'samples': 7910400, 'steps': 15449, 'loss/train': 2.6670305728912354} +03/04/2022 07:36:50 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 07:36:54 - INFO - codeparrot_training - Step 15450: {'lr': 0.0004898800381172951, 'samples': 7910912, 'steps': 15450, 'loss/train': 1.7864562273025513} +03/04/2022 07:36:57 - INFO - codeparrot_training - Step 15451: {'lr': 0.0004898785434718927, 'samples': 7911424, 'steps': 15451, 'loss/train': 1.459734320640564} +03/04/2022 07:36:58 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 07:37:02 - INFO - codeparrot_training - Step 15452: {'lr': 0.0004898770487184047, 'samples': 7911936, 'steps': 15452, 'loss/train': 1.8777679204940796} +03/04/2022 07:37:05 - INFO - codeparrot_training - Step 15453: {'lr': 0.000489875553856832, 'samples': 7912448, 'steps': 15453, 'loss/train': 2.6495299339294434} +03/04/2022 07:37:07 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 07:37:11 - INFO - codeparrot_training - Step 15454: {'lr': 0.000489874058887175, 'samples': 7912960, 'steps': 15454, 'loss/train': 0.30565372109413147} +03/04/2022 07:37:14 - INFO - codeparrot_training - Step 15455: {'lr': 0.0004898725638094345, 'samples': 7913472, 'steps': 15455, 'loss/train': 2.2005231380462646} +03/04/2022 07:37:15 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 07:37:19 - INFO - codeparrot_training - Step 15456: {'lr': 0.0004898710686236109, 'samples': 7913984, 'steps': 15456, 'loss/train': 2.09761381149292} +03/04/2022 07:37:22 - INFO - codeparrot_training - Step 15457: {'lr': 0.0004898695733297054, 'samples': 7914496, 'steps': 15457, 'loss/train': 1.9442355632781982} +03/04/2022 07:37:23 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/04/2022 07:37:28 - INFO - codeparrot_training - Step 15458: {'lr': 0.0004898680779277182, 'samples': 7915008, 'steps': 15458, 'loss/train': 2.0893824100494385} +03/04/2022 07:37:31 - INFO - codeparrot_training - Step 15459: {'lr': 0.0004898665824176502, 'samples': 7915520, 'steps': 15459, 'loss/train': 2.2771191596984863} +03/04/2022 07:37:31 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 07:37:36 - INFO - codeparrot_training - Step 15460: {'lr': 0.000489865086799502, 'samples': 7916032, 'steps': 15460, 'loss/train': 2.5538787841796875} +03/04/2022 07:37:39 - INFO - codeparrot_training - Step 15461: {'lr': 0.0004898635910732743, 'samples': 7916544, 'steps': 15461, 'loss/train': 2.42826247215271} +03/04/2022 07:37:40 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 07:37:44 - INFO - codeparrot_training - Step 15462: {'lr': 0.0004898620952389677, 'samples': 7917056, 'steps': 15462, 'loss/train': 1.670270562171936} +03/04/2022 07:37:48 - INFO - codeparrot_training - Step 15463: {'lr': 0.000489860599296583, 'samples': 7917568, 'steps': 15463, 'loss/train': 2.1151483058929443} +03/04/2022 07:37:49 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 07:37:53 - INFO - codeparrot_training - Step 15464: {'lr': 0.0004898591032461208, 'samples': 7918080, 'steps': 15464, 'loss/train': 1.593345046043396} +03/04/2022 07:37:56 - INFO - codeparrot_training - Step 15465: {'lr': 0.0004898576070875818, 'samples': 7918592, 'steps': 15465, 'loss/train': 2.0916292667388916} +03/04/2022 07:37:57 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 07:38:01 - INFO - codeparrot_training - Step 15466: {'lr': 0.0004898561108209667, 'samples': 7919104, 'steps': 15466, 'loss/train': 1.96178138256073} +03/04/2022 07:38:05 - INFO - codeparrot_training - Step 15467: {'lr': 0.0004898546144462762, 'samples': 7919616, 'steps': 15467, 'loss/train': 2.278851270675659} +03/04/2022 07:38:06 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 07:38:10 - INFO - codeparrot_training - Step 15468: {'lr': 0.0004898531179635108, 'samples': 7920128, 'steps': 15468, 'loss/train': 2.491122245788574} +03/04/2022 07:38:13 - INFO - codeparrot_training - Step 15469: {'lr': 0.0004898516213726712, 'samples': 7920640, 'steps': 15469, 'loss/train': 1.9041359424591064} +03/04/2022 07:38:15 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 07:38:18 - INFO - codeparrot_training - Step 15470: {'lr': 0.0004898501246737583, 'samples': 7921152, 'steps': 15470, 'loss/train': 2.9109950065612793} +03/04/2022 07:38:21 - INFO - codeparrot_training - Step 15471: {'lr': 0.0004898486278667725, 'samples': 7921664, 'steps': 15471, 'loss/train': 1.9082738161087036} +03/04/2022 07:38:23 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 07:38:27 - INFO - codeparrot_training - Step 15472: {'lr': 0.0004898471309517148, 'samples': 7922176, 'steps': 15472, 'loss/train': 1.2683079242706299} +03/04/2022 07:38:30 - INFO - codeparrot_training - Step 15473: {'lr': 0.0004898456339285857, 'samples': 7922688, 'steps': 15473, 'loss/train': 2.021400213241577} +03/04/2022 07:38:31 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/04/2022 07:38:35 - INFO - codeparrot_training - Step 15474: {'lr': 0.0004898441367973856, 'samples': 7923200, 'steps': 15474, 'loss/train': 1.680795431137085} +03/04/2022 07:38:38 - INFO - codeparrot_training - Step 15475: {'lr': 0.0004898426395581156, 'samples': 7923712, 'steps': 15475, 'loss/train': 1.2884926795959473} +03/04/2022 07:38:39 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 07:38:44 - INFO - codeparrot_training - Step 15476: {'lr': 0.0004898411422107762, 'samples': 7924224, 'steps': 15476, 'loss/train': 1.8410600423812866} +03/04/2022 07:38:47 - INFO - codeparrot_training - Step 15477: {'lr': 0.0004898396447553681, 'samples': 7924736, 'steps': 15477, 'loss/train': 2.328235626220703} +03/04/2022 07:38:48 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 07:38:52 - INFO - codeparrot_training - Step 15478: {'lr': 0.000489838147191892, 'samples': 7925248, 'steps': 15478, 'loss/train': 1.4466079473495483} +03/04/2022 07:38:55 - INFO - codeparrot_training - Step 15479: {'lr': 0.0004898366495203483, 'samples': 7925760, 'steps': 15479, 'loss/train': 2.0889716148376465} +03/04/2022 07:38:56 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 07:39:00 - INFO - codeparrot_training - Step 15480: {'lr': 0.0004898351517407381, 'samples': 7926272, 'steps': 15480, 'loss/train': 2.57910418510437} +03/04/2022 07:39:04 - INFO - codeparrot_training - Step 15481: {'lr': 0.0004898336538530619, 'samples': 7926784, 'steps': 15481, 'loss/train': 0.9258764386177063} +03/04/2022 07:39:04 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 07:39:09 - INFO - codeparrot_training - Step 15482: {'lr': 0.0004898321558573203, 'samples': 7927296, 'steps': 15482, 'loss/train': 1.7317770719528198} +03/04/2022 07:39:12 - INFO - codeparrot_training - Step 15483: {'lr': 0.000489830657753514, 'samples': 7927808, 'steps': 15483, 'loss/train': 2.154567003250122} +03/04/2022 07:39:13 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 07:39:17 - INFO - codeparrot_training - Step 15484: {'lr': 0.0004898291595416438, 'samples': 7928320, 'steps': 15484, 'loss/train': 1.4654713869094849} +03/04/2022 07:39:20 - INFO - codeparrot_training - Step 15485: {'lr': 0.0004898276612217102, 'samples': 7928832, 'steps': 15485, 'loss/train': 2.106961727142334} +03/04/2022 07:39:21 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 07:39:26 - INFO - codeparrot_training - Step 15486: {'lr': 0.0004898261627937139, 'samples': 7929344, 'steps': 15486, 'loss/train': 1.6929203271865845} +03/04/2022 07:39:29 - INFO - codeparrot_training - Step 15487: {'lr': 0.0004898246642576559, 'samples': 7929856, 'steps': 15487, 'loss/train': 0.299540251493454} +03/04/2022 07:39:30 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 07:39:35 - INFO - codeparrot_training - Step 15488: {'lr': 0.0004898231656135362, 'samples': 7930368, 'steps': 15488, 'loss/train': 2.2357938289642334} +03/04/2022 07:39:38 - INFO - codeparrot_training - Step 15489: {'lr': 0.0004898216668613562, 'samples': 7930880, 'steps': 15489, 'loss/train': 1.817880630493164} +03/04/2022 07:39:39 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 07:39:43 - INFO - codeparrot_training - Step 15490: {'lr': 0.0004898201680011161, 'samples': 7931392, 'steps': 15490, 'loss/train': 1.8166682720184326} +03/04/2022 07:39:46 - INFO - codeparrot_training - Step 15491: {'lr': 0.0004898186690328168, 'samples': 7931904, 'steps': 15491, 'loss/train': 1.9559389352798462} +03/04/2022 07:39:48 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 07:39:51 - INFO - codeparrot_training - Step 15492: {'lr': 0.000489817169956459, 'samples': 7932416, 'steps': 15492, 'loss/train': 2.202038526535034} +03/04/2022 07:39:55 - INFO - codeparrot_training - Step 15493: {'lr': 0.0004898156707720432, 'samples': 7932928, 'steps': 15493, 'loss/train': 0.9259675741195679} +03/04/2022 07:39:56 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/04/2022 07:40:00 - INFO - codeparrot_training - Step 15494: {'lr': 0.0004898141714795701, 'samples': 7933440, 'steps': 15494, 'loss/train': 2.0608153343200684} +03/04/2022 07:40:03 - INFO - codeparrot_training - Step 15495: {'lr': 0.0004898126720790405, 'samples': 7933952, 'steps': 15495, 'loss/train': 1.8343485593795776} +03/04/2022 07:40:05 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/04/2022 07:40:08 - INFO - codeparrot_training - Step 15496: {'lr': 0.0004898111725704549, 'samples': 7934464, 'steps': 15496, 'loss/train': 2.832853078842163} +03/04/2022 07:40:11 - INFO - codeparrot_training - Step 15497: {'lr': 0.0004898096729538142, 'samples': 7934976, 'steps': 15497, 'loss/train': 1.620359182357788} +03/04/2022 07:40:13 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 07:40:17 - INFO - codeparrot_training - Step 15498: {'lr': 0.000489808173229119, 'samples': 7935488, 'steps': 15498, 'loss/train': 1.658172369003296} +03/04/2022 07:40:20 - INFO - codeparrot_training - Step 15499: {'lr': 0.0004898066733963699, 'samples': 7936000, 'steps': 15499, 'loss/train': 0.7932114005088806} +03/04/2022 07:40:21 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 07:40:25 - INFO - codeparrot_training - Step 15500: {'lr': 0.0004898051734555676, 'samples': 7936512, 'steps': 15500, 'loss/train': 1.9625887870788574} +03/04/2022 07:40:28 - INFO - codeparrot_training - Step 15501: {'lr': 0.0004898036734067127, 'samples': 7937024, 'steps': 15501, 'loss/train': 1.7892743349075317} +03/04/2022 07:40:30 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/04/2022 07:40:33 - INFO - codeparrot_training - Step 15502: {'lr': 0.000489802173249806, 'samples': 7937536, 'steps': 15502, 'loss/train': 1.9243348836898804} +03/04/2022 07:40:37 - INFO - codeparrot_training - Step 15503: {'lr': 0.0004898006729848482, 'samples': 7938048, 'steps': 15503, 'loss/train': 1.8306759595870972} +03/04/2022 07:40:38 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/04/2022 07:40:42 - INFO - codeparrot_training - Step 15504: {'lr': 0.0004897991726118399, 'samples': 7938560, 'steps': 15504, 'loss/train': 2.0525686740875244} +03/04/2022 07:40:45 - INFO - codeparrot_training - Step 15505: {'lr': 0.0004897976721307818, 'samples': 7939072, 'steps': 15505, 'loss/train': 1.6747798919677734} +03/04/2022 07:40:47 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 07:40:51 - INFO - codeparrot_training - Step 15506: {'lr': 0.0004897961715416746, 'samples': 7939584, 'steps': 15506, 'loss/train': 1.4539427757263184} +03/04/2022 07:40:54 - INFO - codeparrot_training - Step 15507: {'lr': 0.0004897946708445189, 'samples': 7940096, 'steps': 15507, 'loss/train': 1.5788847208023071} +03/04/2022 07:40:57 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/04/2022 07:41:00 - INFO - codeparrot_training - Step 15508: {'lr': 0.0004897931700393154, 'samples': 7940608, 'steps': 15508, 'loss/train': 2.2925736904144287} +03/04/2022 07:41:03 - INFO - codeparrot_training - Step 15509: {'lr': 0.0004897916691260648, 'samples': 7941120, 'steps': 15509, 'loss/train': 1.3521226644515991} +03/04/2022 07:41:06 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/04/2022 07:41:08 - INFO - codeparrot_training - Step 15510: {'lr': 0.0004897901681047679, 'samples': 7941632, 'steps': 15510, 'loss/train': 2.5287580490112305} +03/04/2022 07:41:12 - INFO - codeparrot_training - Step 15511: {'lr': 0.0004897886669754251, 'samples': 7942144, 'steps': 15511, 'loss/train': 0.21695557236671448} +03/04/2022 07:41:14 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 07:41:17 - INFO - codeparrot_training - Step 15512: {'lr': 0.0004897871657380373, 'samples': 7942656, 'steps': 15512, 'loss/train': 1.4420907497406006} +03/04/2022 07:41:20 - INFO - codeparrot_training - Step 15513: {'lr': 0.0004897856643926051, 'samples': 7943168, 'steps': 15513, 'loss/train': 1.1334095001220703} +03/04/2022 07:41:23 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/04/2022 07:41:25 - INFO - codeparrot_training - Step 15514: {'lr': 0.0004897841629391291, 'samples': 7943680, 'steps': 15514, 'loss/train': 1.4499120712280273} +03/04/2022 07:41:29 - INFO - codeparrot_training - Step 15515: {'lr': 0.0004897826613776101, 'samples': 7944192, 'steps': 15515, 'loss/train': 2.596832513809204} +03/04/2022 07:41:31 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/04/2022 07:41:34 - INFO - codeparrot_training - Step 15516: {'lr': 0.0004897811597080488, 'samples': 7944704, 'steps': 15516, 'loss/train': 2.5342442989349365} +03/04/2022 07:41:37 - INFO - codeparrot_training - Step 15517: {'lr': 0.0004897796579304458, 'samples': 7945216, 'steps': 15517, 'loss/train': 1.6077481508255005} +03/04/2022 07:41:39 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 07:41:42 - INFO - codeparrot_training - Step 15518: {'lr': 0.0004897781560448017, 'samples': 7945728, 'steps': 15518, 'loss/train': 1.6803473234176636} +03/04/2022 07:41:45 - INFO - codeparrot_training - Step 15519: {'lr': 0.0004897766540511173, 'samples': 7946240, 'steps': 15519, 'loss/train': 2.1792116165161133} +03/04/2022 07:41:48 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 07:41:51 - INFO - codeparrot_training - Step 15520: {'lr': 0.0004897751519493933, 'samples': 7946752, 'steps': 15520, 'loss/train': 2.3816514015197754} +03/04/2022 07:41:54 - INFO - codeparrot_training - Step 15521: {'lr': 0.0004897736497396303, 'samples': 7947264, 'steps': 15521, 'loss/train': 2.8611607551574707} +03/04/2022 07:41:56 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 07:41:59 - INFO - codeparrot_training - Step 15522: {'lr': 0.000489772147421829, 'samples': 7947776, 'steps': 15522, 'loss/train': 1.8840316534042358} +03/04/2022 07:42:02 - INFO - codeparrot_training - Step 15523: {'lr': 0.0004897706449959899, 'samples': 7948288, 'steps': 15523, 'loss/train': 1.8905023336410522} +03/04/2022 07:42:05 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 07:42:08 - INFO - codeparrot_training - Step 15524: {'lr': 0.000489769142462114, 'samples': 7948800, 'steps': 15524, 'loss/train': 1.2950785160064697} +03/04/2022 07:42:11 - INFO - codeparrot_training - Step 15525: {'lr': 0.0004897676398202018, 'samples': 7949312, 'steps': 15525, 'loss/train': 2.2493841648101807} +03/04/2022 07:42:13 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 07:42:16 - INFO - codeparrot_training - Step 15526: {'lr': 0.000489766137070254, 'samples': 7949824, 'steps': 15526, 'loss/train': 2.226500988006592} +03/04/2022 07:42:19 - INFO - codeparrot_training - Step 15527: {'lr': 0.0004897646342122713, 'samples': 7950336, 'steps': 15527, 'loss/train': 1.69699227809906} +03/04/2022 07:42:21 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 07:42:25 - INFO - codeparrot_training - Step 15528: {'lr': 0.0004897631312462544, 'samples': 7950848, 'steps': 15528, 'loss/train': 1.94154953956604} +03/04/2022 07:42:28 - INFO - codeparrot_training - Step 15529: {'lr': 0.0004897616281722038, 'samples': 7951360, 'steps': 15529, 'loss/train': 1.7297444343566895} +03/04/2022 07:42:31 - INFO - codeparrot_training - Step 15530: {'lr': 0.0004897601249901204, 'samples': 7951872, 'steps': 15530, 'loss/train': 2.2708733081817627} +03/04/2022 07:42:32 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 07:42:36 - INFO - codeparrot_training - Step 15531: {'lr': 0.0004897586217000047, 'samples': 7952384, 'steps': 15531, 'loss/train': 1.6525484323501587} +03/04/2022 07:42:40 - INFO - codeparrot_training - Step 15532: {'lr': 0.0004897571183018576, 'samples': 7952896, 'steps': 15532, 'loss/train': 1.5343036651611328} +03/04/2022 07:42:40 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/04/2022 07:42:45 - INFO - codeparrot_training - Step 15533: {'lr': 0.0004897556147956796, 'samples': 7953408, 'steps': 15533, 'loss/train': 2.1478469371795654} +03/04/2022 07:42:48 - INFO - codeparrot_training - Step 15534: {'lr': 0.0004897541111814714, 'samples': 7953920, 'steps': 15534, 'loss/train': 1.7728590965270996} +03/04/2022 07:42:49 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 07:42:53 - INFO - codeparrot_training - Step 15535: {'lr': 0.0004897526074592337, 'samples': 7954432, 'steps': 15535, 'loss/train': 0.5322246551513672} +03/04/2022 07:42:56 - INFO - codeparrot_training - Step 15536: {'lr': 0.0004897511036289671, 'samples': 7954944, 'steps': 15536, 'loss/train': 2.214263677597046} +03/04/2022 07:42:57 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 07:43:02 - INFO - codeparrot_training - Step 15537: {'lr': 0.0004897495996906725, 'samples': 7955456, 'steps': 15537, 'loss/train': 1.684836983680725} +03/04/2022 07:43:05 - INFO - codeparrot_training - Step 15538: {'lr': 0.0004897480956443503, 'samples': 7955968, 'steps': 15538, 'loss/train': 2.200009822845459} +03/04/2022 07:43:06 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 07:43:10 - INFO - codeparrot_training - Step 15539: {'lr': 0.0004897465914900013, 'samples': 7956480, 'steps': 15539, 'loss/train': 1.9549001455307007} +03/04/2022 07:43:13 - INFO - codeparrot_training - Step 15540: {'lr': 0.0004897450872276263, 'samples': 7956992, 'steps': 15540, 'loss/train': 1.8357181549072266} +03/04/2022 07:43:14 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 07:43:18 - INFO - codeparrot_training - Step 15541: {'lr': 0.0004897435828572258, 'samples': 7957504, 'steps': 15541, 'loss/train': 2.2722294330596924} +03/04/2022 07:43:22 - INFO - codeparrot_training - Step 15542: {'lr': 0.0004897420783788006, 'samples': 7958016, 'steps': 15542, 'loss/train': 1.7276039123535156} +03/04/2022 07:43:22 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 07:43:27 - INFO - codeparrot_training - Step 15543: {'lr': 0.0004897405737923511, 'samples': 7958528, 'steps': 15543, 'loss/train': 1.77358078956604} +03/04/2022 07:43:30 - INFO - codeparrot_training - Step 15544: {'lr': 0.0004897390690978785, 'samples': 7959040, 'steps': 15544, 'loss/train': 2.437380075454712} +03/04/2022 07:43:31 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 07:43:35 - INFO - codeparrot_training - Step 15545: {'lr': 0.000489737564295383, 'samples': 7959552, 'steps': 15545, 'loss/train': 1.2213376760482788} +03/04/2022 07:43:38 - INFO - codeparrot_training - Step 15546: {'lr': 0.0004897360593848655, 'samples': 7960064, 'steps': 15546, 'loss/train': 0.9281087517738342} +03/04/2022 07:43:39 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 07:43:44 - INFO - codeparrot_training - Step 15547: {'lr': 0.0004897345543663266, 'samples': 7960576, 'steps': 15547, 'loss/train': 1.7189769744873047} +03/04/2022 07:43:47 - INFO - codeparrot_training - Step 15548: {'lr': 0.000489733049239767, 'samples': 7961088, 'steps': 15548, 'loss/train': 2.868016004562378} +03/04/2022 07:43:49 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 07:43:53 - INFO - codeparrot_training - Step 15549: {'lr': 0.0004897315440051874, 'samples': 7961600, 'steps': 15549, 'loss/train': 2.4554531574249268} +03/04/2022 07:43:56 - INFO - codeparrot_training - Step 15550: {'lr': 0.0004897300386625885, 'samples': 7962112, 'steps': 15550, 'loss/train': 0.8456427454948425} +03/04/2022 07:43:57 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 07:44:01 - INFO - codeparrot_training - Step 15551: {'lr': 0.0004897285332119709, 'samples': 7962624, 'steps': 15551, 'loss/train': 1.7276837825775146} +03/04/2022 07:44:04 - INFO - codeparrot_training - Step 15552: {'lr': 0.0004897270276533355, 'samples': 7963136, 'steps': 15552, 'loss/train': 3.0288171768188477} +03/04/2022 07:44:07 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 07:44:10 - INFO - codeparrot_training - Step 15553: {'lr': 0.0004897255219866825, 'samples': 7963648, 'steps': 15553, 'loss/train': 1.9529637098312378} +03/04/2022 07:44:13 - INFO - codeparrot_training - Step 15554: {'lr': 0.000489724016212013, 'samples': 7964160, 'steps': 15554, 'loss/train': 1.773878812789917} +03/04/2022 07:44:15 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 07:44:18 - INFO - codeparrot_training - Step 15555: {'lr': 0.0004897225103293277, 'samples': 7964672, 'steps': 15555, 'loss/train': 2.08489727973938} +03/04/2022 07:44:21 - INFO - codeparrot_training - Step 15556: {'lr': 0.0004897210043386269, 'samples': 7965184, 'steps': 15556, 'loss/train': 2.656689405441284} +03/04/2022 07:44:24 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/04/2022 07:44:27 - INFO - codeparrot_training - Step 15557: {'lr': 0.0004897194982399117, 'samples': 7965696, 'steps': 15557, 'loss/train': 2.133646011352539} +03/04/2022 07:44:30 - INFO - codeparrot_training - Step 15558: {'lr': 0.0004897179920331826, 'samples': 7966208, 'steps': 15558, 'loss/train': 2.2503154277801514} +03/04/2022 07:44:32 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/04/2022 07:44:35 - INFO - codeparrot_training - Step 15559: {'lr': 0.0004897164857184401, 'samples': 7966720, 'steps': 15559, 'loss/train': 3.2825610637664795} +03/04/2022 07:44:38 - INFO - codeparrot_training - Step 15560: {'lr': 0.0004897149792956852, 'samples': 7967232, 'steps': 15560, 'loss/train': 1.1401220560073853} +03/04/2022 07:44:41 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/04/2022 07:44:43 - INFO - codeparrot_training - Step 15561: {'lr': 0.0004897134727649184, 'samples': 7967744, 'steps': 15561, 'loss/train': 2.068169355392456} +03/04/2022 07:44:47 - INFO - codeparrot_training - Step 15562: {'lr': 0.0004897119661261403, 'samples': 7968256, 'steps': 15562, 'loss/train': 2.204420328140259} +03/04/2022 07:44:49 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 07:44:52 - INFO - codeparrot_training - Step 15563: {'lr': 0.0004897104593793518, 'samples': 7968768, 'steps': 15563, 'loss/train': 2.25547456741333} +03/04/2022 07:44:55 - INFO - codeparrot_training - Step 15564: {'lr': 0.0004897089525245535, 'samples': 7969280, 'steps': 15564, 'loss/train': 1.3469637632369995} +03/04/2022 07:44:58 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 07:45:00 - INFO - codeparrot_training - Step 15565: {'lr': 0.000489707445561746, 'samples': 7969792, 'steps': 15565, 'loss/train': 2.262488603591919} +03/04/2022 07:45:04 - INFO - codeparrot_training - Step 15566: {'lr': 0.0004897059384909299, 'samples': 7970304, 'steps': 15566, 'loss/train': 2.519568920135498} +03/04/2022 07:45:06 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 07:45:09 - INFO - codeparrot_training - Step 15567: {'lr': 0.0004897044313121061, 'samples': 7970816, 'steps': 15567, 'loss/train': 2.0533998012542725} +03/04/2022 07:45:12 - INFO - codeparrot_training - Step 15568: {'lr': 0.0004897029240252753, 'samples': 7971328, 'steps': 15568, 'loss/train': 2.0569255352020264} +03/04/2022 07:45:14 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 07:45:17 - INFO - codeparrot_training - Step 15569: {'lr': 0.000489701416630438, 'samples': 7971840, 'steps': 15569, 'loss/train': 1.7303447723388672} +03/04/2022 07:45:21 - INFO - codeparrot_training - Step 15570: {'lr': 0.0004896999091275948, 'samples': 7972352, 'steps': 15570, 'loss/train': 0.8770133256912231} +03/04/2022 07:45:23 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/04/2022 07:45:26 - INFO - codeparrot_training - Step 15571: {'lr': 0.0004896984015167466, 'samples': 7972864, 'steps': 15571, 'loss/train': 0.6278535723686218} +03/04/2022 07:45:29 - INFO - codeparrot_training - Step 15572: {'lr': 0.0004896968937978941, 'samples': 7973376, 'steps': 15572, 'loss/train': 1.5002636909484863} +03/04/2022 07:45:32 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 07:45:34 - INFO - codeparrot_training - Step 15573: {'lr': 0.0004896953859710379, 'samples': 7973888, 'steps': 15573, 'loss/train': 1.0698113441467285} +03/04/2022 07:45:38 - INFO - codeparrot_training - Step 15574: {'lr': 0.0004896938780361784, 'samples': 7974400, 'steps': 15574, 'loss/train': 1.5110538005828857} +03/04/2022 07:45:40 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 07:45:43 - INFO - codeparrot_training - Step 15575: {'lr': 0.0004896923699933167, 'samples': 7974912, 'steps': 15575, 'loss/train': 1.6775097846984863} +03/04/2022 07:45:46 - INFO - codeparrot_training - Step 15576: {'lr': 0.0004896908618424533, 'samples': 7975424, 'steps': 15576, 'loss/train': 1.170007586479187} +03/04/2022 07:45:48 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 07:45:51 - INFO - codeparrot_training - Step 15577: {'lr': 0.0004896893535835889, 'samples': 7975936, 'steps': 15577, 'loss/train': 1.8936434984207153} +03/04/2022 07:45:54 - INFO - codeparrot_training - Step 15578: {'lr': 0.0004896878452167241, 'samples': 7976448, 'steps': 15578, 'loss/train': 2.204287528991699} +03/04/2022 07:45:57 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 07:46:00 - INFO - codeparrot_training - Step 15579: {'lr': 0.0004896863367418598, 'samples': 7976960, 'steps': 15579, 'loss/train': 1.9621821641921997} +03/04/2022 07:46:03 - INFO - codeparrot_training - Step 15580: {'lr': 0.0004896848281589966, 'samples': 7977472, 'steps': 15580, 'loss/train': 2.264508008956909} +03/04/2022 07:46:06 - INFO - codeparrot_training - Step 15581: {'lr': 0.0004896833194681349, 'samples': 7977984, 'steps': 15581, 'loss/train': 2.0917906761169434} +03/04/2022 07:46:07 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/04/2022 07:46:11 - INFO - codeparrot_training - Step 15582: {'lr': 0.0004896818106692757, 'samples': 7978496, 'steps': 15582, 'loss/train': 1.7354539632797241} +03/04/2022 07:46:15 - INFO - codeparrot_training - Step 15583: {'lr': 0.0004896803017624196, 'samples': 7979008, 'steps': 15583, 'loss/train': 1.9600088596343994} +03/04/2022 07:46:15 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 07:46:20 - INFO - codeparrot_training - Step 15584: {'lr': 0.0004896787927475671, 'samples': 7979520, 'steps': 15584, 'loss/train': 1.9290682077407837} +03/04/2022 07:46:23 - INFO - codeparrot_training - Step 15585: {'lr': 0.0004896772836247192, 'samples': 7980032, 'steps': 15585, 'loss/train': 2.10720157623291} +03/04/2022 07:46:25 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 07:46:29 - INFO - codeparrot_training - Step 15586: {'lr': 0.0004896757743938764, 'samples': 7980544, 'steps': 15586, 'loss/train': 1.5047657489776611} +03/04/2022 07:46:32 - INFO - codeparrot_training - Step 15587: {'lr': 0.0004896742650550393, 'samples': 7981056, 'steps': 15587, 'loss/train': 1.5336370468139648} +03/04/2022 07:46:34 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 07:46:37 - INFO - codeparrot_training - Step 15588: {'lr': 0.0004896727556082086, 'samples': 7981568, 'steps': 15588, 'loss/train': 1.5767669677734375} +03/04/2022 07:46:40 - INFO - codeparrot_training - Step 15589: {'lr': 0.0004896712460533854, 'samples': 7982080, 'steps': 15589, 'loss/train': 1.7325730323791504} +03/04/2022 07:46:43 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 07:46:46 - INFO - codeparrot_training - Step 15590: {'lr': 0.0004896697363905697, 'samples': 7982592, 'steps': 15590, 'loss/train': 1.9023256301879883} +03/04/2022 07:46:49 - INFO - codeparrot_training - Step 15591: {'lr': 0.0004896682266197626, 'samples': 7983104, 'steps': 15591, 'loss/train': 1.7700974941253662} +03/04/2022 07:46:51 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 07:46:54 - INFO - codeparrot_training - Step 15592: {'lr': 0.0004896667167409648, 'samples': 7983616, 'steps': 15592, 'loss/train': 1.6174067258834839} +03/04/2022 07:46:57 - INFO - codeparrot_training - Step 15593: {'lr': 0.0004896652067541767, 'samples': 7984128, 'steps': 15593, 'loss/train': 1.9236781597137451} +03/04/2022 07:47:03 - INFO - codeparrot_training - Step 15594: {'lr': 0.0004896636966593993, 'samples': 7984640, 'steps': 15594, 'loss/train': 1.7120556831359863} +03/04/2022 07:47:06 - INFO - codeparrot_training - Step 15595: {'lr': 0.0004896621864566331, 'samples': 7985152, 'steps': 15595, 'loss/train': 2.080111265182495} +03/04/2022 07:47:09 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 07:47:11 - INFO - codeparrot_training - Step 15596: {'lr': 0.0004896606761458788, 'samples': 7985664, 'steps': 15596, 'loss/train': 2.073698043823242} +03/04/2022 07:47:14 - INFO - codeparrot_training - Step 15597: {'lr': 0.0004896591657271371, 'samples': 7986176, 'steps': 15597, 'loss/train': 2.1522998809814453} +03/04/2022 07:47:17 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 07:47:19 - INFO - codeparrot_training - Step 15598: {'lr': 0.0004896576552004087, 'samples': 7986688, 'steps': 15598, 'loss/train': 2.25711727142334} +03/04/2022 07:47:23 - INFO - codeparrot_training - Step 15599: {'lr': 0.0004896561445656943, 'samples': 7987200, 'steps': 15599, 'loss/train': 1.7083323001861572} +03/04/2022 07:47:25 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/04/2022 07:47:28 - INFO - codeparrot_training - Step 15600: {'lr': 0.0004896546338229945, 'samples': 7987712, 'steps': 15600, 'loss/train': 1.8336783647537231} +03/04/2022 07:47:31 - INFO - codeparrot_training - Step 15601: {'lr': 0.00048965312297231, 'samples': 7988224, 'steps': 15601, 'loss/train': 2.040998697280884} +03/04/2022 07:47:34 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 07:47:37 - INFO - codeparrot_training - Step 15602: {'lr': 0.0004896516120136415, 'samples': 7988736, 'steps': 15602, 'loss/train': 1.830528974533081} +03/04/2022 07:47:40 - INFO - codeparrot_training - Step 15603: {'lr': 0.0004896501009469896, 'samples': 7989248, 'steps': 15603, 'loss/train': 3.5036025047302246} +03/04/2022 07:47:43 - INFO - codeparrot_training - Step 15604: {'lr': 0.0004896485897723552, 'samples': 7989760, 'steps': 15604, 'loss/train': 1.9894554615020752} +03/04/2022 07:47:44 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 07:47:48 - INFO - codeparrot_training - Step 15605: {'lr': 0.0004896470784897388, 'samples': 7990272, 'steps': 15605, 'loss/train': 1.2023062705993652} +03/04/2022 07:47:52 - INFO - codeparrot_training - Step 15606: {'lr': 0.0004896455670991411, 'samples': 7990784, 'steps': 15606, 'loss/train': 1.3464702367782593} +03/04/2022 07:47:52 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 07:47:57 - INFO - codeparrot_training - Step 15607: {'lr': 0.0004896440556005628, 'samples': 7991296, 'steps': 15607, 'loss/train': 6.8068528175354} +03/04/2022 07:48:00 - INFO - codeparrot_training - Step 15608: {'lr': 0.0004896425439940047, 'samples': 7991808, 'steps': 15608, 'loss/train': 2.566437244415283} +03/04/2022 07:48:02 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 07:48:05 - INFO - codeparrot_training - Step 15609: {'lr': 0.0004896410322794673, 'samples': 7992320, 'steps': 15609, 'loss/train': 1.656408667564392} +03/04/2022 07:48:08 - INFO - codeparrot_training - Step 15610: {'lr': 0.0004896395204569512, 'samples': 7992832, 'steps': 15610, 'loss/train': 2.533376932144165} +03/04/2022 07:48:10 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 07:48:14 - INFO - codeparrot_training - Step 15611: {'lr': 0.0004896380085264573, 'samples': 7993344, 'steps': 15611, 'loss/train': 1.599376916885376} +03/04/2022 07:48:17 - INFO - codeparrot_training - Step 15612: {'lr': 0.0004896364964879864, 'samples': 7993856, 'steps': 15612, 'loss/train': 1.5482906103134155} +03/04/2022 07:48:18 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 07:48:22 - INFO - codeparrot_training - Step 15613: {'lr': 0.0004896349843415389, 'samples': 7994368, 'steps': 15613, 'loss/train': 1.8338699340820312} +03/04/2022 07:48:25 - INFO - codeparrot_training - Step 15614: {'lr': 0.0004896334720871156, 'samples': 7994880, 'steps': 15614, 'loss/train': 2.9976632595062256} +03/04/2022 07:48:27 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 07:48:30 - INFO - codeparrot_training - Step 15615: {'lr': 0.0004896319597247169, 'samples': 7995392, 'steps': 15615, 'loss/train': 1.7623733282089233} +03/04/2022 07:48:34 - INFO - codeparrot_training - Step 15616: {'lr': 0.0004896304472543439, 'samples': 7995904, 'steps': 15616, 'loss/train': 2.2343811988830566} +03/04/2022 07:48:35 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 07:48:39 - INFO - codeparrot_training - Step 15617: {'lr': 0.0004896289346759973, 'samples': 7996416, 'steps': 15617, 'loss/train': 1.7282114028930664} +03/04/2022 07:48:42 - INFO - codeparrot_training - Step 15618: {'lr': 0.0004896274219896773, 'samples': 7996928, 'steps': 15618, 'loss/train': 3.023613214492798} +03/04/2022 07:48:44 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 07:48:47 - INFO - codeparrot_training - Step 15619: {'lr': 0.000489625909195385, 'samples': 7997440, 'steps': 15619, 'loss/train': 2.0036978721618652} +03/04/2022 07:48:51 - INFO - codeparrot_training - Step 15620: {'lr': 0.0004896243962931211, 'samples': 7997952, 'steps': 15620, 'loss/train': 2.3277180194854736} +03/04/2022 07:48:52 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 07:48:56 - INFO - codeparrot_training - Step 15621: {'lr': 0.0004896228832828861, 'samples': 7998464, 'steps': 15621, 'loss/train': 2.0538904666900635} +03/04/2022 07:48:59 - INFO - codeparrot_training - Step 15622: {'lr': 0.0004896213701646806, 'samples': 7998976, 'steps': 15622, 'loss/train': 1.7036488056182861} +03/04/2022 07:49:00 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/04/2022 07:49:04 - INFO - codeparrot_training - Step 15623: {'lr': 0.0004896198569385055, 'samples': 7999488, 'steps': 15623, 'loss/train': 1.2380073070526123} +03/04/2022 07:49:08 - INFO - codeparrot_training - Step 15624: {'lr': 0.0004896183436043613, 'samples': 8000000, 'steps': 15624, 'loss/train': 0.34877222776412964} +03/04/2022 07:49:09 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 07:49:13 - INFO - codeparrot_training - Step 15625: {'lr': 0.0004896168301622488, 'samples': 8000512, 'steps': 15625, 'loss/train': 2.1807491779327393} +03/04/2022 07:49:16 - INFO - codeparrot_training - Step 15626: {'lr': 0.0004896153166121688, 'samples': 8001024, 'steps': 15626, 'loss/train': 1.338117241859436} +03/04/2022 07:49:17 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/04/2022 07:49:21 - INFO - codeparrot_training - Step 15627: {'lr': 0.0004896138029541217, 'samples': 8001536, 'steps': 15627, 'loss/train': 2.3398923873901367} +03/04/2022 07:49:24 - INFO - codeparrot_training - Step 15628: {'lr': 0.0004896122891881083, 'samples': 8002048, 'steps': 15628, 'loss/train': 1.6659098863601685} +03/04/2022 07:49:26 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/04/2022 07:49:29 - INFO - codeparrot_training - Step 15629: {'lr': 0.0004896107753141293, 'samples': 8002560, 'steps': 15629, 'loss/train': 0.8720934987068176} +03/04/2022 07:49:33 - INFO - codeparrot_training - Step 15630: {'lr': 0.0004896092613321854, 'samples': 8003072, 'steps': 15630, 'loss/train': 1.6802074909210205} +03/04/2022 07:49:34 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 07:49:38 - INFO - codeparrot_training - Step 15631: {'lr': 0.0004896077472422773, 'samples': 8003584, 'steps': 15631, 'loss/train': 2.2948222160339355} +03/04/2022 07:49:41 - INFO - codeparrot_training - Step 15632: {'lr': 0.0004896062330444057, 'samples': 8004096, 'steps': 15632, 'loss/train': 2.5763370990753174} +03/04/2022 07:49:44 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 07:49:46 - INFO - codeparrot_training - Step 15633: {'lr': 0.0004896047187385711, 'samples': 8004608, 'steps': 15633, 'loss/train': 2.338783025741577} +03/04/2022 07:49:50 - INFO - codeparrot_training - Step 15634: {'lr': 0.0004896032043247744, 'samples': 8005120, 'steps': 15634, 'loss/train': 2.18501877784729} +03/04/2022 07:49:52 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 07:49:55 - INFO - codeparrot_training - Step 15635: {'lr': 0.0004896016898030161, 'samples': 8005632, 'steps': 15635, 'loss/train': 1.0636016130447388} +03/04/2022 07:49:58 - INFO - codeparrot_training - Step 15636: {'lr': 0.0004896001751732971, 'samples': 8006144, 'steps': 15636, 'loss/train': 1.1967425346374512} +03/04/2022 07:50:01 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 07:50:03 - INFO - codeparrot_training - Step 15637: {'lr': 0.0004895986604356178, 'samples': 8006656, 'steps': 15637, 'loss/train': 0.7863867878913879} +03/04/2022 07:50:07 - INFO - codeparrot_training - Step 15638: {'lr': 0.0004895971455899792, 'samples': 8007168, 'steps': 15638, 'loss/train': 1.7963286638259888} +03/04/2022 07:50:09 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 07:50:12 - INFO - codeparrot_training - Step 15639: {'lr': 0.0004895956306363818, 'samples': 8007680, 'steps': 15639, 'loss/train': 1.2012642621994019} +03/04/2022 07:50:15 - INFO - codeparrot_training - Step 15640: {'lr': 0.0004895941155748263, 'samples': 8008192, 'steps': 15640, 'loss/train': 1.476168155670166} +03/04/2022 07:50:17 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 07:50:20 - INFO - codeparrot_training - Step 15641: {'lr': 0.0004895926004053133, 'samples': 8008704, 'steps': 15641, 'loss/train': 1.5905035734176636} +03/04/2022 07:50:23 - INFO - codeparrot_training - Step 15642: {'lr': 0.0004895910851278436, 'samples': 8009216, 'steps': 15642, 'loss/train': 1.6183972358703613} +03/04/2022 07:50:25 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 07:50:29 - INFO - codeparrot_training - Step 15643: {'lr': 0.0004895895697424179, 'samples': 8009728, 'steps': 15643, 'loss/train': 1.5347144603729248} +03/04/2022 07:50:32 - INFO - codeparrot_training - Step 15644: {'lr': 0.0004895880542490369, 'samples': 8010240, 'steps': 15644, 'loss/train': 1.1086368560791016} +03/04/2022 07:50:34 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 07:50:37 - INFO - codeparrot_training - Step 15645: {'lr': 0.0004895865386477011, 'samples': 8010752, 'steps': 15645, 'loss/train': 0.9567164182662964} +03/04/2022 07:50:40 - INFO - codeparrot_training - Step 15646: {'lr': 0.0004895850229384113, 'samples': 8011264, 'steps': 15646, 'loss/train': 2.146369457244873} +03/04/2022 07:50:42 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/04/2022 07:50:46 - INFO - codeparrot_training - Step 15647: {'lr': 0.0004895835071211682, 'samples': 8011776, 'steps': 15647, 'loss/train': 1.4812657833099365} +03/04/2022 07:50:49 - INFO - codeparrot_training - Step 15648: {'lr': 0.0004895819911959725, 'samples': 8012288, 'steps': 15648, 'loss/train': 2.689929246902466} +03/04/2022 07:50:51 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/04/2022 07:50:54 - INFO - codeparrot_training - Step 15649: {'lr': 0.0004895804751628249, 'samples': 8012800, 'steps': 15649, 'loss/train': 2.492647409439087} +03/04/2022 07:50:58 - INFO - codeparrot_training - Step 15650: {'lr': 0.0004895789590217259, 'samples': 8013312, 'steps': 15650, 'loss/train': 2.0614447593688965} +03/04/2022 07:51:01 - INFO - codeparrot_training - Step 15651: {'lr': 0.0004895774427726764, 'samples': 8013824, 'steps': 15651, 'loss/train': 2.0234296321868896} +03/04/2022 07:51:01 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/04/2022 07:51:06 - INFO - codeparrot_training - Step 15652: {'lr': 0.000489575926415677, 'samples': 8014336, 'steps': 15652, 'loss/train': 2.566713809967041} +03/04/2022 07:51:09 - INFO - codeparrot_training - Step 15653: {'lr': 0.0004895744099507284, 'samples': 8014848, 'steps': 15653, 'loss/train': 1.3444267511367798} +03/04/2022 07:51:10 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 07:51:14 - INFO - codeparrot_training - Step 15654: {'lr': 0.0004895728933778313, 'samples': 8015360, 'steps': 15654, 'loss/train': 1.556229591369629} +03/04/2022 07:51:18 - INFO - codeparrot_training - Step 15655: {'lr': 0.0004895713766969863, 'samples': 8015872, 'steps': 15655, 'loss/train': 1.6726371049880981} +03/04/2022 07:51:18 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 07:51:23 - INFO - codeparrot_training - Step 15656: {'lr': 0.0004895698599081942, 'samples': 8016384, 'steps': 15656, 'loss/train': 3.325284719467163} +03/04/2022 07:51:26 - INFO - codeparrot_training - Step 15657: {'lr': 0.0004895683430114555, 'samples': 8016896, 'steps': 15657, 'loss/train': 2.0596282482147217} +03/04/2022 07:51:26 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 07:51:31 - INFO - codeparrot_training - Step 15658: {'lr': 0.0004895668260067711, 'samples': 8017408, 'steps': 15658, 'loss/train': 1.5727821588516235} +03/04/2022 07:51:34 - INFO - codeparrot_training - Step 15659: {'lr': 0.0004895653088941416, 'samples': 8017920, 'steps': 15659, 'loss/train': 1.684291124343872} +03/04/2022 07:51:35 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/04/2022 07:51:40 - INFO - codeparrot_training - Step 15660: {'lr': 0.0004895637916735675, 'samples': 8018432, 'steps': 15660, 'loss/train': 1.999837875366211} +03/04/2022 07:51:43 - INFO - codeparrot_training - Step 15661: {'lr': 0.0004895622743450497, 'samples': 8018944, 'steps': 15661, 'loss/train': 2.152054786682129} +03/04/2022 07:51:43 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 07:51:48 - INFO - codeparrot_training - Step 15662: {'lr': 0.000489560756908589, 'samples': 8019456, 'steps': 15662, 'loss/train': 1.985174298286438} +03/04/2022 07:51:51 - INFO - codeparrot_training - Step 15663: {'lr': 0.0004895592393641858, 'samples': 8019968, 'steps': 15663, 'loss/train': 2.06432843208313} +03/04/2022 07:51:51 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 07:51:57 - INFO - codeparrot_training - Step 15664: {'lr': 0.0004895577217118408, 'samples': 8020480, 'steps': 15664, 'loss/train': 1.4724737405776978} +03/04/2022 07:52:00 - INFO - codeparrot_training - Step 15665: {'lr': 0.000489556203951555, 'samples': 8020992, 'steps': 15665, 'loss/train': 2.0836777687072754} +03/04/2022 07:52:02 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 07:52:05 - INFO - codeparrot_training - Step 15666: {'lr': 0.0004895546860833287, 'samples': 8021504, 'steps': 15666, 'loss/train': 1.4378376007080078} +03/04/2022 07:52:09 - INFO - codeparrot_training - Step 15667: {'lr': 0.000489553168107163, 'samples': 8022016, 'steps': 15667, 'loss/train': 2.417998790740967} +03/04/2022 07:52:11 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/04/2022 07:52:14 - INFO - codeparrot_training - Step 15668: {'lr': 0.0004895516500230581, 'samples': 8022528, 'steps': 15668, 'loss/train': 1.929077386856079} +03/04/2022 07:52:17 - INFO - codeparrot_training - Step 15669: {'lr': 0.000489550131831015, 'samples': 8023040, 'steps': 15669, 'loss/train': 2.634594202041626} +03/04/2022 07:52:20 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 07:52:22 - INFO - codeparrot_training - Step 15670: {'lr': 0.0004895486135310343, 'samples': 8023552, 'steps': 15670, 'loss/train': 1.6383463144302368} +03/04/2022 07:52:26 - INFO - codeparrot_training - Step 15671: {'lr': 0.0004895470951231166, 'samples': 8024064, 'steps': 15671, 'loss/train': 1.5963491201400757} +03/04/2022 07:52:28 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/04/2022 07:52:31 - INFO - codeparrot_training - Step 15672: {'lr': 0.0004895455766072629, 'samples': 8024576, 'steps': 15672, 'loss/train': 2.2744712829589844} +03/04/2022 07:52:34 - INFO - codeparrot_training - Step 15673: {'lr': 0.0004895440579834736, 'samples': 8025088, 'steps': 15673, 'loss/train': 1.329055905342102} +03/04/2022 07:52:36 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 07:52:39 - INFO - codeparrot_training - Step 15674: {'lr': 0.0004895425392517493, 'samples': 8025600, 'steps': 15674, 'loss/train': 2.1262319087982178} +03/04/2022 07:52:42 - INFO - codeparrot_training - Step 15675: {'lr': 0.0004895410204120909, 'samples': 8026112, 'steps': 15675, 'loss/train': 0.7481274604797363} +03/04/2022 07:52:45 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 07:52:48 - INFO - codeparrot_training - Step 15676: {'lr': 0.000489539501464499, 'samples': 8026624, 'steps': 15676, 'loss/train': 1.6115937232971191} +03/04/2022 07:52:51 - INFO - codeparrot_training - Step 15677: {'lr': 0.0004895379824089743, 'samples': 8027136, 'steps': 15677, 'loss/train': 0.3066820800304413} +03/04/2022 07:52:53 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 07:52:56 - INFO - codeparrot_training - Step 15678: {'lr': 0.0004895364632455175, 'samples': 8027648, 'steps': 15678, 'loss/train': 1.414406418800354} +03/04/2022 07:52:59 - INFO - codeparrot_training - Step 15679: {'lr': 0.0004895349439741292, 'samples': 8028160, 'steps': 15679, 'loss/train': 1.7060343027114868} +03/04/2022 07:53:02 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 07:53:05 - INFO - codeparrot_training - Step 15680: {'lr': 0.0004895334245948103, 'samples': 8028672, 'steps': 15680, 'loss/train': 1.9586430788040161} +03/04/2022 07:53:08 - INFO - codeparrot_training - Step 15681: {'lr': 0.0004895319051075612, 'samples': 8029184, 'steps': 15681, 'loss/train': 2.3563432693481445} +03/04/2022 07:53:10 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 07:53:13 - INFO - codeparrot_training - Step 15682: {'lr': 0.0004895303855123828, 'samples': 8029696, 'steps': 15682, 'loss/train': 2.764169216156006} +03/04/2022 07:53:16 - INFO - codeparrot_training - Step 15683: {'lr': 0.0004895288658092757, 'samples': 8030208, 'steps': 15683, 'loss/train': 1.7268319129943848} +03/04/2022 07:53:19 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 07:53:22 - INFO - codeparrot_training - Step 15684: {'lr': 0.0004895273459982406, 'samples': 8030720, 'steps': 15684, 'loss/train': 2.6820268630981445} +03/04/2022 07:53:25 - INFO - codeparrot_training - Step 15685: {'lr': 0.0004895258260792781, 'samples': 8031232, 'steps': 15685, 'loss/train': 1.7235958576202393} +03/04/2022 07:53:27 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 07:53:30 - INFO - codeparrot_training - Step 15686: {'lr': 0.0004895243060523889, 'samples': 8031744, 'steps': 15686, 'loss/train': 1.5525797605514526} +03/04/2022 07:53:33 - INFO - codeparrot_training - Step 15687: {'lr': 0.0004895227859175739, 'samples': 8032256, 'steps': 15687, 'loss/train': 2.6901612281799316} +03/04/2022 07:53:36 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 07:53:38 - INFO - codeparrot_training - Step 15688: {'lr': 0.0004895212656748336, 'samples': 8032768, 'steps': 15688, 'loss/train': 1.9821029901504517} +03/04/2022 07:53:42 - INFO - codeparrot_training - Step 15689: {'lr': 0.0004895197453241687, 'samples': 8033280, 'steps': 15689, 'loss/train': 2.3617098331451416} +03/04/2022 07:53:44 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 07:53:47 - INFO - codeparrot_training - Step 15690: {'lr': 0.0004895182248655798, 'samples': 8033792, 'steps': 15690, 'loss/train': 1.5950372219085693} +03/04/2022 07:53:50 - INFO - codeparrot_training - Step 15691: {'lr': 0.0004895167042990678, 'samples': 8034304, 'steps': 15691, 'loss/train': 2.5935111045837402} +03/04/2022 07:53:52 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 07:53:55 - INFO - codeparrot_training - Step 15692: {'lr': 0.0004895151836246332, 'samples': 8034816, 'steps': 15692, 'loss/train': 1.9146080017089844} +03/04/2022 07:53:59 - INFO - codeparrot_training - Step 15693: {'lr': 0.0004895136628422767, 'samples': 8035328, 'steps': 15693, 'loss/train': 1.6679564714431763} +03/04/2022 07:54:01 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 07:54:04 - INFO - codeparrot_training - Step 15694: {'lr': 0.0004895121419519992, 'samples': 8035840, 'steps': 15694, 'loss/train': 1.7771307229995728} +03/04/2022 07:54:07 - INFO - codeparrot_training - Step 15695: {'lr': 0.0004895106209538011, 'samples': 8036352, 'steps': 15695, 'loss/train': 2.3171160221099854} +03/04/2022 07:54:09 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 07:54:12 - INFO - codeparrot_training - Step 15696: {'lr': 0.0004895090998476833, 'samples': 8036864, 'steps': 15696, 'loss/train': 2.1088979244232178} +03/04/2022 07:54:15 - INFO - codeparrot_training - Step 15697: {'lr': 0.0004895075786336463, 'samples': 8037376, 'steps': 15697, 'loss/train': 1.5757842063903809} +03/04/2022 07:54:18 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 07:54:21 - INFO - codeparrot_training - Step 15698: {'lr': 0.000489506057311691, 'samples': 8037888, 'steps': 15698, 'loss/train': 2.090244770050049} +03/04/2022 07:54:24 - INFO - codeparrot_training - Step 15699: {'lr': 0.0004895045358818179, 'samples': 8038400, 'steps': 15699, 'loss/train': 1.1505576372146606} +03/04/2022 07:54:27 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/04/2022 07:54:29 - INFO - codeparrot_training - Step 15700: {'lr': 0.0004895030143440278, 'samples': 8038912, 'steps': 15700, 'loss/train': 2.5268542766571045} +03/04/2022 07:54:32 - INFO - codeparrot_training - Step 15701: {'lr': 0.0004895014926983212, 'samples': 8039424, 'steps': 15701, 'loss/train': 0.5695185661315918} +03/04/2022 07:54:35 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 07:54:38 - INFO - codeparrot_training - Step 15702: {'lr': 0.0004894999709446991, 'samples': 8039936, 'steps': 15702, 'loss/train': 1.5199710130691528} +03/04/2022 07:54:41 - INFO - codeparrot_training - Step 15703: {'lr': 0.0004894984490831619, 'samples': 8040448, 'steps': 15703, 'loss/train': 0.9801062941551208} +03/04/2022 07:54:43 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 07:54:46 - INFO - codeparrot_training - Step 15704: {'lr': 0.0004894969271137104, 'samples': 8040960, 'steps': 15704, 'loss/train': 1.1070969104766846} +03/04/2022 07:54:49 - INFO - codeparrot_training - Step 15705: {'lr': 0.0004894954050363452, 'samples': 8041472, 'steps': 15705, 'loss/train': 1.569886565208435} +03/04/2022 07:54:52 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 07:54:55 - INFO - codeparrot_training - Step 15706: {'lr': 0.0004894938828510672, 'samples': 8041984, 'steps': 15706, 'loss/train': 2.23795747756958} +03/04/2022 07:54:58 - INFO - codeparrot_training - Step 15707: {'lr': 0.000489492360557877, 'samples': 8042496, 'steps': 15707, 'loss/train': 2.299255132675171} +03/04/2022 07:55:00 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 07:55:03 - INFO - codeparrot_training - Step 15708: {'lr': 0.0004894908381567751, 'samples': 8043008, 'steps': 15708, 'loss/train': 2.358010768890381} +03/04/2022 07:55:06 - INFO - codeparrot_training - Step 15709: {'lr': 0.0004894893156477623, 'samples': 8043520, 'steps': 15709, 'loss/train': 1.6785175800323486} +03/04/2022 07:55:08 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 07:55:11 - INFO - codeparrot_training - Step 15710: {'lr': 0.0004894877930308395, 'samples': 8044032, 'steps': 15710, 'loss/train': 0.8974571824073792} +03/04/2022 07:55:15 - INFO - codeparrot_training - Step 15711: {'lr': 0.0004894862703060071, 'samples': 8044544, 'steps': 15711, 'loss/train': 1.5870919227600098} +03/04/2022 07:55:17 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 07:55:20 - INFO - codeparrot_training - Step 15712: {'lr': 0.0004894847474732658, 'samples': 8045056, 'steps': 15712, 'loss/train': 1.1885066032409668} +03/04/2022 07:55:23 - INFO - codeparrot_training - Step 15713: {'lr': 0.0004894832245326165, 'samples': 8045568, 'steps': 15713, 'loss/train': 2.2328274250030518} +03/04/2022 07:55:25 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 07:55:28 - INFO - codeparrot_training - Step 15714: {'lr': 0.0004894817014840597, 'samples': 8046080, 'steps': 15714, 'loss/train': 5.130954265594482} +03/04/2022 07:55:32 - INFO - codeparrot_training - Step 15715: {'lr': 0.0004894801783275961, 'samples': 8046592, 'steps': 15715, 'loss/train': 2.535412311553955} +03/04/2022 07:55:33 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 07:55:37 - INFO - codeparrot_training - Step 15716: {'lr': 0.0004894786550632264, 'samples': 8047104, 'steps': 15716, 'loss/train': 1.7470682859420776} +03/04/2022 07:55:40 - INFO - codeparrot_training - Step 15717: {'lr': 0.0004894771316909514, 'samples': 8047616, 'steps': 15717, 'loss/train': 1.9062572717666626} +03/04/2022 07:55:42 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 07:55:45 - INFO - codeparrot_training - Step 15718: {'lr': 0.0004894756082107717, 'samples': 8048128, 'steps': 15718, 'loss/train': 1.096044898033142} +03/04/2022 07:55:49 - INFO - codeparrot_training - Step 15719: {'lr': 0.0004894740846226879, 'samples': 8048640, 'steps': 15719, 'loss/train': 2.2670860290527344} +03/04/2022 07:55:50 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 07:55:54 - INFO - codeparrot_training - Step 15720: {'lr': 0.0004894725609267009, 'samples': 8049152, 'steps': 15720, 'loss/train': 1.387764573097229} +03/04/2022 07:55:57 - INFO - codeparrot_training - Step 15721: {'lr': 0.0004894710371228111, 'samples': 8049664, 'steps': 15721, 'loss/train': 1.4563462734222412} +03/04/2022 07:55:59 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 07:56:02 - INFO - codeparrot_training - Step 15722: {'lr': 0.0004894695132110196, 'samples': 8050176, 'steps': 15722, 'loss/train': 2.169600009918213} +03/04/2022 07:56:06 - INFO - codeparrot_training - Step 15723: {'lr': 0.0004894679891913266, 'samples': 8050688, 'steps': 15723, 'loss/train': 6.802032470703125} +03/04/2022 07:56:08 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 07:56:11 - INFO - codeparrot_training - Step 15724: {'lr': 0.000489466465063733, 'samples': 8051200, 'steps': 15724, 'loss/train': 2.100257635116577} +03/04/2022 07:56:14 - INFO - codeparrot_training - Step 15725: {'lr': 0.0004894649408282396, 'samples': 8051712, 'steps': 15725, 'loss/train': 1.6503151655197144} +03/04/2022 07:56:17 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 07:56:19 - INFO - codeparrot_training - Step 15726: {'lr': 0.000489463416484847, 'samples': 8052224, 'steps': 15726, 'loss/train': 1.1871337890625} +03/04/2022 07:56:22 - INFO - codeparrot_training - Step 15727: {'lr': 0.0004894618920335558, 'samples': 8052736, 'steps': 15727, 'loss/train': 2.119513511657715} +03/04/2022 07:56:25 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 07:56:28 - INFO - codeparrot_training - Step 15728: {'lr': 0.0004894603674743668, 'samples': 8053248, 'steps': 15728, 'loss/train': 1.731230616569519} +03/04/2022 07:56:31 - INFO - codeparrot_training - Step 15729: {'lr': 0.0004894588428072808, 'samples': 8053760, 'steps': 15729, 'loss/train': 0.736242413520813} +03/04/2022 07:56:34 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 07:56:36 - INFO - codeparrot_training - Step 15730: {'lr': 0.0004894573180322982, 'samples': 8054272, 'steps': 15730, 'loss/train': 1.8481392860412598} +03/04/2022 07:56:39 - INFO - codeparrot_training - Step 15731: {'lr': 0.0004894557931494199, 'samples': 8054784, 'steps': 15731, 'loss/train': 1.7223693132400513} +03/04/2022 07:56:43 - INFO - codeparrot_training - Step 15732: {'lr': 0.0004894542681586465, 'samples': 8055296, 'steps': 15732, 'loss/train': 1.4309290647506714} +03/04/2022 07:56:43 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 07:56:48 - INFO - codeparrot_training - Step 15733: {'lr': 0.0004894527430599786, 'samples': 8055808, 'steps': 15733, 'loss/train': 2.109006643295288} +03/04/2022 07:56:51 - INFO - codeparrot_training - Step 15734: {'lr': 0.0004894512178534171, 'samples': 8056320, 'steps': 15734, 'loss/train': 1.0732314586639404} +03/04/2022 07:56:51 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 07:56:56 - INFO - codeparrot_training - Step 15735: {'lr': 0.0004894496925389625, 'samples': 8056832, 'steps': 15735, 'loss/train': 0.8656599521636963} +03/04/2022 07:57:00 - INFO - codeparrot_training - Step 15736: {'lr': 0.0004894481671166155, 'samples': 8057344, 'steps': 15736, 'loss/train': 0.9400997757911682} +03/04/2022 07:57:00 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 07:57:05 - INFO - codeparrot_training - Step 15737: {'lr': 0.0004894466415863771, 'samples': 8057856, 'steps': 15737, 'loss/train': 2.0122127532958984} +03/04/2022 07:57:08 - INFO - codeparrot_training - Step 15738: {'lr': 0.0004894451159482476, 'samples': 8058368, 'steps': 15738, 'loss/train': 2.140281915664673} +03/04/2022 07:57:08 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/04/2022 07:57:13 - INFO - codeparrot_training - Step 15739: {'lr': 0.0004894435902022277, 'samples': 8058880, 'steps': 15739, 'loss/train': 2.9429759979248047} +03/04/2022 07:57:17 - INFO - codeparrot_training - Step 15740: {'lr': 0.0004894420643483184, 'samples': 8059392, 'steps': 15740, 'loss/train': 1.9873096942901611} +03/04/2022 07:57:17 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/04/2022 07:57:22 - INFO - codeparrot_training - Step 15741: {'lr': 0.0004894405383865201, 'samples': 8059904, 'steps': 15741, 'loss/train': 1.499707818031311} +03/04/2022 07:57:25 - INFO - codeparrot_training - Step 15742: {'lr': 0.0004894390123168337, 'samples': 8060416, 'steps': 15742, 'loss/train': 1.6385310888290405} +03/04/2022 07:57:25 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 07:57:30 - INFO - codeparrot_training - Step 15743: {'lr': 0.0004894374861392596, 'samples': 8060928, 'steps': 15743, 'loss/train': 2.066291570663452} +03/04/2022 07:57:33 - INFO - codeparrot_training - Step 15744: {'lr': 0.0004894359598537987, 'samples': 8061440, 'steps': 15744, 'loss/train': 0.9673469066619873} +03/04/2022 07:57:33 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 07:57:39 - INFO - codeparrot_training - Step 15745: {'lr': 0.0004894344334604517, 'samples': 8061952, 'steps': 15745, 'loss/train': 2.314256191253662} +03/04/2022 07:57:42 - INFO - codeparrot_training - Step 15746: {'lr': 0.0004894329069592192, 'samples': 8062464, 'steps': 15746, 'loss/train': 0.258719265460968} +03/04/2022 07:57:42 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/04/2022 07:57:47 - INFO - codeparrot_training - Step 15747: {'lr': 0.000489431380350102, 'samples': 8062976, 'steps': 15747, 'loss/train': 1.5270055532455444} +03/04/2022 07:57:51 - INFO - codeparrot_training - Step 15748: {'lr': 0.0004894298536331007, 'samples': 8063488, 'steps': 15748, 'loss/train': 2.2317898273468018} +03/04/2022 07:57:52 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 07:57:56 - INFO - codeparrot_training - Step 15749: {'lr': 0.000489428326808216, 'samples': 8064000, 'steps': 15749, 'loss/train': 2.160482883453369} +03/04/2022 07:57:59 - INFO - codeparrot_training - Step 15750: {'lr': 0.0004894267998754486, 'samples': 8064512, 'steps': 15750, 'loss/train': 1.7678260803222656} +03/04/2022 07:58:00 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 07:58:04 - INFO - codeparrot_training - Step 15751: {'lr': 0.0004894252728347992, 'samples': 8065024, 'steps': 15751, 'loss/train': 2.3406460285186768} +03/04/2022 07:58:08 - INFO - codeparrot_training - Step 15752: {'lr': 0.0004894237456862684, 'samples': 8065536, 'steps': 15752, 'loss/train': 1.9627060890197754} +03/04/2022 07:58:08 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/04/2022 07:58:13 - INFO - codeparrot_training - Step 15753: {'lr': 0.000489422218429857, 'samples': 8066048, 'steps': 15753, 'loss/train': 1.8094896078109741} +03/04/2022 07:58:16 - INFO - codeparrot_training - Step 15754: {'lr': 0.0004894206910655656, 'samples': 8066560, 'steps': 15754, 'loss/train': 0.9984133839607239} +03/04/2022 07:58:16 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 07:58:21 - INFO - codeparrot_training - Step 15755: {'lr': 0.0004894191635933949, 'samples': 8067072, 'steps': 15755, 'loss/train': 1.1861696243286133} +03/04/2022 07:58:24 - INFO - codeparrot_training - Step 15756: {'lr': 0.0004894176360133456, 'samples': 8067584, 'steps': 15756, 'loss/train': 1.8479070663452148} +03/04/2022 07:58:24 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 07:58:30 - INFO - codeparrot_training - Step 15757: {'lr': 0.0004894161083254186, 'samples': 8068096, 'steps': 15757, 'loss/train': 2.597443103790283} +03/04/2022 07:58:33 - INFO - codeparrot_training - Step 15758: {'lr': 0.0004894145805296143, 'samples': 8068608, 'steps': 15758, 'loss/train': 1.9176812171936035} +03/04/2022 07:58:33 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 07:58:39 - INFO - codeparrot_training - Step 15759: {'lr': 0.0004894130526259334, 'samples': 8069120, 'steps': 15759, 'loss/train': 2.292680025100708} +03/04/2022 07:58:42 - INFO - codeparrot_training - Step 15760: {'lr': 0.0004894115246143768, 'samples': 8069632, 'steps': 15760, 'loss/train': 1.7781001329421997} +03/04/2022 07:58:45 - INFO - codeparrot_training - Step 15761: {'lr': 0.0004894099964949449, 'samples': 8070144, 'steps': 15761, 'loss/train': 2.007512092590332} +03/04/2022 07:58:46 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 07:58:50 - INFO - codeparrot_training - Step 15762: {'lr': 0.0004894084682676387, 'samples': 8070656, 'steps': 15762, 'loss/train': 1.850338101387024} +03/04/2022 07:58:54 - INFO - codeparrot_training - Step 15763: {'lr': 0.0004894069399324586, 'samples': 8071168, 'steps': 15763, 'loss/train': 1.583418607711792} +03/04/2022 07:58:55 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/04/2022 07:58:59 - INFO - codeparrot_training - Step 15764: {'lr': 0.0004894054114894055, 'samples': 8071680, 'steps': 15764, 'loss/train': 2.399912118911743} +03/04/2022 07:59:02 - INFO - codeparrot_training - Step 15765: {'lr': 0.00048940388293848, 'samples': 8072192, 'steps': 15765, 'loss/train': 1.6176056861877441} +03/04/2022 07:59:04 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 07:59:07 - INFO - codeparrot_training - Step 15766: {'lr': 0.000489402354279683, 'samples': 8072704, 'steps': 15766, 'loss/train': 2.1097328662872314} +03/04/2022 07:59:11 - INFO - codeparrot_training - Step 15767: {'lr': 0.0004894008255130147, 'samples': 8073216, 'steps': 15767, 'loss/train': 1.6398531198501587} +03/04/2022 07:59:12 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/04/2022 07:59:16 - INFO - codeparrot_training - Step 15768: {'lr': 0.0004893992966384762, 'samples': 8073728, 'steps': 15768, 'loss/train': 0.8948653936386108} +03/04/2022 07:59:19 - INFO - codeparrot_training - Step 15769: {'lr': 0.0004893977676560682, 'samples': 8074240, 'steps': 15769, 'loss/train': 2.3008790016174316} +03/04/2022 07:59:21 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 07:59:24 - INFO - codeparrot_training - Step 15770: {'lr': 0.000489396238565791, 'samples': 8074752, 'steps': 15770, 'loss/train': 1.6504114866256714} +03/04/2022 07:59:27 - INFO - codeparrot_training - Step 15771: {'lr': 0.0004893947093676458, 'samples': 8075264, 'steps': 15771, 'loss/train': 1.8950122594833374} +03/04/2022 07:59:29 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 07:59:33 - INFO - codeparrot_training - Step 15772: {'lr': 0.0004893931800616329, 'samples': 8075776, 'steps': 15772, 'loss/train': 1.6442188024520874} +03/04/2022 07:59:36 - INFO - codeparrot_training - Step 15773: {'lr': 0.0004893916506477532, 'samples': 8076288, 'steps': 15773, 'loss/train': 1.806784749031067} +03/04/2022 07:59:38 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 07:59:41 - INFO - codeparrot_training - Step 15774: {'lr': 0.0004893901211260073, 'samples': 8076800, 'steps': 15774, 'loss/train': 1.3734625577926636} +03/04/2022 07:59:44 - INFO - codeparrot_training - Step 15775: {'lr': 0.0004893885914963958, 'samples': 8077312, 'steps': 15775, 'loss/train': 2.3755300045013428} +03/04/2022 07:59:46 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/04/2022 07:59:50 - INFO - codeparrot_training - Step 15776: {'lr': 0.0004893870617589196, 'samples': 8077824, 'steps': 15776, 'loss/train': 0.982387363910675} +03/04/2022 07:59:53 - INFO - codeparrot_training - Step 15777: {'lr': 0.0004893855319135791, 'samples': 8078336, 'steps': 15777, 'loss/train': 2.390859365463257} +03/04/2022 07:59:55 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 07:59:58 - INFO - codeparrot_training - Step 15778: {'lr': 0.0004893840019603754, 'samples': 8078848, 'steps': 15778, 'loss/train': 2.067556142807007} +03/04/2022 08:00:01 - INFO - codeparrot_training - Step 15779: {'lr': 0.0004893824718993088, 'samples': 8079360, 'steps': 15779, 'loss/train': 2.096879720687866} +03/04/2022 08:00:03 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 08:00:06 - INFO - codeparrot_training - Step 15780: {'lr': 0.0004893809417303803, 'samples': 8079872, 'steps': 15780, 'loss/train': 1.7250200510025024} +03/04/2022 08:00:10 - INFO - codeparrot_training - Step 15781: {'lr': 0.0004893794114535905, 'samples': 8080384, 'steps': 15781, 'loss/train': 1.2458003759384155} +03/04/2022 08:00:11 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/04/2022 08:00:15 - INFO - codeparrot_training - Step 15782: {'lr': 0.0004893778810689399, 'samples': 8080896, 'steps': 15782, 'loss/train': 1.8468230962753296} +03/04/2022 08:00:18 - INFO - codeparrot_training - Step 15783: {'lr': 0.0004893763505764292, 'samples': 8081408, 'steps': 15783, 'loss/train': 3.7919137477874756} +03/04/2022 08:00:20 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/04/2022 08:00:23 - INFO - codeparrot_training - Step 15784: {'lr': 0.0004893748199760594, 'samples': 8081920, 'steps': 15784, 'loss/train': 1.166268229484558} +03/04/2022 08:00:26 - INFO - codeparrot_training - Step 15785: {'lr': 0.0004893732892678309, 'samples': 8082432, 'steps': 15785, 'loss/train': 2.209667444229126} +03/04/2022 08:00:28 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 08:00:32 - INFO - codeparrot_training - Step 15786: {'lr': 0.0004893717584517445, 'samples': 8082944, 'steps': 15786, 'loss/train': 2.6296679973602295} +03/04/2022 08:00:35 - INFO - codeparrot_training - Step 15787: {'lr': 0.000489370227527801, 'samples': 8083456, 'steps': 15787, 'loss/train': 1.7829806804656982} +03/04/2022 08:00:38 - INFO - codeparrot_training - Step 15788: {'lr': 0.0004893686964960009, 'samples': 8083968, 'steps': 15788, 'loss/train': 2.3239595890045166} +03/04/2022 08:00:38 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 08:00:44 - INFO - codeparrot_training - Step 15789: {'lr': 0.0004893671653563448, 'samples': 8084480, 'steps': 15789, 'loss/train': 1.8203243017196655} +03/04/2022 08:00:47 - INFO - codeparrot_training - Step 15790: {'lr': 0.0004893656341088338, 'samples': 8084992, 'steps': 15790, 'loss/train': 2.1572787761688232} +03/04/2022 08:00:48 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 08:00:52 - INFO - codeparrot_training - Step 15791: {'lr': 0.0004893641027534682, 'samples': 8085504, 'steps': 15791, 'loss/train': 2.2781121730804443} +03/04/2022 08:00:55 - INFO - codeparrot_training - Step 15792: {'lr': 0.0004893625712902489, 'samples': 8086016, 'steps': 15792, 'loss/train': 1.9550840854644775} +03/04/2022 08:00:56 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 08:01:01 - INFO - codeparrot_training - Step 15793: {'lr': 0.0004893610397191764, 'samples': 8086528, 'steps': 15793, 'loss/train': 1.6182414293289185} +03/04/2022 08:01:04 - INFO - codeparrot_training - Step 15794: {'lr': 0.0004893595080402517, 'samples': 8087040, 'steps': 15794, 'loss/train': 1.7665551900863647} +03/04/2022 08:01:05 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 08:01:09 - INFO - codeparrot_training - Step 15795: {'lr': 0.0004893579762534751, 'samples': 8087552, 'steps': 15795, 'loss/train': 1.4938420057296753} +03/04/2022 08:01:12 - INFO - codeparrot_training - Step 15796: {'lr': 0.0004893564443588476, 'samples': 8088064, 'steps': 15796, 'loss/train': 2.1501986980438232} +03/04/2022 08:01:13 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 08:01:17 - INFO - codeparrot_training - Step 15797: {'lr': 0.0004893549123563697, 'samples': 8088576, 'steps': 15797, 'loss/train': 0.7328019738197327} +03/04/2022 08:01:21 - INFO - codeparrot_training - Step 15798: {'lr': 0.0004893533802460422, 'samples': 8089088, 'steps': 15798, 'loss/train': 2.7890186309814453} +03/04/2022 08:01:21 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 08:01:26 - INFO - codeparrot_training - Step 15799: {'lr': 0.0004893518480278658, 'samples': 8089600, 'steps': 15799, 'loss/train': 1.7985717058181763} +03/04/2022 08:01:29 - INFO - codeparrot_training - Step 15800: {'lr': 0.0004893503157018412, 'samples': 8090112, 'steps': 15800, 'loss/train': 2.0957748889923096} +03/04/2022 08:01:30 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 08:01:34 - INFO - codeparrot_training - Step 15801: {'lr': 0.000489348783267969, 'samples': 8090624, 'steps': 15801, 'loss/train': 1.6135627031326294} +03/04/2022 08:01:38 - INFO - codeparrot_training - Step 15802: {'lr': 0.0004893472507262499, 'samples': 8091136, 'steps': 15802, 'loss/train': 2.281519889831543} +03/04/2022 08:01:38 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 08:01:43 - INFO - codeparrot_training - Step 15803: {'lr': 0.0004893457180766846, 'samples': 8091648, 'steps': 15803, 'loss/train': 0.18596215546131134} +03/04/2022 08:01:46 - INFO - codeparrot_training - Step 15804: {'lr': 0.0004893441853192739, 'samples': 8092160, 'steps': 15804, 'loss/train': 1.4705119132995605} +03/04/2022 08:01:46 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 08:01:51 - INFO - codeparrot_training - Step 15805: {'lr': 0.0004893426524540183, 'samples': 8092672, 'steps': 15805, 'loss/train': 2.06527042388916} +03/04/2022 08:01:54 - INFO - codeparrot_training - Step 15806: {'lr': 0.0004893411194809186, 'samples': 8093184, 'steps': 15806, 'loss/train': 2.124040365219116} +03/04/2022 08:01:54 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 08:02:00 - INFO - codeparrot_training - Step 15807: {'lr': 0.0004893395863999755, 'samples': 8093696, 'steps': 15807, 'loss/train': 1.6741931438446045} +03/04/2022 08:02:03 - INFO - codeparrot_training - Step 15808: {'lr': 0.0004893380532111898, 'samples': 8094208, 'steps': 15808, 'loss/train': 2.011415719985962} +03/04/2022 08:02:03 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/04/2022 08:02:08 - INFO - codeparrot_training - Step 15809: {'lr': 0.0004893365199145619, 'samples': 8094720, 'steps': 15809, 'loss/train': 1.4917219877243042} +03/04/2022 08:02:11 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 08:02:13 - INFO - codeparrot_training - Step 15810: {'lr': 0.0004893349865100927, 'samples': 8095232, 'steps': 15810, 'loss/train': 1.5114916563034058} +03/04/2022 08:02:17 - INFO - codeparrot_training - Step 15811: {'lr': 0.0004893334529977828, 'samples': 8095744, 'steps': 15811, 'loss/train': 1.9902334213256836} +03/04/2022 08:02:20 - INFO - codeparrot_training - Step 15812: {'lr': 0.0004893319193776331, 'samples': 8096256, 'steps': 15812, 'loss/train': 1.152863621711731} +03/04/2022 08:02:20 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/04/2022 08:02:25 - INFO - codeparrot_training - Step 15813: {'lr': 0.000489330385649644, 'samples': 8096768, 'steps': 15813, 'loss/train': 1.5612592697143555} +03/04/2022 08:02:28 - INFO - codeparrot_training - Step 15814: {'lr': 0.0004893288518138163, 'samples': 8097280, 'steps': 15814, 'loss/train': 2.08412766456604} +03/04/2022 08:02:28 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 08:02:34 - INFO - codeparrot_training - Step 15815: {'lr': 0.0004893273178701508, 'samples': 8097792, 'steps': 15815, 'loss/train': 3.6469032764434814} +03/04/2022 08:02:36 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 08:02:39 - INFO - codeparrot_training - Step 15816: {'lr': 0.0004893257838186481, 'samples': 8098304, 'steps': 15816, 'loss/train': 1.4669967889785767} +03/04/2022 08:02:42 - INFO - codeparrot_training - Step 15817: {'lr': 0.0004893242496593089, 'samples': 8098816, 'steps': 15817, 'loss/train': 2.26248836517334} +03/04/2022 08:02:45 - INFO - codeparrot_training - Step 15818: {'lr': 0.0004893227153921338, 'samples': 8099328, 'steps': 15818, 'loss/train': 6.722436428070068} +03/04/2022 08:02:46 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 08:02:51 - INFO - codeparrot_training - Step 15819: {'lr': 0.0004893211810171237, 'samples': 8099840, 'steps': 15819, 'loss/train': 2.182323455810547} +03/04/2022 08:02:54 - INFO - codeparrot_training - Step 15820: {'lr': 0.0004893196465342791, 'samples': 8100352, 'steps': 15820, 'loss/train': 1.7550013065338135} +03/04/2022 08:02:55 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 08:02:59 - INFO - codeparrot_training - Step 15821: {'lr': 0.0004893181119436007, 'samples': 8100864, 'steps': 15821, 'loss/train': 2.352924346923828} +03/04/2022 08:03:02 - INFO - codeparrot_training - Step 15822: {'lr': 0.0004893165772450893, 'samples': 8101376, 'steps': 15822, 'loss/train': 1.8123749494552612} +03/04/2022 08:03:03 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/04/2022 08:03:08 - INFO - codeparrot_training - Step 15823: {'lr': 0.0004893150424387456, 'samples': 8101888, 'steps': 15823, 'loss/train': 1.9320213794708252} +03/04/2022 08:03:11 - INFO - codeparrot_training - Step 15824: {'lr': 0.0004893135075245702, 'samples': 8102400, 'steps': 15824, 'loss/train': 0.6169528365135193} +03/04/2022 08:03:11 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 08:03:16 - INFO - codeparrot_training - Step 15825: {'lr': 0.0004893119725025639, 'samples': 8102912, 'steps': 15825, 'loss/train': 1.7955065965652466} +03/04/2022 08:03:19 - INFO - codeparrot_training - Step 15826: {'lr': 0.0004893104373727272, 'samples': 8103424, 'steps': 15826, 'loss/train': 1.3975342512130737} +03/04/2022 08:03:20 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/04/2022 08:03:25 - INFO - codeparrot_training - Step 15827: {'lr': 0.0004893089021350609, 'samples': 8103936, 'steps': 15827, 'loss/train': 1.7311853170394897} +03/04/2022 08:03:28 - INFO - codeparrot_training - Step 15828: {'lr': 0.0004893073667895658, 'samples': 8104448, 'steps': 15828, 'loss/train': 0.5723534822463989} +03/04/2022 08:03:28 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 08:03:33 - INFO - codeparrot_training - Step 15829: {'lr': 0.0004893058313362424, 'samples': 8104960, 'steps': 15829, 'loss/train': 2.4262561798095703} +03/04/2022 08:03:36 - INFO - codeparrot_training - Step 15830: {'lr': 0.0004893042957750916, 'samples': 8105472, 'steps': 15830, 'loss/train': 1.6608119010925293} +03/04/2022 08:03:37 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 08:03:42 - INFO - codeparrot_training - Step 15831: {'lr': 0.0004893027601061138, 'samples': 8105984, 'steps': 15831, 'loss/train': 2.279754161834717} +03/04/2022 08:03:45 - INFO - codeparrot_training - Step 15832: {'lr': 0.00048930122432931, 'samples': 8106496, 'steps': 15832, 'loss/train': 2.34913969039917} +03/04/2022 08:03:46 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 08:03:50 - INFO - codeparrot_training - Step 15833: {'lr': 0.0004892996884446807, 'samples': 8107008, 'steps': 15833, 'loss/train': 1.6687400341033936} +03/04/2022 08:03:54 - INFO - codeparrot_training - Step 15834: {'lr': 0.0004892981524522267, 'samples': 8107520, 'steps': 15834, 'loss/train': 1.4493680000305176} +03/04/2022 08:03:55 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 08:03:59 - INFO - codeparrot_training - Step 15835: {'lr': 0.0004892966163519487, 'samples': 8108032, 'steps': 15835, 'loss/train': 2.158994197845459} +03/04/2022 08:04:02 - INFO - codeparrot_training - Step 15836: {'lr': 0.0004892950801438472, 'samples': 8108544, 'steps': 15836, 'loss/train': 2.445634603500366} +03/04/2022 08:04:04 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 08:04:08 - INFO - codeparrot_training - Step 15837: {'lr': 0.0004892935438279231, 'samples': 8109056, 'steps': 15837, 'loss/train': 2.107288360595703} +03/04/2022 08:04:11 - INFO - codeparrot_training - Step 15838: {'lr': 0.0004892920074041771, 'samples': 8109568, 'steps': 15838, 'loss/train': 1.5856229066848755} +03/04/2022 08:04:14 - INFO - codeparrot_training - Step 15839: {'lr': 0.0004892904708726096, 'samples': 8110080, 'steps': 15839, 'loss/train': 2.4564788341522217} +03/04/2022 08:04:16 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 08:04:20 - INFO - codeparrot_training - Step 15840: {'lr': 0.0004892889342332218, 'samples': 8110592, 'steps': 15840, 'loss/train': 2.463087797164917} +03/04/2022 08:04:23 - INFO - codeparrot_training - Step 15841: {'lr': 0.000489287397486014, 'samples': 8111104, 'steps': 15841, 'loss/train': 2.2456350326538086} +03/04/2022 08:04:24 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 08:04:28 - INFO - codeparrot_training - Step 15842: {'lr': 0.0004892858606309868, 'samples': 8111616, 'steps': 15842, 'loss/train': 2.015606164932251} +03/04/2022 08:04:31 - INFO - codeparrot_training - Step 15843: {'lr': 0.0004892843236681412, 'samples': 8112128, 'steps': 15843, 'loss/train': 2.396704912185669} +03/04/2022 08:04:33 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 08:04:37 - INFO - codeparrot_training - Step 15844: {'lr': 0.0004892827865974779, 'samples': 8112640, 'steps': 15844, 'loss/train': 2.3219165802001953} +03/04/2022 08:04:40 - INFO - codeparrot_training - Step 15845: {'lr': 0.0004892812494189973, 'samples': 8113152, 'steps': 15845, 'loss/train': 2.236128330230713} +03/04/2022 08:04:41 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 08:04:45 - INFO - codeparrot_training - Step 15846: {'lr': 0.0004892797121327003, 'samples': 8113664, 'steps': 15846, 'loss/train': 1.9846620559692383} +03/04/2022 08:04:48 - INFO - codeparrot_training - Step 15847: {'lr': 0.0004892781747385876, 'samples': 8114176, 'steps': 15847, 'loss/train': 1.8854482173919678} +03/04/2022 08:04:49 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 08:04:53 - INFO - codeparrot_training - Step 15848: {'lr': 0.0004892766372366598, 'samples': 8114688, 'steps': 15848, 'loss/train': 2.14094877243042} +03/04/2022 08:04:57 - INFO - codeparrot_training - Step 15849: {'lr': 0.0004892750996269177, 'samples': 8115200, 'steps': 15849, 'loss/train': 1.92839777469635} +03/04/2022 08:04:58 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 08:05:02 - INFO - codeparrot_training - Step 15850: {'lr': 0.0004892735619093618, 'samples': 8115712, 'steps': 15850, 'loss/train': 1.8320256471633911} +03/04/2022 08:05:05 - INFO - codeparrot_training - Step 15851: {'lr': 0.0004892720240839931, 'samples': 8116224, 'steps': 15851, 'loss/train': 1.133046269416809} +03/04/2022 08:05:06 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 08:05:10 - INFO - codeparrot_training - Step 15852: {'lr': 0.0004892704861508121, 'samples': 8116736, 'steps': 15852, 'loss/train': 1.5446724891662598} +03/04/2022 08:05:14 - INFO - codeparrot_training - Step 15853: {'lr': 0.0004892689481098193, 'samples': 8117248, 'steps': 15853, 'loss/train': 0.6465367674827576} +03/04/2022 08:05:15 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 08:05:19 - INFO - codeparrot_training - Step 15854: {'lr': 0.0004892674099610158, 'samples': 8117760, 'steps': 15854, 'loss/train': 2.0504391193389893} +03/04/2022 08:05:22 - INFO - codeparrot_training - Step 15855: {'lr': 0.000489265871704402, 'samples': 8118272, 'steps': 15855, 'loss/train': 1.7675758600234985} +03/04/2022 08:05:23 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 08:05:27 - INFO - codeparrot_training - Step 15856: {'lr': 0.0004892643333399788, 'samples': 8118784, 'steps': 15856, 'loss/train': 1.8653327226638794} +03/04/2022 08:05:31 - INFO - codeparrot_training - Step 15857: {'lr': 0.0004892627948677467, 'samples': 8119296, 'steps': 15857, 'loss/train': 1.6817375421524048} +03/04/2022 08:05:32 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/04/2022 08:05:36 - INFO - codeparrot_training - Step 15858: {'lr': 0.0004892612562877066, 'samples': 8119808, 'steps': 15858, 'loss/train': 2.5121445655822754} +03/04/2022 08:05:39 - INFO - codeparrot_training - Step 15859: {'lr': 0.0004892597175998589, 'samples': 8120320, 'steps': 15859, 'loss/train': 2.230720281600952} +03/04/2022 08:05:40 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/04/2022 08:05:44 - INFO - codeparrot_training - Step 15860: {'lr': 0.0004892581788042045, 'samples': 8120832, 'steps': 15860, 'loss/train': 1.6992627382278442} +03/04/2022 08:05:47 - INFO - codeparrot_training - Step 15861: {'lr': 0.0004892566399007441, 'samples': 8121344, 'steps': 15861, 'loss/train': 2.367680788040161} +03/04/2022 08:05:48 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 08:05:53 - INFO - codeparrot_training - Step 15862: {'lr': 0.0004892551008894784, 'samples': 8121856, 'steps': 15862, 'loss/train': 1.3575867414474487} +03/04/2022 08:05:56 - INFO - codeparrot_training - Step 15863: {'lr': 0.0004892535617704079, 'samples': 8122368, 'steps': 15863, 'loss/train': 1.7326768636703491} +03/04/2022 08:05:57 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 08:06:01 - INFO - codeparrot_training - Step 15864: {'lr': 0.0004892520225435336, 'samples': 8122880, 'steps': 15864, 'loss/train': 2.236912488937378} +03/04/2022 08:06:04 - INFO - codeparrot_training - Step 15865: {'lr': 0.000489250483208856, 'samples': 8123392, 'steps': 15865, 'loss/train': 1.875346064567566} +03/04/2022 08:06:05 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 08:06:10 - INFO - codeparrot_training - Step 15866: {'lr': 0.0004892489437663758, 'samples': 8123904, 'steps': 15866, 'loss/train': 1.5783551931381226} +03/04/2022 08:06:13 - INFO - codeparrot_training - Step 15867: {'lr': 0.0004892474042160936, 'samples': 8124416, 'steps': 15867, 'loss/train': 0.7856854200363159} +03/04/2022 08:06:16 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 08:06:18 - INFO - codeparrot_training - Step 15868: {'lr': 0.0004892458645580103, 'samples': 8124928, 'steps': 15868, 'loss/train': 1.161672592163086} +03/04/2022 08:06:21 - INFO - codeparrot_training - Step 15869: {'lr': 0.0004892443247921265, 'samples': 8125440, 'steps': 15869, 'loss/train': 2.6213581562042236} +03/04/2022 08:06:24 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/04/2022 08:06:27 - INFO - codeparrot_training - Step 15870: {'lr': 0.0004892427849184428, 'samples': 8125952, 'steps': 15870, 'loss/train': 2.2652716636657715} +03/04/2022 08:06:30 - INFO - codeparrot_training - Step 15871: {'lr': 0.0004892412449369602, 'samples': 8126464, 'steps': 15871, 'loss/train': 1.668884038925171} +03/04/2022 08:06:32 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 08:06:35 - INFO - codeparrot_training - Step 15872: {'lr': 0.0004892397048476791, 'samples': 8126976, 'steps': 15872, 'loss/train': 2.220470666885376} +03/04/2022 08:06:38 - INFO - codeparrot_training - Step 15873: {'lr': 0.0004892381646506002, 'samples': 8127488, 'steps': 15873, 'loss/train': 1.9844043254852295} +03/04/2022 08:06:42 - INFO - codeparrot_training - Step 15874: {'lr': 0.0004892366243457244, 'samples': 8128000, 'steps': 15874, 'loss/train': 1.0987316370010376} +03/04/2022 08:06:42 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/04/2022 08:06:47 - INFO - codeparrot_training - Step 15875: {'lr': 0.0004892350839330522, 'samples': 8128512, 'steps': 15875, 'loss/train': 1.033462405204773} +03/04/2022 08:06:50 - INFO - codeparrot_training - Step 15876: {'lr': 0.0004892335434125844, 'samples': 8129024, 'steps': 15876, 'loss/train': 1.4013376235961914} +03/04/2022 08:06:50 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 08:06:55 - INFO - codeparrot_training - Step 15877: {'lr': 0.0004892320027843216, 'samples': 8129536, 'steps': 15877, 'loss/train': 0.6777030229568481} +03/04/2022 08:06:59 - INFO - codeparrot_training - Step 15878: {'lr': 0.0004892304620482646, 'samples': 8130048, 'steps': 15878, 'loss/train': 1.9155230522155762} +03/04/2022 08:06:59 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/04/2022 08:07:04 - INFO - codeparrot_training - Step 15879: {'lr': 0.000489228921204414, 'samples': 8130560, 'steps': 15879, 'loss/train': 1.9339735507965088} +03/04/2022 08:07:07 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 08:07:09 - INFO - codeparrot_training - Step 15880: {'lr': 0.0004892273802527706, 'samples': 8131072, 'steps': 15880, 'loss/train': 2.108027219772339} +03/04/2022 08:07:12 - INFO - codeparrot_training - Step 15881: {'lr': 0.000489225839193335, 'samples': 8131584, 'steps': 15881, 'loss/train': 1.762894630432129} +03/04/2022 08:07:15 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/04/2022 08:07:17 - INFO - codeparrot_training - Step 15882: {'lr': 0.0004892242980261079, 'samples': 8132096, 'steps': 15882, 'loss/train': 2.032097816467285} +03/04/2022 08:07:21 - INFO - codeparrot_training - Step 15883: {'lr': 0.0004892227567510901, 'samples': 8132608, 'steps': 15883, 'loss/train': 1.822227120399475} +03/04/2022 08:07:23 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 08:07:26 - INFO - codeparrot_training - Step 15884: {'lr': 0.0004892212153682822, 'samples': 8133120, 'steps': 15884, 'loss/train': 1.586495280265808} +03/04/2022 08:07:30 - INFO - codeparrot_training - Step 15885: {'lr': 0.0004892196738776848, 'samples': 8133632, 'steps': 15885, 'loss/train': 1.091257929801941} +03/04/2022 08:07:33 - INFO - codeparrot_training - Step 15886: {'lr': 0.0004892181322792989, 'samples': 8134144, 'steps': 15886, 'loss/train': 2.4290037155151367} +03/04/2022 08:07:34 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 08:07:38 - INFO - codeparrot_training - Step 15887: {'lr': 0.0004892165905731248, 'samples': 8134656, 'steps': 15887, 'loss/train': 1.8909389972686768} +03/04/2022 08:07:41 - INFO - codeparrot_training - Step 15888: {'lr': 0.0004892150487591635, 'samples': 8135168, 'steps': 15888, 'loss/train': 1.070261836051941} +03/04/2022 08:07:43 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 08:07:46 - INFO - codeparrot_training - Step 15889: {'lr': 0.0004892135068374156, 'samples': 8135680, 'steps': 15889, 'loss/train': 1.9358346462249756} +03/04/2022 08:07:50 - INFO - codeparrot_training - Step 15890: {'lr': 0.0004892119648078817, 'samples': 8136192, 'steps': 15890, 'loss/train': 2.5113108158111572} +03/04/2022 08:07:51 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 08:07:55 - INFO - codeparrot_training - Step 15891: {'lr': 0.0004892104226705627, 'samples': 8136704, 'steps': 15891, 'loss/train': 1.4813250303268433} +03/04/2022 08:07:58 - INFO - codeparrot_training - Step 15892: {'lr': 0.0004892088804254591, 'samples': 8137216, 'steps': 15892, 'loss/train': 1.735444188117981} +03/04/2022 08:08:00 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 08:08:03 - INFO - codeparrot_training - Step 15893: {'lr': 0.0004892073380725716, 'samples': 8137728, 'steps': 15893, 'loss/train': 0.21478936076164246} +03/04/2022 08:08:06 - INFO - codeparrot_training - Step 15894: {'lr': 0.0004892057956119012, 'samples': 8138240, 'steps': 15894, 'loss/train': 2.0290284156799316} +03/04/2022 08:08:08 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 08:08:12 - INFO - codeparrot_training - Step 15895: {'lr': 0.0004892042530434482, 'samples': 8138752, 'steps': 15895, 'loss/train': 1.847808599472046} +03/04/2022 08:08:15 - INFO - codeparrot_training - Step 15896: {'lr': 0.0004892027103672134, 'samples': 8139264, 'steps': 15896, 'loss/train': 2.3561031818389893} +03/04/2022 08:08:17 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 08:08:20 - INFO - codeparrot_training - Step 15897: {'lr': 0.0004892011675831976, 'samples': 8139776, 'steps': 15897, 'loss/train': 1.825137972831726} +03/04/2022 08:08:23 - INFO - codeparrot_training - Step 15898: {'lr': 0.0004891996246914014, 'samples': 8140288, 'steps': 15898, 'loss/train': 2.7217257022857666} +03/04/2022 08:08:25 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 08:08:28 - INFO - codeparrot_training - Step 15899: {'lr': 0.0004891980816918257, 'samples': 8140800, 'steps': 15899, 'loss/train': 2.161484956741333} +03/04/2022 08:08:32 - INFO - codeparrot_training - Step 15900: {'lr': 0.0004891965385844709, 'samples': 8141312, 'steps': 15900, 'loss/train': 1.3729290962219238} +03/04/2022 08:08:33 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 08:08:37 - INFO - codeparrot_training - Step 15901: {'lr': 0.0004891949953693378, 'samples': 8141824, 'steps': 15901, 'loss/train': 1.8371444940567017} +03/04/2022 08:08:40 - INFO - codeparrot_training - Step 15902: {'lr': 0.0004891934520464273, 'samples': 8142336, 'steps': 15902, 'loss/train': 1.0400646924972534} +03/04/2022 08:08:42 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 08:08:45 - INFO - codeparrot_training - Step 15903: {'lr': 0.0004891919086157398, 'samples': 8142848, 'steps': 15903, 'loss/train': 1.1689378023147583} +03/04/2022 08:08:49 - INFO - codeparrot_training - Step 15904: {'lr': 0.000489190365077276, 'samples': 8143360, 'steps': 15904, 'loss/train': 1.826979637145996} +03/04/2022 08:08:50 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 08:08:54 - INFO - codeparrot_training - Step 15905: {'lr': 0.0004891888214310369, 'samples': 8143872, 'steps': 15905, 'loss/train': 2.569539785385132} +03/04/2022 08:08:57 - INFO - codeparrot_training - Step 15906: {'lr': 0.000489187277677023, 'samples': 8144384, 'steps': 15906, 'loss/train': 2.307072639465332} +03/04/2022 08:08:58 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 08:09:03 - INFO - codeparrot_training - Step 15907: {'lr': 0.000489185733815235, 'samples': 8144896, 'steps': 15907, 'loss/train': 1.8718996047973633} +03/04/2022 08:09:06 - INFO - codeparrot_training - Step 15908: {'lr': 0.0004891841898456735, 'samples': 8145408, 'steps': 15908, 'loss/train': 2.5906143188476562} +03/04/2022 08:09:08 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/04/2022 08:09:11 - INFO - codeparrot_training - Step 15909: {'lr': 0.0004891826457683394, 'samples': 8145920, 'steps': 15909, 'loss/train': 2.35233473777771} +03/04/2022 08:09:14 - INFO - codeparrot_training - Step 15910: {'lr': 0.0004891811015832332, 'samples': 8146432, 'steps': 15910, 'loss/train': 2.5454862117767334} +03/04/2022 08:09:16 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 08:09:19 - INFO - codeparrot_training - Step 15911: {'lr': 0.0004891795572903557, 'samples': 8146944, 'steps': 15911, 'loss/train': 2.1073851585388184} +03/04/2022 08:09:22 - INFO - codeparrot_training - Step 15912: {'lr': 0.0004891780128897077, 'samples': 8147456, 'steps': 15912, 'loss/train': 1.6469091176986694} +03/04/2022 08:09:24 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 08:09:28 - INFO - codeparrot_training - Step 15913: {'lr': 0.0004891764683812896, 'samples': 8147968, 'steps': 15913, 'loss/train': 1.3148953914642334} +03/04/2022 08:09:31 - INFO - codeparrot_training - Step 15914: {'lr': 0.0004891749237651024, 'samples': 8148480, 'steps': 15914, 'loss/train': 1.747546911239624} +03/04/2022 08:09:33 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 08:09:36 - INFO - codeparrot_training - Step 15915: {'lr': 0.0004891733790411466, 'samples': 8148992, 'steps': 15915, 'loss/train': 2.2069268226623535} +03/04/2022 08:09:39 - INFO - codeparrot_training - Step 15916: {'lr': 0.000489171834209423, 'samples': 8149504, 'steps': 15916, 'loss/train': 1.907037615776062} +03/04/2022 08:09:41 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 08:09:44 - INFO - codeparrot_training - Step 15917: {'lr': 0.0004891702892699323, 'samples': 8150016, 'steps': 15917, 'loss/train': 2.838202476501465} +03/04/2022 08:09:48 - INFO - codeparrot_training - Step 15918: {'lr': 0.0004891687442226751, 'samples': 8150528, 'steps': 15918, 'loss/train': 1.1139270067214966} +03/04/2022 08:09:49 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 08:09:53 - INFO - codeparrot_training - Step 15919: {'lr': 0.0004891671990676522, 'samples': 8151040, 'steps': 15919, 'loss/train': 1.6333458423614502} +03/04/2022 08:09:56 - INFO - codeparrot_training - Step 15920: {'lr': 0.0004891656538048642, 'samples': 8151552, 'steps': 15920, 'loss/train': 0.8280512690544128} +03/04/2022 08:09:58 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 08:10:01 - INFO - codeparrot_training - Step 15921: {'lr': 0.0004891641084343118, 'samples': 8152064, 'steps': 15921, 'loss/train': 0.4806496798992157} +03/04/2022 08:10:04 - INFO - codeparrot_training - Step 15922: {'lr': 0.0004891625629559959, 'samples': 8152576, 'steps': 15922, 'loss/train': 1.7383266687393188} +03/04/2022 08:10:06 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 08:10:10 - INFO - codeparrot_training - Step 15923: {'lr': 0.0004891610173699169, 'samples': 8153088, 'steps': 15923, 'loss/train': 0.9154915809631348} +03/04/2022 08:10:13 - INFO - codeparrot_training - Step 15924: {'lr': 0.0004891594716760757, 'samples': 8153600, 'steps': 15924, 'loss/train': 1.2812516689300537} +03/04/2022 08:10:15 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 08:10:18 - INFO - codeparrot_training - Step 15925: {'lr': 0.0004891579258744728, 'samples': 8154112, 'steps': 15925, 'loss/train': 1.7019883394241333} +03/04/2022 08:10:21 - INFO - codeparrot_training - Step 15926: {'lr': 0.0004891563799651092, 'samples': 8154624, 'steps': 15926, 'loss/train': 1.9730263948440552} +03/04/2022 08:10:23 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 08:10:27 - INFO - codeparrot_training - Step 15927: {'lr': 0.0004891548339479854, 'samples': 8155136, 'steps': 15927, 'loss/train': 2.4476823806762695} +03/04/2022 08:10:30 - INFO - codeparrot_training - Step 15928: {'lr': 0.0004891532878231021, 'samples': 8155648, 'steps': 15928, 'loss/train': 0.9744095802307129} +03/04/2022 08:10:32 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 08:10:35 - INFO - codeparrot_training - Step 15929: {'lr': 0.00048915174159046, 'samples': 8156160, 'steps': 15929, 'loss/train': 2.172797441482544} +03/04/2022 08:10:38 - INFO - codeparrot_training - Step 15930: {'lr': 0.0004891501952500599, 'samples': 8156672, 'steps': 15930, 'loss/train': 1.408158302307129} +03/04/2022 08:10:40 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/04/2022 08:10:43 - INFO - codeparrot_training - Step 15931: {'lr': 0.0004891486488019023, 'samples': 8157184, 'steps': 15931, 'loss/train': 2.1941165924072266} +03/04/2022 08:10:47 - INFO - codeparrot_training - Step 15932: {'lr': 0.000489147102245988, 'samples': 8157696, 'steps': 15932, 'loss/train': 1.90802800655365} +03/04/2022 08:10:48 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 08:10:52 - INFO - codeparrot_training - Step 15933: {'lr': 0.0004891455555823179, 'samples': 8158208, 'steps': 15933, 'loss/train': 1.3605841398239136} +03/04/2022 08:10:55 - INFO - codeparrot_training - Step 15934: {'lr': 0.0004891440088108923, 'samples': 8158720, 'steps': 15934, 'loss/train': 2.020583391189575} +03/04/2022 08:10:57 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 08:11:01 - INFO - codeparrot_training - Step 15935: {'lr': 0.0004891424619317121, 'samples': 8159232, 'steps': 15935, 'loss/train': 0.9181253910064697} +03/04/2022 08:11:04 - INFO - codeparrot_training - Step 15936: {'lr': 0.000489140914944778, 'samples': 8159744, 'steps': 15936, 'loss/train': 1.0891193151474} +03/04/2022 08:11:06 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 08:11:09 - INFO - codeparrot_training - Step 15937: {'lr': 0.0004891393678500909, 'samples': 8160256, 'steps': 15937, 'loss/train': 1.5200639963150024} +03/04/2022 08:11:12 - INFO - codeparrot_training - Step 15938: {'lr': 0.0004891378206476511, 'samples': 8160768, 'steps': 15938, 'loss/train': 1.6848254203796387} +03/04/2022 08:11:14 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 08:11:17 - INFO - codeparrot_training - Step 15939: {'lr': 0.0004891362733374595, 'samples': 8161280, 'steps': 15939, 'loss/train': 1.5263921022415161} +03/04/2022 08:11:21 - INFO - codeparrot_training - Step 15940: {'lr': 0.0004891347259195168, 'samples': 8161792, 'steps': 15940, 'loss/train': 1.1496570110321045} +03/04/2022 08:11:23 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 08:11:26 - INFO - codeparrot_training - Step 15941: {'lr': 0.0004891331783938238, 'samples': 8162304, 'steps': 15941, 'loss/train': 1.8480569124221802} +03/04/2022 08:11:29 - INFO - codeparrot_training - Step 15942: {'lr': 0.000489131630760381, 'samples': 8162816, 'steps': 15942, 'loss/train': 0.8411359190940857} +03/04/2022 08:11:31 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/04/2022 08:11:34 - INFO - codeparrot_training - Step 15943: {'lr': 0.000489130083019189, 'samples': 8163328, 'steps': 15943, 'loss/train': 2.0692622661590576} +03/04/2022 08:11:37 - INFO - codeparrot_training - Step 15944: {'lr': 0.000489128535170249, 'samples': 8163840, 'steps': 15944, 'loss/train': 2.2139692306518555} +03/04/2022 08:11:40 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 08:11:43 - INFO - codeparrot_training - Step 15945: {'lr': 0.0004891269872135611, 'samples': 8164352, 'steps': 15945, 'loss/train': 1.6871144771575928} +03/04/2022 08:11:46 - INFO - codeparrot_training - Step 15946: {'lr': 0.0004891254391491264, 'samples': 8164864, 'steps': 15946, 'loss/train': 1.8881088495254517} +03/04/2022 08:11:48 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 08:11:51 - INFO - codeparrot_training - Step 15947: {'lr': 0.0004891238909769454, 'samples': 8165376, 'steps': 15947, 'loss/train': 1.7407503128051758} +03/04/2022 08:11:54 - INFO - codeparrot_training - Step 15948: {'lr': 0.0004891223426970189, 'samples': 8165888, 'steps': 15948, 'loss/train': 2.3193178176879883} +03/04/2022 08:11:56 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/04/2022 08:12:00 - INFO - codeparrot_training - Step 15949: {'lr': 0.0004891207943093476, 'samples': 8166400, 'steps': 15949, 'loss/train': 1.7809083461761475} +03/04/2022 08:12:03 - INFO - codeparrot_training - Step 15950: {'lr': 0.000489119245813932, 'samples': 8166912, 'steps': 15950, 'loss/train': 1.8434104919433594} +03/04/2022 08:12:05 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 08:12:08 - INFO - codeparrot_training - Step 15951: {'lr': 0.0004891176972107731, 'samples': 8167424, 'steps': 15951, 'loss/train': 2.105038642883301} +03/04/2022 08:12:11 - INFO - codeparrot_training - Step 15952: {'lr': 0.0004891161484998715, 'samples': 8167936, 'steps': 15952, 'loss/train': 1.9321300983428955} +03/04/2022 08:12:13 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/04/2022 08:12:17 - INFO - codeparrot_training - Step 15953: {'lr': 0.0004891145996812279, 'samples': 8168448, 'steps': 15953, 'loss/train': 2.2044837474823} +03/04/2022 08:12:20 - INFO - codeparrot_training - Step 15954: {'lr': 0.0004891130507548427, 'samples': 8168960, 'steps': 15954, 'loss/train': 1.4614242315292358} +03/04/2022 08:12:22 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/04/2022 08:12:25 - INFO - codeparrot_training - Step 15955: {'lr': 0.000489111501720717, 'samples': 8169472, 'steps': 15955, 'loss/train': 1.4192380905151367} +03/04/2022 08:12:28 - INFO - codeparrot_training - Step 15956: {'lr': 0.0004891099525788514, 'samples': 8169984, 'steps': 15956, 'loss/train': 2.2037250995635986} +03/04/2022 08:12:30 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 08:12:33 - INFO - codeparrot_training - Step 15957: {'lr': 0.0004891084033292464, 'samples': 8170496, 'steps': 15957, 'loss/train': 1.7308428287506104} +03/04/2022 08:12:37 - INFO - codeparrot_training - Step 15958: {'lr': 0.0004891068539719031, 'samples': 8171008, 'steps': 15958, 'loss/train': 2.250924825668335} +03/04/2022 08:12:39 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 08:12:42 - INFO - codeparrot_training - Step 15959: {'lr': 0.0004891053045068217, 'samples': 8171520, 'steps': 15959, 'loss/train': 2.1372478008270264} +03/04/2022 08:12:45 - INFO - codeparrot_training - Step 15960: {'lr': 0.0004891037549340032, 'samples': 8172032, 'steps': 15960, 'loss/train': 1.8430269956588745} +03/04/2022 08:12:48 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 08:12:50 - INFO - codeparrot_training - Step 15961: {'lr': 0.0004891022052534482, 'samples': 8172544, 'steps': 15961, 'loss/train': 1.8960281610488892} +03/04/2022 08:12:54 - INFO - codeparrot_training - Step 15962: {'lr': 0.0004891006554651574, 'samples': 8173056, 'steps': 15962, 'loss/train': 1.3815077543258667} +03/04/2022 08:12:56 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 08:12:59 - INFO - codeparrot_training - Step 15963: {'lr': 0.0004890991055691318, 'samples': 8173568, 'steps': 15963, 'loss/train': 1.8064050674438477} +03/04/2022 08:13:02 - INFO - codeparrot_training - Step 15964: {'lr': 0.0004890975555653716, 'samples': 8174080, 'steps': 15964, 'loss/train': 1.4979465007781982} +03/04/2022 08:13:04 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 08:13:07 - INFO - codeparrot_training - Step 15965: {'lr': 0.0004890960054538778, 'samples': 8174592, 'steps': 15965, 'loss/train': 2.520284652709961} +03/04/2022 08:13:11 - INFO - codeparrot_training - Step 15966: {'lr': 0.000489094455234651, 'samples': 8175104, 'steps': 15966, 'loss/train': 2.3158254623413086} +03/04/2022 08:13:13 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/04/2022 08:13:16 - INFO - codeparrot_training - Step 15967: {'lr': 0.0004890929049076919, 'samples': 8175616, 'steps': 15967, 'loss/train': 1.9096425771713257} +03/04/2022 08:13:19 - INFO - codeparrot_training - Step 15968: {'lr': 0.0004890913544730013, 'samples': 8176128, 'steps': 15968, 'loss/train': 1.879757046699524} +03/04/2022 08:13:21 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 08:13:24 - INFO - codeparrot_training - Step 15969: {'lr': 0.0004890898039305798, 'samples': 8176640, 'steps': 15969, 'loss/train': 1.1629698276519775} +03/04/2022 08:13:27 - INFO - codeparrot_training - Step 15970: {'lr': 0.000489088253280428, 'samples': 8177152, 'steps': 15970, 'loss/train': 1.643641710281372} +03/04/2022 08:13:30 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 08:13:33 - INFO - codeparrot_training - Step 15971: {'lr': 0.0004890867025225469, 'samples': 8177664, 'steps': 15971, 'loss/train': 1.6386851072311401} +03/04/2022 08:13:36 - INFO - codeparrot_training - Step 15972: {'lr': 0.000489085151656937, 'samples': 8178176, 'steps': 15972, 'loss/train': 1.796078085899353} +03/04/2022 08:13:38 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 08:13:41 - INFO - codeparrot_training - Step 15973: {'lr': 0.000489083600683599, 'samples': 8178688, 'steps': 15973, 'loss/train': 1.964971661567688} +03/04/2022 08:13:44 - INFO - codeparrot_training - Step 15974: {'lr': 0.0004890820496025335, 'samples': 8179200, 'steps': 15974, 'loss/train': 2.150369167327881} +03/04/2022 08:13:47 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 08:13:50 - INFO - codeparrot_training - Step 15975: {'lr': 0.0004890804984137415, 'samples': 8179712, 'steps': 15975, 'loss/train': 1.9185349941253662} +03/04/2022 08:13:53 - INFO - codeparrot_training - Step 15976: {'lr': 0.0004890789471172233, 'samples': 8180224, 'steps': 15976, 'loss/train': 1.7790374755859375} +03/04/2022 08:13:56 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 08:13:59 - INFO - codeparrot_training - Step 15977: {'lr': 0.00048907739571298, 'samples': 8180736, 'steps': 15977, 'loss/train': 1.7202320098876953} +03/04/2022 08:14:02 - INFO - codeparrot_training - Step 15978: {'lr': 0.000489075844201012, 'samples': 8181248, 'steps': 15978, 'loss/train': 1.1985440254211426} +03/04/2022 08:14:05 - INFO - codeparrot_training - Step 15979: {'lr': 0.0004890742925813202, 'samples': 8181760, 'steps': 15979, 'loss/train': 2.2064850330352783} +03/04/2022 08:14:08 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 08:14:10 - INFO - codeparrot_training - Step 15980: {'lr': 0.0004890727408539051, 'samples': 8182272, 'steps': 15980, 'loss/train': 2.1215715408325195} +03/04/2022 08:14:14 - INFO - codeparrot_training - Step 15981: {'lr': 0.0004890711890187676, 'samples': 8182784, 'steps': 15981, 'loss/train': 1.9274007081985474} +03/04/2022 08:14:16 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 08:14:19 - INFO - codeparrot_training - Step 15982: {'lr': 0.0004890696370759085, 'samples': 8183296, 'steps': 15982, 'loss/train': 2.704514503479004} +03/04/2022 08:14:22 - INFO - codeparrot_training - Step 15983: {'lr': 0.0004890680850253281, 'samples': 8183808, 'steps': 15983, 'loss/train': 2.16925048828125} +03/04/2022 08:14:24 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 08:14:27 - INFO - codeparrot_training - Step 15984: {'lr': 0.0004890665328670273, 'samples': 8184320, 'steps': 15984, 'loss/train': 2.0490660667419434} +03/04/2022 08:14:30 - INFO - codeparrot_training - Step 15985: {'lr': 0.0004890649806010067, 'samples': 8184832, 'steps': 15985, 'loss/train': 1.6507622003555298} +03/04/2022 08:14:32 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 08:14:36 - INFO - codeparrot_training - Step 15986: {'lr': 0.0004890634282272673, 'samples': 8185344, 'steps': 15986, 'loss/train': 1.192713975906372} +03/04/2022 08:14:39 - INFO - codeparrot_training - Step 15987: {'lr': 0.0004890618757458096, 'samples': 8185856, 'steps': 15987, 'loss/train': 2.0257797241210938} +03/04/2022 08:14:41 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 08:14:44 - INFO - codeparrot_training - Step 15988: {'lr': 0.0004890603231566343, 'samples': 8186368, 'steps': 15988, 'loss/train': 2.0535452365875244} +03/04/2022 08:14:47 - INFO - codeparrot_training - Step 15989: {'lr': 0.000489058770459742, 'samples': 8186880, 'steps': 15989, 'loss/train': 1.711564302444458} +03/04/2022 08:14:49 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/04/2022 08:14:53 - INFO - codeparrot_training - Step 15990: {'lr': 0.0004890572176551337, 'samples': 8187392, 'steps': 15990, 'loss/train': 1.0720800161361694} +03/04/2022 08:14:56 - INFO - codeparrot_training - Step 15991: {'lr': 0.0004890556647428097, 'samples': 8187904, 'steps': 15991, 'loss/train': 2.0773980617523193} +03/04/2022 08:14:57 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 08:15:01 - INFO - codeparrot_training - Step 15992: {'lr': 0.0004890541117227711, 'samples': 8188416, 'steps': 15992, 'loss/train': 2.21012282371521} +03/04/2022 08:15:04 - INFO - codeparrot_training - Step 15993: {'lr': 0.0004890525585950181, 'samples': 8188928, 'steps': 15993, 'loss/train': 2.4045891761779785} +03/04/2022 08:15:06 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 08:15:09 - INFO - codeparrot_training - Step 15994: {'lr': 0.000489051005359552, 'samples': 8189440, 'steps': 15994, 'loss/train': 1.3287371397018433} +03/04/2022 08:15:13 - INFO - codeparrot_training - Step 15995: {'lr': 0.0004890494520163731, 'samples': 8189952, 'steps': 15995, 'loss/train': 2.5904393196105957} +03/04/2022 08:15:14 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 08:15:18 - INFO - codeparrot_training - Step 15996: {'lr': 0.0004890478985654823, 'samples': 8190464, 'steps': 15996, 'loss/train': 2.031301736831665} +03/04/2022 08:15:21 - INFO - codeparrot_training - Step 15997: {'lr': 0.0004890463450068801, 'samples': 8190976, 'steps': 15997, 'loss/train': 2.444502353668213} +03/04/2022 08:15:22 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 08:15:26 - INFO - codeparrot_training - Step 15998: {'lr': 0.0004890447913405673, 'samples': 8191488, 'steps': 15998, 'loss/train': 1.9029850959777832} +03/04/2022 08:15:30 - INFO - codeparrot_training - Step 15999: {'lr': 0.0004890432375665447, 'samples': 8192000, 'steps': 15999, 'loss/train': 1.806227207183838} +03/04/2022 08:15:31 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 08:15:35 - INFO - codeparrot_training - Step 16000: {'lr': 0.0004890416836848127, 'samples': 8192512, 'steps': 16000, 'loss/train': 2.837110996246338} +03/04/2022 08:15:38 - INFO - codeparrot_training - Step 16001: {'lr': 0.0004890401296953723, 'samples': 8193024, 'steps': 16001, 'loss/train': 1.0709452629089355} +03/04/2022 08:15:40 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 08:15:44 - INFO - codeparrot_training - Step 16002: {'lr': 0.0004890385755982243, 'samples': 8193536, 'steps': 16002, 'loss/train': 2.1455135345458984} +03/04/2022 08:15:47 - INFO - codeparrot_training - Step 16003: {'lr': 0.0004890370213933691, 'samples': 8194048, 'steps': 16003, 'loss/train': 2.4262654781341553} +03/04/2022 08:15:49 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 08:15:52 - INFO - codeparrot_training - Step 16004: {'lr': 0.0004890354670808074, 'samples': 8194560, 'steps': 16004, 'loss/train': 2.000058174133301} +03/04/2022 08:15:55 - INFO - codeparrot_training - Step 16005: {'lr': 0.0004890339126605401, 'samples': 8195072, 'steps': 16005, 'loss/train': 2.374931573867798} +03/04/2022 08:15:57 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 08:16:01 - INFO - codeparrot_training - Step 16006: {'lr': 0.0004890323581325677, 'samples': 8195584, 'steps': 16006, 'loss/train': 1.7162998914718628} +03/04/2022 08:16:04 - INFO - codeparrot_training - Step 16007: {'lr': 0.0004890308034968911, 'samples': 8196096, 'steps': 16007, 'loss/train': 2.157804012298584} +03/04/2022 08:16:06 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 08:16:09 - INFO - codeparrot_training - Step 16008: {'lr': 0.0004890292487535108, 'samples': 8196608, 'steps': 16008, 'loss/train': 1.023560643196106} +03/04/2022 08:16:12 - INFO - codeparrot_training - Step 16009: {'lr': 0.0004890276939024278, 'samples': 8197120, 'steps': 16009, 'loss/train': 2.151146173477173} +03/04/2022 08:16:14 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 08:16:18 - INFO - codeparrot_training - Step 16010: {'lr': 0.0004890261389436424, 'samples': 8197632, 'steps': 16010, 'loss/train': 2.4966530799865723} +03/04/2022 08:16:21 - INFO - codeparrot_training - Step 16011: {'lr': 0.0004890245838771557, 'samples': 8198144, 'steps': 16011, 'loss/train': 1.8235560655593872} +03/04/2022 08:16:22 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 08:16:26 - INFO - codeparrot_training - Step 16012: {'lr': 0.0004890230287029681, 'samples': 8198656, 'steps': 16012, 'loss/train': 2.27071475982666} +03/04/2022 08:16:29 - INFO - codeparrot_training - Step 16013: {'lr': 0.0004890214734210805, 'samples': 8199168, 'steps': 16013, 'loss/train': 1.4055893421173096} +03/04/2022 08:16:31 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/04/2022 08:16:35 - INFO - codeparrot_training - Step 16014: {'lr': 0.0004890199180314935, 'samples': 8199680, 'steps': 16014, 'loss/train': 1.869050145149231} +03/04/2022 08:16:38 - INFO - codeparrot_training - Step 16015: {'lr': 0.0004890183625342078, 'samples': 8200192, 'steps': 16015, 'loss/train': 1.9164482355117798} +03/04/2022 08:16:40 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 08:16:43 - INFO - codeparrot_training - Step 16016: {'lr': 0.0004890168069292241, 'samples': 8200704, 'steps': 16016, 'loss/train': 1.8110665082931519} +03/04/2022 08:16:46 - INFO - codeparrot_training - Step 16017: {'lr': 0.000489015251216543, 'samples': 8201216, 'steps': 16017, 'loss/train': 2.0182888507843018} +03/04/2022 08:16:48 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 08:16:51 - INFO - codeparrot_training - Step 16018: {'lr': 0.0004890136953961654, 'samples': 8201728, 'steps': 16018, 'loss/train': 2.110051393508911} +03/04/2022 08:16:55 - INFO - codeparrot_training - Step 16019: {'lr': 0.000489012139468092, 'samples': 8202240, 'steps': 16019, 'loss/train': 1.598767638206482} +03/04/2022 08:16:56 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/04/2022 08:17:00 - INFO - codeparrot_training - Step 16020: {'lr': 0.0004890105834323233, 'samples': 8202752, 'steps': 16020, 'loss/train': 1.3805426359176636} +03/04/2022 08:17:03 - INFO - codeparrot_training - Step 16021: {'lr': 0.0004890090272888602, 'samples': 8203264, 'steps': 16021, 'loss/train': 1.526781678199768} +03/04/2022 08:17:04 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/04/2022 08:17:08 - INFO - codeparrot_training - Step 16022: {'lr': 0.0004890074710377033, 'samples': 8203776, 'steps': 16022, 'loss/train': 1.666969656944275} +03/04/2022 08:17:11 - INFO - codeparrot_training - Step 16023: {'lr': 0.0004890059146788532, 'samples': 8204288, 'steps': 16023, 'loss/train': 1.8739378452301025} +03/04/2022 08:17:13 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 08:17:17 - INFO - codeparrot_training - Step 16024: {'lr': 0.000489004358212311, 'samples': 8204800, 'steps': 16024, 'loss/train': 2.3105900287628174} +03/04/2022 08:17:20 - INFO - codeparrot_training - Step 16025: {'lr': 0.0004890028016380769, 'samples': 8205312, 'steps': 16025, 'loss/train': 2.232114553451538} +03/04/2022 08:17:21 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 08:17:25 - INFO - codeparrot_training - Step 16026: {'lr': 0.0004890012449561518, 'samples': 8205824, 'steps': 16026, 'loss/train': 1.22097647190094} +03/04/2022 08:17:28 - INFO - codeparrot_training - Step 16027: {'lr': 0.0004889996881665366, 'samples': 8206336, 'steps': 16027, 'loss/train': 2.9566426277160645} +03/04/2022 08:17:29 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 08:17:33 - INFO - codeparrot_training - Step 16028: {'lr': 0.0004889981312692317, 'samples': 8206848, 'steps': 16028, 'loss/train': 1.058837652206421} +03/04/2022 08:17:37 - INFO - codeparrot_training - Step 16029: {'lr': 0.000488996574264238, 'samples': 8207360, 'steps': 16029, 'loss/train': 1.7570722103118896} +03/04/2022 08:17:38 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 08:17:42 - INFO - codeparrot_training - Step 16030: {'lr': 0.000488995017151556, 'samples': 8207872, 'steps': 16030, 'loss/train': 1.798453450202942} +03/04/2022 08:17:45 - INFO - codeparrot_training - Step 16031: {'lr': 0.0004889934599311867, 'samples': 8208384, 'steps': 16031, 'loss/train': 1.3538175821304321} +03/04/2022 08:17:46 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 08:17:50 - INFO - codeparrot_training - Step 16032: {'lr': 0.0004889919026031306, 'samples': 8208896, 'steps': 16032, 'loss/train': 2.4699196815490723} +03/04/2022 08:17:53 - INFO - codeparrot_training - Step 16033: {'lr': 0.0004889903451673884, 'samples': 8209408, 'steps': 16033, 'loss/train': 1.386980652809143} +03/04/2022 08:17:54 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/04/2022 08:17:59 - INFO - codeparrot_training - Step 16034: {'lr': 0.0004889887876239608, 'samples': 8209920, 'steps': 16034, 'loss/train': 1.71080482006073} +03/04/2022 08:18:02 - INFO - codeparrot_training - Step 16035: {'lr': 0.0004889872299728486, 'samples': 8210432, 'steps': 16035, 'loss/train': 2.1239185333251953} +03/04/2022 08:18:03 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 08:18:07 - INFO - codeparrot_training - Step 16036: {'lr': 0.0004889856722140525, 'samples': 8210944, 'steps': 16036, 'loss/train': 1.1463099718093872} +03/04/2022 08:18:10 - INFO - codeparrot_training - Step 16037: {'lr': 0.000488984114347573, 'samples': 8211456, 'steps': 16037, 'loss/train': 2.139702796936035} +03/04/2022 08:18:11 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 08:18:16 - INFO - codeparrot_training - Step 16038: {'lr': 0.000488982556373411, 'samples': 8211968, 'steps': 16038, 'loss/train': 1.1784212589263916} +03/04/2022 08:18:19 - INFO - codeparrot_training - Step 16039: {'lr': 0.0004889809982915672, 'samples': 8212480, 'steps': 16039, 'loss/train': 1.8799773454666138} +03/04/2022 08:18:20 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 08:18:24 - INFO - codeparrot_training - Step 16040: {'lr': 0.0004889794401020422, 'samples': 8212992, 'steps': 16040, 'loss/train': 1.89878249168396} +03/04/2022 08:18:27 - INFO - codeparrot_training - Step 16041: {'lr': 0.0004889778818048368, 'samples': 8213504, 'steps': 16041, 'loss/train': 1.3498899936676025} +03/04/2022 08:18:29 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 08:18:33 - INFO - codeparrot_training - Step 16042: {'lr': 0.0004889763233999516, 'samples': 8214016, 'steps': 16042, 'loss/train': 2.2422473430633545} +03/04/2022 08:18:36 - INFO - codeparrot_training - Step 16043: {'lr': 0.0004889747648873874, 'samples': 8214528, 'steps': 16043, 'loss/train': 2.0304276943206787} +03/04/2022 08:18:37 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/04/2022 08:18:41 - INFO - codeparrot_training - Step 16044: {'lr': 0.0004889732062671448, 'samples': 8215040, 'steps': 16044, 'loss/train': 1.253030776977539} +03/04/2022 08:18:44 - INFO - codeparrot_training - Step 16045: {'lr': 0.0004889716475392247, 'samples': 8215552, 'steps': 16045, 'loss/train': 2.686518907546997} +03/04/2022 08:18:46 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/04/2022 08:18:50 - INFO - codeparrot_training - Step 16046: {'lr': 0.0004889700887036275, 'samples': 8216064, 'steps': 16046, 'loss/train': 2.4120290279388428} +03/04/2022 08:18:53 - INFO - codeparrot_training - Step 16047: {'lr': 0.0004889685297603541, 'samples': 8216576, 'steps': 16047, 'loss/train': 1.7913895845413208} +03/04/2022 08:18:54 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 08:18:58 - INFO - codeparrot_training - Step 16048: {'lr': 0.0004889669707094052, 'samples': 8217088, 'steps': 16048, 'loss/train': 2.4196865558624268} +03/04/2022 08:19:01 - INFO - codeparrot_training - Step 16049: {'lr': 0.0004889654115507815, 'samples': 8217600, 'steps': 16049, 'loss/train': 2.511270523071289} +03/04/2022 08:19:03 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 08:19:06 - INFO - codeparrot_training - Step 16050: {'lr': 0.0004889638522844836, 'samples': 8218112, 'steps': 16050, 'loss/train': 2.014045000076294} +03/04/2022 08:19:10 - INFO - codeparrot_training - Step 16051: {'lr': 0.0004889622929105123, 'samples': 8218624, 'steps': 16051, 'loss/train': 1.580773115158081} +03/04/2022 08:19:11 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 08:19:15 - INFO - codeparrot_training - Step 16052: {'lr': 0.0004889607334288683, 'samples': 8219136, 'steps': 16052, 'loss/train': 1.9482122659683228} +03/04/2022 08:19:18 - INFO - codeparrot_training - Step 16053: {'lr': 0.0004889591738395522, 'samples': 8219648, 'steps': 16053, 'loss/train': 1.421478509902954} +03/04/2022 08:19:20 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 08:19:23 - INFO - codeparrot_training - Step 16054: {'lr': 0.0004889576141425649, 'samples': 8220160, 'steps': 16054, 'loss/train': 0.7449761033058167} +03/04/2022 08:19:26 - INFO - codeparrot_training - Step 16055: {'lr': 0.0004889560543379069, 'samples': 8220672, 'steps': 16055, 'loss/train': 2.2041666507720947} +03/04/2022 08:19:28 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 08:19:32 - INFO - codeparrot_training - Step 16056: {'lr': 0.000488954494425579, 'samples': 8221184, 'steps': 16056, 'loss/train': 1.9152884483337402} +03/04/2022 08:19:35 - INFO - codeparrot_training - Step 16057: {'lr': 0.000488952934405582, 'samples': 8221696, 'steps': 16057, 'loss/train': 2.0389585494995117} +03/04/2022 08:19:36 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/04/2022 08:19:40 - INFO - codeparrot_training - Step 16058: {'lr': 0.0004889513742779164, 'samples': 8222208, 'steps': 16058, 'loss/train': 1.678281545639038} +03/04/2022 08:19:43 - INFO - codeparrot_training - Step 16059: {'lr': 0.0004889498140425829, 'samples': 8222720, 'steps': 16059, 'loss/train': 0.624750018119812} +03/04/2022 08:19:45 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 08:19:49 - INFO - codeparrot_training - Step 16060: {'lr': 0.0004889482536995825, 'samples': 8223232, 'steps': 16060, 'loss/train': 1.8798847198486328} +03/04/2022 08:19:52 - INFO - codeparrot_training - Step 16061: {'lr': 0.0004889466932489157, 'samples': 8223744, 'steps': 16061, 'loss/train': 1.7294416427612305} +03/04/2022 08:19:53 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 08:19:57 - INFO - codeparrot_training - Step 16062: {'lr': 0.0004889451326905831, 'samples': 8224256, 'steps': 16062, 'loss/train': 0.885642945766449} +03/04/2022 08:20:00 - INFO - codeparrot_training - Step 16063: {'lr': 0.0004889435720245855, 'samples': 8224768, 'steps': 16063, 'loss/train': 1.6908111572265625} +03/04/2022 08:20:01 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 08:20:05 - INFO - codeparrot_training - Step 16064: {'lr': 0.0004889420112509237, 'samples': 8225280, 'steps': 16064, 'loss/train': 2.0448262691497803} +03/04/2022 08:20:09 - INFO - codeparrot_training - Step 16065: {'lr': 0.0004889404503695983, 'samples': 8225792, 'steps': 16065, 'loss/train': 1.0186573266983032} +03/04/2022 08:20:10 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 08:20:14 - INFO - codeparrot_training - Step 16066: {'lr': 0.0004889388893806099, 'samples': 8226304, 'steps': 16066, 'loss/train': 1.3194963932037354} +03/04/2022 08:20:17 - INFO - codeparrot_training - Step 16067: {'lr': 0.0004889373282839594, 'samples': 8226816, 'steps': 16067, 'loss/train': 1.5543700456619263} +03/04/2022 08:20:18 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/04/2022 08:20:23 - INFO - codeparrot_training - Step 16068: {'lr': 0.0004889357670796474, 'samples': 8227328, 'steps': 16068, 'loss/train': 2.5440874099731445} +03/04/2022 08:20:26 - INFO - codeparrot_training - Step 16069: {'lr': 0.0004889342057676748, 'samples': 8227840, 'steps': 16069, 'loss/train': 1.768068552017212} +03/04/2022 08:20:27 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 08:20:31 - INFO - codeparrot_training - Step 16070: {'lr': 0.000488932644348042, 'samples': 8228352, 'steps': 16070, 'loss/train': 0.25640177726745605} +03/04/2022 08:20:34 - INFO - codeparrot_training - Step 16071: {'lr': 0.0004889310828207498, 'samples': 8228864, 'steps': 16071, 'loss/train': 1.7538979053497314} +03/04/2022 08:20:35 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 08:20:39 - INFO - codeparrot_training - Step 16072: {'lr': 0.000488929521185799, 'samples': 8229376, 'steps': 16072, 'loss/train': 2.57619047164917} +03/04/2022 08:20:43 - INFO - codeparrot_training - Step 16073: {'lr': 0.0004889279594431903, 'samples': 8229888, 'steps': 16073, 'loss/train': 2.501298427581787} +03/04/2022 08:20:44 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 08:20:48 - INFO - codeparrot_training - Step 16074: {'lr': 0.0004889263975929242, 'samples': 8230400, 'steps': 16074, 'loss/train': 2.206531524658203} +03/04/2022 08:20:51 - INFO - codeparrot_training - Step 16075: {'lr': 0.0004889248356350016, 'samples': 8230912, 'steps': 16075, 'loss/train': 0.863510012626648} +03/04/2022 08:20:52 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/04/2022 08:20:56 - INFO - codeparrot_training - Step 16076: {'lr': 0.0004889232735694232, 'samples': 8231424, 'steps': 16076, 'loss/train': 2.0025265216827393} +03/04/2022 08:20:59 - INFO - codeparrot_training - Step 16077: {'lr': 0.0004889217113961896, 'samples': 8231936, 'steps': 16077, 'loss/train': 1.8470087051391602} +03/04/2022 08:21:00 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/04/2022 08:21:05 - INFO - codeparrot_training - Step 16078: {'lr': 0.0004889201491153016, 'samples': 8232448, 'steps': 16078, 'loss/train': 1.2813292741775513} +03/04/2022 08:21:08 - INFO - codeparrot_training - Step 16079: {'lr': 0.0004889185867267599, 'samples': 8232960, 'steps': 16079, 'loss/train': 2.0163514614105225} +03/04/2022 08:21:09 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 08:21:13 - INFO - codeparrot_training - Step 16080: {'lr': 0.0004889170242305652, 'samples': 8233472, 'steps': 16080, 'loss/train': 1.738868236541748} +03/04/2022 08:21:16 - INFO - codeparrot_training - Step 16081: {'lr': 0.0004889154616267181, 'samples': 8233984, 'steps': 16081, 'loss/train': 2.1362087726593018} +03/04/2022 08:21:17 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 08:21:21 - INFO - codeparrot_training - Step 16082: {'lr': 0.0004889138989152194, 'samples': 8234496, 'steps': 16082, 'loss/train': 2.1465413570404053} +03/04/2022 08:21:25 - INFO - codeparrot_training - Step 16083: {'lr': 0.0004889123360960698, 'samples': 8235008, 'steps': 16083, 'loss/train': 2.540071487426758} +03/04/2022 08:21:26 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/04/2022 08:21:30 - INFO - codeparrot_training - Step 16084: {'lr': 0.0004889107731692699, 'samples': 8235520, 'steps': 16084, 'loss/train': 2.0456109046936035} +03/04/2022 08:21:33 - INFO - codeparrot_training - Step 16085: {'lr': 0.0004889092101348206, 'samples': 8236032, 'steps': 16085, 'loss/train': 1.906584620475769} +03/04/2022 08:21:35 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/04/2022 08:21:38 - INFO - codeparrot_training - Step 16086: {'lr': 0.0004889076469927225, 'samples': 8236544, 'steps': 16086, 'loss/train': 1.867908239364624} +03/04/2022 08:21:42 - INFO - codeparrot_training - Step 16087: {'lr': 0.0004889060837429762, 'samples': 8237056, 'steps': 16087, 'loss/train': 1.7816317081451416} +03/04/2022 08:21:43 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 08:21:47 - INFO - codeparrot_training - Step 16088: {'lr': 0.0004889045203855826, 'samples': 8237568, 'steps': 16088, 'loss/train': 1.3998560905456543} +03/04/2022 08:21:50 - INFO - codeparrot_training - Step 16089: {'lr': 0.0004889029569205423, 'samples': 8238080, 'steps': 16089, 'loss/train': 1.6776505708694458} +03/04/2022 08:21:52 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 08:21:55 - INFO - codeparrot_training - Step 16090: {'lr': 0.0004889013933478559, 'samples': 8238592, 'steps': 16090, 'loss/train': 2.1155169010162354} +03/04/2022 08:21:59 - INFO - codeparrot_training - Step 16091: {'lr': 0.0004888998296675243, 'samples': 8239104, 'steps': 16091, 'loss/train': 1.6578240394592285} +03/04/2022 08:22:00 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 08:22:04 - INFO - codeparrot_training - Step 16092: {'lr': 0.0004888982658795482, 'samples': 8239616, 'steps': 16092, 'loss/train': 1.7067776918411255} +03/04/2022 08:22:07 - INFO - codeparrot_training - Step 16093: {'lr': 0.0004888967019839282, 'samples': 8240128, 'steps': 16093, 'loss/train': 1.858924388885498} +03/04/2022 08:22:09 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 08:22:12 - INFO - codeparrot_training - Step 16094: {'lr': 0.000488895137980665, 'samples': 8240640, 'steps': 16094, 'loss/train': 2.258974313735962} +03/04/2022 08:22:16 - INFO - codeparrot_training - Step 16095: {'lr': 0.0004888935738697593, 'samples': 8241152, 'steps': 16095, 'loss/train': 1.5699955224990845} +03/04/2022 08:22:18 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/04/2022 08:22:21 - INFO - codeparrot_training - Step 16096: {'lr': 0.0004888920096512118, 'samples': 8241664, 'steps': 16096, 'loss/train': 1.866520881652832} +03/04/2022 08:22:24 - INFO - codeparrot_training - Step 16097: {'lr': 0.0004888904453250233, 'samples': 8242176, 'steps': 16097, 'loss/train': 2.166182279586792} +03/04/2022 08:22:26 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 08:22:29 - INFO - codeparrot_training - Step 16098: {'lr': 0.0004888888808911946, 'samples': 8242688, 'steps': 16098, 'loss/train': 1.2927037477493286} +03/04/2022 08:22:32 - INFO - codeparrot_training - Step 16099: {'lr': 0.0004888873163497261, 'samples': 8243200, 'steps': 16099, 'loss/train': 1.735762596130371} +03/04/2022 08:22:35 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 08:22:38 - INFO - codeparrot_training - Step 16100: {'lr': 0.0004888857517006186, 'samples': 8243712, 'steps': 16100, 'loss/train': 2.1306138038635254} +03/04/2022 08:22:41 - INFO - codeparrot_training - Step 16101: {'lr': 0.000488884186943873, 'samples': 8244224, 'steps': 16101, 'loss/train': 1.062048316001892} +03/04/2022 08:22:43 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/04/2022 08:22:46 - INFO - codeparrot_training - Step 16102: {'lr': 0.0004888826220794899, 'samples': 8244736, 'steps': 16102, 'loss/train': 1.2356011867523193} +03/04/2022 08:22:49 - INFO - codeparrot_training - Step 16103: {'lr': 0.0004888810571074698, 'samples': 8245248, 'steps': 16103, 'loss/train': 0.996645450592041} +03/04/2022 08:22:51 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/04/2022 08:22:54 - INFO - codeparrot_training - Step 16104: {'lr': 0.0004888794920278137, 'samples': 8245760, 'steps': 16104, 'loss/train': 1.9622377157211304} +03/04/2022 08:22:58 - INFO - codeparrot_training - Step 16105: {'lr': 0.0004888779268405223, 'samples': 8246272, 'steps': 16105, 'loss/train': 1.7462842464447021} +03/04/2022 08:22:59 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/04/2022 08:23:03 - INFO - codeparrot_training - Step 16106: {'lr': 0.0004888763615455959, 'samples': 8246784, 'steps': 16106, 'loss/train': 2.2172703742980957} +03/04/2022 08:23:06 - INFO - codeparrot_training - Step 16107: {'lr': 0.0004888747961430358, 'samples': 8247296, 'steps': 16107, 'loss/train': 2.3934457302093506} +03/04/2022 08:23:08 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 08:23:12 - INFO - codeparrot_training - Step 16108: {'lr': 0.0004888732306328422, 'samples': 8247808, 'steps': 16108, 'loss/train': 2.0733537673950195} +03/04/2022 08:23:15 - INFO - codeparrot_training - Step 16109: {'lr': 0.000488871665015016, 'samples': 8248320, 'steps': 16109, 'loss/train': 1.424755573272705} +03/04/2022 08:23:16 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/04/2022 08:23:20 - INFO - codeparrot_training - Step 16110: {'lr': 0.0004888700992895581, 'samples': 8248832, 'steps': 16110, 'loss/train': 1.8470808267593384} +03/04/2022 08:23:23 - INFO - codeparrot_training - Step 16111: {'lr': 0.0004888685334564688, 'samples': 8249344, 'steps': 16111, 'loss/train': 1.7087559700012207} +03/04/2022 08:23:25 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/04/2022 08:23:28 - INFO - codeparrot_training - Step 16112: {'lr': 0.0004888669675157492, 'samples': 8249856, 'steps': 16112, 'loss/train': 1.4280099868774414} +03/04/2022 08:23:32 - INFO - codeparrot_training - Step 16113: {'lr': 0.0004888654014673998, 'samples': 8250368, 'steps': 16113, 'loss/train': 1.9675606489181519} +03/04/2022 08:23:33 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 08:23:37 - INFO - codeparrot_training - Step 16114: {'lr': 0.0004888638353114212, 'samples': 8250880, 'steps': 16114, 'loss/train': 1.131265640258789} +03/04/2022 08:23:40 - INFO - codeparrot_training - Step 16115: {'lr': 0.0004888622690478144, 'samples': 8251392, 'steps': 16115, 'loss/train': 1.437040090560913} +03/04/2022 08:23:42 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/04/2022 08:23:45 - INFO - codeparrot_training - Step 16116: {'lr': 0.0004888607026765799, 'samples': 8251904, 'steps': 16116, 'loss/train': 1.9726862907409668} +03/04/2022 08:23:49 - INFO - codeparrot_training - Step 16117: {'lr': 0.0004888591361977184, 'samples': 8252416, 'steps': 16117, 'loss/train': 2.1556813716888428} +03/04/2022 08:23:50 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 08:23:54 - INFO - codeparrot_training - Step 16118: {'lr': 0.0004888575696112308, 'samples': 8252928, 'steps': 16118, 'loss/train': 1.6974804401397705} +03/04/2022 08:23:57 - INFO - codeparrot_training - Step 16119: {'lr': 0.0004888560029171175, 'samples': 8253440, 'steps': 16119, 'loss/train': 2.380051612854004} +03/04/2022 08:23:58 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/04/2022 08:24:02 - INFO - codeparrot_training - Step 16120: {'lr': 0.0004888544361153794, 'samples': 8253952, 'steps': 16120, 'loss/train': 1.5583192110061646} +03/04/2022 08:24:05 - INFO - codeparrot_training - Step 16121: {'lr': 0.0004888528692060173, 'samples': 8254464, 'steps': 16121, 'loss/train': 2.51651668548584} +03/04/2022 08:24:07 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/04/2022 08:24:11 - INFO - codeparrot_training - Step 16122: {'lr': 0.0004888513021890316, 'samples': 8254976, 'steps': 16122, 'loss/train': 1.7720905542373657} +03/04/2022 08:24:14 - INFO - codeparrot_training - Step 16123: {'lr': 0.0004888497350644234, 'samples': 8255488, 'steps': 16123, 'loss/train': 1.6488277912139893} +03/04/2022 08:24:15 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 08:24:19 - INFO - codeparrot_training - Step 16124: {'lr': 0.000488848167832193, 'samples': 8256000, 'steps': 16124, 'loss/train': 2.496272087097168} +03/04/2022 08:24:22 - INFO - codeparrot_training - Step 16125: {'lr': 0.0004888466004923413, 'samples': 8256512, 'steps': 16125, 'loss/train': 3.7929582595825195} +03/04/2022 08:24:24 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/04/2022 08:24:28 - INFO - codeparrot_training - Step 16126: {'lr': 0.0004888450330448692, 'samples': 8257024, 'steps': 16126, 'loss/train': 2.3383591175079346} +03/04/2022 08:24:31 - INFO - codeparrot_training - Step 16127: {'lr': 0.000488843465489777, 'samples': 8257536, 'steps': 16127, 'loss/train': 1.2399309873580933} +03/04/2022 08:24:32 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 08:24:36 - INFO - codeparrot_training - Step 16128: {'lr': 0.0004888418978270658, 'samples': 8258048, 'steps': 16128, 'loss/train': 2.023458957672119} +03/04/2022 08:24:39 - INFO - codeparrot_training - Step 16129: {'lr': 0.000488840330056736, 'samples': 8258560, 'steps': 16129, 'loss/train': 0.867405116558075} +03/04/2022 08:24:40 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 08:24:44 - INFO - codeparrot_training - Step 16130: {'lr': 0.0004888387621787885, 'samples': 8259072, 'steps': 16130, 'loss/train': 1.700221300125122} +03/04/2022 08:24:47 - INFO - codeparrot_training - Step 16131: {'lr': 0.0004888371941932239, 'samples': 8259584, 'steps': 16131, 'loss/train': 2.6374945640563965} +03/04/2022 08:24:49 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/04/2022 08:24:53 - INFO - codeparrot_training - Step 16132: {'lr': 0.000488835626100043, 'samples': 8260096, 'steps': 16132, 'loss/train': 1.8760226964950562} +03/04/2022 08:24:56 - INFO - codeparrot_training - Step 16133: {'lr': 0.0004888340578992464, 'samples': 8260608, 'steps': 16133, 'loss/train': 2.160445213317871} +03/04/2022 08:24:57 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 08:25:01 - INFO - codeparrot_training - Step 16134: {'lr': 0.0004888324895908349, 'samples': 8261120, 'steps': 16134, 'loss/train': 1.2084697484970093} +03/04/2022 08:25:04 - INFO - codeparrot_training - Step 16135: {'lr': 0.0004888309211748091, 'samples': 8261632, 'steps': 16135, 'loss/train': 2.1752138137817383} +03/04/2022 08:25:06 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 08:25:09 - INFO - codeparrot_training - Step 16136: {'lr': 0.0004888293526511697, 'samples': 8262144, 'steps': 16136, 'loss/train': 1.3664453029632568} +03/04/2022 08:25:13 - INFO - codeparrot_training - Step 16137: {'lr': 0.0004888277840199177, 'samples': 8262656, 'steps': 16137, 'loss/train': 1.881345510482788} +03/04/2022 08:25:14 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 08:25:18 - INFO - codeparrot_training - Step 16138: {'lr': 0.0004888262152810534, 'samples': 8263168, 'steps': 16138, 'loss/train': 2.372832775115967} +03/04/2022 08:25:21 - INFO - codeparrot_training - Step 16139: {'lr': 0.0004888246464345779, 'samples': 8263680, 'steps': 16139, 'loss/train': 1.2288739681243896} +03/04/2022 08:25:23 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 08:25:26 - INFO - codeparrot_training - Step 16140: {'lr': 0.0004888230774804915, 'samples': 8264192, 'steps': 16140, 'loss/train': 1.8634742498397827} +03/04/2022 08:25:30 - INFO - codeparrot_training - Step 16141: {'lr': 0.0004888215084187952, 'samples': 8264704, 'steps': 16141, 'loss/train': 1.641205072402954} +03/04/2022 08:25:31 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 08:25:35 - INFO - codeparrot_training - Step 16142: {'lr': 0.0004888199392494896, 'samples': 8265216, 'steps': 16142, 'loss/train': 1.8594779968261719} +03/04/2022 08:25:38 - INFO - codeparrot_training - Step 16143: {'lr': 0.0004888183699725755, 'samples': 8265728, 'steps': 16143, 'loss/train': 1.2768545150756836} +03/04/2022 08:25:39 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 08:25:43 - INFO - codeparrot_training - Step 16144: {'lr': 0.0004888168005880533, 'samples': 8266240, 'steps': 16144, 'loss/train': 2.348010540008545} +03/04/2022 08:25:46 - INFO - codeparrot_training - Step 16145: {'lr': 0.0004888152310959242, 'samples': 8266752, 'steps': 16145, 'loss/train': 2.1777427196502686} +03/04/2022 08:25:48 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/04/2022 08:25:52 - INFO - codeparrot_training - Step 16146: {'lr': 0.0004888136614961885, 'samples': 8267264, 'steps': 16146, 'loss/train': 2.066293716430664} +03/04/2022 08:25:55 - INFO - codeparrot_training - Step 16147: {'lr': 0.000488812091788847, 'samples': 8267776, 'steps': 16147, 'loss/train': 2.361811399459839} +03/04/2022 08:25:56 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 08:26:00 - INFO - codeparrot_training - Step 16148: {'lr': 0.0004888105219739005, 'samples': 8268288, 'steps': 16148, 'loss/train': 1.4620476961135864} +03/04/2022 08:26:03 - INFO - codeparrot_training - Step 16149: {'lr': 0.0004888089520513497, 'samples': 8268800, 'steps': 16149, 'loss/train': 1.6388620138168335} +03/04/2022 08:26:04 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 08:26:09 - INFO - codeparrot_training - Step 16150: {'lr': 0.0004888073820211952, 'samples': 8269312, 'steps': 16150, 'loss/train': 1.579231858253479} +03/04/2022 08:26:12 - INFO - codeparrot_training - Step 16151: {'lr': 0.0004888058118834379, 'samples': 8269824, 'steps': 16151, 'loss/train': 1.775742769241333} +03/04/2022 08:26:13 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 08:26:17 - INFO - codeparrot_training - Step 16152: {'lr': 0.0004888042416380784, 'samples': 8270336, 'steps': 16152, 'loss/train': 2.6505253314971924} +03/04/2022 08:26:21 - INFO - codeparrot_training - Step 16153: {'lr': 0.0004888026712851172, 'samples': 8270848, 'steps': 16153, 'loss/train': 1.5418152809143066} +03/04/2022 08:26:21 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 08:26:26 - INFO - codeparrot_training - Step 16154: {'lr': 0.0004888011008245554, 'samples': 8271360, 'steps': 16154, 'loss/train': 2.11429500579834} +03/04/2022 08:26:29 - INFO - codeparrot_training - Step 16155: {'lr': 0.0004887995302563934, 'samples': 8271872, 'steps': 16155, 'loss/train': 2.352250576019287} +03/04/2022 08:26:30 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/04/2022 08:26:34 - INFO - codeparrot_training - Step 16156: {'lr': 0.000488797959580632, 'samples': 8272384, 'steps': 16156, 'loss/train': 1.414434552192688} +03/04/2022 08:26:37 - INFO - codeparrot_training - Step 16157: {'lr': 0.000488796388797272, 'samples': 8272896, 'steps': 16157, 'loss/train': 0.6504292488098145} +03/04/2022 08:26:39 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/04/2022 08:26:43 - INFO - codeparrot_training - Step 16158: {'lr': 0.0004887948179063139, 'samples': 8273408, 'steps': 16158, 'loss/train': 1.564260482788086} +03/04/2022 08:26:46 - INFO - codeparrot_training - Step 16159: {'lr': 0.0004887932469077587, 'samples': 8273920, 'steps': 16159, 'loss/train': 1.779106855392456} +03/04/2022 08:26:47 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 08:26:51 - INFO - codeparrot_training - Step 16160: {'lr': 0.0004887916758016069, 'samples': 8274432, 'steps': 16160, 'loss/train': 1.971617579460144} +03/04/2022 08:26:54 - INFO - codeparrot_training - Step 16161: {'lr': 0.0004887901045878592, 'samples': 8274944, 'steps': 16161, 'loss/train': 2.1870310306549072} +03/04/2022 08:26:55 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 08:27:00 - INFO - codeparrot_training - Step 16162: {'lr': 0.0004887885332665165, 'samples': 8275456, 'steps': 16162, 'loss/train': 1.8949812650680542} +03/04/2022 08:27:03 - INFO - codeparrot_training - Step 16163: {'lr': 0.0004887869618375793, 'samples': 8275968, 'steps': 16163, 'loss/train': 1.8161934614181519} +03/04/2022 08:27:04 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/04/2022 08:27:08 - INFO - codeparrot_training - Step 16164: {'lr': 0.0004887853903010483, 'samples': 8276480, 'steps': 16164, 'loss/train': 2.1300110816955566} +03/04/2022 08:27:11 - INFO - codeparrot_training - Step 16165: {'lr': 0.0004887838186569244, 'samples': 8276992, 'steps': 16165, 'loss/train': 1.3764344453811646} +03/04/2022 08:27:12 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 08:27:17 - INFO - codeparrot_training - Step 16166: {'lr': 0.0004887822469052081, 'samples': 8277504, 'steps': 16166, 'loss/train': 1.259840488433838} +03/04/2022 08:27:20 - INFO - codeparrot_training - Step 16167: {'lr': 0.0004887806750459002, 'samples': 8278016, 'steps': 16167, 'loss/train': 1.6597380638122559} +03/04/2022 08:27:21 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 08:27:25 - INFO - codeparrot_training - Step 16168: {'lr': 0.0004887791030790016, 'samples': 8278528, 'steps': 16168, 'loss/train': 1.7345441579818726} +03/04/2022 08:27:28 - INFO - codeparrot_training - Step 16169: {'lr': 0.0004887775310045126, 'samples': 8279040, 'steps': 16169, 'loss/train': 2.454425096511841} +03/04/2022 08:27:29 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 08:27:34 - INFO - codeparrot_training - Step 16170: {'lr': 0.0004887759588224342, 'samples': 8279552, 'steps': 16170, 'loss/train': 2.3924410343170166} +03/04/2022 08:27:37 - INFO - codeparrot_training - Step 16171: {'lr': 0.000488774386532767, 'samples': 8280064, 'steps': 16171, 'loss/train': 1.7405668497085571} +03/04/2022 08:27:38 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 08:27:42 - INFO - codeparrot_training - Step 16172: {'lr': 0.0004887728141355118, 'samples': 8280576, 'steps': 16172, 'loss/train': 2.053586721420288} +03/04/2022 08:27:45 - INFO - codeparrot_training - Step 16173: {'lr': 0.0004887712416306693, 'samples': 8281088, 'steps': 16173, 'loss/train': 1.695051670074463} +03/04/2022 08:27:47 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 08:27:50 - INFO - codeparrot_training - Step 16174: {'lr': 0.00048876966901824, 'samples': 8281600, 'steps': 16174, 'loss/train': 1.518394947052002} +03/04/2022 08:27:54 - INFO - codeparrot_training - Step 16175: {'lr': 0.0004887680962982249, 'samples': 8282112, 'steps': 16175, 'loss/train': 2.001528739929199} +03/04/2022 08:27:55 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 08:27:59 - INFO - codeparrot_training - Step 16176: {'lr': 0.0004887665234706247, 'samples': 8282624, 'steps': 16176, 'loss/train': 2.5772359371185303} +03/04/2022 08:28:02 - INFO - codeparrot_training - Step 16177: {'lr': 0.0004887649505354398, 'samples': 8283136, 'steps': 16177, 'loss/train': 0.8842549324035645} +03/04/2022 08:28:04 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 08:28:08 - INFO - codeparrot_training - Step 16178: {'lr': 0.000488763377492671, 'samples': 8283648, 'steps': 16178, 'loss/train': 1.412381649017334} +03/04/2022 08:28:11 - INFO - codeparrot_training - Step 16179: {'lr': 0.0004887618043423194, 'samples': 8284160, 'steps': 16179, 'loss/train': 1.6233022212982178} +03/04/2022 08:28:13 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 08:28:16 - INFO - codeparrot_training - Step 16180: {'lr': 0.0004887602310843852, 'samples': 8284672, 'steps': 16180, 'loss/train': 2.275810956954956} +03/04/2022 08:28:19 - INFO - codeparrot_training - Step 16181: {'lr': 0.0004887586577188694, 'samples': 8285184, 'steps': 16181, 'loss/train': 2.532498836517334} +03/04/2022 08:28:22 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 08:28:24 - INFO - codeparrot_training - Step 16182: {'lr': 0.0004887570842457726, 'samples': 8285696, 'steps': 16182, 'loss/train': 1.252089500427246} +03/04/2022 08:28:28 - INFO - codeparrot_training - Step 16183: {'lr': 0.0004887555106650956, 'samples': 8286208, 'steps': 16183, 'loss/train': 1.5297998189926147} +03/04/2022 08:28:30 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 08:28:33 - INFO - codeparrot_training - Step 16184: {'lr': 0.000488753936976839, 'samples': 8286720, 'steps': 16184, 'loss/train': 1.6619635820388794} +03/04/2022 08:28:36 - INFO - codeparrot_training - Step 16185: {'lr': 0.0004887523631810036, 'samples': 8287232, 'steps': 16185, 'loss/train': 1.5206786394119263} +03/04/2022 08:28:39 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/04/2022 08:28:41 - INFO - codeparrot_training - Step 16186: {'lr': 0.00048875078927759, 'samples': 8287744, 'steps': 16186, 'loss/train': 2.6936628818511963} +03/04/2022 08:28:45 - INFO - codeparrot_training - Step 16187: {'lr': 0.000488749215266599, 'samples': 8288256, 'steps': 16187, 'loss/train': 1.905018925666809} +03/04/2022 08:28:47 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 08:28:50 - INFO - codeparrot_training - Step 16188: {'lr': 0.0004887476411480314, 'samples': 8288768, 'steps': 16188, 'loss/train': 1.834274411201477} +03/04/2022 08:28:53 - INFO - codeparrot_training - Step 16189: {'lr': 0.0004887460669218877, 'samples': 8289280, 'steps': 16189, 'loss/train': 1.703017234802246} +03/04/2022 08:28:56 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/04/2022 08:28:58 - INFO - codeparrot_training - Step 16190: {'lr': 0.0004887444925881688, 'samples': 8289792, 'steps': 16190, 'loss/train': 2.132237195968628} +03/04/2022 08:29:01 - INFO - codeparrot_training - Step 16191: {'lr': 0.0004887429181468752, 'samples': 8290304, 'steps': 16191, 'loss/train': 2.9703009128570557} +03/04/2022 08:29:04 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 08:29:07 - INFO - codeparrot_training - Step 16192: {'lr': 0.0004887413435980077, 'samples': 8290816, 'steps': 16192, 'loss/train': 1.3766413927078247} +03/04/2022 08:29:11 - INFO - codeparrot_training - Step 16193: {'lr': 0.0004887397689415672, 'samples': 8291328, 'steps': 16193, 'loss/train': 0.9252809882164001} +03/04/2022 08:29:14 - INFO - codeparrot_training - Step 16194: {'lr': 0.0004887381941775541, 'samples': 8291840, 'steps': 16194, 'loss/train': 2.2073724269866943} +03/04/2022 08:29:16 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 08:29:19 - INFO - codeparrot_training - Step 16195: {'lr': 0.0004887366193059693, 'samples': 8292352, 'steps': 16195, 'loss/train': 1.4966994524002075} +03/04/2022 08:29:22 - INFO - codeparrot_training - Step 16196: {'lr': 0.0004887350443268134, 'samples': 8292864, 'steps': 16196, 'loss/train': 1.479994297027588} +03/04/2022 08:29:24 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 08:29:27 - INFO - codeparrot_training - Step 16197: {'lr': 0.0004887334692400872, 'samples': 8293376, 'steps': 16197, 'loss/train': 2.2081053256988525} +03/04/2022 08:29:30 - INFO - codeparrot_training - Step 16198: {'lr': 0.0004887318940457915, 'samples': 8293888, 'steps': 16198, 'loss/train': 1.5185822248458862} +03/04/2022 08:29:32 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 08:29:36 - INFO - codeparrot_training - Step 16199: {'lr': 0.0004887303187439267, 'samples': 8294400, 'steps': 16199, 'loss/train': 1.6668519973754883} +03/04/2022 08:29:39 - INFO - codeparrot_training - Step 16200: {'lr': 0.0004887287433344939, 'samples': 8294912, 'steps': 16200, 'loss/train': 1.736337661743164} +03/04/2022 08:29:42 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 08:29:44 - INFO - codeparrot_training - Step 16201: {'lr': 0.0004887271678174935, 'samples': 8295424, 'steps': 16201, 'loss/train': 2.709927797317505} +03/04/2022 08:29:48 - INFO - codeparrot_training - Step 16202: {'lr': 0.0004887255921929264, 'samples': 8295936, 'steps': 16202, 'loss/train': 2.1274397373199463} +03/04/2022 08:29:50 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 08:29:53 - INFO - codeparrot_training - Step 16203: {'lr': 0.0004887240164607931, 'samples': 8296448, 'steps': 16203, 'loss/train': 2.7359538078308105} +03/04/2022 08:29:56 - INFO - codeparrot_training - Step 16204: {'lr': 0.0004887224406210945, 'samples': 8296960, 'steps': 16204, 'loss/train': 2.073945999145508} +03/04/2022 08:29:59 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/04/2022 08:30:01 - INFO - codeparrot_training - Step 16205: {'lr': 0.0004887208646738312, 'samples': 8297472, 'steps': 16205, 'loss/train': 1.4177926778793335} +03/04/2022 08:30:05 - INFO - codeparrot_training - Step 16206: {'lr': 0.000488719288619004, 'samples': 8297984, 'steps': 16206, 'loss/train': 0.8719953894615173} +03/04/2022 08:30:07 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 08:30:10 - INFO - codeparrot_training - Step 16207: {'lr': 0.0004887177124566136, 'samples': 8298496, 'steps': 16207, 'loss/train': 1.8617607355117798} +03/04/2022 08:30:13 - INFO - codeparrot_training - Step 16208: {'lr': 0.0004887161361866607, 'samples': 8299008, 'steps': 16208, 'loss/train': 6.998103141784668} +03/04/2022 08:30:15 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 08:30:18 - INFO - codeparrot_training - Step 16209: {'lr': 0.000488714559809146, 'samples': 8299520, 'steps': 16209, 'loss/train': 1.85304594039917} +03/04/2022 08:30:21 - INFO - codeparrot_training - Step 16210: {'lr': 0.0004887129833240703, 'samples': 8300032, 'steps': 16210, 'loss/train': 2.4427542686462402} +03/04/2022 08:30:23 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 08:30:27 - INFO - codeparrot_training - Step 16211: {'lr': 0.000488711406731434, 'samples': 8300544, 'steps': 16211, 'loss/train': 1.6055783033370972} +03/04/2022 08:30:30 - INFO - codeparrot_training - Step 16212: {'lr': 0.0004887098300312381, 'samples': 8301056, 'steps': 16212, 'loss/train': 1.6247010231018066} +03/04/2022 08:30:32 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 08:30:35 - INFO - codeparrot_training - Step 16213: {'lr': 0.0004887082532234832, 'samples': 8301568, 'steps': 16213, 'loss/train': 1.6986756324768066} +03/04/2022 08:30:38 - INFO - codeparrot_training - Step 16214: {'lr': 0.0004887066763081702, 'samples': 8302080, 'steps': 16214, 'loss/train': 1.5028762817382812} +03/04/2022 08:30:40 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 08:30:44 - INFO - codeparrot_training - Step 16215: {'lr': 0.0004887050992852995, 'samples': 8302592, 'steps': 16215, 'loss/train': 2.176476240158081} +03/04/2022 08:30:47 - INFO - codeparrot_training - Step 16216: {'lr': 0.000488703522154872, 'samples': 8303104, 'steps': 16216, 'loss/train': 1.727066159248352} +03/04/2022 08:30:49 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 08:30:52 - INFO - codeparrot_training - Step 16217: {'lr': 0.0004887019449168884, 'samples': 8303616, 'steps': 16217, 'loss/train': 0.5155454874038696} +03/04/2022 08:30:55 - INFO - codeparrot_training - Step 16218: {'lr': 0.0004887003675713493, 'samples': 8304128, 'steps': 16218, 'loss/train': 2.167361259460449} +03/04/2022 08:30:57 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 08:31:01 - INFO - codeparrot_training - Step 16219: {'lr': 0.0004886987901182556, 'samples': 8304640, 'steps': 16219, 'loss/train': 1.8261597156524658} +03/04/2022 08:31:04 - INFO - codeparrot_training - Step 16220: {'lr': 0.0004886972125576079, 'samples': 8305152, 'steps': 16220, 'loss/train': 1.7096847295761108} +03/04/2022 08:31:06 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/04/2022 08:31:09 - INFO - codeparrot_training - Step 16221: {'lr': 0.0004886956348894069, 'samples': 8305664, 'steps': 16221, 'loss/train': 2.0300941467285156} +03/04/2022 08:31:12 - INFO - codeparrot_training - Step 16222: {'lr': 0.0004886940571136533, 'samples': 8306176, 'steps': 16222, 'loss/train': 2.0135064125061035} +03/04/2022 08:31:14 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/04/2022 08:31:18 - INFO - codeparrot_training - Step 16223: {'lr': 0.0004886924792303479, 'samples': 8306688, 'steps': 16223, 'loss/train': 2.2782342433929443} +03/04/2022 08:31:21 - INFO - codeparrot_training - Step 16224: {'lr': 0.0004886909012394913, 'samples': 8307200, 'steps': 16224, 'loss/train': 1.7928155660629272} +03/04/2022 08:31:23 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 08:31:26 - INFO - codeparrot_training - Step 16225: {'lr': 0.0004886893231410844, 'samples': 8307712, 'steps': 16225, 'loss/train': 1.1453008651733398} +03/04/2022 08:31:29 - INFO - codeparrot_training - Step 16226: {'lr': 0.0004886877449351276, 'samples': 8308224, 'steps': 16226, 'loss/train': 1.9457550048828125} +03/04/2022 08:31:32 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 08:31:35 - INFO - codeparrot_training - Step 16227: {'lr': 0.0004886861666216219, 'samples': 8308736, 'steps': 16227, 'loss/train': 2.231879472732544} +03/04/2022 08:31:38 - INFO - codeparrot_training - Step 16228: {'lr': 0.0004886845882005679, 'samples': 8309248, 'steps': 16228, 'loss/train': 2.0991926193237305} +03/04/2022 08:31:40 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 08:31:43 - INFO - codeparrot_training - Step 16229: {'lr': 0.0004886830096719662, 'samples': 8309760, 'steps': 16229, 'loss/train': 2.0748748779296875} +03/04/2022 08:31:46 - INFO - codeparrot_training - Step 16230: {'lr': 0.0004886814310358176, 'samples': 8310272, 'steps': 16230, 'loss/train': 1.8112804889678955} +03/04/2022 08:31:48 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 08:31:51 - INFO - codeparrot_training - Step 16231: {'lr': 0.000488679852292123, 'samples': 8310784, 'steps': 16231, 'loss/train': 1.3747007846832275} +03/04/2022 08:31:55 - INFO - codeparrot_training - Step 16232: {'lr': 0.0004886782734408828, 'samples': 8311296, 'steps': 16232, 'loss/train': 1.6902848482131958} +03/04/2022 08:31:57 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 08:32:00 - INFO - codeparrot_training - Step 16233: {'lr': 0.0004886766944820979, 'samples': 8311808, 'steps': 16233, 'loss/train': 1.6983884572982788} +03/04/2022 08:32:03 - INFO - codeparrot_training - Step 16234: {'lr': 0.0004886751154157689, 'samples': 8312320, 'steps': 16234, 'loss/train': 1.8751786947250366} +03/04/2022 08:32:05 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/04/2022 08:32:08 - INFO - codeparrot_training - Step 16235: {'lr': 0.0004886735362418967, 'samples': 8312832, 'steps': 16235, 'loss/train': 0.4464772939682007} +03/04/2022 08:32:11 - INFO - codeparrot_training - Step 16236: {'lr': 0.0004886719569604818, 'samples': 8313344, 'steps': 16236, 'loss/train': 1.763805627822876} +03/04/2022 08:32:14 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 08:32:17 - INFO - codeparrot_training - Step 16237: {'lr': 0.000488670377571525, 'samples': 8313856, 'steps': 16237, 'loss/train': 1.3114837408065796} +03/04/2022 08:32:20 - INFO - codeparrot_training - Step 16238: {'lr': 0.0004886687980750271, 'samples': 8314368, 'steps': 16238, 'loss/train': 2.2473745346069336} +03/04/2022 08:32:22 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 08:32:25 - INFO - codeparrot_training - Step 16239: {'lr': 0.0004886672184709886, 'samples': 8314880, 'steps': 16239, 'loss/train': 2.284607410430908} +03/04/2022 08:32:28 - INFO - codeparrot_training - Step 16240: {'lr': 0.0004886656387594104, 'samples': 8315392, 'steps': 16240, 'loss/train': 2.138957977294922} +03/04/2022 08:32:30 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 08:32:34 - INFO - codeparrot_training - Step 16241: {'lr': 0.0004886640589402932, 'samples': 8315904, 'steps': 16241, 'loss/train': 1.9323773384094238} +03/04/2022 08:32:37 - INFO - codeparrot_training - Step 16242: {'lr': 0.0004886624790136375, 'samples': 8316416, 'steps': 16242, 'loss/train': 1.8334922790527344} +03/04/2022 08:32:39 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 08:32:42 - INFO - codeparrot_training - Step 16243: {'lr': 0.0004886608989794443, 'samples': 8316928, 'steps': 16243, 'loss/train': 2.8203155994415283} +03/04/2022 08:32:45 - INFO - codeparrot_training - Step 16244: {'lr': 0.0004886593188377142, 'samples': 8317440, 'steps': 16244, 'loss/train': 1.8669228553771973} +03/04/2022 08:32:47 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 08:32:51 - INFO - codeparrot_training - Step 16245: {'lr': 0.0004886577385884478, 'samples': 8317952, 'steps': 16245, 'loss/train': 2.733999013900757} +03/04/2022 08:32:54 - INFO - codeparrot_training - Step 16246: {'lr': 0.0004886561582316458, 'samples': 8318464, 'steps': 16246, 'loss/train': 2.389343023300171} +03/04/2022 08:32:57 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/04/2022 08:32:59 - INFO - codeparrot_training - Step 16247: {'lr': 0.0004886545777673093, 'samples': 8318976, 'steps': 16247, 'loss/train': 1.3846372365951538} +03/04/2022 08:33:02 - INFO - codeparrot_training - Step 16248: {'lr': 0.0004886529971954385, 'samples': 8319488, 'steps': 16248, 'loss/train': 2.1152260303497314} +03/04/2022 08:33:05 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 08:33:08 - INFO - codeparrot_training - Step 16249: {'lr': 0.0004886514165160345, 'samples': 8320000, 'steps': 16249, 'loss/train': 1.9478179216384888} +03/04/2022 08:33:11 - INFO - codeparrot_training - Step 16250: {'lr': 0.0004886498357290979, 'samples': 8320512, 'steps': 16250, 'loss/train': 2.284682273864746} +03/04/2022 08:33:13 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 08:33:16 - INFO - codeparrot_training - Step 16251: {'lr': 0.0004886482548346291, 'samples': 8321024, 'steps': 16251, 'loss/train': 1.765131950378418} +03/04/2022 08:33:19 - INFO - codeparrot_training - Step 16252: {'lr': 0.0004886466738326293, 'samples': 8321536, 'steps': 16252, 'loss/train': 2.0178256034851074} +03/04/2022 08:33:22 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 08:33:25 - INFO - codeparrot_training - Step 16253: {'lr': 0.000488645092723099, 'samples': 8322048, 'steps': 16253, 'loss/train': 2.428711414337158} +03/04/2022 08:33:28 - INFO - codeparrot_training - Step 16254: {'lr': 0.0004886435115060388, 'samples': 8322560, 'steps': 16254, 'loss/train': 2.002218008041382} +03/04/2022 08:33:30 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 08:33:33 - INFO - codeparrot_training - Step 16255: {'lr': 0.0004886419301814495, 'samples': 8323072, 'steps': 16255, 'loss/train': 1.0418208837509155} +03/04/2022 08:33:36 - INFO - codeparrot_training - Step 16256: {'lr': 0.0004886403487493319, 'samples': 8323584, 'steps': 16256, 'loss/train': 1.7002395391464233} +03/04/2022 08:33:38 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 08:33:41 - INFO - codeparrot_training - Step 16257: {'lr': 0.0004886387672096866, 'samples': 8324096, 'steps': 16257, 'loss/train': 2.1541051864624023} +03/04/2022 08:33:45 - INFO - codeparrot_training - Step 16258: {'lr': 0.0004886371855625143, 'samples': 8324608, 'steps': 16258, 'loss/train': 1.1288487911224365} +03/04/2022 08:33:47 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 08:33:50 - INFO - codeparrot_training - Step 16259: {'lr': 0.0004886356038078159, 'samples': 8325120, 'steps': 16259, 'loss/train': 1.866613507270813} +03/04/2022 08:33:53 - INFO - codeparrot_training - Step 16260: {'lr': 0.0004886340219455919, 'samples': 8325632, 'steps': 16260, 'loss/train': 2.0085225105285645} +03/04/2022 08:33:55 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/04/2022 08:33:58 - INFO - codeparrot_training - Step 16261: {'lr': 0.0004886324399758431, 'samples': 8326144, 'steps': 16261, 'loss/train': 2.328104257583618} +03/04/2022 08:34:01 - INFO - codeparrot_training - Step 16262: {'lr': 0.0004886308578985702, 'samples': 8326656, 'steps': 16262, 'loss/train': 0.9662577509880066} +03/04/2022 08:34:03 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 08:34:07 - INFO - codeparrot_training - Step 16263: {'lr': 0.0004886292757137739, 'samples': 8327168, 'steps': 16263, 'loss/train': 1.5578356981277466} +03/04/2022 08:34:10 - INFO - codeparrot_training - Step 16264: {'lr': 0.0004886276934214551, 'samples': 8327680, 'steps': 16264, 'loss/train': 1.5682373046875} +03/04/2022 08:34:13 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 08:34:15 - INFO - codeparrot_training - Step 16265: {'lr': 0.0004886261110216141, 'samples': 8328192, 'steps': 16265, 'loss/train': 1.538379192352295} +03/04/2022 08:34:18 - INFO - codeparrot_training - Step 16266: {'lr': 0.000488624528514252, 'samples': 8328704, 'steps': 16266, 'loss/train': 1.9214611053466797} +03/04/2022 08:34:21 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/04/2022 08:34:24 - INFO - codeparrot_training - Step 16267: {'lr': 0.0004886229458993693, 'samples': 8329216, 'steps': 16267, 'loss/train': 1.781991720199585} +03/04/2022 08:34:27 - INFO - codeparrot_training - Step 16268: {'lr': 0.0004886213631769669, 'samples': 8329728, 'steps': 16268, 'loss/train': 2.1212308406829834} +03/04/2022 08:34:30 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 08:34:32 - INFO - codeparrot_training - Step 16269: {'lr': 0.0004886197803470453, 'samples': 8330240, 'steps': 16269, 'loss/train': 1.99233078956604} +03/04/2022 08:34:35 - INFO - codeparrot_training - Step 16270: {'lr': 0.0004886181974096052, 'samples': 8330752, 'steps': 16270, 'loss/train': 1.4627552032470703} +03/04/2022 08:34:38 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 08:34:41 - INFO - codeparrot_training - Step 16271: {'lr': 0.0004886166143646476, 'samples': 8331264, 'steps': 16271, 'loss/train': 1.8879719972610474} +03/04/2022 08:34:44 - INFO - codeparrot_training - Step 16272: {'lr': 0.000488615031212173, 'samples': 8331776, 'steps': 16272, 'loss/train': 1.8473461866378784} +03/04/2022 08:34:47 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 08:34:49 - INFO - codeparrot_training - Step 16273: {'lr': 0.0004886134479521821, 'samples': 8332288, 'steps': 16273, 'loss/train': 1.276752233505249} +03/04/2022 08:34:52 - INFO - codeparrot_training - Step 16274: {'lr': 0.0004886118645846757, 'samples': 8332800, 'steps': 16274, 'loss/train': 1.794073462486267} +03/04/2022 08:34:55 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 08:34:57 - INFO - codeparrot_training - Step 16275: {'lr': 0.0004886102811096544, 'samples': 8333312, 'steps': 16275, 'loss/train': 1.6298946142196655} +03/04/2022 08:35:01 - INFO - codeparrot_training - Step 16276: {'lr': 0.0004886086975271191, 'samples': 8333824, 'steps': 16276, 'loss/train': 2.318837881088257} +03/04/2022 08:35:03 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 08:35:06 - INFO - codeparrot_training - Step 16277: {'lr': 0.0004886071138370704, 'samples': 8334336, 'steps': 16277, 'loss/train': 2.6901960372924805} +03/04/2022 08:35:09 - INFO - codeparrot_training - Step 16278: {'lr': 0.000488605530039509, 'samples': 8334848, 'steps': 16278, 'loss/train': 2.326103448867798} +03/04/2022 08:35:11 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 08:35:14 - INFO - codeparrot_training - Step 16279: {'lr': 0.0004886039461344356, 'samples': 8335360, 'steps': 16279, 'loss/train': 2.022749900817871} +03/04/2022 08:35:17 - INFO - codeparrot_training - Step 16280: {'lr': 0.0004886023621218509, 'samples': 8335872, 'steps': 16280, 'loss/train': 1.428017497062683} +03/04/2022 08:35:20 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 08:35:23 - INFO - codeparrot_training - Step 16281: {'lr': 0.0004886007780017557, 'samples': 8336384, 'steps': 16281, 'loss/train': 2.040410280227661} +03/04/2022 08:35:26 - INFO - codeparrot_training - Step 16282: {'lr': 0.0004885991937741506, 'samples': 8336896, 'steps': 16282, 'loss/train': 2.6690139770507812} +03/04/2022 08:35:28 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 08:35:31 - INFO - codeparrot_training - Step 16283: {'lr': 0.0004885976094390366, 'samples': 8337408, 'steps': 16283, 'loss/train': 2.343161106109619} +03/04/2022 08:35:34 - INFO - codeparrot_training - Step 16284: {'lr': 0.000488596024996414, 'samples': 8337920, 'steps': 16284, 'loss/train': 1.8221739530563354} +03/04/2022 08:35:36 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/04/2022 08:35:40 - INFO - codeparrot_training - Step 16285: {'lr': 0.0004885944404462838, 'samples': 8338432, 'steps': 16285, 'loss/train': 2.3313353061676025} +03/04/2022 08:35:43 - INFO - codeparrot_training - Step 16286: {'lr': 0.0004885928557886466, 'samples': 8338944, 'steps': 16286, 'loss/train': 1.553094506263733} +03/04/2022 08:35:46 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 08:35:48 - INFO - codeparrot_training - Step 16287: {'lr': 0.0004885912710235031, 'samples': 8339456, 'steps': 16287, 'loss/train': 1.7495057582855225} +03/04/2022 08:35:51 - INFO - codeparrot_training - Step 16288: {'lr': 0.0004885896861508541, 'samples': 8339968, 'steps': 16288, 'loss/train': 1.5911059379577637} +03/04/2022 08:35:54 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 08:35:57 - INFO - codeparrot_training - Step 16289: {'lr': 0.0004885881011707003, 'samples': 8340480, 'steps': 16289, 'loss/train': 1.7349072694778442} +03/04/2022 08:36:00 - INFO - codeparrot_training - Step 16290: {'lr': 0.0004885865160830422, 'samples': 8340992, 'steps': 16290, 'loss/train': 1.7129225730895996} +03/04/2022 08:36:02 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/04/2022 08:36:05 - INFO - codeparrot_training - Step 16291: {'lr': 0.0004885849308878809, 'samples': 8341504, 'steps': 16291, 'loss/train': 1.9480869770050049} +03/04/2022 08:36:09 - INFO - codeparrot_training - Step 16292: {'lr': 0.0004885833455852169, 'samples': 8342016, 'steps': 16292, 'loss/train': 1.8706939220428467} +03/04/2022 08:36:12 - INFO - codeparrot_training - Step 16293: {'lr': 0.0004885817601750509, 'samples': 8342528, 'steps': 16293, 'loss/train': 0.9647485017776489} +03/04/2022 08:36:12 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 08:36:17 - INFO - codeparrot_training - Step 16294: {'lr': 0.0004885801746573836, 'samples': 8343040, 'steps': 16294, 'loss/train': 1.9916930198669434} +03/04/2022 08:36:20 - INFO - codeparrot_training - Step 16295: {'lr': 0.0004885785890322158, 'samples': 8343552, 'steps': 16295, 'loss/train': 3.2068467140197754} +03/04/2022 08:36:21 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/04/2022 08:36:25 - INFO - codeparrot_training - Step 16296: {'lr': 0.0004885770032995482, 'samples': 8344064, 'steps': 16296, 'loss/train': 0.7710683345794678} +03/04/2022 08:36:29 - INFO - codeparrot_training - Step 16297: {'lr': 0.0004885754174593814, 'samples': 8344576, 'steps': 16297, 'loss/train': 1.5819766521453857} +03/04/2022 08:36:29 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/04/2022 08:36:34 - INFO - codeparrot_training - Step 16298: {'lr': 0.0004885738315117162, 'samples': 8345088, 'steps': 16298, 'loss/train': 1.5976943969726562} +03/04/2022 08:36:37 - INFO - codeparrot_training - Step 16299: {'lr': 0.0004885722454565534, 'samples': 8345600, 'steps': 16299, 'loss/train': 1.8697959184646606} +03/04/2022 08:36:37 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 08:36:42 - INFO - codeparrot_training - Step 16300: {'lr': 0.0004885706592938936, 'samples': 8346112, 'steps': 16300, 'loss/train': 2.616769313812256} +03/04/2022 08:36:45 - INFO - codeparrot_training - Step 16301: {'lr': 0.0004885690730237375, 'samples': 8346624, 'steps': 16301, 'loss/train': 1.8084639310836792} +03/04/2022 08:36:46 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 08:36:51 - INFO - codeparrot_training - Step 16302: {'lr': 0.0004885674866460858, 'samples': 8347136, 'steps': 16302, 'loss/train': 2.3747527599334717} +03/04/2022 08:36:54 - INFO - codeparrot_training - Step 16303: {'lr': 0.0004885659001609393, 'samples': 8347648, 'steps': 16303, 'loss/train': 1.0494694709777832} +03/04/2022 08:36:54 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 08:36:59 - INFO - codeparrot_training - Step 16304: {'lr': 0.0004885643135682987, 'samples': 8348160, 'steps': 16304, 'loss/train': 1.2134062051773071} +03/04/2022 08:37:02 - INFO - codeparrot_training - Step 16305: {'lr': 0.0004885627268681648, 'samples': 8348672, 'steps': 16305, 'loss/train': 1.5553817749023438} +03/04/2022 08:37:02 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/04/2022 08:37:08 - INFO - codeparrot_training - Step 16306: {'lr': 0.0004885611400605381, 'samples': 8349184, 'steps': 16306, 'loss/train': 0.7348319888114929} +03/04/2022 08:37:11 - INFO - codeparrot_training - Step 16307: {'lr': 0.0004885595531454195, 'samples': 8349696, 'steps': 16307, 'loss/train': 1.392432689666748} +03/04/2022 08:37:11 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 08:37:16 - INFO - codeparrot_training - Step 16308: {'lr': 0.0004885579661228097, 'samples': 8350208, 'steps': 16308, 'loss/train': 1.9520777463912964} +03/04/2022 08:37:19 - INFO - codeparrot_training - Step 16309: {'lr': 0.0004885563789927092, 'samples': 8350720, 'steps': 16309, 'loss/train': 2.1646621227264404} +03/04/2022 08:37:20 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 08:37:25 - INFO - codeparrot_training - Step 16310: {'lr': 0.0004885547917551189, 'samples': 8351232, 'steps': 16310, 'loss/train': 2.200105905532837} +03/04/2022 08:37:28 - INFO - codeparrot_training - Step 16311: {'lr': 0.0004885532044100396, 'samples': 8351744, 'steps': 16311, 'loss/train': 2.1152734756469727} +03/04/2022 08:37:31 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 08:37:34 - INFO - codeparrot_training - Step 16312: {'lr': 0.0004885516169574719, 'samples': 8352256, 'steps': 16312, 'loss/train': 1.840437412261963} +03/04/2022 08:37:37 - INFO - codeparrot_training - Step 16313: {'lr': 0.0004885500293974165, 'samples': 8352768, 'steps': 16313, 'loss/train': 0.478522390127182} +03/04/2022 08:37:39 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 08:37:42 - INFO - codeparrot_training - Step 16314: {'lr': 0.0004885484417298741, 'samples': 8353280, 'steps': 16314, 'loss/train': 2.347452163696289} +03/04/2022 08:37:45 - INFO - codeparrot_training - Step 16315: {'lr': 0.0004885468539548455, 'samples': 8353792, 'steps': 16315, 'loss/train': 1.9225770235061646} +03/04/2022 08:37:48 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 08:37:50 - INFO - codeparrot_training - Step 16316: {'lr': 0.0004885452660723313, 'samples': 8354304, 'steps': 16316, 'loss/train': 1.8872768878936768} +03/04/2022 08:37:54 - INFO - codeparrot_training - Step 16317: {'lr': 0.0004885436780823324, 'samples': 8354816, 'steps': 16317, 'loss/train': 1.2461172342300415} +03/04/2022 08:37:57 - INFO - codeparrot_training - Step 16318: {'lr': 0.0004885420899848492, 'samples': 8355328, 'steps': 16318, 'loss/train': 1.4898695945739746} +03/04/2022 08:37:57 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/04/2022 08:38:02 - INFO - codeparrot_training - Step 16319: {'lr': 0.0004885405017798828, 'samples': 8355840, 'steps': 16319, 'loss/train': 2.015568733215332} +03/04/2022 08:38:05 - INFO - codeparrot_training - Step 16320: {'lr': 0.0004885389134674337, 'samples': 8356352, 'steps': 16320, 'loss/train': 1.9870113134384155} +03/04/2022 08:38:05 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 08:38:11 - INFO - codeparrot_training - Step 16321: {'lr': 0.0004885373250475026, 'samples': 8356864, 'steps': 16321, 'loss/train': 0.9783211350440979} +03/04/2022 08:38:14 - INFO - codeparrot_training - Step 16322: {'lr': 0.0004885357365200903, 'samples': 8357376, 'steps': 16322, 'loss/train': 2.743126392364502} +03/04/2022 08:38:16 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 08:38:19 - INFO - codeparrot_training - Step 16323: {'lr': 0.0004885341478851975, 'samples': 8357888, 'steps': 16323, 'loss/train': 0.46866661310195923} +03/04/2022 08:38:23 - INFO - codeparrot_training - Step 16324: {'lr': 0.0004885325591428248, 'samples': 8358400, 'steps': 16324, 'loss/train': 1.577775239944458} +03/04/2022 08:38:24 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/04/2022 08:38:28 - INFO - codeparrot_training - Step 16325: {'lr': 0.0004885309702929731, 'samples': 8358912, 'steps': 16325, 'loss/train': 1.9194586277008057} +03/04/2022 08:38:31 - INFO - codeparrot_training - Step 16326: {'lr': 0.000488529381335643, 'samples': 8359424, 'steps': 16326, 'loss/train': 1.9354419708251953} +03/04/2022 08:38:33 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/04/2022 08:38:37 - INFO - codeparrot_training - Step 16327: {'lr': 0.0004885277922708352, 'samples': 8359936, 'steps': 16327, 'loss/train': 1.9065040349960327} +03/04/2022 08:38:40 - INFO - codeparrot_training - Step 16328: {'lr': 0.0004885262030985504, 'samples': 8360448, 'steps': 16328, 'loss/train': 2.034669876098633} +03/04/2022 08:38:42 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/04/2022 08:38:45 - INFO - codeparrot_training - Step 16329: {'lr': 0.0004885246138187896, 'samples': 8360960, 'steps': 16329, 'loss/train': 2.1502139568328857} +03/04/2022 08:38:48 - INFO - codeparrot_training - Step 16330: {'lr': 0.0004885230244315531, 'samples': 8361472, 'steps': 16330, 'loss/train': 1.6941417455673218} +03/04/2022 08:38:51 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/04/2022 08:38:54 - INFO - codeparrot_training - Step 16331: {'lr': 0.0004885214349368419, 'samples': 8361984, 'steps': 16331, 'loss/train': 2.103154182434082} +03/04/2022 08:38:57 - INFO - codeparrot_training - Step 16332: {'lr': 0.0004885198453346565, 'samples': 8362496, 'steps': 16332, 'loss/train': 2.4901154041290283} +03/04/2022 08:39:00 - INFO - codeparrot_training - Step 16333: {'lr': 0.0004885182556249978, 'samples': 8363008, 'steps': 16333, 'loss/train': 2.248621702194214} +03/04/2022 08:39:00 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 08:39:05 - INFO - codeparrot_training - Step 16334: {'lr': 0.0004885166658078666, 'samples': 8363520, 'steps': 16334, 'loss/train': 1.976453185081482} +03/04/2022 08:39:08 - INFO - codeparrot_training - Step 16335: {'lr': 0.0004885150758832632, 'samples': 8364032, 'steps': 16335, 'loss/train': 0.9693822264671326} +03/04/2022 08:39:08 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 08:39:14 - INFO - codeparrot_training - Step 16336: {'lr': 0.0004885134858511888, 'samples': 8364544, 'steps': 16336, 'loss/train': 2.021921157836914} +03/04/2022 08:39:17 - INFO - codeparrot_training - Step 16337: {'lr': 0.0004885118957116438, 'samples': 8365056, 'steps': 16337, 'loss/train': 2.153299570083618} +03/04/2022 08:39:17 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 08:39:22 - INFO - codeparrot_training - Step 16338: {'lr': 0.000488510305464629, 'samples': 8365568, 'steps': 16338, 'loss/train': 0.4131595194339752} +03/04/2022 08:39:25 - INFO - codeparrot_training - Step 16339: {'lr': 0.0004885087151101453, 'samples': 8366080, 'steps': 16339, 'loss/train': 2.1611082553863525} +03/04/2022 08:39:25 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 08:39:30 - INFO - codeparrot_training - Step 16340: {'lr': 0.0004885071246481931, 'samples': 8366592, 'steps': 16340, 'loss/train': 1.5361522436141968} +03/04/2022 08:39:34 - INFO - codeparrot_training - Step 16341: {'lr': 0.0004885055340787733, 'samples': 8367104, 'steps': 16341, 'loss/train': 2.841336488723755} +03/04/2022 08:39:34 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 08:39:39 - INFO - codeparrot_training - Step 16342: {'lr': 0.0004885039434018866, 'samples': 8367616, 'steps': 16342, 'loss/train': 2.0472989082336426} +03/04/2022 08:39:42 - INFO - codeparrot_training - Step 16343: {'lr': 0.0004885023526175337, 'samples': 8368128, 'steps': 16343, 'loss/train': 2.8409836292266846} +03/04/2022 08:39:42 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 08:39:47 - INFO - codeparrot_training - Step 16344: {'lr': 0.0004885007617257154, 'samples': 8368640, 'steps': 16344, 'loss/train': 2.11627197265625} +03/04/2022 08:39:51 - INFO - codeparrot_training - Step 16345: {'lr': 0.0004884991707264322, 'samples': 8369152, 'steps': 16345, 'loss/train': 1.9433125257492065} +03/04/2022 08:39:51 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 08:39:56 - INFO - codeparrot_training - Step 16346: {'lr': 0.000488497579619685, 'samples': 8369664, 'steps': 16346, 'loss/train': 1.5647889375686646} +03/04/2022 08:39:59 - INFO - codeparrot_training - Step 16347: {'lr': 0.0004884959884054745, 'samples': 8370176, 'steps': 16347, 'loss/train': 1.5386345386505127} +03/04/2022 08:39:59 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/04/2022 08:40:05 - INFO - codeparrot_training - Step 16348: {'lr': 0.0004884943970838014, 'samples': 8370688, 'steps': 16348, 'loss/train': 2.0312376022338867} +03/04/2022 08:40:08 - INFO - codeparrot_training - Step 16349: {'lr': 0.0004884928056546663, 'samples': 8371200, 'steps': 16349, 'loss/train': 1.971061110496521} +03/04/2022 08:40:08 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 08:40:13 - INFO - codeparrot_training - Step 16350: {'lr': 0.0004884912141180701, 'samples': 8371712, 'steps': 16350, 'loss/train': 2.3363723754882812} +03/04/2022 08:40:16 - INFO - codeparrot_training - Step 16351: {'lr': 0.0004884896224740136, 'samples': 8372224, 'steps': 16351, 'loss/train': 1.6670196056365967} +03/04/2022 08:40:16 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/04/2022 08:40:22 - INFO - codeparrot_training - Step 16352: {'lr': 0.0004884880307224972, 'samples': 8372736, 'steps': 16352, 'loss/train': 1.4225924015045166} +03/04/2022 08:40:25 - INFO - codeparrot_training - Step 16353: {'lr': 0.0004884864388635217, 'samples': 8373248, 'steps': 16353, 'loss/train': 0.1494947224855423} +03/04/2022 08:40:25 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/04/2022 08:40:30 - INFO - codeparrot_training - Step 16354: {'lr': 0.0004884848468970879, 'samples': 8373760, 'steps': 16354, 'loss/train': 1.8595839738845825} +03/04/2022 08:40:33 - INFO - codeparrot_training - Step 16355: {'lr': 0.0004884832548231966, 'samples': 8374272, 'steps': 16355, 'loss/train': 1.6779239177703857} +03/04/2022 08:40:33 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/04/2022 08:40:39 - INFO - codeparrot_training - Step 16356: {'lr': 0.0004884816626418484, 'samples': 8374784, 'steps': 16356, 'loss/train': 1.8837916851043701} +03/04/2022 08:40:42 - INFO - codeparrot_training - Step 16357: {'lr': 0.000488480070353044, 'samples': 8375296, 'steps': 16357, 'loss/train': 1.214759111404419} +03/04/2022 08:40:42 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/04/2022 08:40:47 - INFO - codeparrot_training - Step 16358: {'lr': 0.0004884784779567843, 'samples': 8375808, 'steps': 16358, 'loss/train': 2.0302274227142334} +03/04/2022 08:40:50 - INFO - codeparrot_training - Step 16359: {'lr': 0.0004884768854530696, 'samples': 8376320, 'steps': 16359, 'loss/train': 2.6051042079925537} +03/04/2022 08:40:51 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 08:40:56 - INFO - codeparrot_training - Step 16360: {'lr': 0.0004884752928419012, 'samples': 8376832, 'steps': 16360, 'loss/train': 2.216580867767334} +03/04/2022 08:40:59 - INFO - codeparrot_training - Step 16361: {'lr': 0.0004884737001232793, 'samples': 8377344, 'steps': 16361, 'loss/train': 2.3614847660064697} +03/04/2022 08:41:00 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 08:41:04 - INFO - codeparrot_training - Step 16362: {'lr': 0.000488472107297205, 'samples': 8377856, 'steps': 16362, 'loss/train': 1.8771743774414062} +03/04/2022 08:41:07 - INFO - codeparrot_training - Step 16363: {'lr': 0.0004884705143636788, 'samples': 8378368, 'steps': 16363, 'loss/train': 1.6833462715148926} +03/04/2022 08:41:08 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 08:41:12 - INFO - codeparrot_training - Step 16364: {'lr': 0.0004884689213227013, 'samples': 8378880, 'steps': 16364, 'loss/train': 1.9249987602233887} +03/04/2022 08:41:16 - INFO - codeparrot_training - Step 16365: {'lr': 0.0004884673281742736, 'samples': 8379392, 'steps': 16365, 'loss/train': 1.058294653892517} +03/04/2022 08:41:16 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 08:41:21 - INFO - codeparrot_training - Step 16366: {'lr': 0.0004884657349183961, 'samples': 8379904, 'steps': 16366, 'loss/train': 1.865576148033142} +03/04/2022 08:41:24 - INFO - codeparrot_training - Step 16367: {'lr': 0.0004884641415550696, 'samples': 8380416, 'steps': 16367, 'loss/train': 1.5700007677078247} +03/04/2022 08:41:25 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 08:41:29 - INFO - codeparrot_training - Step 16368: {'lr': 0.0004884625480842949, 'samples': 8380928, 'steps': 16368, 'loss/train': 2.2438154220581055} +03/04/2022 08:41:32 - INFO - codeparrot_training - Step 16369: {'lr': 0.0004884609545060726, 'samples': 8381440, 'steps': 16369, 'loss/train': 2.161803960800171} +03/04/2022 08:41:34 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 08:41:38 - INFO - codeparrot_training - Step 16370: {'lr': 0.0004884593608204035, 'samples': 8381952, 'steps': 16370, 'loss/train': 3.4671592712402344} +03/04/2022 08:41:41 - INFO - codeparrot_training - Step 16371: {'lr': 0.0004884577670272882, 'samples': 8382464, 'steps': 16371, 'loss/train': 2.4168713092803955} +03/04/2022 08:41:43 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 08:41:46 - INFO - codeparrot_training - Step 16372: {'lr': 0.0004884561731267278, 'samples': 8382976, 'steps': 16372, 'loss/train': 1.680180549621582} +03/04/2022 08:41:49 - INFO - codeparrot_training - Step 16373: {'lr': 0.0004884545791187224, 'samples': 8383488, 'steps': 16373, 'loss/train': 2.24761700630188} +03/04/2022 08:41:51 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/04/2022 08:41:55 - INFO - codeparrot_training - Step 16374: {'lr': 0.0004884529850032732, 'samples': 8384000, 'steps': 16374, 'loss/train': 1.6884914636611938} +03/04/2022 08:41:58 - INFO - codeparrot_training - Step 16375: {'lr': 0.0004884513907803808, 'samples': 8384512, 'steps': 16375, 'loss/train': 2.682286024093628} +03/04/2022 08:41:59 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/04/2022 08:42:03 - INFO - codeparrot_training - Step 16376: {'lr': 0.0004884497964500457, 'samples': 8385024, 'steps': 16376, 'loss/train': 1.6001795530319214} +03/04/2022 08:42:07 - INFO - codeparrot_training - Step 16377: {'lr': 0.000488448202012269, 'samples': 8385536, 'steps': 16377, 'loss/train': 2.0706160068511963} +03/04/2022 08:42:08 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 08:42:12 - INFO - codeparrot_training - Step 16378: {'lr': 0.0004884466074670512, 'samples': 8386048, 'steps': 16378, 'loss/train': 1.4148619174957275} +03/04/2022 08:42:15 - INFO - codeparrot_training - Step 16379: {'lr': 0.0004884450128143929, 'samples': 8386560, 'steps': 16379, 'loss/train': 1.5309103727340698} +03/04/2022 08:42:16 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/04/2022 08:42:20 - INFO - codeparrot_training - Step 16380: {'lr': 0.000488443418054295, 'samples': 8387072, 'steps': 16380, 'loss/train': 1.9036521911621094} +03/04/2022 08:42:23 - INFO - codeparrot_training - Step 16381: {'lr': 0.0004884418231867583, 'samples': 8387584, 'steps': 16381, 'loss/train': 1.7319591045379639} +03/04/2022 08:42:25 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 08:42:29 - INFO - codeparrot_training - Step 16382: {'lr': 0.0004884402282117833, 'samples': 8388096, 'steps': 16382, 'loss/train': 1.9059211015701294} +03/04/2022 08:42:32 - INFO - codeparrot_training - Step 16383: {'lr': 0.0004884386331293708, 'samples': 8388608, 'steps': 16383, 'loss/train': 2.3407557010650635} +03/04/2022 08:42:33 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 08:42:37 - INFO - codeparrot_training - Step 16384: {'lr': 0.0004884370379395215, 'samples': 8389120, 'steps': 16384, 'loss/train': 1.7239257097244263} +03/04/2022 08:42:40 - INFO - codeparrot_training - Step 16385: {'lr': 0.0004884354426422363, 'samples': 8389632, 'steps': 16385, 'loss/train': 2.0407824516296387} +03/04/2022 08:42:42 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 08:42:45 - INFO - codeparrot_training - Step 16386: {'lr': 0.0004884338472375156, 'samples': 8390144, 'steps': 16386, 'loss/train': 2.4947617053985596} +03/04/2022 08:42:49 - INFO - codeparrot_training - Step 16387: {'lr': 0.0004884322517253604, 'samples': 8390656, 'steps': 16387, 'loss/train': 2.1813197135925293} +03/04/2022 08:42:50 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 08:42:54 - INFO - codeparrot_training - Step 16388: {'lr': 0.0004884306561057713, 'samples': 8391168, 'steps': 16388, 'loss/train': 6.550569534301758} +03/04/2022 08:42:57 - INFO - codeparrot_training - Step 16389: {'lr': 0.000488429060378749, 'samples': 8391680, 'steps': 16389, 'loss/train': 1.762549877166748} +03/04/2022 08:43:00 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/04/2022 08:43:03 - INFO - codeparrot_training - Step 16390: {'lr': 0.0004884274645442942, 'samples': 8392192, 'steps': 16390, 'loss/train': 2.424325466156006} +03/04/2022 08:43:06 - INFO - codeparrot_training - Step 16391: {'lr': 0.0004884258686024077, 'samples': 8392704, 'steps': 16391, 'loss/train': 0.29353129863739014} +03/04/2022 08:43:08 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 08:43:11 - INFO - codeparrot_training - Step 16392: {'lr': 0.0004884242725530902, 'samples': 8393216, 'steps': 16392, 'loss/train': 1.0659098625183105} +03/04/2022 08:43:14 - INFO - codeparrot_training - Step 16393: {'lr': 0.0004884226763963423, 'samples': 8393728, 'steps': 16393, 'loss/train': 1.8349199295043945} +03/04/2022 08:43:17 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 08:43:20 - INFO - codeparrot_training - Step 16394: {'lr': 0.000488421080132165, 'samples': 8394240, 'steps': 16394, 'loss/train': 2.378765106201172} +03/04/2022 08:43:23 - INFO - codeparrot_training - Step 16395: {'lr': 0.0004884194837605587, 'samples': 8394752, 'steps': 16395, 'loss/train': 1.544532060623169} +03/04/2022 08:43:25 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 08:43:28 - INFO - codeparrot_training - Step 16396: {'lr': 0.0004884178872815243, 'samples': 8395264, 'steps': 16396, 'loss/train': 2.2379274368286133} +03/04/2022 08:43:31 - INFO - codeparrot_training - Step 16397: {'lr': 0.0004884162906950624, 'samples': 8395776, 'steps': 16397, 'loss/train': 2.1827073097229004} +03/04/2022 08:43:33 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 08:43:36 - INFO - codeparrot_training - Step 16398: {'lr': 0.000488414694001174, 'samples': 8396288, 'steps': 16398, 'loss/train': 1.7113568782806396} +03/04/2022 08:43:40 - INFO - codeparrot_training - Step 16399: {'lr': 0.0004884130971998595, 'samples': 8396800, 'steps': 16399, 'loss/train': 0.541576087474823} +03/04/2022 08:43:42 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/04/2022 08:43:45 - INFO - codeparrot_training - Step 16400: {'lr': 0.0004884115002911197, 'samples': 8397312, 'steps': 16400, 'loss/train': 2.4893033504486084} +03/04/2022 08:43:48 - INFO - codeparrot_training - Step 16401: {'lr': 0.0004884099032749554, 'samples': 8397824, 'steps': 16401, 'loss/train': 2.2197635173797607} +03/04/2022 08:43:50 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 08:43:53 - INFO - codeparrot_training - Step 16402: {'lr': 0.0004884083061513672, 'samples': 8398336, 'steps': 16402, 'loss/train': 1.6724162101745605} +03/04/2022 08:43:57 - INFO - codeparrot_training - Step 16403: {'lr': 0.0004884067089203559, 'samples': 8398848, 'steps': 16403, 'loss/train': 0.9342283606529236} +03/04/2022 08:43:59 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 08:44:02 - INFO - codeparrot_training - Step 16404: {'lr': 0.0004884051115819224, 'samples': 8399360, 'steps': 16404, 'loss/train': 1.2800562381744385} +03/04/2022 08:44:05 - INFO - codeparrot_training - Step 16405: {'lr': 0.000488403514136067, 'samples': 8399872, 'steps': 16405, 'loss/train': 2.3662519454956055} +03/04/2022 08:44:08 - INFO - codeparrot_training - Step 16406: {'lr': 0.0004884019165827909, 'samples': 8400384, 'steps': 16406, 'loss/train': 1.8137977123260498} +03/04/2022 08:44:08 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/04/2022 08:44:13 - INFO - codeparrot_training - Step 16407: {'lr': 0.0004884003189220945, 'samples': 8400896, 'steps': 16407, 'loss/train': 1.5275542736053467} +03/04/2022 08:44:16 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 08:44:19 - INFO - codeparrot_training - Step 16408: {'lr': 0.0004883987211539785, 'samples': 8401408, 'steps': 16408, 'loss/train': 2.0113325119018555} +03/04/2022 08:44:22 - INFO - codeparrot_training - Step 16409: {'lr': 0.0004883971232784438, 'samples': 8401920, 'steps': 16409, 'loss/train': 1.3769861459732056} +03/04/2022 08:44:24 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 08:44:27 - INFO - codeparrot_training - Step 16410: {'lr': 0.0004883955252954909, 'samples': 8402432, 'steps': 16410, 'loss/train': 2.0431456565856934} +03/04/2022 08:44:30 - INFO - codeparrot_training - Step 16411: {'lr': 0.0004883939272051208, 'samples': 8402944, 'steps': 16411, 'loss/train': 1.7873300313949585} +03/04/2022 08:44:32 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 08:44:35 - INFO - codeparrot_training - Step 16412: {'lr': 0.000488392329007334, 'samples': 8403456, 'steps': 16412, 'loss/train': 2.0017151832580566} +03/04/2022 08:44:39 - INFO - codeparrot_training - Step 16413: {'lr': 0.0004883907307021314, 'samples': 8403968, 'steps': 16413, 'loss/train': 1.756945013999939} +03/04/2022 08:44:41 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 08:44:44 - INFO - codeparrot_training - Step 16414: {'lr': 0.0004883891322895134, 'samples': 8404480, 'steps': 16414, 'loss/train': 1.899993896484375} +03/04/2022 08:44:47 - INFO - codeparrot_training - Step 16415: {'lr': 0.000488387533769481, 'samples': 8404992, 'steps': 16415, 'loss/train': 1.3593714237213135} +03/04/2022 08:44:50 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 08:44:53 - INFO - codeparrot_training - Step 16416: {'lr': 0.000488385935142035, 'samples': 8405504, 'steps': 16416, 'loss/train': 2.884122133255005} +03/04/2022 08:44:56 - INFO - codeparrot_training - Step 16417: {'lr': 0.0004883843364071759, 'samples': 8406016, 'steps': 16417, 'loss/train': 1.846139907836914} +03/04/2022 08:44:58 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 08:45:01 - INFO - codeparrot_training - Step 16418: {'lr': 0.0004883827375649045, 'samples': 8406528, 'steps': 16418, 'loss/train': 1.3730463981628418} +03/04/2022 08:45:05 - INFO - codeparrot_training - Step 16419: {'lr': 0.0004883811386152216, 'samples': 8407040, 'steps': 16419, 'loss/train': 1.8221787214279175} +03/04/2022 08:45:07 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/04/2022 08:45:10 - INFO - codeparrot_training - Step 16420: {'lr': 0.0004883795395581277, 'samples': 8407552, 'steps': 16420, 'loss/train': 1.4449278116226196} +03/04/2022 08:45:13 - INFO - codeparrot_training - Step 16421: {'lr': 0.0004883779403936237, 'samples': 8408064, 'steps': 16421, 'loss/train': 1.92012357711792} +03/04/2022 08:45:15 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 08:45:18 - INFO - codeparrot_training - Step 16422: {'lr': 0.0004883763411217103, 'samples': 8408576, 'steps': 16422, 'loss/train': 2.222801446914673} +03/04/2022 08:45:21 - INFO - codeparrot_training - Step 16423: {'lr': 0.0004883747417423882, 'samples': 8409088, 'steps': 16423, 'loss/train': 0.47974029183387756} +03/04/2022 08:45:23 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/04/2022 08:45:27 - INFO - codeparrot_training - Step 16424: {'lr': 0.000488373142255658, 'samples': 8409600, 'steps': 16424, 'loss/train': 3.604419708251953} +03/04/2022 08:45:30 - INFO - codeparrot_training - Step 16425: {'lr': 0.0004883715426615207, 'samples': 8410112, 'steps': 16425, 'loss/train': 1.943888545036316} +03/04/2022 08:45:32 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/04/2022 08:45:35 - INFO - codeparrot_training - Step 16426: {'lr': 0.0004883699429599768, 'samples': 8410624, 'steps': 16426, 'loss/train': 1.618390440940857} +03/04/2022 08:45:39 - INFO - codeparrot_training - Step 16427: {'lr': 0.0004883683431510272, 'samples': 8411136, 'steps': 16427, 'loss/train': 2.245054006576538} +03/04/2022 08:45:41 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 08:45:44 - INFO - codeparrot_training - Step 16428: {'lr': 0.0004883667432346723, 'samples': 8411648, 'steps': 16428, 'loss/train': 1.7984651327133179} +03/04/2022 08:45:47 - INFO - codeparrot_training - Step 16429: {'lr': 0.0004883651432109132, 'samples': 8412160, 'steps': 16429, 'loss/train': 1.4769923686981201} +03/04/2022 08:45:49 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 08:45:52 - INFO - codeparrot_training - Step 16430: {'lr': 0.0004883635430797502, 'samples': 8412672, 'steps': 16430, 'loss/train': 1.7652015686035156} +03/04/2022 08:45:55 - INFO - codeparrot_training - Step 16431: {'lr': 0.0004883619428411846, 'samples': 8413184, 'steps': 16431, 'loss/train': 1.9275730848312378} +03/04/2022 08:45:57 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 08:46:01 - INFO - codeparrot_training - Step 16432: {'lr': 0.0004883603424952165, 'samples': 8413696, 'steps': 16432, 'loss/train': 1.6035029888153076} +03/04/2022 08:46:04 - INFO - codeparrot_training - Step 16433: {'lr': 0.0004883587420418471, 'samples': 8414208, 'steps': 16433, 'loss/train': 1.8572572469711304} +03/04/2022 08:46:05 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 08:46:09 - INFO - codeparrot_training - Step 16434: {'lr': 0.0004883571414810769, 'samples': 8414720, 'steps': 16434, 'loss/train': 0.7688239216804504} +03/04/2022 08:46:12 - INFO - codeparrot_training - Step 16435: {'lr': 0.0004883555408129066, 'samples': 8415232, 'steps': 16435, 'loss/train': 1.0418535470962524} +03/04/2022 08:46:14 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 08:46:18 - INFO - codeparrot_training - Step 16436: {'lr': 0.0004883539400373369, 'samples': 8415744, 'steps': 16436, 'loss/train': 2.462855577468872} +03/04/2022 08:46:21 - INFO - codeparrot_training - Step 16437: {'lr': 0.0004883523391543687, 'samples': 8416256, 'steps': 16437, 'loss/train': 2.115964651107788} +03/04/2022 08:46:23 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/04/2022 08:46:26 - INFO - codeparrot_training - Step 16438: {'lr': 0.0004883507381640026, 'samples': 8416768, 'steps': 16438, 'loss/train': 1.6613762378692627} +03/04/2022 08:46:29 - INFO - codeparrot_training - Step 16439: {'lr': 0.0004883491370662393, 'samples': 8417280, 'steps': 16439, 'loss/train': 1.7432026863098145} +03/04/2022 08:46:31 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 08:46:35 - INFO - codeparrot_training - Step 16440: {'lr': 0.0004883475358610794, 'samples': 8417792, 'steps': 16440, 'loss/train': 2.375701904296875} +03/04/2022 08:46:38 - INFO - codeparrot_training - Step 16441: {'lr': 0.000488345934548524, 'samples': 8418304, 'steps': 16441, 'loss/train': 2.737980365753174} +03/04/2022 08:46:41 - INFO - codeparrot_training - Step 16442: {'lr': 0.0004883443331285736, 'samples': 8418816, 'steps': 16442, 'loss/train': 0.6280149817466736} +03/04/2022 08:46:42 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 08:46:46 - INFO - codeparrot_training - Step 16443: {'lr': 0.0004883427316012289, 'samples': 8419328, 'steps': 16443, 'loss/train': 1.4167916774749756} +03/04/2022 08:46:49 - INFO - codeparrot_training - Step 16444: {'lr': 0.0004883411299664906, 'samples': 8419840, 'steps': 16444, 'loss/train': 1.8453447818756104} +03/04/2022 08:46:50 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 08:46:55 - INFO - codeparrot_training - Step 16445: {'lr': 0.0004883395282243595, 'samples': 8420352, 'steps': 16445, 'loss/train': 2.0180883407592773} +03/04/2022 08:46:58 - INFO - codeparrot_training - Step 16446: {'lr': 0.0004883379263748363, 'samples': 8420864, 'steps': 16446, 'loss/train': 1.4337706565856934} +03/04/2022 08:46:58 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 08:47:03 - INFO - codeparrot_training - Step 16447: {'lr': 0.0004883363244179217, 'samples': 8421376, 'steps': 16447, 'loss/train': 2.104165554046631} +03/04/2022 08:47:06 - INFO - codeparrot_training - Step 16448: {'lr': 0.0004883347223536164, 'samples': 8421888, 'steps': 16448, 'loss/train': 2.71697998046875} +03/04/2022 08:47:06 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 08:47:12 - INFO - codeparrot_training - Step 16449: {'lr': 0.0004883331201819211, 'samples': 8422400, 'steps': 16449, 'loss/train': 1.981850028038025} +03/04/2022 08:47:15 - INFO - codeparrot_training - Step 16450: {'lr': 0.0004883315179028366, 'samples': 8422912, 'steps': 16450, 'loss/train': 1.8353919982910156} +03/04/2022 08:47:15 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 08:47:20 - INFO - codeparrot_training - Step 16451: {'lr': 0.0004883299155163636, 'samples': 8423424, 'steps': 16451, 'loss/train': 2.083240032196045} +03/04/2022 08:47:23 - INFO - codeparrot_training - Step 16452: {'lr': 0.0004883283130225029, 'samples': 8423936, 'steps': 16452, 'loss/train': 1.5761168003082275} +03/04/2022 08:47:23 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 08:47:28 - INFO - codeparrot_training - Step 16453: {'lr': 0.0004883267104212551, 'samples': 8424448, 'steps': 16453, 'loss/train': 2.1332712173461914} +03/04/2022 08:47:32 - INFO - codeparrot_training - Step 16454: {'lr': 0.0004883251077126209, 'samples': 8424960, 'steps': 16454, 'loss/train': 2.032949924468994} +03/04/2022 08:47:32 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 08:47:37 - INFO - codeparrot_training - Step 16455: {'lr': 0.0004883235048966011, 'samples': 8425472, 'steps': 16455, 'loss/train': 0.4261553883552551} +03/04/2022 08:47:40 - INFO - codeparrot_training - Step 16456: {'lr': 0.0004883219019731964, 'samples': 8425984, 'steps': 16456, 'loss/train': 1.5995630025863647} +03/04/2022 08:47:41 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/04/2022 08:47:45 - INFO - codeparrot_training - Step 16457: {'lr': 0.0004883202989424076, 'samples': 8426496, 'steps': 16457, 'loss/train': 1.059237003326416} +03/04/2022 08:47:49 - INFO - codeparrot_training - Step 16458: {'lr': 0.0004883186958042354, 'samples': 8427008, 'steps': 16458, 'loss/train': 0.44589293003082275} +03/04/2022 08:47:49 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/04/2022 08:47:54 - INFO - codeparrot_training - Step 16459: {'lr': 0.0004883170925586804, 'samples': 8427520, 'steps': 16459, 'loss/train': 1.8939913511276245} +03/04/2022 08:47:57 - INFO - codeparrot_training - Step 16460: {'lr': 0.0004883154892057433, 'samples': 8428032, 'steps': 16460, 'loss/train': 2.273163080215454} +03/04/2022 08:47:57 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 08:48:02 - INFO - codeparrot_training - Step 16461: {'lr': 0.000488313885745425, 'samples': 8428544, 'steps': 16461, 'loss/train': 1.6056065559387207} +03/04/2022 08:48:05 - INFO - codeparrot_training - Step 16462: {'lr': 0.0004883122821777261, 'samples': 8429056, 'steps': 16462, 'loss/train': 2.0975940227508545} +03/04/2022 08:48:06 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 08:48:11 - INFO - codeparrot_training - Step 16463: {'lr': 0.0004883106785026475, 'samples': 8429568, 'steps': 16463, 'loss/train': 1.7993580102920532} +03/04/2022 08:48:14 - INFO - codeparrot_training - Step 16464: {'lr': 0.0004883090747201897, 'samples': 8430080, 'steps': 16464, 'loss/train': 1.6430062055587769} +03/04/2022 08:48:14 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/04/2022 08:48:19 - INFO - codeparrot_training - Step 16465: {'lr': 0.0004883074708303534, 'samples': 8430592, 'steps': 16465, 'loss/train': 1.3656504154205322} +03/04/2022 08:48:22 - INFO - codeparrot_training - Step 16466: {'lr': 0.0004883058668331396, 'samples': 8431104, 'steps': 16466, 'loss/train': 1.9298224449157715} +03/04/2022 08:48:22 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 08:48:27 - INFO - codeparrot_training - Step 16467: {'lr': 0.0004883042627285488, 'samples': 8431616, 'steps': 16467, 'loss/train': 1.0262864828109741} +03/04/2022 08:48:30 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 08:48:33 - INFO - codeparrot_training - Step 16468: {'lr': 0.0004883026585165817, 'samples': 8432128, 'steps': 16468, 'loss/train': 1.9309629201889038} +03/04/2022 08:48:36 - INFO - codeparrot_training - Step 16469: {'lr': 0.0004883010541972392, 'samples': 8432640, 'steps': 16469, 'loss/train': 1.3404954671859741} +03/04/2022 08:48:39 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 08:48:41 - INFO - codeparrot_training - Step 16470: {'lr': 0.0004882994497705219, 'samples': 8433152, 'steps': 16470, 'loss/train': 1.471269965171814} +03/04/2022 08:48:44 - INFO - codeparrot_training - Step 16471: {'lr': 0.0004882978452364305, 'samples': 8433664, 'steps': 16471, 'loss/train': 1.9538277387619019} +03/04/2022 08:48:47 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 08:48:50 - INFO - codeparrot_training - Step 16472: {'lr': 0.0004882962405949658, 'samples': 8434176, 'steps': 16472, 'loss/train': 0.9498873949050903} +03/04/2022 08:48:53 - INFO - codeparrot_training - Step 16473: {'lr': 0.0004882946358461285, 'samples': 8434688, 'steps': 16473, 'loss/train': 1.5891410112380981} +03/04/2022 08:48:56 - INFO - codeparrot_training - Step 16474: {'lr': 0.0004882930309899192, 'samples': 8435200, 'steps': 16474, 'loss/train': 4.115705966949463} +03/04/2022 08:48:56 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 08:49:01 - INFO - codeparrot_training - Step 16475: {'lr': 0.000488291426026339, 'samples': 8435712, 'steps': 16475, 'loss/train': 2.1988816261291504} +03/04/2022 08:49:04 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 08:49:07 - INFO - codeparrot_training - Step 16476: {'lr': 0.0004882898209553881, 'samples': 8436224, 'steps': 16476, 'loss/train': 2.47982120513916} +03/04/2022 08:49:10 - INFO - codeparrot_training - Step 16477: {'lr': 0.0004882882157770676, 'samples': 8436736, 'steps': 16477, 'loss/train': 1.7204853296279907} +03/04/2022 08:49:13 - INFO - codeparrot_training - Step 16478: {'lr': 0.000488286610491378, 'samples': 8437248, 'steps': 16478, 'loss/train': 1.2006235122680664} +03/04/2022 08:49:14 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 08:49:19 - INFO - codeparrot_training - Step 16479: {'lr': 0.0004882850050983203, 'samples': 8437760, 'steps': 16479, 'loss/train': 2.713766098022461} +03/04/2022 08:49:22 - INFO - codeparrot_training - Step 16480: {'lr': 0.0004882833995978949, 'samples': 8438272, 'steps': 16480, 'loss/train': 1.9634824991226196} +03/04/2022 08:49:22 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 08:49:27 - INFO - codeparrot_training - Step 16481: {'lr': 0.0004882817939901027, 'samples': 8438784, 'steps': 16481, 'loss/train': 1.8109197616577148} +03/04/2022 08:49:30 - INFO - codeparrot_training - Step 16482: {'lr': 0.0004882801882749445, 'samples': 8439296, 'steps': 16482, 'loss/train': 2.3125808238983154} +03/04/2022 08:49:31 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 08:49:36 - INFO - codeparrot_training - Step 16483: {'lr': 0.0004882785824524209, 'samples': 8439808, 'steps': 16483, 'loss/train': 2.422185182571411} +03/04/2022 08:49:39 - INFO - codeparrot_training - Step 16484: {'lr': 0.0004882769765225326, 'samples': 8440320, 'steps': 16484, 'loss/train': 1.7352460622787476} +03/04/2022 08:49:40 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/04/2022 08:49:44 - INFO - codeparrot_training - Step 16485: {'lr': 0.00048827537048528035, 'samples': 8440832, 'steps': 16485, 'loss/train': 2.7356927394866943} +03/04/2022 08:49:47 - INFO - codeparrot_training - Step 16486: {'lr': 0.00048827376434066493, 'samples': 8441344, 'steps': 16486, 'loss/train': 1.5061273574829102} +03/04/2022 08:49:48 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/04/2022 08:49:53 - INFO - codeparrot_training - Step 16487: {'lr': 0.0004882721580886871, 'samples': 8441856, 'steps': 16487, 'loss/train': 1.4718307256698608} +03/04/2022 08:49:56 - INFO - codeparrot_training - Step 16488: {'lr': 0.00048827055172934744, 'samples': 8442368, 'steps': 16488, 'loss/train': 2.4316797256469727} +03/04/2022 08:49:57 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 08:50:01 - INFO - codeparrot_training - Step 16489: {'lr': 0.0004882689452626468, 'samples': 8442880, 'steps': 16489, 'loss/train': 1.7944515943527222} +03/04/2022 08:50:04 - INFO - codeparrot_training - Step 16490: {'lr': 0.00048826733868858577, 'samples': 8443392, 'steps': 16490, 'loss/train': 1.5502034425735474} +03/04/2022 08:50:05 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/04/2022 08:50:10 - INFO - codeparrot_training - Step 16491: {'lr': 0.00048826573200716516, 'samples': 8443904, 'steps': 16491, 'loss/train': 1.7131142616271973} +03/04/2022 08:50:13 - INFO - codeparrot_training - Step 16492: {'lr': 0.0004882641252183857, 'samples': 8444416, 'steps': 16492, 'loss/train': 1.7050395011901855} +03/04/2022 08:50:18 - INFO - codeparrot_training - Step 16493: {'lr': 0.0004882625183222481, 'samples': 8444928, 'steps': 16493, 'loss/train': 2.1461682319641113} +03/04/2022 08:50:21 - INFO - codeparrot_training - Step 16494: {'lr': 0.00048826091131875317, 'samples': 8445440, 'steps': 16494, 'loss/train': 2.159764289855957} +03/04/2022 08:50:22 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 08:50:26 - INFO - codeparrot_training - Step 16495: {'lr': 0.00048825930420790144, 'samples': 8445952, 'steps': 16495, 'loss/train': 1.6271209716796875} +03/04/2022 08:50:29 - INFO - codeparrot_training - Step 16496: {'lr': 0.0004882576969896938, 'samples': 8446464, 'steps': 16496, 'loss/train': 1.6771215200424194} +03/04/2022 08:50:30 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 08:50:35 - INFO - codeparrot_training - Step 16497: {'lr': 0.00048825608966413095, 'samples': 8446976, 'steps': 16497, 'loss/train': 2.129197120666504} +03/04/2022 08:50:38 - INFO - codeparrot_training - Step 16498: {'lr': 0.0004882544822312135, 'samples': 8447488, 'steps': 16498, 'loss/train': 0.7159848809242249} +03/04/2022 08:50:38 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/04/2022 08:50:43 - INFO - codeparrot_training - Step 16499: {'lr': 0.00048825287469094224, 'samples': 8448000, 'steps': 16499, 'loss/train': 0.7264172434806824} +03/04/2022 08:50:46 - INFO - codeparrot_training - Step 16500: {'lr': 0.000488251267043318, 'samples': 8448512, 'steps': 16500, 'loss/train': 1.967427372932434} +03/04/2022 08:50:47 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 08:50:52 - INFO - codeparrot_training - Step 16501: {'lr': 0.00048824965928834143, 'samples': 8449024, 'steps': 16501, 'loss/train': 1.1117445230484009} +03/04/2022 08:50:55 - INFO - codeparrot_training - Step 16502: {'lr': 0.0004882480514260131, 'samples': 8449536, 'steps': 16502, 'loss/train': 1.6806604862213135} +03/04/2022 08:50:55 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 08:51:00 - INFO - codeparrot_training - Step 16503: {'lr': 0.000488246443456334, 'samples': 8450048, 'steps': 16503, 'loss/train': 2.1147561073303223} +03/04/2022 08:51:03 - INFO - codeparrot_training - Step 16504: {'lr': 0.0004882448353793048, 'samples': 8450560, 'steps': 16504, 'loss/train': 2.4821555614471436} +03/04/2022 08:51:03 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 08:51:08 - INFO - codeparrot_training - Step 16505: {'lr': 0.000488243227194926, 'samples': 8451072, 'steps': 16505, 'loss/train': 1.6439998149871826} +03/04/2022 08:51:11 - INFO - codeparrot_training - Step 16506: {'lr': 0.00048824161890319854, 'samples': 8451584, 'steps': 16506, 'loss/train': 1.218070149421692} +03/04/2022 08:51:12 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 08:51:17 - INFO - codeparrot_training - Step 16507: {'lr': 0.00048824001050412304, 'samples': 8452096, 'steps': 16507, 'loss/train': 2.236164093017578} +03/04/2022 08:51:20 - INFO - codeparrot_training - Step 16508: {'lr': 0.0004882384019977003, 'samples': 8452608, 'steps': 16508, 'loss/train': 1.8140965700149536} +03/04/2022 08:51:21 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 08:51:25 - INFO - codeparrot_training - Step 16509: {'lr': 0.000488236793383931, 'samples': 8453120, 'steps': 16509, 'loss/train': 1.58786940574646} +03/04/2022 08:51:28 - INFO - codeparrot_training - Step 16510: {'lr': 0.00048823518466281586, 'samples': 8453632, 'steps': 16510, 'loss/train': 2.582559823989868} +03/04/2022 08:51:29 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 08:51:34 - INFO - codeparrot_training - Step 16511: {'lr': 0.0004882335758343557, 'samples': 8454144, 'steps': 16511, 'loss/train': 1.897495985031128} +03/04/2022 08:51:37 - INFO - codeparrot_training - Step 16512: {'lr': 0.0004882319668985511, 'samples': 8454656, 'steps': 16512, 'loss/train': 1.8930240869522095} +03/04/2022 08:51:42 - INFO - codeparrot_training - Step 16513: {'lr': 0.00048823035785540284, 'samples': 8455168, 'steps': 16513, 'loss/train': 2.1198575496673584} +03/04/2022 08:51:45 - INFO - codeparrot_training - Step 16514: {'lr': 0.0004882287487049117, 'samples': 8455680, 'steps': 16514, 'loss/train': 1.6999025344848633} +03/04/2022 08:51:46 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 08:51:51 - INFO - codeparrot_training - Step 16515: {'lr': 0.00048822713944707833, 'samples': 8456192, 'steps': 16515, 'loss/train': 1.9500010013580322} +03/04/2022 08:51:54 - INFO - codeparrot_training - Step 16516: {'lr': 0.0004882255300819035, 'samples': 8456704, 'steps': 16516, 'loss/train': 1.5312080383300781} +03/04/2022 08:51:55 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 08:51:59 - INFO - codeparrot_training - Step 16517: {'lr': 0.0004882239206093879, 'samples': 8457216, 'steps': 16517, 'loss/train': 0.8108397126197815} +03/04/2022 08:52:02 - INFO - codeparrot_training - Step 16518: {'lr': 0.0004882223110295323, 'samples': 8457728, 'steps': 16518, 'loss/train': 1.5769646167755127} +03/04/2022 08:52:03 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 08:52:08 - INFO - codeparrot_training - Step 16519: {'lr': 0.00048822070134233743, 'samples': 8458240, 'steps': 16519, 'loss/train': 1.5263577699661255} +03/04/2022 08:52:11 - INFO - codeparrot_training - Step 16520: {'lr': 0.000488219091547804, 'samples': 8458752, 'steps': 16520, 'loss/train': 2.1808791160583496} +03/04/2022 08:52:13 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 08:52:16 - INFO - codeparrot_training - Step 16521: {'lr': 0.0004882174816459326, 'samples': 8459264, 'steps': 16521, 'loss/train': 1.4515050649642944} +03/04/2022 08:52:19 - INFO - codeparrot_training - Step 16522: {'lr': 0.0004882158716367242, 'samples': 8459776, 'steps': 16522, 'loss/train': 1.5568034648895264} +03/04/2022 08:52:21 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 08:52:25 - INFO - codeparrot_training - Step 16523: {'lr': 0.0004882142615201793, 'samples': 8460288, 'steps': 16523, 'loss/train': 1.9983128309249878} +03/04/2022 08:52:28 - INFO - codeparrot_training - Step 16524: {'lr': 0.00048821265129629887, 'samples': 8460800, 'steps': 16524, 'loss/train': 1.3830703496932983} +03/04/2022 08:52:30 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 08:52:33 - INFO - codeparrot_training - Step 16525: {'lr': 0.0004882110409650834, 'samples': 8461312, 'steps': 16525, 'loss/train': 2.3797736167907715} +03/04/2022 08:52:36 - INFO - codeparrot_training - Step 16526: {'lr': 0.0004882094305265338, 'samples': 8461824, 'steps': 16526, 'loss/train': 1.6942509412765503} +03/04/2022 08:52:39 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/04/2022 08:52:42 - INFO - codeparrot_training - Step 16527: {'lr': 0.00048820781998065054, 'samples': 8462336, 'steps': 16527, 'loss/train': 1.725527048110962} +03/04/2022 08:52:45 - INFO - codeparrot_training - Step 16528: {'lr': 0.00048820620932743465, 'samples': 8462848, 'steps': 16528, 'loss/train': 1.5236940383911133} +03/04/2022 08:52:47 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 08:52:50 - INFO - codeparrot_training - Step 16529: {'lr': 0.0004882045985668867, 'samples': 8463360, 'steps': 16529, 'loss/train': 1.4523777961730957} +03/04/2022 08:52:53 - INFO - codeparrot_training - Step 16530: {'lr': 0.0004882029876990074, 'samples': 8463872, 'steps': 16530, 'loss/train': 1.6205475330352783} +03/04/2022 08:52:56 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/04/2022 08:52:58 - INFO - codeparrot_training - Step 16531: {'lr': 0.0004882013767237975, 'samples': 8464384, 'steps': 16531, 'loss/train': 2.1257123947143555} +03/04/2022 08:53:02 - INFO - codeparrot_training - Step 16532: {'lr': 0.0004881997656412578, 'samples': 8464896, 'steps': 16532, 'loss/train': 1.6770117282867432} +03/04/2022 08:53:04 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/04/2022 08:53:07 - INFO - codeparrot_training - Step 16533: {'lr': 0.0004881981544513889, 'samples': 8465408, 'steps': 16533, 'loss/train': 1.9519438743591309} +03/04/2022 08:53:10 - INFO - codeparrot_training - Step 16534: {'lr': 0.0004881965431541916, 'samples': 8465920, 'steps': 16534, 'loss/train': 1.8548449277877808} +03/04/2022 08:53:13 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 08:53:16 - INFO - codeparrot_training - Step 16535: {'lr': 0.0004881949317496667, 'samples': 8466432, 'steps': 16535, 'loss/train': 1.4767296314239502} +03/04/2022 08:53:19 - INFO - codeparrot_training - Step 16536: {'lr': 0.0004881933202378147, 'samples': 8466944, 'steps': 16536, 'loss/train': 1.932114601135254} +03/04/2022 08:53:21 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/04/2022 08:53:24 - INFO - codeparrot_training - Step 16537: {'lr': 0.0004881917086186365, 'samples': 8467456, 'steps': 16537, 'loss/train': 2.5938267707824707} +03/04/2022 08:53:27 - INFO - codeparrot_training - Step 16538: {'lr': 0.0004881900968921328, 'samples': 8467968, 'steps': 16538, 'loss/train': 1.5332279205322266} +03/04/2022 08:53:30 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 08:53:32 - INFO - codeparrot_training - Step 16539: {'lr': 0.00048818848505830436, 'samples': 8468480, 'steps': 16539, 'loss/train': 1.6837527751922607} +03/04/2022 08:53:36 - INFO - codeparrot_training - Step 16540: {'lr': 0.0004881868731171518, 'samples': 8468992, 'steps': 16540, 'loss/train': 3.549009323120117} +03/04/2022 08:53:38 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 08:53:41 - INFO - codeparrot_training - Step 16541: {'lr': 0.000488185261068676, 'samples': 8469504, 'steps': 16541, 'loss/train': 1.4544156789779663} +03/04/2022 08:53:44 - INFO - codeparrot_training - Step 16542: {'lr': 0.0004881836489128776, 'samples': 8470016, 'steps': 16542, 'loss/train': 1.245230793952942} +03/04/2022 08:53:46 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/04/2022 08:53:49 - INFO - codeparrot_training - Step 16543: {'lr': 0.00048818203664975727, 'samples': 8470528, 'steps': 16543, 'loss/train': 2.0431759357452393} +03/04/2022 08:53:52 - INFO - codeparrot_training - Step 16544: {'lr': 0.00048818042427931573, 'samples': 8471040, 'steps': 16544, 'loss/train': 1.1005769968032837} +03/04/2022 08:53:55 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 08:53:58 - INFO - codeparrot_training - Step 16545: {'lr': 0.00048817881180155385, 'samples': 8471552, 'steps': 16545, 'loss/train': 2.6497230529785156} +03/04/2022 08:54:01 - INFO - codeparrot_training - Step 16546: {'lr': 0.0004881771992164722, 'samples': 8472064, 'steps': 16546, 'loss/train': 2.312631845474243} +03/04/2022 08:54:03 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 08:54:06 - INFO - codeparrot_training - Step 16547: {'lr': 0.0004881755865240717, 'samples': 8472576, 'steps': 16547, 'loss/train': 1.629122018814087} +03/04/2022 08:54:09 - INFO - codeparrot_training - Step 16548: {'lr': 0.0004881739737243528, 'samples': 8473088, 'steps': 16548, 'loss/train': 4.586852073669434} +03/04/2022 08:54:11 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 08:54:14 - INFO - codeparrot_training - Step 16549: {'lr': 0.00048817236081731655, 'samples': 8473600, 'steps': 16549, 'loss/train': 2.1240968704223633} +03/04/2022 08:54:18 - INFO - codeparrot_training - Step 16550: {'lr': 0.0004881707478029634, 'samples': 8474112, 'steps': 16550, 'loss/train': 1.92020583152771} +03/04/2022 08:54:20 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 08:54:23 - INFO - codeparrot_training - Step 16551: {'lr': 0.0004881691346812942, 'samples': 8474624, 'steps': 16551, 'loss/train': 1.2693634033203125} +03/04/2022 08:54:26 - INFO - codeparrot_training - Step 16552: {'lr': 0.0004881675214523097, 'samples': 8475136, 'steps': 16552, 'loss/train': 0.9423460960388184} +03/04/2022 08:54:28 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/04/2022 08:54:31 - INFO - codeparrot_training - Step 16553: {'lr': 0.00048816590811601054, 'samples': 8475648, 'steps': 16553, 'loss/train': 1.7125828266143799} +03/04/2022 08:54:35 - INFO - codeparrot_training - Step 16554: {'lr': 0.0004881642946723975, 'samples': 8476160, 'steps': 16554, 'loss/train': 1.2231742143630981} +03/04/2022 08:54:36 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 08:54:40 - INFO - codeparrot_training - Step 16555: {'lr': 0.00048816268112147134, 'samples': 8476672, 'steps': 16555, 'loss/train': 2.664358139038086} +03/04/2022 08:54:43 - INFO - codeparrot_training - Step 16556: {'lr': 0.00048816106746323273, 'samples': 8477184, 'steps': 16556, 'loss/train': 0.9690961837768555} +03/04/2022 08:54:45 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 08:54:48 - INFO - codeparrot_training - Step 16557: {'lr': 0.00048815945369768245, 'samples': 8477696, 'steps': 16557, 'loss/train': 1.73697829246521} +03/04/2022 08:54:51 - INFO - codeparrot_training - Step 16558: {'lr': 0.00048815783982482115, 'samples': 8478208, 'steps': 16558, 'loss/train': 1.9005706310272217} +03/04/2022 08:54:54 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/04/2022 08:54:57 - INFO - codeparrot_training - Step 16559: {'lr': 0.0004881562258446496, 'samples': 8478720, 'steps': 16559, 'loss/train': 2.010918140411377} +03/04/2022 08:55:00 - INFO - codeparrot_training - Step 16560: {'lr': 0.00048815461175716855, 'samples': 8479232, 'steps': 16560, 'loss/train': 1.6496511697769165} +03/04/2022 08:55:02 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/04/2022 08:55:05 - INFO - codeparrot_training - Step 16561: {'lr': 0.00048815299756237873, 'samples': 8479744, 'steps': 16561, 'loss/train': 1.4427142143249512} +03/04/2022 08:55:08 - INFO - codeparrot_training - Step 16562: {'lr': 0.0004881513832602808, 'samples': 8480256, 'steps': 16562, 'loss/train': 2.1401188373565674} +03/04/2022 08:55:12 - INFO - codeparrot_training - Step 16563: {'lr': 0.0004881497688508756, 'samples': 8480768, 'steps': 16563, 'loss/train': 1.9297064542770386} +03/04/2022 08:55:12 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 08:55:17 - INFO - codeparrot_training - Step 16564: {'lr': 0.0004881481543341637, 'samples': 8481280, 'steps': 16564, 'loss/train': 2.005859851837158} +03/04/2022 08:55:20 - INFO - codeparrot_training - Step 16565: {'lr': 0.000488146539710146, 'samples': 8481792, 'steps': 16565, 'loss/train': 2.1767263412475586} +03/04/2022 08:55:22 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 08:55:26 - INFO - codeparrot_training - Step 16566: {'lr': 0.00048814492497882306, 'samples': 8482304, 'steps': 16566, 'loss/train': 1.6742664575576782} +03/04/2022 08:55:29 - INFO - codeparrot_training - Step 16567: {'lr': 0.00048814331014019577, 'samples': 8482816, 'steps': 16567, 'loss/train': 2.8731586933135986} +03/04/2022 08:55:30 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 08:55:34 - INFO - codeparrot_training - Step 16568: {'lr': 0.0004881416951942647, 'samples': 8483328, 'steps': 16568, 'loss/train': 5.4098005294799805} +03/04/2022 08:55:37 - INFO - codeparrot_training - Step 16569: {'lr': 0.0004881400801410307, 'samples': 8483840, 'steps': 16569, 'loss/train': 1.516476035118103} +03/04/2022 08:55:40 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 08:55:43 - INFO - codeparrot_training - Step 16570: {'lr': 0.0004881384649804945, 'samples': 8484352, 'steps': 16570, 'loss/train': 1.746759295463562} +03/04/2022 08:55:46 - INFO - codeparrot_training - Step 16571: {'lr': 0.0004881368497126567, 'samples': 8484864, 'steps': 16571, 'loss/train': 1.6414941549301147} +03/04/2022 08:55:48 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 08:55:51 - INFO - codeparrot_training - Step 16572: {'lr': 0.00048813523433751814, 'samples': 8485376, 'steps': 16572, 'loss/train': 1.994202733039856} +03/04/2022 08:55:54 - INFO - codeparrot_training - Step 16573: {'lr': 0.00048813361885507956, 'samples': 8485888, 'steps': 16573, 'loss/train': 1.460244059562683} +03/04/2022 08:55:56 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 08:56:00 - INFO - codeparrot_training - Step 16574: {'lr': 0.00048813200326534156, 'samples': 8486400, 'steps': 16574, 'loss/train': 2.621657133102417} +03/04/2022 08:56:03 - INFO - codeparrot_training - Step 16575: {'lr': 0.00048813038756830506, 'samples': 8486912, 'steps': 16575, 'loss/train': 1.9320424795150757} +03/04/2022 08:56:06 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/04/2022 08:56:08 - INFO - codeparrot_training - Step 16576: {'lr': 0.00048812877176397066, 'samples': 8487424, 'steps': 16576, 'loss/train': 1.4567440748214722} +03/04/2022 08:56:12 - INFO - codeparrot_training - Step 16577: {'lr': 0.00048812715585233905, 'samples': 8487936, 'steps': 16577, 'loss/train': 1.8638925552368164} +03/04/2022 08:56:14 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/04/2022 08:56:17 - INFO - codeparrot_training - Step 16578: {'lr': 0.000488125539833411, 'samples': 8488448, 'steps': 16578, 'loss/train': 1.1370714902877808} +03/04/2022 08:56:20 - INFO - codeparrot_training - Step 16579: {'lr': 0.0004881239237071873, 'samples': 8488960, 'steps': 16579, 'loss/train': 1.7270461320877075} +03/04/2022 08:56:23 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 08:56:25 - INFO - codeparrot_training - Step 16580: {'lr': 0.0004881223074736687, 'samples': 8489472, 'steps': 16580, 'loss/train': 2.0917625427246094} +03/04/2022 08:56:28 - INFO - codeparrot_training - Step 16581: {'lr': 0.00048812069113285573, 'samples': 8489984, 'steps': 16581, 'loss/train': 1.518120527267456} +03/04/2022 08:56:31 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/04/2022 08:56:34 - INFO - codeparrot_training - Step 16582: {'lr': 0.00048811907468474934, 'samples': 8490496, 'steps': 16582, 'loss/train': 0.1303931623697281} +03/04/2022 08:56:37 - INFO - codeparrot_training - Step 16583: {'lr': 0.00048811745812935015, 'samples': 8491008, 'steps': 16583, 'loss/train': 2.194061756134033} +03/04/2022 08:56:39 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/04/2022 08:56:42 - INFO - codeparrot_training - Step 16584: {'lr': 0.00048811584146665895, 'samples': 8491520, 'steps': 16584, 'loss/train': 1.5363976955413818} +03/04/2022 08:56:45 - INFO - codeparrot_training - Step 16585: {'lr': 0.0004881142246966763, 'samples': 8492032, 'steps': 16585, 'loss/train': 2.0598225593566895} +03/04/2022 08:56:48 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/04/2022 08:56:50 - INFO - codeparrot_training - Step 16586: {'lr': 0.00048811260781940317, 'samples': 8492544, 'steps': 16586, 'loss/train': 0.2607193887233734} +03/04/2022 08:56:54 - INFO - codeparrot_training - Step 16587: {'lr': 0.00048811099083484016, 'samples': 8493056, 'steps': 16587, 'loss/train': 1.5231043100357056} +03/04/2022 08:56:56 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 08:56:59 - INFO - codeparrot_training - Step 16588: {'lr': 0.000488109373742988, 'samples': 8493568, 'steps': 16588, 'loss/train': 2.4526803493499756} +03/04/2022 08:57:02 - INFO - codeparrot_training - Step 16589: {'lr': 0.0004881077565438474, 'samples': 8494080, 'steps': 16589, 'loss/train': 2.3064687252044678} +03/04/2022 08:57:04 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/04/2022 08:57:07 - INFO - codeparrot_training - Step 16590: {'lr': 0.0004881061392374192, 'samples': 8494592, 'steps': 16590, 'loss/train': 1.3833872079849243} +03/04/2022 08:57:10 - INFO - codeparrot_training - Step 16591: {'lr': 0.000488104521823704, 'samples': 8495104, 'steps': 16591, 'loss/train': 1.6452972888946533} +03/04/2022 08:57:13 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 08:57:16 - INFO - codeparrot_training - Step 16592: {'lr': 0.00048810290430270257, 'samples': 8495616, 'steps': 16592, 'loss/train': 2.212461233139038} +03/04/2022 08:57:19 - INFO - codeparrot_training - Step 16593: {'lr': 0.0004881012866744156, 'samples': 8496128, 'steps': 16593, 'loss/train': 2.3824303150177} +03/04/2022 08:57:21 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/04/2022 08:57:24 - INFO - codeparrot_training - Step 16594: {'lr': 0.00048809966893884396, 'samples': 8496640, 'steps': 16594, 'loss/train': 1.6391851902008057} +03/04/2022 08:57:27 - INFO - codeparrot_training - Step 16595: {'lr': 0.00048809805109598813, 'samples': 8497152, 'steps': 16595, 'loss/train': 1.965467929840088} +03/04/2022 08:57:29 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/04/2022 08:57:32 - INFO - codeparrot_training - Step 16596: {'lr': 0.0004880964331458492, 'samples': 8497664, 'steps': 16596, 'loss/train': 2.131965398788452} +03/04/2022 08:57:36 - INFO - codeparrot_training - Step 16597: {'lr': 0.0004880948150884276, 'samples': 8498176, 'steps': 16597, 'loss/train': 1.191604495048523} +03/04/2022 08:57:38 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 08:57:41 - INFO - codeparrot_training - Step 16598: {'lr': 0.00048809319692372406, 'samples': 8498688, 'steps': 16598, 'loss/train': 1.635105013847351} +03/04/2022 08:57:44 - INFO - codeparrot_training - Step 16599: {'lr': 0.0004880915786517395, 'samples': 8499200, 'steps': 16599, 'loss/train': 1.7494173049926758} +03/04/2022 08:57:46 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/04/2022 08:57:49 - INFO - codeparrot_training - Step 16600: {'lr': 0.00048808996027247453, 'samples': 8499712, 'steps': 16600, 'loss/train': 1.7936832904815674} +03/04/2022 08:57:52 - INFO - codeparrot_training - Step 16601: {'lr': 0.0004880883417859299, 'samples': 8500224, 'steps': 16601, 'loss/train': 1.9150307178497314} +03/04/2022 08:57:54 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/04/2022 08:57:58 - INFO - codeparrot_training - Step 16602: {'lr': 0.0004880867231921063, 'samples': 8500736, 'steps': 16602, 'loss/train': 2.179809093475342} +03/04/2022 08:58:01 - INFO - codeparrot_training - Step 16603: {'lr': 0.0004880851044910045, 'samples': 8501248, 'steps': 16603, 'loss/train': 1.3965831995010376} +03/04/2022 08:58:03 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 08:58:06 - INFO - codeparrot_training - Step 16604: {'lr': 0.0004880834856826253, 'samples': 8501760, 'steps': 16604, 'loss/train': 1.848528504371643} +03/04/2022 08:58:09 - INFO - codeparrot_training - Step 16605: {'lr': 0.0004880818667669693, 'samples': 8502272, 'steps': 16605, 'loss/train': 0.983249306678772} +03/04/2022 08:58:11 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 08:58:14 - INFO - codeparrot_training - Step 16606: {'lr': 0.00048808024774403726, 'samples': 8502784, 'steps': 16606, 'loss/train': 2.7971549034118652} +03/04/2022 08:58:18 - INFO - codeparrot_training - Step 16607: {'lr': 0.00048807862861382996, 'samples': 8503296, 'steps': 16607, 'loss/train': 2.3572897911071777} +03/04/2022 08:58:19 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 08:58:23 - INFO - codeparrot_training - Step 16608: {'lr': 0.0004880770093763481, 'samples': 8503808, 'steps': 16608, 'loss/train': 1.2899162769317627} +03/04/2022 08:58:26 - INFO - codeparrot_training - Step 16609: {'lr': 0.0004880753900315924, 'samples': 8504320, 'steps': 16609, 'loss/train': 1.4081403017044067} +03/04/2022 08:58:28 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 08:58:31 - INFO - codeparrot_training - Step 16610: {'lr': 0.00048807377057956365, 'samples': 8504832, 'steps': 16610, 'loss/train': 2.1993486881256104} +03/04/2022 08:58:35 - INFO - codeparrot_training - Step 16611: {'lr': 0.00048807215102026247, 'samples': 8505344, 'steps': 16611, 'loss/train': 1.9136772155761719} +03/04/2022 08:58:36 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/04/2022 08:58:40 - INFO - codeparrot_training - Step 16612: {'lr': 0.00048807053135368973, 'samples': 8505856, 'steps': 16612, 'loss/train': 1.2492166757583618} +03/04/2022 08:58:43 - INFO - codeparrot_training - Step 16613: {'lr': 0.00048806891157984604, 'samples': 8506368, 'steps': 16613, 'loss/train': 1.9695073366165161} +03/04/2022 08:58:45 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 08:58:48 - INFO - codeparrot_training - Step 16614: {'lr': 0.0004880672916987322, 'samples': 8506880, 'steps': 16614, 'loss/train': 2.2194902896881104} +03/04/2022 08:58:51 - INFO - codeparrot_training - Step 16615: {'lr': 0.0004880656717103489, 'samples': 8507392, 'steps': 16615, 'loss/train': 1.6845721006393433} +03/04/2022 08:58:57 - INFO - codeparrot_training - Step 16616: {'lr': 0.0004880640516146968, 'samples': 8507904, 'steps': 16616, 'loss/train': 2.425220012664795} +03/04/2022 08:59:00 - INFO - codeparrot_training - Step 16617: {'lr': 0.0004880624314117768, 'samples': 8508416, 'steps': 16617, 'loss/train': 2.359647274017334} +03/04/2022 08:59:02 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 08:59:05 - INFO - codeparrot_training - Step 16618: {'lr': 0.0004880608111015895, 'samples': 8508928, 'steps': 16618, 'loss/train': 1.6987509727478027} +03/04/2022 08:59:08 - INFO - codeparrot_training - Step 16619: {'lr': 0.00048805919068413574, 'samples': 8509440, 'steps': 16619, 'loss/train': 2.0077006816864014} +03/04/2022 08:59:10 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 08:59:13 - INFO - codeparrot_training - Step 16620: {'lr': 0.0004880575701594161, 'samples': 8509952, 'steps': 16620, 'loss/train': 2.5801820755004883} +03/04/2022 08:59:17 - INFO - codeparrot_training - Step 16621: {'lr': 0.0004880559495274315, 'samples': 8510464, 'steps': 16621, 'loss/train': 2.3096323013305664} +03/04/2022 08:59:19 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 08:59:22 - INFO - codeparrot_training - Step 16622: {'lr': 0.00048805432878818247, 'samples': 8510976, 'steps': 16622, 'loss/train': 1.8001234531402588} +03/04/2022 08:59:25 - INFO - codeparrot_training - Step 16623: {'lr': 0.0004880527079416698, 'samples': 8511488, 'steps': 16623, 'loss/train': 1.8345078229904175} +03/04/2022 08:59:27 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/04/2022 08:59:30 - INFO - codeparrot_training - Step 16624: {'lr': 0.00048805108698789435, 'samples': 8512000, 'steps': 16624, 'loss/train': 1.1190029382705688} +03/04/2022 08:59:34 - INFO - codeparrot_training - Step 16625: {'lr': 0.00048804946592685667, 'samples': 8512512, 'steps': 16625, 'loss/train': 1.680300235748291} +03/04/2022 08:59:36 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 08:59:39 - INFO - codeparrot_training - Step 16626: {'lr': 0.0004880478447585576, 'samples': 8513024, 'steps': 16626, 'loss/train': 2.4135780334472656} +03/04/2022 08:59:42 - INFO - codeparrot_training - Step 16627: {'lr': 0.00048804622348299785, 'samples': 8513536, 'steps': 16627, 'loss/train': 1.5653162002563477} +03/04/2022 08:59:45 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/04/2022 08:59:48 - INFO - codeparrot_training - Step 16628: {'lr': 0.0004880446021001782, 'samples': 8514048, 'steps': 16628, 'loss/train': 0.38516438007354736} +03/04/2022 08:59:51 - INFO - codeparrot_training - Step 16629: {'lr': 0.00048804298061009925, 'samples': 8514560, 'steps': 16629, 'loss/train': 2.3354785442352295} +03/04/2022 08:59:54 - INFO - codeparrot_training - Step 16630: {'lr': 0.0004880413590127619, 'samples': 8515072, 'steps': 16630, 'loss/train': 1.5391613245010376} +03/04/2022 08:59:54 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/04/2022 08:59:59 - INFO - codeparrot_training - Step 16631: {'lr': 0.0004880397373081666, 'samples': 8515584, 'steps': 16631, 'loss/train': 2.2083847522735596} +03/04/2022 09:00:02 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 09:00:05 - INFO - codeparrot_training - Step 16632: {'lr': 0.0004880381154963145, 'samples': 8516096, 'steps': 16632, 'loss/train': 1.8506433963775635} +03/04/2022 09:00:08 - INFO - codeparrot_training - Step 16633: {'lr': 0.0004880364935772059, 'samples': 8516608, 'steps': 16633, 'loss/train': 1.2671072483062744} +03/04/2022 09:00:10 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/04/2022 09:00:13 - INFO - codeparrot_training - Step 16634: {'lr': 0.00048803487155084184, 'samples': 8517120, 'steps': 16634, 'loss/train': 1.402799367904663} +03/04/2022 09:00:16 - INFO - codeparrot_training - Step 16635: {'lr': 0.00048803324941722295, 'samples': 8517632, 'steps': 16635, 'loss/train': 1.8695707321166992} +03/04/2022 09:00:18 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 09:00:21 - INFO - codeparrot_training - Step 16636: {'lr': 0.0004880316271763499, 'samples': 8518144, 'steps': 16636, 'loss/train': 1.6234724521636963} +03/04/2022 09:00:24 - INFO - codeparrot_training - Step 16637: {'lr': 0.0004880300048282235, 'samples': 8518656, 'steps': 16637, 'loss/train': 1.39134681224823} +03/04/2022 09:00:27 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/04/2022 09:00:30 - INFO - codeparrot_training - Step 16638: {'lr': 0.00048802838237284443, 'samples': 8519168, 'steps': 16638, 'loss/train': 2.4961256980895996} +03/04/2022 09:00:33 - INFO - codeparrot_training - Step 16639: {'lr': 0.0004880267598102135, 'samples': 8519680, 'steps': 16639, 'loss/train': 2.0822393894195557} +03/04/2022 09:00:35 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/04/2022 09:00:38 - INFO - codeparrot_training - Step 16640: {'lr': 0.0004880251371403313, 'samples': 8520192, 'steps': 16640, 'loss/train': 1.7616218328475952} +03/04/2022 09:00:41 - INFO - codeparrot_training - Step 16641: {'lr': 0.0004880235143631987, 'samples': 8520704, 'steps': 16641, 'loss/train': 2.0404610633850098} +03/04/2022 09:00:43 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/04/2022 09:00:47 - INFO - codeparrot_training - Step 16642: {'lr': 0.0004880218914788164, 'samples': 8521216, 'steps': 16642, 'loss/train': 1.8404872417449951} +03/04/2022 09:00:50 - INFO - codeparrot_training - Step 16643: {'lr': 0.00048802026848718505, 'samples': 8521728, 'steps': 16643, 'loss/train': 2.1120808124542236} +03/04/2022 09:00:52 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 09:00:55 - INFO - codeparrot_training - Step 16644: {'lr': 0.0004880186453883054, 'samples': 8522240, 'steps': 16644, 'loss/train': 1.665408730506897} +03/04/2022 09:00:58 - INFO - codeparrot_training - Step 16645: {'lr': 0.00048801702218217834, 'samples': 8522752, 'steps': 16645, 'loss/train': 1.1136960983276367} +03/04/2022 09:01:00 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 09:01:03 - INFO - codeparrot_training - Step 16646: {'lr': 0.0004880153988688044, 'samples': 8523264, 'steps': 16646, 'loss/train': 2.0179731845855713} +03/04/2022 09:01:07 - INFO - codeparrot_training - Step 16647: {'lr': 0.0004880137754481845, 'samples': 8523776, 'steps': 16647, 'loss/train': 1.9022858142852783} +03/04/2022 09:01:09 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 09:01:12 - INFO - codeparrot_training - Step 16648: {'lr': 0.0004880121519203191, 'samples': 8524288, 'steps': 16648, 'loss/train': 2.039890766143799} +03/04/2022 09:01:15 - INFO - codeparrot_training - Step 16649: {'lr': 0.0004880105282852092, 'samples': 8524800, 'steps': 16649, 'loss/train': 1.739603877067566} +03/04/2022 09:01:17 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 09:01:20 - INFO - codeparrot_training - Step 16650: {'lr': 0.0004880089045428554, 'samples': 8525312, 'steps': 16650, 'loss/train': 2.0189335346221924} +03/04/2022 09:01:23 - INFO - codeparrot_training - Step 16651: {'lr': 0.0004880072806932585, 'samples': 8525824, 'steps': 16651, 'loss/train': 2.2903056144714355} +03/04/2022 09:01:26 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 09:01:29 - INFO - codeparrot_training - Step 16652: {'lr': 0.00048800565673641917, 'samples': 8526336, 'steps': 16652, 'loss/train': 1.9041879177093506} +03/04/2022 09:01:32 - INFO - codeparrot_training - Step 16653: {'lr': 0.0004880040326723382, 'samples': 8526848, 'steps': 16653, 'loss/train': 1.887171983718872} +03/04/2022 09:01:35 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 09:01:37 - INFO - codeparrot_training - Step 16654: {'lr': 0.0004880024085010162, 'samples': 8527360, 'steps': 16654, 'loss/train': 2.095214605331421} +03/04/2022 09:01:40 - INFO - codeparrot_training - Step 16655: {'lr': 0.00048800078422245406, 'samples': 8527872, 'steps': 16655, 'loss/train': 1.6468513011932373} +03/04/2022 09:01:44 - INFO - codeparrot_training - Step 16656: {'lr': 0.0004879991598366524, 'samples': 8528384, 'steps': 16656, 'loss/train': 1.9055964946746826} +03/04/2022 09:01:44 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 09:01:49 - INFO - codeparrot_training - Step 16657: {'lr': 0.000487997535343612, 'samples': 8528896, 'steps': 16657, 'loss/train': 0.5104544758796692} +03/04/2022 09:01:52 - INFO - codeparrot_training - Step 16658: {'lr': 0.0004879959107433336, 'samples': 8529408, 'steps': 16658, 'loss/train': 1.3045573234558105} +03/04/2022 09:01:52 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/04/2022 09:01:57 - INFO - codeparrot_training - Step 16659: {'lr': 0.00048799428603581786, 'samples': 8529920, 'steps': 16659, 'loss/train': 1.5870933532714844} +03/04/2022 09:02:00 - INFO - codeparrot_training - Step 16660: {'lr': 0.0004879926612210656, 'samples': 8530432, 'steps': 16660, 'loss/train': 2.5221951007843018} +03/04/2022 09:02:01 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/04/2022 09:02:06 - INFO - codeparrot_training - Step 16661: {'lr': 0.0004879910362990775, 'samples': 8530944, 'steps': 16661, 'loss/train': 2.5674779415130615} +03/04/2022 09:02:09 - INFO - codeparrot_training - Step 16662: {'lr': 0.0004879894112698544, 'samples': 8531456, 'steps': 16662, 'loss/train': 1.7332818508148193} +03/04/2022 09:02:09 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 09:02:14 - INFO - codeparrot_training - Step 16663: {'lr': 0.0004879877861333969, 'samples': 8531968, 'steps': 16663, 'loss/train': 1.498223066329956} +03/04/2022 09:02:17 - INFO - codeparrot_training - Step 16664: {'lr': 0.00048798616088970573, 'samples': 8532480, 'steps': 16664, 'loss/train': 0.8574267029762268} +03/04/2022 09:02:17 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 09:02:23 - INFO - codeparrot_training - Step 16665: {'lr': 0.0004879845355387817, 'samples': 8532992, 'steps': 16665, 'loss/train': 2.1271824836730957} +03/04/2022 09:02:26 - INFO - codeparrot_training - Step 16666: {'lr': 0.00048798291008062553, 'samples': 8533504, 'steps': 16666, 'loss/train': 1.761151909828186} +03/04/2022 09:02:26 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 09:02:31 - INFO - codeparrot_training - Step 16667: {'lr': 0.0004879812845152379, 'samples': 8534016, 'steps': 16667, 'loss/train': 1.2976549863815308} +03/04/2022 09:02:34 - INFO - codeparrot_training - Step 16668: {'lr': 0.0004879796588426195, 'samples': 8534528, 'steps': 16668, 'loss/train': 2.0821914672851562} +03/04/2022 09:02:35 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 09:02:40 - INFO - codeparrot_training - Step 16669: {'lr': 0.0004879780330627713, 'samples': 8535040, 'steps': 16669, 'loss/train': 1.6671078205108643} +03/04/2022 09:02:43 - INFO - codeparrot_training - Step 16670: {'lr': 0.0004879764071756938, 'samples': 8535552, 'steps': 16670, 'loss/train': 1.6932450532913208} +03/04/2022 09:02:43 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/04/2022 09:02:48 - INFO - codeparrot_training - Step 16671: {'lr': 0.00048797478118138777, 'samples': 8536064, 'steps': 16671, 'loss/train': 1.9135266542434692} +03/04/2022 09:02:51 - INFO - codeparrot_training - Step 16672: {'lr': 0.000487973155079854, 'samples': 8536576, 'steps': 16672, 'loss/train': 2.252218723297119} +03/04/2022 09:02:52 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 09:02:56 - INFO - codeparrot_training - Step 16673: {'lr': 0.0004879715288710932, 'samples': 8537088, 'steps': 16673, 'loss/train': 0.8685274720191956} +03/04/2022 09:03:00 - INFO - codeparrot_training - Step 16674: {'lr': 0.0004879699025551061, 'samples': 8537600, 'steps': 16674, 'loss/train': 1.9682334661483765} +03/04/2022 09:03:00 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 09:03:05 - INFO - codeparrot_training - Step 16675: {'lr': 0.0004879682761318934, 'samples': 8538112, 'steps': 16675, 'loss/train': 2.4010026454925537} +03/04/2022 09:03:08 - INFO - codeparrot_training - Step 16676: {'lr': 0.00048796664960145596, 'samples': 8538624, 'steps': 16676, 'loss/train': 1.5005769729614258} +03/04/2022 09:03:08 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 09:03:14 - INFO - codeparrot_training - Step 16677: {'lr': 0.00048796502296379437, 'samples': 8539136, 'steps': 16677, 'loss/train': 1.351193904876709} +03/04/2022 09:03:17 - INFO - codeparrot_training - Step 16678: {'lr': 0.0004879633962189094, 'samples': 8539648, 'steps': 16678, 'loss/train': 2.5455265045166016} +03/04/2022 09:03:17 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 09:03:22 - INFO - codeparrot_training - Step 16679: {'lr': 0.0004879617693668018, 'samples': 8540160, 'steps': 16679, 'loss/train': 1.5512962341308594} +03/04/2022 09:03:25 - INFO - codeparrot_training - Step 16680: {'lr': 0.00048796014240747227, 'samples': 8540672, 'steps': 16680, 'loss/train': 1.5147302150726318} +03/04/2022 09:03:27 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 09:03:31 - INFO - codeparrot_training - Step 16681: {'lr': 0.0004879585153409216, 'samples': 8541184, 'steps': 16681, 'loss/train': 1.8735524415969849} +03/04/2022 09:03:34 - INFO - codeparrot_training - Step 16682: {'lr': 0.0004879568881671505, 'samples': 8541696, 'steps': 16682, 'loss/train': 2.0253565311431885} +03/04/2022 09:03:35 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 09:03:39 - INFO - codeparrot_training - Step 16683: {'lr': 0.0004879552608861597, 'samples': 8542208, 'steps': 16683, 'loss/train': 2.9551174640655518} +03/04/2022 09:03:42 - INFO - codeparrot_training - Step 16684: {'lr': 0.00048795363349794996, 'samples': 8542720, 'steps': 16684, 'loss/train': 1.4628971815109253} +03/04/2022 09:03:44 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 09:03:48 - INFO - codeparrot_training - Step 16685: {'lr': 0.00048795200600252193, 'samples': 8543232, 'steps': 16685, 'loss/train': 1.9313147068023682} +03/04/2022 09:03:51 - INFO - codeparrot_training - Step 16686: {'lr': 0.00048795037839987644, 'samples': 8543744, 'steps': 16686, 'loss/train': 1.8491053581237793} +03/04/2022 09:03:52 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/04/2022 09:03:56 - INFO - codeparrot_training - Step 16687: {'lr': 0.0004879487506900141, 'samples': 8544256, 'steps': 16687, 'loss/train': 4.9222917556762695} +03/04/2022 09:03:59 - INFO - codeparrot_training - Step 16688: {'lr': 0.0004879471228729358, 'samples': 8544768, 'steps': 16688, 'loss/train': 1.630972146987915} +03/04/2022 09:04:01 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/04/2022 09:04:04 - INFO - codeparrot_training - Step 16689: {'lr': 0.0004879454949486422, 'samples': 8545280, 'steps': 16689, 'loss/train': 1.4569423198699951} +03/04/2022 09:04:08 - INFO - codeparrot_training - Step 16690: {'lr': 0.000487943866917134, 'samples': 8545792, 'steps': 16690, 'loss/train': 2.64225697517395} +03/04/2022 09:04:09 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 09:04:13 - INFO - codeparrot_training - Step 16691: {'lr': 0.00048794223877841197, 'samples': 8546304, 'steps': 16691, 'loss/train': 1.6660428047180176} +03/04/2022 09:04:16 - INFO - codeparrot_training - Step 16692: {'lr': 0.00048794061053247686, 'samples': 8546816, 'steps': 16692, 'loss/train': 1.0559406280517578} +03/04/2022 09:04:19 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/04/2022 09:04:22 - INFO - codeparrot_training - Step 16693: {'lr': 0.0004879389821793294, 'samples': 8547328, 'steps': 16693, 'loss/train': 2.074751377105713} +03/04/2022 09:04:25 - INFO - codeparrot_training - Step 16694: {'lr': 0.00048793735371897027, 'samples': 8547840, 'steps': 16694, 'loss/train': 2.5704195499420166} +03/04/2022 09:04:27 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/04/2022 09:04:30 - INFO - codeparrot_training - Step 16695: {'lr': 0.00048793572515140024, 'samples': 8548352, 'steps': 16695, 'loss/train': 1.7796664237976074} +03/04/2022 09:04:33 - INFO - codeparrot_training - Step 16696: {'lr': 0.00048793409647662, 'samples': 8548864, 'steps': 16696, 'loss/train': 2.454838275909424} +03/04/2022 09:04:36 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 09:04:39 - INFO - codeparrot_training - Step 16697: {'lr': 0.0004879324676946304, 'samples': 8549376, 'steps': 16697, 'loss/train': 2.831484079360962} +03/04/2022 09:04:42 - INFO - codeparrot_training - Step 16698: {'lr': 0.0004879308388054321, 'samples': 8549888, 'steps': 16698, 'loss/train': 1.5918872356414795} +03/04/2022 09:04:44 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 09:04:47 - INFO - codeparrot_training - Step 16699: {'lr': 0.0004879292098090258, 'samples': 8550400, 'steps': 16699, 'loss/train': 2.4023540019989014} +03/04/2022 09:04:50 - INFO - codeparrot_training - Step 16700: {'lr': 0.00048792758070541234, 'samples': 8550912, 'steps': 16700, 'loss/train': 1.765406608581543} +03/04/2022 09:04:53 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 09:04:56 - INFO - codeparrot_training - Step 16701: {'lr': 0.00048792595149459226, 'samples': 8551424, 'steps': 16701, 'loss/train': 0.5077486634254456} +03/04/2022 09:04:59 - INFO - codeparrot_training - Step 16702: {'lr': 0.0004879243221765665, 'samples': 8551936, 'steps': 16702, 'loss/train': 1.7503204345703125} +03/04/2022 09:05:02 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/04/2022 09:05:04 - INFO - codeparrot_training - Step 16703: {'lr': 0.00048792269275133574, 'samples': 8552448, 'steps': 16703, 'loss/train': 2.37258243560791} +03/04/2022 09:05:07 - INFO - codeparrot_training - Step 16704: {'lr': 0.0004879210632189006, 'samples': 8552960, 'steps': 16704, 'loss/train': 2.250486135482788} +03/04/2022 09:05:11 - INFO - codeparrot_training - Step 16705: {'lr': 0.0004879194335792619, 'samples': 8553472, 'steps': 16705, 'loss/train': 0.5273582935333252} +03/04/2022 09:05:11 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 09:05:16 - INFO - codeparrot_training - Step 16706: {'lr': 0.0004879178038324205, 'samples': 8553984, 'steps': 16706, 'loss/train': 2.17018723487854} +03/04/2022 09:05:19 - INFO - codeparrot_training - Step 16707: {'lr': 0.0004879161739783769, 'samples': 8554496, 'steps': 16707, 'loss/train': 2.2166433334350586} +03/04/2022 09:05:20 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/04/2022 09:05:24 - INFO - codeparrot_training - Step 16708: {'lr': 0.00048791454401713195, 'samples': 8555008, 'steps': 16708, 'loss/train': 2.136784553527832} +03/04/2022 09:05:28 - INFO - codeparrot_training - Step 16709: {'lr': 0.00048791291394868644, 'samples': 8555520, 'steps': 16709, 'loss/train': 2.529398202896118} +03/04/2022 09:05:28 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 09:05:33 - INFO - codeparrot_training - Step 16710: {'lr': 0.000487911283773041, 'samples': 8556032, 'steps': 16710, 'loss/train': 2.0423038005828857} +03/04/2022 09:05:36 - INFO - codeparrot_training - Step 16711: {'lr': 0.0004879096534901964, 'samples': 8556544, 'steps': 16711, 'loss/train': 3.4875011444091797} +03/04/2022 09:05:37 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 09:05:41 - INFO - codeparrot_training - Step 16712: {'lr': 0.00048790802310015336, 'samples': 8557056, 'steps': 16712, 'loss/train': 1.6342641115188599} +03/04/2022 09:05:44 - INFO - codeparrot_training - Step 16713: {'lr': 0.0004879063926029127, 'samples': 8557568, 'steps': 16713, 'loss/train': 1.3797892332077026} +03/04/2022 09:05:45 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/04/2022 09:05:50 - INFO - codeparrot_training - Step 16714: {'lr': 0.00048790476199847506, 'samples': 8558080, 'steps': 16714, 'loss/train': 2.3978869915008545} +03/04/2022 09:05:53 - INFO - codeparrot_training - Step 16715: {'lr': 0.0004879031312868412, 'samples': 8558592, 'steps': 16715, 'loss/train': 1.9270598888397217} +03/04/2022 09:05:53 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 09:05:58 - INFO - codeparrot_training - Step 16716: {'lr': 0.00048790150046801187, 'samples': 8559104, 'steps': 16716, 'loss/train': 2.549328327178955} +03/04/2022 09:06:01 - INFO - codeparrot_training - Step 16717: {'lr': 0.0004878998695419877, 'samples': 8559616, 'steps': 16717, 'loss/train': 1.8845922946929932} +03/04/2022 09:06:02 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 09:06:06 - INFO - codeparrot_training - Step 16718: {'lr': 0.0004878982385087697, 'samples': 8560128, 'steps': 16718, 'loss/train': 2.2903623580932617} +03/04/2022 09:06:10 - INFO - codeparrot_training - Step 16719: {'lr': 0.0004878966073683583, 'samples': 8560640, 'steps': 16719, 'loss/train': 0.9374154210090637} +03/04/2022 09:06:10 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/04/2022 09:06:15 - INFO - codeparrot_training - Step 16720: {'lr': 0.0004878949761207544, 'samples': 8561152, 'steps': 16720, 'loss/train': 1.3983957767486572} +03/04/2022 09:06:18 - INFO - codeparrot_training - Step 16721: {'lr': 0.0004878933447659587, 'samples': 8561664, 'steps': 16721, 'loss/train': 2.1902999877929688} +03/04/2022 09:06:19 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 09:06:23 - INFO - codeparrot_training - Step 16722: {'lr': 0.0004878917133039719, 'samples': 8562176, 'steps': 16722, 'loss/train': 2.1337084770202637} +03/04/2022 09:06:26 - INFO - codeparrot_training - Step 16723: {'lr': 0.00048789008173479476, 'samples': 8562688, 'steps': 16723, 'loss/train': 1.9695619344711304} +03/04/2022 09:06:27 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/04/2022 09:06:32 - INFO - codeparrot_training - Step 16724: {'lr': 0.0004878884500584281, 'samples': 8563200, 'steps': 16724, 'loss/train': 1.370527744293213} +03/04/2022 09:06:35 - INFO - codeparrot_training - Step 16725: {'lr': 0.0004878868182748725, 'samples': 8563712, 'steps': 16725, 'loss/train': 0.3641508221626282} +03/04/2022 09:06:36 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 09:06:40 - INFO - codeparrot_training - Step 16726: {'lr': 0.0004878851863841287, 'samples': 8564224, 'steps': 16726, 'loss/train': 1.5229601860046387} +03/04/2022 09:06:43 - INFO - codeparrot_training - Step 16727: {'lr': 0.00048788355438619764, 'samples': 8564736, 'steps': 16727, 'loss/train': 1.4827646017074585} +03/04/2022 09:06:44 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 09:06:49 - INFO - codeparrot_training - Step 16728: {'lr': 0.00048788192228107986, 'samples': 8565248, 'steps': 16728, 'loss/train': 2.850599765777588} +03/04/2022 09:06:52 - INFO - codeparrot_training - Step 16729: {'lr': 0.00048788029006877623, 'samples': 8565760, 'steps': 16729, 'loss/train': 2.25079607963562} +03/04/2022 09:06:52 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 09:06:57 - INFO - codeparrot_training - Step 16730: {'lr': 0.0004878786577492873, 'samples': 8566272, 'steps': 16730, 'loss/train': 1.6387897729873657} +03/04/2022 09:07:00 - INFO - codeparrot_training - Step 16731: {'lr': 0.00048787702532261396, 'samples': 8566784, 'steps': 16731, 'loss/train': 2.0271520614624023} +03/04/2022 09:07:01 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 09:07:05 - INFO - codeparrot_training - Step 16732: {'lr': 0.0004878753927887569, 'samples': 8567296, 'steps': 16732, 'loss/train': 2.5411384105682373} +03/04/2022 09:07:09 - INFO - codeparrot_training - Step 16733: {'lr': 0.0004878737601477169, 'samples': 8567808, 'steps': 16733, 'loss/train': 2.135249376296997} +03/04/2022 09:07:09 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 09:07:14 - INFO - codeparrot_training - Step 16734: {'lr': 0.0004878721273994946, 'samples': 8568320, 'steps': 16734, 'loss/train': 1.8367668390274048} +03/04/2022 09:07:17 - INFO - codeparrot_training - Step 16735: {'lr': 0.00048787049454409085, 'samples': 8568832, 'steps': 16735, 'loss/train': 1.0575274229049683} +03/04/2022 09:07:18 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/04/2022 09:07:22 - INFO - codeparrot_training - Step 16736: {'lr': 0.0004878688615815063, 'samples': 8569344, 'steps': 16736, 'loss/train': 1.688331127166748} +03/04/2022 09:07:25 - INFO - codeparrot_training - Step 16737: {'lr': 0.0004878672285117417, 'samples': 8569856, 'steps': 16737, 'loss/train': 2.2730164527893066} +03/04/2022 09:07:26 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 09:07:31 - INFO - codeparrot_training - Step 16738: {'lr': 0.0004878655953347978, 'samples': 8570368, 'steps': 16738, 'loss/train': 1.7579344511032104} +03/04/2022 09:07:34 - INFO - codeparrot_training - Step 16739: {'lr': 0.0004878639620506753, 'samples': 8570880, 'steps': 16739, 'loss/train': 2.4228334426879883} +03/04/2022 09:07:34 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 09:07:39 - INFO - codeparrot_training - Step 16740: {'lr': 0.00048786232865937504, 'samples': 8571392, 'steps': 16740, 'loss/train': 1.3742014169692993} +03/04/2022 09:07:42 - INFO - codeparrot_training - Step 16741: {'lr': 0.0004878606951608976, 'samples': 8571904, 'steps': 16741, 'loss/train': 1.9895243644714355} +03/04/2022 09:07:43 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/04/2022 09:07:48 - INFO - codeparrot_training - Step 16742: {'lr': 0.00048785906155524386, 'samples': 8572416, 'steps': 16742, 'loss/train': 1.8570005893707275} +03/04/2022 09:07:51 - INFO - codeparrot_training - Step 16743: {'lr': 0.0004878574278424145, 'samples': 8572928, 'steps': 16743, 'loss/train': 2.195761203765869} +03/04/2022 09:07:51 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 09:07:56 - INFO - codeparrot_training - Step 16744: {'lr': 0.0004878557940224102, 'samples': 8573440, 'steps': 16744, 'loss/train': 1.3034437894821167} +03/04/2022 09:07:59 - INFO - codeparrot_training - Step 16745: {'lr': 0.0004878541600952318, 'samples': 8573952, 'steps': 16745, 'loss/train': 2.34202241897583} +03/04/2022 09:08:00 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 09:08:05 - INFO - codeparrot_training - Step 16746: {'lr': 0.00048785252606087996, 'samples': 8574464, 'steps': 16746, 'loss/train': 1.6847962141036987} +03/04/2022 09:08:08 - INFO - codeparrot_training - Step 16747: {'lr': 0.0004878508919193555, 'samples': 8574976, 'steps': 16747, 'loss/train': 1.8350378274917603} +03/04/2022 09:08:09 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 09:08:13 - INFO - codeparrot_training - Step 16748: {'lr': 0.000487849257670659, 'samples': 8575488, 'steps': 16748, 'loss/train': 1.8376457691192627} +03/04/2022 09:08:16 - INFO - codeparrot_training - Step 16749: {'lr': 0.0004878476233147914, 'samples': 8576000, 'steps': 16749, 'loss/train': 2.877941370010376} +03/04/2022 09:08:17 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 09:08:22 - INFO - codeparrot_training - Step 16750: {'lr': 0.00048784598885175324, 'samples': 8576512, 'steps': 16750, 'loss/train': 1.2408077716827393} +03/04/2022 09:08:25 - INFO - codeparrot_training - Step 16751: {'lr': 0.00048784435428154537, 'samples': 8577024, 'steps': 16751, 'loss/train': 1.8337435722351074} +03/04/2022 09:08:26 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 09:08:30 - INFO - codeparrot_training - Step 16752: {'lr': 0.0004878427196041686, 'samples': 8577536, 'steps': 16752, 'loss/train': 2.547670841217041} +03/04/2022 09:08:33 - INFO - codeparrot_training - Step 16753: {'lr': 0.00048784108481962347, 'samples': 8578048, 'steps': 16753, 'loss/train': 6.489173412322998} +03/04/2022 09:08:36 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/04/2022 09:08:39 - INFO - codeparrot_training - Step 16754: {'lr': 0.00048783944992791085, 'samples': 8578560, 'steps': 16754, 'loss/train': 1.5650520324707031} +03/04/2022 09:08:42 - INFO - codeparrot_training - Step 16755: {'lr': 0.00048783781492903145, 'samples': 8579072, 'steps': 16755, 'loss/train': 2.4490368366241455} +03/04/2022 09:08:44 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/04/2022 09:08:47 - INFO - codeparrot_training - Step 16756: {'lr': 0.00048783617982298594, 'samples': 8579584, 'steps': 16756, 'loss/train': 2.366964340209961} +03/04/2022 09:08:50 - INFO - codeparrot_training - Step 16757: {'lr': 0.00048783454460977517, 'samples': 8580096, 'steps': 16757, 'loss/train': 1.4457862377166748} +03/04/2022 09:08:54 - INFO - codeparrot_training - Step 16758: {'lr': 0.00048783290928939985, 'samples': 8580608, 'steps': 16758, 'loss/train': 0.7980867624282837} +03/04/2022 09:08:54 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 09:08:59 - INFO - codeparrot_training - Step 16759: {'lr': 0.00048783127386186064, 'samples': 8581120, 'steps': 16759, 'loss/train': 1.9489564895629883} +03/04/2022 09:09:02 - INFO - codeparrot_training - Step 16760: {'lr': 0.00048782963832715834, 'samples': 8581632, 'steps': 16760, 'loss/train': 1.7456711530685425} +03/04/2022 09:09:02 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/04/2022 09:09:07 - INFO - codeparrot_training - Step 16761: {'lr': 0.0004878280026852937, 'samples': 8582144, 'steps': 16761, 'loss/train': 1.222352385520935} +03/04/2022 09:09:10 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 09:09:12 - INFO - codeparrot_training - Step 16762: {'lr': 0.00048782636693626736, 'samples': 8582656, 'steps': 16762, 'loss/train': 2.1475255489349365} +03/04/2022 09:09:16 - INFO - codeparrot_training - Step 16763: {'lr': 0.0004878247310800802, 'samples': 8583168, 'steps': 16763, 'loss/train': 1.7000850439071655} +03/04/2022 09:09:18 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 09:09:21 - INFO - codeparrot_training - Step 16764: {'lr': 0.0004878230951167328, 'samples': 8583680, 'steps': 16764, 'loss/train': 1.8506168127059937} +03/04/2022 09:09:24 - INFO - codeparrot_training - Step 16765: {'lr': 0.0004878214590462261, 'samples': 8584192, 'steps': 16765, 'loss/train': 2.3854928016662598} +03/04/2022 09:09:26 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 09:09:29 - INFO - codeparrot_training - Step 16766: {'lr': 0.0004878198228685607, 'samples': 8584704, 'steps': 16766, 'loss/train': 2.3422558307647705} +03/04/2022 09:09:32 - INFO - codeparrot_training - Step 16767: {'lr': 0.00048781818658373734, 'samples': 8585216, 'steps': 16767, 'loss/train': 1.6928225755691528} +03/04/2022 09:09:35 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 09:09:38 - INFO - codeparrot_training - Step 16768: {'lr': 0.00048781655019175676, 'samples': 8585728, 'steps': 16768, 'loss/train': 1.8302969932556152} +03/04/2022 09:09:41 - INFO - codeparrot_training - Step 16769: {'lr': 0.00048781491369261965, 'samples': 8586240, 'steps': 16769, 'loss/train': 1.6847326755523682} +03/04/2022 09:09:43 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 09:09:46 - INFO - codeparrot_training - Step 16770: {'lr': 0.00048781327708632695, 'samples': 8586752, 'steps': 16770, 'loss/train': 1.6954201459884644} +03/04/2022 09:09:49 - INFO - codeparrot_training - Step 16771: {'lr': 0.0004878116403728792, 'samples': 8587264, 'steps': 16771, 'loss/train': 2.2453973293304443} +03/04/2022 09:09:52 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 09:09:55 - INFO - codeparrot_training - Step 16772: {'lr': 0.0004878100035522771, 'samples': 8587776, 'steps': 16772, 'loss/train': 1.6055817604064941} +03/04/2022 09:09:58 - INFO - codeparrot_training - Step 16773: {'lr': 0.00048780836662452154, 'samples': 8588288, 'steps': 16773, 'loss/train': 1.3732736110687256} +03/04/2022 09:10:00 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/04/2022 09:10:03 - INFO - codeparrot_training - Step 16774: {'lr': 0.00048780672958961325, 'samples': 8588800, 'steps': 16774, 'loss/train': 1.4095689058303833} +03/04/2022 09:10:06 - INFO - codeparrot_training - Step 16775: {'lr': 0.0004878050924475529, 'samples': 8589312, 'steps': 16775, 'loss/train': 1.9039345979690552} +03/04/2022 09:10:09 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 09:10:11 - INFO - codeparrot_training - Step 16776: {'lr': 0.00048780345519834124, 'samples': 8589824, 'steps': 16776, 'loss/train': 1.191585659980774} +03/04/2022 09:10:15 - INFO - codeparrot_training - Step 16777: {'lr': 0.000487801817841979, 'samples': 8590336, 'steps': 16777, 'loss/train': 0.9453536868095398} +03/04/2022 09:10:17 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/04/2022 09:10:21 - INFO - codeparrot_training - Step 16778: {'lr': 0.0004878001803784669, 'samples': 8590848, 'steps': 16778, 'loss/train': 1.7707719802856445} +03/04/2022 09:10:24 - INFO - codeparrot_training - Step 16779: {'lr': 0.00048779854280780576, 'samples': 8591360, 'steps': 16779, 'loss/train': 1.0595307350158691} +03/04/2022 09:10:27 - INFO - codeparrot_training - Step 16780: {'lr': 0.00048779690512999627, 'samples': 8591872, 'steps': 16780, 'loss/train': 0.5918049812316895} +03/04/2022 09:10:28 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 09:10:32 - INFO - codeparrot_training - Step 16781: {'lr': 0.0004877952673450391, 'samples': 8592384, 'steps': 16781, 'loss/train': 1.9472204446792603} +03/04/2022 09:10:35 - INFO - codeparrot_training - Step 16782: {'lr': 0.0004877936294529351, 'samples': 8592896, 'steps': 16782, 'loss/train': 2.1891071796417236} +03/04/2022 09:10:36 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/04/2022 09:10:41 - INFO - codeparrot_training - Step 16783: {'lr': 0.00048779199145368494, 'samples': 8593408, 'steps': 16783, 'loss/train': 1.0650302171707153} +03/04/2022 09:10:44 - INFO - codeparrot_training - Step 16784: {'lr': 0.0004877903533472894, 'samples': 8593920, 'steps': 16784, 'loss/train': 2.2744765281677246} +03/04/2022 09:10:45 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/04/2022 09:10:49 - INFO - codeparrot_training - Step 16785: {'lr': 0.0004877887151337492, 'samples': 8594432, 'steps': 16785, 'loss/train': 1.9492017030715942} +03/04/2022 09:10:52 - INFO - codeparrot_training - Step 16786: {'lr': 0.0004877870768130651, 'samples': 8594944, 'steps': 16786, 'loss/train': 2.0014824867248535} +03/04/2022 09:10:53 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/04/2022 09:10:57 - INFO - codeparrot_training - Step 16787: {'lr': 0.0004877854383852377, 'samples': 8595456, 'steps': 16787, 'loss/train': 2.024102210998535} +03/04/2022 09:11:01 - INFO - codeparrot_training - Step 16788: {'lr': 0.000487783799850268, 'samples': 8595968, 'steps': 16788, 'loss/train': 1.4918347597122192} +03/04/2022 09:11:01 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 09:11:06 - INFO - codeparrot_training - Step 16789: {'lr': 0.00048778216120815644, 'samples': 8596480, 'steps': 16789, 'loss/train': 2.108762741088867} +03/04/2022 09:11:09 - INFO - codeparrot_training - Step 16790: {'lr': 0.00048778052245890404, 'samples': 8596992, 'steps': 16790, 'loss/train': 2.3487672805786133} +03/04/2022 09:11:10 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 09:11:14 - INFO - codeparrot_training - Step 16791: {'lr': 0.0004877788836025113, 'samples': 8597504, 'steps': 16791, 'loss/train': 1.0121835470199585} +03/04/2022 09:11:17 - INFO - codeparrot_training - Step 16792: {'lr': 0.0004877772446389791, 'samples': 8598016, 'steps': 16792, 'loss/train': 2.469670295715332} +03/04/2022 09:11:18 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 09:11:23 - INFO - codeparrot_training - Step 16793: {'lr': 0.0004877756055683082, 'samples': 8598528, 'steps': 16793, 'loss/train': 0.41385364532470703} +03/04/2022 09:11:26 - INFO - codeparrot_training - Step 16794: {'lr': 0.0004877739663904992, 'samples': 8599040, 'steps': 16794, 'loss/train': 2.033597707748413} +03/04/2022 09:11:26 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/04/2022 09:11:31 - INFO - codeparrot_training - Step 16795: {'lr': 0.00048777232710555296, 'samples': 8599552, 'steps': 16795, 'loss/train': 1.7967733144760132} +03/04/2022 09:11:34 - INFO - codeparrot_training - Step 16796: {'lr': 0.0004877706877134702, 'samples': 8600064, 'steps': 16796, 'loss/train': 1.6678260564804077} +03/04/2022 09:11:35 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 09:11:40 - INFO - codeparrot_training - Step 16797: {'lr': 0.0004877690482142516, 'samples': 8600576, 'steps': 16797, 'loss/train': 1.972100019454956} +03/04/2022 09:11:43 - INFO - codeparrot_training - Step 16798: {'lr': 0.0004877674086078979, 'samples': 8601088, 'steps': 16798, 'loss/train': 2.4862613677978516} +03/04/2022 09:11:44 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/04/2022 09:11:48 - INFO - codeparrot_training - Step 16799: {'lr': 0.0004877657688944099, 'samples': 8601600, 'steps': 16799, 'loss/train': 1.739594578742981} +03/04/2022 09:11:51 - INFO - codeparrot_training - Step 16800: {'lr': 0.0004877641290737884, 'samples': 8602112, 'steps': 16800, 'loss/train': 1.928982138633728} +03/04/2022 09:11:53 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 09:11:56 - INFO - codeparrot_training - Step 16801: {'lr': 0.000487762489146034, 'samples': 8602624, 'steps': 16801, 'loss/train': 1.5922176837921143} +03/04/2022 09:12:00 - INFO - codeparrot_training - Step 16802: {'lr': 0.0004877608491111475, 'samples': 8603136, 'steps': 16802, 'loss/train': 2.1041958332061768} +03/04/2022 09:12:01 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 09:12:05 - INFO - codeparrot_training - Step 16803: {'lr': 0.0004877592089691296, 'samples': 8603648, 'steps': 16803, 'loss/train': 6.40061616897583} +03/04/2022 09:12:08 - INFO - codeparrot_training - Step 16804: {'lr': 0.00048775756871998106, 'samples': 8604160, 'steps': 16804, 'loss/train': 1.7058134078979492} +03/04/2022 09:12:10 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 09:12:13 - INFO - codeparrot_training - Step 16805: {'lr': 0.0004877559283637026, 'samples': 8604672, 'steps': 16805, 'loss/train': 1.8942151069641113} +03/04/2022 09:12:17 - INFO - codeparrot_training - Step 16806: {'lr': 0.0004877542879002951, 'samples': 8605184, 'steps': 16806, 'loss/train': 1.3459999561309814} +03/04/2022 09:12:18 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/04/2022 09:12:22 - INFO - codeparrot_training - Step 16807: {'lr': 0.0004877526473297591, 'samples': 8605696, 'steps': 16807, 'loss/train': 2.0614078044891357} +03/04/2022 09:12:25 - INFO - codeparrot_training - Step 16808: {'lr': 0.0004877510066520954, 'samples': 8606208, 'steps': 16808, 'loss/train': 1.862703561782837} +03/04/2022 09:12:26 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 09:12:30 - INFO - codeparrot_training - Step 16809: {'lr': 0.0004877493658673048, 'samples': 8606720, 'steps': 16809, 'loss/train': 2.9184420108795166} +03/04/2022 09:12:33 - INFO - codeparrot_training - Step 16810: {'lr': 0.00048774772497538806, 'samples': 8607232, 'steps': 16810, 'loss/train': 1.9242480993270874} +03/04/2022 09:12:35 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 09:12:39 - INFO - codeparrot_training - Step 16811: {'lr': 0.0004877460839763458, 'samples': 8607744, 'steps': 16811, 'loss/train': 2.3118088245391846} +03/04/2022 09:12:42 - INFO - codeparrot_training - Step 16812: {'lr': 0.0004877444428701788, 'samples': 8608256, 'steps': 16812, 'loss/train': 2.727816343307495} +03/04/2022 09:12:43 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 09:12:47 - INFO - codeparrot_training - Step 16813: {'lr': 0.0004877428016568879, 'samples': 8608768, 'steps': 16813, 'loss/train': 1.2772276401519775} +03/04/2022 09:12:50 - INFO - codeparrot_training - Step 16814: {'lr': 0.00048774116033647373, 'samples': 8609280, 'steps': 16814, 'loss/train': 2.22672700881958} +03/04/2022 09:12:51 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 09:12:56 - INFO - codeparrot_training - Step 16815: {'lr': 0.0004877395189089371, 'samples': 8609792, 'steps': 16815, 'loss/train': 1.1322365999221802} +03/04/2022 09:12:59 - INFO - codeparrot_training - Step 16816: {'lr': 0.00048773787737427867, 'samples': 8610304, 'steps': 16816, 'loss/train': 1.9681241512298584} +03/04/2022 09:13:00 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 09:13:04 - INFO - codeparrot_training - Step 16817: {'lr': 0.0004877362357324992, 'samples': 8610816, 'steps': 16817, 'loss/train': 2.3060851097106934} +03/04/2022 09:13:07 - INFO - codeparrot_training - Step 16818: {'lr': 0.0004877345939835995, 'samples': 8611328, 'steps': 16818, 'loss/train': 1.7466137409210205} +03/04/2022 09:13:08 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 09:13:12 - INFO - codeparrot_training - Step 16819: {'lr': 0.0004877329521275802, 'samples': 8611840, 'steps': 16819, 'loss/train': 1.6963021755218506} +03/04/2022 09:13:16 - INFO - codeparrot_training - Step 16820: {'lr': 0.0004877313101644422, 'samples': 8612352, 'steps': 16820, 'loss/train': 1.8235007524490356} +03/04/2022 09:13:17 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 09:13:21 - INFO - codeparrot_training - Step 16821: {'lr': 0.000487729668094186, 'samples': 8612864, 'steps': 16821, 'loss/train': 1.168093204498291} +03/04/2022 09:13:24 - INFO - codeparrot_training - Step 16822: {'lr': 0.0004877280259168125, 'samples': 8613376, 'steps': 16822, 'loss/train': 1.7980440855026245} +03/04/2022 09:13:25 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 09:13:30 - INFO - codeparrot_training - Step 16823: {'lr': 0.0004877263836323226, 'samples': 8613888, 'steps': 16823, 'loss/train': 2.24636173248291} +03/04/2022 09:13:33 - INFO - codeparrot_training - Step 16824: {'lr': 0.00048772474124071663, 'samples': 8614400, 'steps': 16824, 'loss/train': 1.5120302438735962} +03/04/2022 09:13:36 - INFO - codeparrot_training - Step 16825: {'lr': 0.0004877230987419957, 'samples': 8614912, 'steps': 16825, 'loss/train': 2.156494617462158} +03/04/2022 09:13:37 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 09:13:42 - INFO - codeparrot_training - Step 16826: {'lr': 0.00048772145613616035, 'samples': 8615424, 'steps': 16826, 'loss/train': 1.7621532678604126} +03/04/2022 09:13:45 - INFO - codeparrot_training - Step 16827: {'lr': 0.00048771981342321145, 'samples': 8615936, 'steps': 16827, 'loss/train': 2.2267062664031982} +03/04/2022 09:13:46 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/04/2022 09:13:50 - INFO - codeparrot_training - Step 16828: {'lr': 0.0004877181706031496, 'samples': 8616448, 'steps': 16828, 'loss/train': 2.7509829998016357} +03/04/2022 09:13:53 - INFO - codeparrot_training - Step 16829: {'lr': 0.00048771652767597563, 'samples': 8616960, 'steps': 16829, 'loss/train': 1.7442864179611206} +03/04/2022 09:13:55 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 09:13:59 - INFO - codeparrot_training - Step 16830: {'lr': 0.0004877148846416903, 'samples': 8617472, 'steps': 16830, 'loss/train': 2.0920181274414062} +03/04/2022 09:14:02 - INFO - codeparrot_training - Step 16831: {'lr': 0.0004877132415002943, 'samples': 8617984, 'steps': 16831, 'loss/train': 2.050269603729248} +03/04/2022 09:14:03 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 09:14:07 - INFO - codeparrot_training - Step 16832: {'lr': 0.00048771159825178827, 'samples': 8618496, 'steps': 16832, 'loss/train': 1.4495407342910767} +03/04/2022 09:14:10 - INFO - codeparrot_training - Step 16833: {'lr': 0.0004877099548961732, 'samples': 8619008, 'steps': 16833, 'loss/train': 1.9929603338241577} +03/04/2022 09:14:12 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 09:14:16 - INFO - codeparrot_training - Step 16834: {'lr': 0.0004877083114334496, 'samples': 8619520, 'steps': 16834, 'loss/train': 1.6012290716171265} +03/04/2022 09:14:19 - INFO - codeparrot_training - Step 16835: {'lr': 0.0004877066678636184, 'samples': 8620032, 'steps': 16835, 'loss/train': 2.109827756881714} +03/04/2022 09:14:20 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 09:14:24 - INFO - codeparrot_training - Step 16836: {'lr': 0.00048770502418668017, 'samples': 8620544, 'steps': 16836, 'loss/train': 0.9779191017150879} +03/04/2022 09:14:27 - INFO - codeparrot_training - Step 16837: {'lr': 0.00048770338040263574, 'samples': 8621056, 'steps': 16837, 'loss/train': 1.9277397394180298} +03/04/2022 09:14:29 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/04/2022 09:14:33 - INFO - codeparrot_training - Step 16838: {'lr': 0.00048770173651148586, 'samples': 8621568, 'steps': 16838, 'loss/train': 1.742400050163269} +03/04/2022 09:14:36 - INFO - codeparrot_training - Step 16839: {'lr': 0.0004877000925132312, 'samples': 8622080, 'steps': 16839, 'loss/train': 2.306121587753296} +03/04/2022 09:14:38 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 09:14:41 - INFO - codeparrot_training - Step 16840: {'lr': 0.0004876984484078726, 'samples': 8622592, 'steps': 16840, 'loss/train': 1.6649868488311768} +03/04/2022 09:14:44 - INFO - codeparrot_training - Step 16841: {'lr': 0.0004876968041954107, 'samples': 8623104, 'steps': 16841, 'loss/train': 1.966260552406311} +03/04/2022 09:14:46 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/04/2022 09:14:49 - INFO - codeparrot_training - Step 16842: {'lr': 0.00048769515987584624, 'samples': 8623616, 'steps': 16842, 'loss/train': 1.2436267137527466} +03/04/2022 09:14:53 - INFO - codeparrot_training - Step 16843: {'lr': 0.0004876935154491801, 'samples': 8624128, 'steps': 16843, 'loss/train': 1.6472392082214355} +03/04/2022 09:14:54 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 09:14:58 - INFO - codeparrot_training - Step 16844: {'lr': 0.00048769187091541287, 'samples': 8624640, 'steps': 16844, 'loss/train': 2.2080459594726562} +03/04/2022 09:15:01 - INFO - codeparrot_training - Step 16845: {'lr': 0.0004876902262745454, 'samples': 8625152, 'steps': 16845, 'loss/train': 2.3519277572631836} +03/04/2022 09:15:03 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 09:15:06 - INFO - codeparrot_training - Step 16846: {'lr': 0.00048768858152657837, 'samples': 8625664, 'steps': 16846, 'loss/train': 1.8650680780410767} +03/04/2022 09:15:09 - INFO - codeparrot_training - Step 16847: {'lr': 0.0004876869366715125, 'samples': 8626176, 'steps': 16847, 'loss/train': 2.2649085521698} +03/04/2022 09:15:11 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/04/2022 09:15:15 - INFO - codeparrot_training - Step 16848: {'lr': 0.0004876852917093486, 'samples': 8626688, 'steps': 16848, 'loss/train': 1.7680608034133911} +03/04/2022 09:15:18 - INFO - codeparrot_training - Step 16849: {'lr': 0.0004876836466400874, 'samples': 8627200, 'steps': 16849, 'loss/train': 2.2456302642822266} +03/04/2022 09:15:20 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/04/2022 09:15:23 - INFO - codeparrot_training - Step 16850: {'lr': 0.00048768200146372955, 'samples': 8627712, 'steps': 16850, 'loss/train': 2.1874747276306152} +03/04/2022 09:15:26 - INFO - codeparrot_training - Step 16851: {'lr': 0.00048768035618027597, 'samples': 8628224, 'steps': 16851, 'loss/train': 1.171908974647522} +03/04/2022 09:15:28 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/04/2022 09:15:32 - INFO - codeparrot_training - Step 16852: {'lr': 0.00048767871078972717, 'samples': 8628736, 'steps': 16852, 'loss/train': 2.3781909942626953} +03/04/2022 09:15:35 - INFO - codeparrot_training - Step 16853: {'lr': 0.000487677065292084, 'samples': 8629248, 'steps': 16853, 'loss/train': 2.421755075454712} +03/04/2022 09:15:37 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/04/2022 09:15:40 - INFO - codeparrot_training - Step 16854: {'lr': 0.0004876754196873473, 'samples': 8629760, 'steps': 16854, 'loss/train': 2.2711586952209473} +03/04/2022 09:15:43 - INFO - codeparrot_training - Step 16855: {'lr': 0.00048767377397551773, 'samples': 8630272, 'steps': 16855, 'loss/train': 2.336109161376953} +03/04/2022 09:15:45 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 09:15:49 - INFO - codeparrot_training - Step 16856: {'lr': 0.00048767212815659593, 'samples': 8630784, 'steps': 16856, 'loss/train': 7.030905723571777} +03/04/2022 09:15:52 - INFO - codeparrot_training - Step 16857: {'lr': 0.0004876704822305828, 'samples': 8631296, 'steps': 16857, 'loss/train': 2.7630112171173096} +03/04/2022 09:15:54 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 09:15:57 - INFO - codeparrot_training - Step 16858: {'lr': 0.00048766883619747906, 'samples': 8631808, 'steps': 16858, 'loss/train': 1.823747158050537} +03/04/2022 09:16:00 - INFO - codeparrot_training - Step 16859: {'lr': 0.00048766719005728534, 'samples': 8632320, 'steps': 16859, 'loss/train': 1.8633544445037842} +03/04/2022 09:16:03 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/04/2022 09:16:05 - INFO - codeparrot_training - Step 16860: {'lr': 0.0004876655438100024, 'samples': 8632832, 'steps': 16860, 'loss/train': 2.5746958255767822} +03/04/2022 09:16:09 - INFO - codeparrot_training - Step 16861: {'lr': 0.00048766389745563113, 'samples': 8633344, 'steps': 16861, 'loss/train': 1.5694084167480469} +03/04/2022 09:16:11 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 09:16:14 - INFO - codeparrot_training - Step 16862: {'lr': 0.00048766225099417215, 'samples': 8633856, 'steps': 16862, 'loss/train': 1.3526335954666138} +03/04/2022 09:16:17 - INFO - codeparrot_training - Step 16863: {'lr': 0.0004876606044256262, 'samples': 8634368, 'steps': 16863, 'loss/train': 2.037963390350342} +03/04/2022 09:16:20 - INFO - codeparrot_training - Step 16864: {'lr': 0.0004876589577499941, 'samples': 8634880, 'steps': 16864, 'loss/train': 1.548314094543457} +03/04/2022 09:16:20 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 09:16:26 - INFO - codeparrot_training - Step 16865: {'lr': 0.0004876573109672765, 'samples': 8635392, 'steps': 16865, 'loss/train': 1.710619568824768} +03/04/2022 09:16:28 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/04/2022 09:16:31 - INFO - codeparrot_training - Step 16866: {'lr': 0.0004876556640774742, 'samples': 8635904, 'steps': 16866, 'loss/train': 2.2892911434173584} +03/04/2022 09:16:34 - INFO - codeparrot_training - Step 16867: {'lr': 0.0004876540170805879, 'samples': 8636416, 'steps': 16867, 'loss/train': 2.0580506324768066} +03/04/2022 09:16:37 - INFO - codeparrot_training - Step 16868: {'lr': 0.00048765236997661845, 'samples': 8636928, 'steps': 16868, 'loss/train': 1.7206205129623413} +03/04/2022 09:16:38 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/04/2022 09:16:43 - INFO - codeparrot_training - Step 16869: {'lr': 0.0004876507227655664, 'samples': 8637440, 'steps': 16869, 'loss/train': 1.4987205266952515} +03/04/2022 09:16:46 - INFO - codeparrot_training - Step 16870: {'lr': 0.00048764907544743264, 'samples': 8637952, 'steps': 16870, 'loss/train': 2.0848255157470703} +03/04/2022 09:16:47 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/04/2022 09:16:51 - INFO - codeparrot_training - Step 16871: {'lr': 0.0004876474280222179, 'samples': 8638464, 'steps': 16871, 'loss/train': 1.2447184324264526} +03/04/2022 09:16:55 - INFO - codeparrot_training - Step 16872: {'lr': 0.00048764578048992284, 'samples': 8638976, 'steps': 16872, 'loss/train': 1.8943469524383545} +03/04/2022 09:16:55 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 09:17:00 - INFO - codeparrot_training - Step 16873: {'lr': 0.0004876441328505483, 'samples': 8639488, 'steps': 16873, 'loss/train': 2.31379771232605} +03/04/2022 09:17:03 - INFO - codeparrot_training - Step 16874: {'lr': 0.000487642485104095, 'samples': 8640000, 'steps': 16874, 'loss/train': 1.1681108474731445} +03/04/2022 09:17:04 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 09:17:08 - INFO - codeparrot_training - Step 16875: {'lr': 0.00048764083725056365, 'samples': 8640512, 'steps': 16875, 'loss/train': 0.10291320830583572} +03/04/2022 09:17:12 - INFO - codeparrot_training - Step 16876: {'lr': 0.00048763918928995496, 'samples': 8641024, 'steps': 16876, 'loss/train': 1.9710620641708374} +03/04/2022 09:17:14 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 09:17:17 - INFO - codeparrot_training - Step 16877: {'lr': 0.00048763754122226977, 'samples': 8641536, 'steps': 16877, 'loss/train': 1.6267890930175781} +03/04/2022 09:17:20 - INFO - codeparrot_training - Step 16878: {'lr': 0.00048763589304750876, 'samples': 8642048, 'steps': 16878, 'loss/train': 2.508193016052246} +03/04/2022 09:17:22 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 09:17:25 - INFO - codeparrot_training - Step 16879: {'lr': 0.0004876342447656727, 'samples': 8642560, 'steps': 16879, 'loss/train': 2.3881168365478516} +03/04/2022 09:17:29 - INFO - codeparrot_training - Step 16880: {'lr': 0.00048763259637676226, 'samples': 8643072, 'steps': 16880, 'loss/train': 1.9023140668869019} +03/04/2022 09:17:31 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 09:17:34 - INFO - codeparrot_training - Step 16881: {'lr': 0.00048763094788077834, 'samples': 8643584, 'steps': 16881, 'loss/train': 1.6261519193649292} +03/04/2022 09:17:37 - INFO - codeparrot_training - Step 16882: {'lr': 0.0004876292992777215, 'samples': 8644096, 'steps': 16882, 'loss/train': 0.24607695639133453} +03/04/2022 09:17:39 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/04/2022 09:17:42 - INFO - codeparrot_training - Step 16883: {'lr': 0.00048762765056759255, 'samples': 8644608, 'steps': 16883, 'loss/train': 2.590109348297119} +03/04/2022 09:17:45 - INFO - codeparrot_training - Step 16884: {'lr': 0.00048762600175039227, 'samples': 8645120, 'steps': 16884, 'loss/train': 2.155954360961914} +03/04/2022 09:17:47 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 09:17:51 - INFO - codeparrot_training - Step 16885: {'lr': 0.0004876243528261214, 'samples': 8645632, 'steps': 16885, 'loss/train': 1.25299870967865} +03/04/2022 09:17:54 - INFO - codeparrot_training - Step 16886: {'lr': 0.0004876227037947807, 'samples': 8646144, 'steps': 16886, 'loss/train': 0.6563835144042969} +03/04/2022 09:17:55 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 09:17:59 - INFO - codeparrot_training - Step 16887: {'lr': 0.0004876210546563707, 'samples': 8646656, 'steps': 16887, 'loss/train': 0.5206893086433411} +03/04/2022 09:18:02 - INFO - codeparrot_training - Step 16888: {'lr': 0.0004876194054108926, 'samples': 8647168, 'steps': 16888, 'loss/train': 1.8378583192825317} +03/04/2022 09:18:04 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 09:18:07 - INFO - codeparrot_training - Step 16889: {'lr': 0.0004876177560583466, 'samples': 8647680, 'steps': 16889, 'loss/train': 1.7045217752456665} +03/04/2022 09:18:11 - INFO - codeparrot_training - Step 16890: {'lr': 0.00048761610659873387, 'samples': 8648192, 'steps': 16890, 'loss/train': 2.1792893409729004} +03/04/2022 09:18:12 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 09:18:16 - INFO - codeparrot_training - Step 16891: {'lr': 0.0004876144570320549, 'samples': 8648704, 'steps': 16891, 'loss/train': 2.275803804397583} +03/04/2022 09:18:19 - INFO - codeparrot_training - Step 16892: {'lr': 0.0004876128073583106, 'samples': 8649216, 'steps': 16892, 'loss/train': 1.317810297012329} +03/04/2022 09:18:20 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 09:18:24 - INFO - codeparrot_training - Step 16893: {'lr': 0.00048761115757750155, 'samples': 8649728, 'steps': 16893, 'loss/train': 1.1555465459823608} +03/04/2022 09:18:28 - INFO - codeparrot_training - Step 16894: {'lr': 0.00048760950768962863, 'samples': 8650240, 'steps': 16894, 'loss/train': 1.5093331336975098} +03/04/2022 09:18:29 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 09:18:33 - INFO - codeparrot_training - Step 16895: {'lr': 0.00048760785769469254, 'samples': 8650752, 'steps': 16895, 'loss/train': 1.4779750108718872} +03/04/2022 09:18:36 - INFO - codeparrot_training - Step 16896: {'lr': 0.00048760620759269403, 'samples': 8651264, 'steps': 16896, 'loss/train': 1.955522894859314} +03/04/2022 09:18:38 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 09:18:41 - INFO - codeparrot_training - Step 16897: {'lr': 0.00048760455738363376, 'samples': 8651776, 'steps': 16897, 'loss/train': 2.022625684738159} +03/04/2022 09:18:44 - INFO - codeparrot_training - Step 16898: {'lr': 0.0004876029070675126, 'samples': 8652288, 'steps': 16898, 'loss/train': 1.8878549337387085} +03/04/2022 09:18:46 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 09:18:50 - INFO - codeparrot_training - Step 16899: {'lr': 0.0004876012566443312, 'samples': 8652800, 'steps': 16899, 'loss/train': 2.2100322246551514} +03/04/2022 09:18:53 - INFO - codeparrot_training - Step 16900: {'lr': 0.00048759960611409036, 'samples': 8653312, 'steps': 16900, 'loss/train': 2.43709659576416} +03/04/2022 09:18:54 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/04/2022 09:18:58 - INFO - codeparrot_training - Step 16901: {'lr': 0.00048759795547679083, 'samples': 8653824, 'steps': 16901, 'loss/train': 2.4187655448913574} +03/04/2022 09:19:01 - INFO - codeparrot_training - Step 16902: {'lr': 0.00048759630473243327, 'samples': 8654336, 'steps': 16902, 'loss/train': 1.537337064743042} +03/04/2022 09:19:03 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 09:19:06 - INFO - codeparrot_training - Step 16903: {'lr': 0.00048759465388101855, 'samples': 8654848, 'steps': 16903, 'loss/train': 1.8529452085494995} +03/04/2022 09:19:10 - INFO - codeparrot_training - Step 16904: {'lr': 0.0004875930029225473, 'samples': 8655360, 'steps': 16904, 'loss/train': 2.9206037521362305} +03/04/2022 09:19:11 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 09:19:15 - INFO - codeparrot_training - Step 16905: {'lr': 0.0004875913518570203, 'samples': 8655872, 'steps': 16905, 'loss/train': 3.0768840312957764} +03/04/2022 09:19:18 - INFO - codeparrot_training - Step 16906: {'lr': 0.0004875897006844383, 'samples': 8656384, 'steps': 16906, 'loss/train': 1.8729430437088013} +03/04/2022 09:19:20 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 09:19:24 - INFO - codeparrot_training - Step 16907: {'lr': 0.00048758804940480203, 'samples': 8656896, 'steps': 16907, 'loss/train': 2.024226665496826} +03/04/2022 09:19:27 - INFO - codeparrot_training - Step 16908: {'lr': 0.0004875863980181123, 'samples': 8657408, 'steps': 16908, 'loss/train': 0.9614630937576294} +03/04/2022 09:19:29 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 09:19:32 - INFO - codeparrot_training - Step 16909: {'lr': 0.0004875847465243698, 'samples': 8657920, 'steps': 16909, 'loss/train': 2.0694754123687744} +03/04/2022 09:19:35 - INFO - codeparrot_training - Step 16910: {'lr': 0.00048758309492357533, 'samples': 8658432, 'steps': 16910, 'loss/train': 2.0329232215881348} +03/04/2022 09:19:38 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 09:19:40 - INFO - codeparrot_training - Step 16911: {'lr': 0.0004875814432157295, 'samples': 8658944, 'steps': 16911, 'loss/train': 2.2260448932647705} +03/04/2022 09:19:44 - INFO - codeparrot_training - Step 16912: {'lr': 0.0004875797914008332, 'samples': 8659456, 'steps': 16912, 'loss/train': 1.645774245262146} +03/04/2022 09:19:46 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/04/2022 09:19:49 - INFO - codeparrot_training - Step 16913: {'lr': 0.00048757813947888706, 'samples': 8659968, 'steps': 16913, 'loss/train': 1.9161906242370605} +03/04/2022 09:19:52 - INFO - codeparrot_training - Step 16914: {'lr': 0.0004875764874498919, 'samples': 8660480, 'steps': 16914, 'loss/train': 2.214254140853882} +03/04/2022 09:19:55 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 09:19:57 - INFO - codeparrot_training - Step 16915: {'lr': 0.00048757483531384837, 'samples': 8660992, 'steps': 16915, 'loss/train': 1.419070839881897} +03/04/2022 09:20:00 - INFO - codeparrot_training - Step 16916: {'lr': 0.0004875731830707574, 'samples': 8661504, 'steps': 16916, 'loss/train': 1.6740669012069702} +03/04/2022 09:20:03 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 09:20:06 - INFO - codeparrot_training - Step 16917: {'lr': 0.00048757153072061954, 'samples': 8662016, 'steps': 16917, 'loss/train': 1.8722896575927734} +03/04/2022 09:20:09 - INFO - codeparrot_training - Step 16918: {'lr': 0.0004875698782634357, 'samples': 8662528, 'steps': 16918, 'loss/train': 1.0943175554275513} +03/04/2022 09:20:11 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/04/2022 09:20:14 - INFO - codeparrot_training - Step 16919: {'lr': 0.00048756822569920647, 'samples': 8663040, 'steps': 16919, 'loss/train': 1.3030593395233154} +03/04/2022 09:20:18 - INFO - codeparrot_training - Step 16920: {'lr': 0.0004875665730279326, 'samples': 8663552, 'steps': 16920, 'loss/train': 1.9468982219696045} +03/04/2022 09:20:21 - INFO - codeparrot_training - Step 16921: {'lr': 0.000487564920249615, 'samples': 8664064, 'steps': 16921, 'loss/train': 0.286955863237381} +03/04/2022 09:20:21 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 09:20:26 - INFO - codeparrot_training - Step 16922: {'lr': 0.00048756326736425427, 'samples': 8664576, 'steps': 16922, 'loss/train': 1.8562086820602417} +03/04/2022 09:20:29 - INFO - codeparrot_training - Step 16923: {'lr': 0.00048756161437185126, 'samples': 8665088, 'steps': 16923, 'loss/train': 1.963325023651123} +03/04/2022 09:20:30 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 09:20:35 - INFO - codeparrot_training - Step 16924: {'lr': 0.0004875599612724066, 'samples': 8665600, 'steps': 16924, 'loss/train': 2.058159351348877} +03/04/2022 09:20:38 - INFO - codeparrot_training - Step 16925: {'lr': 0.00048755830806592105, 'samples': 8666112, 'steps': 16925, 'loss/train': 2.0522348880767822} +03/04/2022 09:20:39 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/04/2022 09:20:43 - INFO - codeparrot_training - Step 16926: {'lr': 0.00048755665475239547, 'samples': 8666624, 'steps': 16926, 'loss/train': 1.7096834182739258} +03/04/2022 09:20:46 - INFO - codeparrot_training - Step 16927: {'lr': 0.0004875550013318305, 'samples': 8667136, 'steps': 16927, 'loss/train': 2.08017635345459} +03/04/2022 09:20:47 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/04/2022 09:20:51 - INFO - codeparrot_training - Step 16928: {'lr': 0.0004875533478042269, 'samples': 8667648, 'steps': 16928, 'loss/train': 1.6763980388641357} +03/04/2022 09:20:55 - INFO - codeparrot_training - Step 16929: {'lr': 0.00048755169416958544, 'samples': 8668160, 'steps': 16929, 'loss/train': 2.043870210647583} +03/04/2022 09:20:56 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 09:21:00 - INFO - codeparrot_training - Step 16930: {'lr': 0.00048755004042790685, 'samples': 8668672, 'steps': 16930, 'loss/train': 2.068103551864624} +03/04/2022 09:21:03 - INFO - codeparrot_training - Step 16931: {'lr': 0.00048754838657919186, 'samples': 8669184, 'steps': 16931, 'loss/train': 2.09295654296875} +03/04/2022 09:21:04 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 09:21:08 - INFO - codeparrot_training - Step 16932: {'lr': 0.00048754673262344124, 'samples': 8669696, 'steps': 16932, 'loss/train': 2.4502410888671875} +03/04/2022 09:21:12 - INFO - codeparrot_training - Step 16933: {'lr': 0.00048754507856065574, 'samples': 8670208, 'steps': 16933, 'loss/train': 2.406803607940674} +03/04/2022 09:21:13 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 09:21:17 - INFO - codeparrot_training - Step 16934: {'lr': 0.0004875434243908361, 'samples': 8670720, 'steps': 16934, 'loss/train': 2.2319862842559814} +03/04/2022 09:21:20 - INFO - codeparrot_training - Step 16935: {'lr': 0.00048754177011398303, 'samples': 8671232, 'steps': 16935, 'loss/train': 1.6219836473464966} +03/04/2022 09:21:21 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 09:21:25 - INFO - codeparrot_training - Step 16936: {'lr': 0.0004875401157300973, 'samples': 8671744, 'steps': 16936, 'loss/train': 1.7857357263565063} +03/04/2022 09:21:28 - INFO - codeparrot_training - Step 16937: {'lr': 0.00048753846123917964, 'samples': 8672256, 'steps': 16937, 'loss/train': 1.4194968938827515} +03/04/2022 09:21:30 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 09:21:34 - INFO - codeparrot_training - Step 16938: {'lr': 0.0004875368066412309, 'samples': 8672768, 'steps': 16938, 'loss/train': 1.8956108093261719} +03/04/2022 09:21:37 - INFO - codeparrot_training - Step 16939: {'lr': 0.00048753515193625165, 'samples': 8673280, 'steps': 16939, 'loss/train': 0.9386816620826721} +03/04/2022 09:21:38 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/04/2022 09:21:42 - INFO - codeparrot_training - Step 16940: {'lr': 0.00048753349712424277, 'samples': 8673792, 'steps': 16940, 'loss/train': 1.05565345287323} +03/04/2022 09:21:45 - INFO - codeparrot_training - Step 16941: {'lr': 0.00048753184220520497, 'samples': 8674304, 'steps': 16941, 'loss/train': 1.794123888015747} +03/04/2022 09:21:46 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/04/2022 09:21:51 - INFO - codeparrot_training - Step 16942: {'lr': 0.000487530187179139, 'samples': 8674816, 'steps': 16942, 'loss/train': 1.8177355527877808} +03/04/2022 09:21:54 - INFO - codeparrot_training - Step 16943: {'lr': 0.00048752853204604555, 'samples': 8675328, 'steps': 16943, 'loss/train': 2.7921884059906006} +03/04/2022 09:21:55 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 09:21:59 - INFO - codeparrot_training - Step 16944: {'lr': 0.00048752687680592545, 'samples': 8675840, 'steps': 16944, 'loss/train': 2.029855251312256} +03/04/2022 09:22:02 - INFO - codeparrot_training - Step 16945: {'lr': 0.00048752522145877937, 'samples': 8676352, 'steps': 16945, 'loss/train': 1.8961869478225708} +03/04/2022 09:22:03 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 09:22:07 - INFO - codeparrot_training - Step 16946: {'lr': 0.0004875235660046081, 'samples': 8676864, 'steps': 16946, 'loss/train': 1.404382586479187} +03/04/2022 09:22:11 - INFO - codeparrot_training - Step 16947: {'lr': 0.0004875219104434124, 'samples': 8677376, 'steps': 16947, 'loss/train': 2.0380818843841553} +03/04/2022 09:22:12 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 09:22:16 - INFO - codeparrot_training - Step 16948: {'lr': 0.0004875202547751929, 'samples': 8677888, 'steps': 16948, 'loss/train': 2.6288721561431885} +03/04/2022 09:22:19 - INFO - codeparrot_training - Step 16949: {'lr': 0.00048751859899995054, 'samples': 8678400, 'steps': 16949, 'loss/train': 1.5813437700271606} +03/04/2022 09:22:20 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/04/2022 09:22:24 - INFO - codeparrot_training - Step 16950: {'lr': 0.0004875169431176859, 'samples': 8678912, 'steps': 16950, 'loss/train': 1.0193238258361816} +03/04/2022 09:22:27 - INFO - codeparrot_training - Step 16951: {'lr': 0.0004875152871283999, 'samples': 8679424, 'steps': 16951, 'loss/train': 1.7038531303405762} +03/04/2022 09:22:29 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 09:22:33 - INFO - codeparrot_training - Step 16952: {'lr': 0.0004875136310320931, 'samples': 8679936, 'steps': 16952, 'loss/train': 1.9126293659210205} +03/04/2022 09:22:36 - INFO - codeparrot_training - Step 16953: {'lr': 0.0004875119748287663, 'samples': 8680448, 'steps': 16953, 'loss/train': 2.454759359359741} +03/04/2022 09:22:37 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/04/2022 09:22:41 - INFO - codeparrot_training - Step 16954: {'lr': 0.0004875103185184203, 'samples': 8680960, 'steps': 16954, 'loss/train': 1.74368417263031} +03/04/2022 09:22:44 - INFO - codeparrot_training - Step 16955: {'lr': 0.00048750866210105583, 'samples': 8681472, 'steps': 16955, 'loss/train': 0.1641991138458252} +03/04/2022 09:22:46 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 09:22:50 - INFO - codeparrot_training - Step 16956: {'lr': 0.0004875070055766736, 'samples': 8681984, 'steps': 16956, 'loss/train': 1.929293155670166} +03/04/2022 09:22:53 - INFO - codeparrot_training - Step 16957: {'lr': 0.0004875053489452743, 'samples': 8682496, 'steps': 16957, 'loss/train': 1.4869800806045532} +03/04/2022 09:22:54 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 09:22:58 - INFO - codeparrot_training - Step 16958: {'lr': 0.00048750369220685886, 'samples': 8683008, 'steps': 16958, 'loss/train': 1.9670077562332153} +03/04/2022 09:23:01 - INFO - codeparrot_training - Step 16959: {'lr': 0.0004875020353614279, 'samples': 8683520, 'steps': 16959, 'loss/train': 2.647514581680298} +03/04/2022 09:23:03 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/04/2022 09:23:06 - INFO - codeparrot_training - Step 16960: {'lr': 0.0004875003784089822, 'samples': 8684032, 'steps': 16960, 'loss/train': 2.0391526222229004} +03/04/2022 09:23:10 - INFO - codeparrot_training - Step 16961: {'lr': 0.00048749872134952243, 'samples': 8684544, 'steps': 16961, 'loss/train': 1.7749903202056885} +03/04/2022 09:23:11 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/04/2022 09:23:15 - INFO - codeparrot_training - Step 16962: {'lr': 0.0004874970641830495, 'samples': 8685056, 'steps': 16962, 'loss/train': 1.1836737394332886} +03/04/2022 09:23:18 - INFO - codeparrot_training - Step 16963: {'lr': 0.000487495406909564, 'samples': 8685568, 'steps': 16963, 'loss/train': 1.72423255443573} +03/04/2022 09:23:20 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 09:23:24 - INFO - codeparrot_training - Step 16964: {'lr': 0.00048749374952906677, 'samples': 8686080, 'steps': 16964, 'loss/train': 2.1110990047454834} +03/04/2022 09:23:27 - INFO - codeparrot_training - Step 16965: {'lr': 0.0004874920920415584, 'samples': 8686592, 'steps': 16965, 'loss/train': 1.8694078922271729} +03/04/2022 09:23:28 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 09:23:32 - INFO - codeparrot_training - Step 16966: {'lr': 0.0004874904344470399, 'samples': 8687104, 'steps': 16966, 'loss/train': 2.0700762271881104} +03/04/2022 09:23:36 - INFO - codeparrot_training - Step 16967: {'lr': 0.00048748877674551183, 'samples': 8687616, 'steps': 16967, 'loss/train': 3.6208784580230713} +03/04/2022 09:23:38 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 09:23:41 - INFO - codeparrot_training - Step 16968: {'lr': 0.00048748711893697495, 'samples': 8688128, 'steps': 16968, 'loss/train': 1.273001790046692} +03/04/2022 09:23:44 - INFO - codeparrot_training - Step 16969: {'lr': 0.0004874854610214301, 'samples': 8688640, 'steps': 16969, 'loss/train': 1.6043765544891357} +03/04/2022 09:23:46 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 09:23:49 - INFO - codeparrot_training - Step 16970: {'lr': 0.00048748380299887793, 'samples': 8689152, 'steps': 16970, 'loss/train': 2.1030914783477783} +03/04/2022 09:23:53 - INFO - codeparrot_training - Step 16971: {'lr': 0.0004874821448693192, 'samples': 8689664, 'steps': 16971, 'loss/train': 1.783687710762024} +03/04/2022 09:23:55 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 09:23:58 - INFO - codeparrot_training - Step 16972: {'lr': 0.00048748048663275475, 'samples': 8690176, 'steps': 16972, 'loss/train': 2.1835052967071533} +03/04/2022 09:24:01 - INFO - codeparrot_training - Step 16973: {'lr': 0.00048747882828918524, 'samples': 8690688, 'steps': 16973, 'loss/train': 2.5105714797973633} +03/04/2022 09:24:04 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 09:24:06 - INFO - codeparrot_training - Step 16974: {'lr': 0.0004874771698386113, 'samples': 8691200, 'steps': 16974, 'loss/train': 1.6340335607528687} +03/04/2022 09:24:10 - INFO - codeparrot_training - Step 16975: {'lr': 0.00048747551128103397, 'samples': 8691712, 'steps': 16975, 'loss/train': 2.034607410430908} +03/04/2022 09:24:13 - INFO - codeparrot_training - Step 16976: {'lr': 0.00048747385261645377, 'samples': 8692224, 'steps': 16976, 'loss/train': 1.6640053987503052} +03/04/2022 09:24:13 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 09:24:18 - INFO - codeparrot_training - Step 16977: {'lr': 0.0004874721938448715, 'samples': 8692736, 'steps': 16977, 'loss/train': 2.047442674636841} +03/04/2022 09:24:21 - INFO - codeparrot_training - Step 16978: {'lr': 0.000487470534966288, 'samples': 8693248, 'steps': 16978, 'loss/train': 2.3193156719207764} +03/04/2022 09:24:22 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/04/2022 09:24:27 - INFO - codeparrot_training - Step 16979: {'lr': 0.0004874688759807039, 'samples': 8693760, 'steps': 16979, 'loss/train': 1.3341710567474365} +03/04/2022 09:24:30 - INFO - codeparrot_training - Step 16980: {'lr': 0.00048746721688812004, 'samples': 8694272, 'steps': 16980, 'loss/train': 1.8903558254241943} +03/04/2022 09:24:30 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 09:24:35 - INFO - codeparrot_training - Step 16981: {'lr': 0.00048746555768853703, 'samples': 8694784, 'steps': 16981, 'loss/train': 1.7109447717666626} +03/04/2022 09:24:38 - INFO - codeparrot_training - Step 16982: {'lr': 0.00048746389838195573, 'samples': 8695296, 'steps': 16982, 'loss/train': 2.277663230895996} +03/04/2022 09:24:38 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 09:24:44 - INFO - codeparrot_training - Step 16983: {'lr': 0.0004874622389683768, 'samples': 8695808, 'steps': 16983, 'loss/train': 2.342374801635742} +03/04/2022 09:24:47 - INFO - codeparrot_training - Step 16984: {'lr': 0.0004874605794478012, 'samples': 8696320, 'steps': 16984, 'loss/train': 2.1430656909942627} +03/04/2022 09:24:47 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 09:24:52 - INFO - codeparrot_training - Step 16985: {'lr': 0.0004874589198202294, 'samples': 8696832, 'steps': 16985, 'loss/train': 2.5442886352539062} +03/04/2022 09:24:55 - INFO - codeparrot_training - Step 16986: {'lr': 0.0004874572600856624, 'samples': 8697344, 'steps': 16986, 'loss/train': 1.8319238424301147} +03/04/2022 09:24:56 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 09:25:01 - INFO - codeparrot_training - Step 16987: {'lr': 0.0004874556002441007, 'samples': 8697856, 'steps': 16987, 'loss/train': 1.7899049520492554} +03/04/2022 09:25:04 - INFO - codeparrot_training - Step 16988: {'lr': 0.0004874539402955452, 'samples': 8698368, 'steps': 16988, 'loss/train': 1.7940919399261475} +03/04/2022 09:25:04 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 09:25:09 - INFO - codeparrot_training - Step 16989: {'lr': 0.00048745228023999666, 'samples': 8698880, 'steps': 16989, 'loss/train': 1.297383427619934} +03/04/2022 09:25:12 - INFO - codeparrot_training - Step 16990: {'lr': 0.0004874506200774557, 'samples': 8699392, 'steps': 16990, 'loss/train': 2.2932510375976562} +03/04/2022 09:25:13 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 09:25:17 - INFO - codeparrot_training - Step 16991: {'lr': 0.00048744895980792327, 'samples': 8699904, 'steps': 16991, 'loss/train': 1.3940424919128418} +03/04/2022 09:25:21 - INFO - codeparrot_training - Step 16992: {'lr': 0.00048744729943139993, 'samples': 8700416, 'steps': 16992, 'loss/train': 2.2109503746032715} +03/04/2022 09:25:22 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 09:25:26 - INFO - codeparrot_training - Step 16993: {'lr': 0.0004874456389478865, 'samples': 8700928, 'steps': 16993, 'loss/train': 1.9773454666137695} +03/04/2022 09:25:29 - INFO - codeparrot_training - Step 16994: {'lr': 0.00048744397835738377, 'samples': 8701440, 'steps': 16994, 'loss/train': 2.3317158222198486} +03/04/2022 09:25:30 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/04/2022 09:25:34 - INFO - codeparrot_training - Step 16995: {'lr': 0.00048744231765989246, 'samples': 8701952, 'steps': 16995, 'loss/train': 2.1656618118286133} +03/04/2022 09:25:38 - INFO - codeparrot_training - Step 16996: {'lr': 0.0004874406568554132, 'samples': 8702464, 'steps': 16996, 'loss/train': 2.202397108078003} +03/04/2022 09:25:39 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/04/2022 09:25:43 - INFO - codeparrot_training - Step 16997: {'lr': 0.0004874389959439469, 'samples': 8702976, 'steps': 16997, 'loss/train': 2.656357765197754} +03/04/2022 09:25:46 - INFO - codeparrot_training - Step 16998: {'lr': 0.0004874373349254943, 'samples': 8703488, 'steps': 16998, 'loss/train': 1.1595306396484375} +03/04/2022 09:25:49 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/04/2022 09:25:52 - INFO - codeparrot_training - Step 16999: {'lr': 0.00048743567380005604, 'samples': 8704000, 'steps': 16999, 'loss/train': 2.014744281768799} +03/04/2022 09:25:55 - INFO - codeparrot_training - Step 17000: {'lr': 0.000487434012567633, 'samples': 8704512, 'steps': 17000, 'loss/train': 2.2198593616485596} +03/04/2022 09:25:57 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 09:26:00 - INFO - codeparrot_training - Step 17001: {'lr': 0.0004874323512282258, 'samples': 8705024, 'steps': 17001, 'loss/train': 1.7270327806472778} +03/04/2022 09:26:03 - INFO - codeparrot_training - Step 17002: {'lr': 0.00048743068978183523, 'samples': 8705536, 'steps': 17002, 'loss/train': 1.6361719369888306} +03/04/2022 09:26:05 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 09:26:08 - INFO - codeparrot_training - Step 17003: {'lr': 0.00048742902822846215, 'samples': 8706048, 'steps': 17003, 'loss/train': 1.610042929649353} +03/04/2022 09:26:12 - INFO - codeparrot_training - Step 17004: {'lr': 0.0004874273665681071, 'samples': 8706560, 'steps': 17004, 'loss/train': 2.1730690002441406} +03/04/2022 09:26:14 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/04/2022 09:26:17 - INFO - codeparrot_training - Step 17005: {'lr': 0.00048742570480077096, 'samples': 8707072, 'steps': 17005, 'loss/train': 1.8673317432403564} +03/04/2022 09:26:20 - INFO - codeparrot_training - Step 17006: {'lr': 0.0004874240429264545, 'samples': 8707584, 'steps': 17006, 'loss/train': 1.839585304260254} +03/04/2022 09:26:23 - INFO - codeparrot_training - Step 17007: {'lr': 0.00048742238094515844, 'samples': 8708096, 'steps': 17007, 'loss/train': 4.15144157409668} +03/04/2022 09:26:24 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 09:26:29 - INFO - codeparrot_training - Step 17008: {'lr': 0.00048742071885688354, 'samples': 8708608, 'steps': 17008, 'loss/train': 2.374964475631714} +03/04/2022 09:26:32 - INFO - codeparrot_training - Step 17009: {'lr': 0.00048741905666163047, 'samples': 8709120, 'steps': 17009, 'loss/train': 6.283572196960449} +03/04/2022 09:26:34 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/04/2022 09:26:37 - INFO - codeparrot_training - Step 17010: {'lr': 0.00048741739435940003, 'samples': 8709632, 'steps': 17010, 'loss/train': 1.635693073272705} +03/04/2022 09:26:41 - INFO - codeparrot_training - Step 17011: {'lr': 0.000487415731950193, 'samples': 8710144, 'steps': 17011, 'loss/train': 2.453800678253174} +03/04/2022 09:26:43 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 09:26:46 - INFO - codeparrot_training - Step 17012: {'lr': 0.0004874140694340101, 'samples': 8710656, 'steps': 17012, 'loss/train': 0.5076583027839661} +03/04/2022 09:26:49 - INFO - codeparrot_training - Step 17013: {'lr': 0.0004874124068108521, 'samples': 8711168, 'steps': 17013, 'loss/train': 2.672123432159424} +03/04/2022 09:26:51 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 09:26:54 - INFO - codeparrot_training - Step 17014: {'lr': 0.00048741074408071975, 'samples': 8711680, 'steps': 17014, 'loss/train': 2.1428098678588867} +03/04/2022 09:26:58 - INFO - codeparrot_training - Step 17015: {'lr': 0.00048740908124361373, 'samples': 8712192, 'steps': 17015, 'loss/train': 1.359662413597107} +03/04/2022 09:27:00 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/04/2022 09:27:03 - INFO - codeparrot_training - Step 17016: {'lr': 0.0004874074182995349, 'samples': 8712704, 'steps': 17016, 'loss/train': 6.8417510986328125} +03/04/2022 09:27:06 - INFO - codeparrot_training - Step 17017: {'lr': 0.0004874057552484839, 'samples': 8713216, 'steps': 17017, 'loss/train': 2.714891195297241} +03/04/2022 09:27:08 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/04/2022 09:27:11 - INFO - codeparrot_training - Step 17018: {'lr': 0.00048740409209046154, 'samples': 8713728, 'steps': 17018, 'loss/train': 1.4954580068588257} +03/04/2022 09:27:14 - INFO - codeparrot_training - Step 17019: {'lr': 0.0004874024288254686, 'samples': 8714240, 'steps': 17019, 'loss/train': 1.4837478399276733} +03/04/2022 09:27:17 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 09:27:20 - INFO - codeparrot_training - Step 17020: {'lr': 0.00048740076545350573, 'samples': 8714752, 'steps': 17020, 'loss/train': 1.0343765020370483} +03/04/2022 09:27:23 - INFO - codeparrot_training - Step 17021: {'lr': 0.00048739910197457376, 'samples': 8715264, 'steps': 17021, 'loss/train': 1.7516624927520752} +03/04/2022 09:27:25 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/04/2022 09:27:28 - INFO - codeparrot_training - Step 17022: {'lr': 0.00048739743838867344, 'samples': 8715776, 'steps': 17022, 'loss/train': 2.433748245239258} +03/04/2022 09:27:31 - INFO - codeparrot_training - Step 17023: {'lr': 0.00048739577469580545, 'samples': 8716288, 'steps': 17023, 'loss/train': 2.1277201175689697} +03/04/2022 09:27:33 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/04/2022 09:27:36 - INFO - codeparrot_training - Step 17024: {'lr': 0.0004873941108959706, 'samples': 8716800, 'steps': 17024, 'loss/train': 1.1943570375442505} +03/04/2022 09:27:40 - INFO - codeparrot_training - Step 17025: {'lr': 0.0004873924469891697, 'samples': 8717312, 'steps': 17025, 'loss/train': 1.1365171670913696} +03/04/2022 09:27:41 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 09:27:45 - INFO - codeparrot_training - Step 17026: {'lr': 0.00048739078297540335, 'samples': 8717824, 'steps': 17026, 'loss/train': 1.7916502952575684} +03/04/2022 09:27:48 - INFO - codeparrot_training - Step 17027: {'lr': 0.00048738911885467243, 'samples': 8718336, 'steps': 17027, 'loss/train': 2.611462354660034} +03/04/2022 09:27:50 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 09:27:53 - INFO - codeparrot_training - Step 17028: {'lr': 0.00048738745462697754, 'samples': 8718848, 'steps': 17028, 'loss/train': 2.2122631072998047} +03/04/2022 09:27:56 - INFO - codeparrot_training - Step 17029: {'lr': 0.0004873857902923196, 'samples': 8719360, 'steps': 17029, 'loss/train': 1.6640962362289429} +03/04/2022 09:27:58 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/04/2022 09:28:02 - INFO - codeparrot_training - Step 17030: {'lr': 0.00048738412585069927, 'samples': 8719872, 'steps': 17030, 'loss/train': 1.687991976737976} +03/04/2022 09:28:05 - INFO - codeparrot_training - Step 17031: {'lr': 0.00048738246130211734, 'samples': 8720384, 'steps': 17031, 'loss/train': 2.4635486602783203} +03/04/2022 09:28:07 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/04/2022 09:28:10 - INFO - codeparrot_training - Step 17032: {'lr': 0.00048738079664657454, 'samples': 8720896, 'steps': 17032, 'loss/train': 6.62474250793457} +03/04/2022 09:28:13 - INFO - codeparrot_training - Step 17033: {'lr': 0.00048737913188407156, 'samples': 8721408, 'steps': 17033, 'loss/train': 2.0110085010528564} +03/04/2022 09:28:16 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 09:28:19 - INFO - codeparrot_training - Step 17034: {'lr': 0.00048737746701460927, 'samples': 8721920, 'steps': 17034, 'loss/train': 1.663604497909546} +03/04/2022 09:28:22 - INFO - codeparrot_training - Step 17035: {'lr': 0.0004873758020381883, 'samples': 8722432, 'steps': 17035, 'loss/train': 2.368196964263916} +03/04/2022 09:28:24 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/04/2022 09:28:27 - INFO - codeparrot_training - Step 17036: {'lr': 0.00048737413695480947, 'samples': 8722944, 'steps': 17036, 'loss/train': 3.2607014179229736} +03/04/2022 09:28:30 - INFO - codeparrot_training - Step 17037: {'lr': 0.00048737247176447354, 'samples': 8723456, 'steps': 17037, 'loss/train': 2.2232487201690674} +03/04/2022 09:28:33 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 09:28:36 - INFO - codeparrot_training - Step 17038: {'lr': 0.0004873708064671812, 'samples': 8723968, 'steps': 17038, 'loss/train': 2.1876864433288574} +03/04/2022 09:28:39 - INFO - codeparrot_training - Step 17039: {'lr': 0.0004873691410629333, 'samples': 8724480, 'steps': 17039, 'loss/train': 2.320192575454712} +03/04/2022 09:28:41 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/04/2022 09:28:44 - INFO - codeparrot_training - Step 17040: {'lr': 0.0004873674755517304, 'samples': 8724992, 'steps': 17040, 'loss/train': 2.677354097366333} +03/04/2022 09:28:47 - INFO - codeparrot_training - Step 17041: {'lr': 0.00048736580993357357, 'samples': 8725504, 'steps': 17041, 'loss/train': 1.1840838193893433} +03/04/2022 09:28:49 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/04/2022 09:28:53 - INFO - codeparrot_training - Step 17042: {'lr': 0.0004873641442084632, 'samples': 8726016, 'steps': 17042, 'loss/train': 1.269323468208313} +03/04/2022 09:28:56 - INFO - codeparrot_training - Step 17043: {'lr': 0.00048736247837640037, 'samples': 8726528, 'steps': 17043, 'loss/train': 1.7999725341796875} +03/04/2022 09:28:58 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/04/2022 09:29:01 - INFO - codeparrot_training - Step 17044: {'lr': 0.0004873608124373855, 'samples': 8727040, 'steps': 17044, 'loss/train': 2.438044309616089} +03/04/2022 09:29:04 - INFO - codeparrot_training - Step 17045: {'lr': 0.00048735914639141964, 'samples': 8727552, 'steps': 17045, 'loss/train': 1.3694199323654175} +03/04/2022 09:29:07 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 09:29:09 - INFO - codeparrot_training - Step 17046: {'lr': 0.00048735748023850337, 'samples': 8728064, 'steps': 17046, 'loss/train': 2.089823007583618} +03/04/2022 09:29:13 - INFO - codeparrot_training - Step 17047: {'lr': 0.00048735581397863745, 'samples': 8728576, 'steps': 17047, 'loss/train': 1.908474326133728} +03/04/2022 09:29:15 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 09:29:18 - INFO - codeparrot_training - Step 17048: {'lr': 0.0004873541476118227, 'samples': 8729088, 'steps': 17048, 'loss/train': 1.8098095655441284} +03/04/2022 09:29:21 - INFO - codeparrot_training - Step 17049: {'lr': 0.00048735248113805976, 'samples': 8729600, 'steps': 17049, 'loss/train': 1.1103909015655518} +03/04/2022 09:29:24 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 09:29:26 - INFO - codeparrot_training - Step 17050: {'lr': 0.0004873508145573495, 'samples': 8730112, 'steps': 17050, 'loss/train': 0.9631534218788147} +03/04/2022 09:29:30 - INFO - codeparrot_training - Step 17051: {'lr': 0.00048734914786969266, 'samples': 8730624, 'steps': 17051, 'loss/train': 2.084745168685913} +03/04/2022 09:29:32 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 09:29:35 - INFO - codeparrot_training - Step 17052: {'lr': 0.00048734748107509, 'samples': 8731136, 'steps': 17052, 'loss/train': 1.2581549882888794} +03/04/2022 09:29:38 - INFO - codeparrot_training - Step 17053: {'lr': 0.0004873458141735421, 'samples': 8731648, 'steps': 17053, 'loss/train': 1.807431936264038} +03/04/2022 09:29:41 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 09:29:43 - INFO - codeparrot_training - Step 17054: {'lr': 0.0004873441471650499, 'samples': 8732160, 'steps': 17054, 'loss/train': 2.3514840602874756} +03/04/2022 09:29:46 - INFO - codeparrot_training - Step 17055: {'lr': 0.00048734248004961414, 'samples': 8732672, 'steps': 17055, 'loss/train': 2.524091958999634} +03/04/2022 09:29:49 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/04/2022 09:29:52 - INFO - codeparrot_training - Step 17056: {'lr': 0.00048734081282723543, 'samples': 8733184, 'steps': 17056, 'loss/train': 1.3448936939239502} +03/04/2022 09:29:55 - INFO - codeparrot_training - Step 17057: {'lr': 0.00048733914549791465, 'samples': 8733696, 'steps': 17057, 'loss/train': 1.5965874195098877} +03/04/2022 09:29:57 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 09:30:00 - INFO - codeparrot_training - Step 17058: {'lr': 0.0004873374780616525, 'samples': 8734208, 'steps': 17058, 'loss/train': 1.9870991706848145} +03/04/2022 09:30:03 - INFO - codeparrot_training - Step 17059: {'lr': 0.00048733581051844976, 'samples': 8734720, 'steps': 17059, 'loss/train': 1.9849728345870972} +03/04/2022 09:30:06 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 09:30:09 - INFO - codeparrot_training - Step 17060: {'lr': 0.00048733414286830716, 'samples': 8735232, 'steps': 17060, 'loss/train': 1.3484708070755005} +03/04/2022 09:30:12 - INFO - codeparrot_training - Step 17061: {'lr': 0.00048733247511122547, 'samples': 8735744, 'steps': 17061, 'loss/train': 2.432173490524292} +03/04/2022 09:30:14 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/04/2022 09:30:17 - INFO - codeparrot_training - Step 17062: {'lr': 0.00048733080724720545, 'samples': 8736256, 'steps': 17062, 'loss/train': 5.5679826736450195} +03/04/2022 09:30:21 - INFO - codeparrot_training - Step 17063: {'lr': 0.00048732913927624776, 'samples': 8736768, 'steps': 17063, 'loss/train': 1.5371040105819702} +03/04/2022 09:30:24 - INFO - codeparrot_training - Step 17064: {'lr': 0.0004873274711983533, 'samples': 8737280, 'steps': 17064, 'loss/train': 2.051386833190918} +03/04/2022 09:30:24 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/04/2022 09:30:29 - INFO - codeparrot_training - Step 17065: {'lr': 0.0004873258030135227, 'samples': 8737792, 'steps': 17065, 'loss/train': 1.4739885330200195} +03/04/2022 09:30:33 - INFO - codeparrot_training - Step 17066: {'lr': 0.0004873241347217567, 'samples': 8738304, 'steps': 17066, 'loss/train': 2.2407567501068115} +03/04/2022 09:30:34 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 09:30:38 - INFO - codeparrot_training - Step 17067: {'lr': 0.0004873224663230562, 'samples': 8738816, 'steps': 17067, 'loss/train': 2.238762140274048} +03/04/2022 09:30:41 - INFO - codeparrot_training - Step 17068: {'lr': 0.0004873207978174219, 'samples': 8739328, 'steps': 17068, 'loss/train': 2.1703383922576904} +03/04/2022 09:30:42 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 09:30:46 - INFO - codeparrot_training - Step 17069: {'lr': 0.00048731912920485444, 'samples': 8739840, 'steps': 17069, 'loss/train': 1.6807875633239746} +03/04/2022 09:30:49 - INFO - codeparrot_training - Step 17070: {'lr': 0.0004873174604853546, 'samples': 8740352, 'steps': 17070, 'loss/train': 1.9073567390441895} +03/04/2022 09:30:51 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 09:30:55 - INFO - codeparrot_training - Step 17071: {'lr': 0.00048731579165892325, 'samples': 8740864, 'steps': 17071, 'loss/train': 0.8900700211524963} +03/04/2022 09:30:58 - INFO - codeparrot_training - Step 17072: {'lr': 0.000487314122725561, 'samples': 8741376, 'steps': 17072, 'loss/train': 1.9029769897460938} +03/04/2022 09:30:59 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 09:31:04 - INFO - codeparrot_training - Step 17073: {'lr': 0.00048731245368526877, 'samples': 8741888, 'steps': 17073, 'loss/train': 1.752681016921997} +03/04/2022 09:31:07 - INFO - codeparrot_training - Step 17074: {'lr': 0.0004873107845380471, 'samples': 8742400, 'steps': 17074, 'loss/train': 3.722543239593506} +03/04/2022 09:31:10 - INFO - codeparrot_training - Step 17075: {'lr': 0.00048730911528389686, 'samples': 8742912, 'steps': 17075, 'loss/train': 1.7301571369171143} +03/04/2022 09:31:10 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 09:31:15 - INFO - codeparrot_training - Step 17076: {'lr': 0.0004873074459228188, 'samples': 8743424, 'steps': 17076, 'loss/train': 1.7591311931610107} +03/04/2022 09:31:18 - INFO - codeparrot_training - Step 17077: {'lr': 0.0004873057764548138, 'samples': 8743936, 'steps': 17077, 'loss/train': 2.2447030544281006} +03/04/2022 09:31:18 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 09:31:24 - INFO - codeparrot_training - Step 17078: {'lr': 0.00048730410687988237, 'samples': 8744448, 'steps': 17078, 'loss/train': 1.5612919330596924} +03/04/2022 09:31:27 - INFO - codeparrot_training - Step 17079: {'lr': 0.00048730243719802535, 'samples': 8744960, 'steps': 17079, 'loss/train': 2.052609920501709} +03/04/2022 09:31:30 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 09:31:33 - INFO - codeparrot_training - Step 17080: {'lr': 0.00048730076740924355, 'samples': 8745472, 'steps': 17080, 'loss/train': 2.001112461090088} +03/04/2022 09:31:36 - INFO - codeparrot_training - Step 17081: {'lr': 0.0004872990975135377, 'samples': 8745984, 'steps': 17081, 'loss/train': 0.40842610597610474} +03/04/2022 09:31:38 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 09:31:41 - INFO - codeparrot_training - Step 17082: {'lr': 0.0004872974275109085, 'samples': 8746496, 'steps': 17082, 'loss/train': 1.1621137857437134} +03/04/2022 09:31:44 - INFO - codeparrot_training - Step 17083: {'lr': 0.00048729575740135675, 'samples': 8747008, 'steps': 17083, 'loss/train': 2.1182966232299805} +03/04/2022 09:31:47 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 09:31:50 - INFO - codeparrot_training - Step 17084: {'lr': 0.0004872940871848832, 'samples': 8747520, 'steps': 17084, 'loss/train': 1.7584813833236694} +03/04/2022 09:31:53 - INFO - codeparrot_training - Step 17085: {'lr': 0.00048729241686148864, 'samples': 8748032, 'steps': 17085, 'loss/train': 1.2249332666397095} +03/04/2022 09:31:55 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 09:31:58 - INFO - codeparrot_training - Step 17086: {'lr': 0.0004872907464311737, 'samples': 8748544, 'steps': 17086, 'loss/train': 2.3586435317993164} +03/04/2022 09:32:01 - INFO - codeparrot_training - Step 17087: {'lr': 0.0004872890758939392, 'samples': 8749056, 'steps': 17087, 'loss/train': 2.992093563079834} +03/04/2022 09:32:04 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 09:32:07 - INFO - codeparrot_training - Step 17088: {'lr': 0.00048728740524978597, 'samples': 8749568, 'steps': 17088, 'loss/train': 2.095993995666504} +03/04/2022 09:32:10 - INFO - codeparrot_training - Step 17089: {'lr': 0.00048728573449871473, 'samples': 8750080, 'steps': 17089, 'loss/train': 2.9900293350219727} +03/04/2022 09:32:13 - INFO - codeparrot_training - Step 17090: {'lr': 0.0004872840636407261, 'samples': 8750592, 'steps': 17090, 'loss/train': 2.517744541168213} +03/04/2022 09:32:13 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 09:32:18 - INFO - codeparrot_training - Step 17091: {'lr': 0.00048728239267582096, 'samples': 8751104, 'steps': 17091, 'loss/train': 1.5301023721694946} +03/04/2022 09:32:21 - INFO - codeparrot_training - Step 17092: {'lr': 0.00048728072160400006, 'samples': 8751616, 'steps': 17092, 'loss/train': 1.8416099548339844} +03/04/2022 09:32:21 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/04/2022 09:32:27 - INFO - codeparrot_training - Step 17093: {'lr': 0.0004872790504252641, 'samples': 8752128, 'steps': 17093, 'loss/train': 0.8109533786773682} +03/04/2022 09:32:30 - INFO - codeparrot_training - Step 17094: {'lr': 0.0004872773791396139, 'samples': 8752640, 'steps': 17094, 'loss/train': 1.6323556900024414} +03/04/2022 09:32:30 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/04/2022 09:32:35 - INFO - codeparrot_training - Step 17095: {'lr': 0.0004872757077470502, 'samples': 8753152, 'steps': 17095, 'loss/train': 2.3780570030212402} +03/04/2022 09:32:38 - INFO - codeparrot_training - Step 17096: {'lr': 0.0004872740362475737, 'samples': 8753664, 'steps': 17096, 'loss/train': 1.4135091304779053} +03/04/2022 09:32:38 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 09:32:44 - INFO - codeparrot_training - Step 17097: {'lr': 0.0004872723646411851, 'samples': 8754176, 'steps': 17097, 'loss/train': 1.5938845872879028} +03/04/2022 09:32:47 - INFO - codeparrot_training - Step 17098: {'lr': 0.0004872706929278853, 'samples': 8754688, 'steps': 17098, 'loss/train': 1.1584092378616333} +03/04/2022 09:32:47 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 09:32:52 - INFO - codeparrot_training - Step 17099: {'lr': 0.000487269021107675, 'samples': 8755200, 'steps': 17099, 'loss/train': 1.1859915256500244} +03/04/2022 09:32:55 - INFO - codeparrot_training - Step 17100: {'lr': 0.0004872673491805549, 'samples': 8755712, 'steps': 17100, 'loss/train': 2.2391159534454346} +03/04/2022 09:32:55 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 09:33:01 - INFO - codeparrot_training - Step 17101: {'lr': 0.0004872656771465259, 'samples': 8756224, 'steps': 17101, 'loss/train': 0.9763694405555725} +03/04/2022 09:33:04 - INFO - codeparrot_training - Step 17102: {'lr': 0.00048726400500558856, 'samples': 8756736, 'steps': 17102, 'loss/train': 1.8844467401504517} +03/04/2022 09:33:04 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 09:33:09 - INFO - codeparrot_training - Step 17103: {'lr': 0.0004872623327577437, 'samples': 8757248, 'steps': 17103, 'loss/train': 2.577543020248413} +03/04/2022 09:33:12 - INFO - codeparrot_training - Step 17104: {'lr': 0.0004872606604029921, 'samples': 8757760, 'steps': 17104, 'loss/train': 1.1057757139205933} +03/04/2022 09:33:12 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 09:33:18 - INFO - codeparrot_training - Step 17105: {'lr': 0.00048725898794133455, 'samples': 8758272, 'steps': 17105, 'loss/train': 1.9573076963424683} +03/04/2022 09:33:21 - INFO - codeparrot_training - Step 17106: {'lr': 0.00048725731537277173, 'samples': 8758784, 'steps': 17106, 'loss/train': 2.4027652740478516} +03/04/2022 09:33:21 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/04/2022 09:33:26 - INFO - codeparrot_training - Step 17107: {'lr': 0.0004872556426973044, 'samples': 8759296, 'steps': 17107, 'loss/train': 2.096590280532837} +03/04/2022 09:33:29 - INFO - codeparrot_training - Step 17108: {'lr': 0.0004872539699149334, 'samples': 8759808, 'steps': 17108, 'loss/train': 3.0715389251708984} +03/04/2022 09:33:29 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 09:33:35 - INFO - codeparrot_training - Step 17109: {'lr': 0.0004872522970256594, 'samples': 8760320, 'steps': 17109, 'loss/train': 1.5525943040847778} +03/04/2022 09:33:38 - INFO - codeparrot_training - Step 17110: {'lr': 0.00048725062402948314, 'samples': 8760832, 'steps': 17110, 'loss/train': 2.497875213623047} +03/04/2022 09:33:38 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 09:33:43 - INFO - codeparrot_training - Step 17111: {'lr': 0.00048724895092640546, 'samples': 8761344, 'steps': 17111, 'loss/train': 2.5655417442321777} +03/04/2022 09:33:46 - INFO - codeparrot_training - Step 17112: {'lr': 0.00048724727771642706, 'samples': 8761856, 'steps': 17112, 'loss/train': 2.0300776958465576} +03/04/2022 09:33:46 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 09:33:52 - INFO - codeparrot_training - Step 17113: {'lr': 0.00048724560439954867, 'samples': 8762368, 'steps': 17113, 'loss/train': 1.3643265962600708} +03/04/2022 09:33:55 - INFO - codeparrot_training - Step 17114: {'lr': 0.00048724393097577113, 'samples': 8762880, 'steps': 17114, 'loss/train': 3.0808236598968506} +03/04/2022 09:33:55 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 09:34:00 - INFO - codeparrot_training - Step 17115: {'lr': 0.0004872422574450951, 'samples': 8763392, 'steps': 17115, 'loss/train': 2.3056414127349854} +03/04/2022 09:34:03 - INFO - codeparrot_training - Step 17116: {'lr': 0.0004872405838075213, 'samples': 8763904, 'steps': 17116, 'loss/train': 1.0287232398986816} +03/04/2022 09:34:03 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 09:34:09 - INFO - codeparrot_training - Step 17117: {'lr': 0.00048723891006305066, 'samples': 8764416, 'steps': 17117, 'loss/train': 1.6343083381652832} +03/04/2022 09:34:11 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 09:34:14 - INFO - codeparrot_training - Step 17118: {'lr': 0.0004872372362116838, 'samples': 8764928, 'steps': 17118, 'loss/train': 1.8367695808410645} +03/04/2022 09:34:17 - INFO - codeparrot_training - Step 17119: {'lr': 0.0004872355622534215, 'samples': 8765440, 'steps': 17119, 'loss/train': 1.476016640663147} +03/04/2022 09:34:20 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 09:34:22 - INFO - codeparrot_training - Step 17120: {'lr': 0.0004872338881882644, 'samples': 8765952, 'steps': 17120, 'loss/train': 3.5359251499176025} +03/04/2022 09:34:25 - INFO - codeparrot_training - Step 17121: {'lr': 0.00048723221401621354, 'samples': 8766464, 'steps': 17121, 'loss/train': 1.3585875034332275} +03/04/2022 09:34:28 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 09:34:31 - INFO - codeparrot_training - Step 17122: {'lr': 0.0004872305397372694, 'samples': 8766976, 'steps': 17122, 'loss/train': 1.6164674758911133} +03/04/2022 09:34:34 - INFO - codeparrot_training - Step 17123: {'lr': 0.0004872288653514329, 'samples': 8767488, 'steps': 17123, 'loss/train': 1.6768649816513062} +03/04/2022 09:34:36 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 09:34:39 - INFO - codeparrot_training - Step 17124: {'lr': 0.0004872271908587047, 'samples': 8768000, 'steps': 17124, 'loss/train': 6.6288299560546875} +03/04/2022 09:34:42 - INFO - codeparrot_training - Step 17125: {'lr': 0.0004872255162590856, 'samples': 8768512, 'steps': 17125, 'loss/train': 1.3283218145370483} +03/04/2022 09:34:46 - INFO - codeparrot_training - Step 17126: {'lr': 0.0004872238415525764, 'samples': 8769024, 'steps': 17126, 'loss/train': 1.7718658447265625} +03/04/2022 09:34:46 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 09:34:51 - INFO - codeparrot_training - Step 17127: {'lr': 0.0004872221667391777, 'samples': 8769536, 'steps': 17127, 'loss/train': 1.925257682800293} +03/04/2022 09:34:54 - INFO - codeparrot_training - Step 17128: {'lr': 0.00048722049181889037, 'samples': 8770048, 'steps': 17128, 'loss/train': 2.0928568840026855} +03/04/2022 09:34:54 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/04/2022 09:34:59 - INFO - codeparrot_training - Step 17129: {'lr': 0.0004872188167917152, 'samples': 8770560, 'steps': 17129, 'loss/train': 2.0784177780151367} +03/04/2022 09:35:03 - INFO - codeparrot_training - Step 17130: {'lr': 0.00048721714165765286, 'samples': 8771072, 'steps': 17130, 'loss/train': 1.6427758932113647} +03/04/2022 09:35:03 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/04/2022 09:35:08 - INFO - codeparrot_training - Step 17131: {'lr': 0.00048721546641670413, 'samples': 8771584, 'steps': 17131, 'loss/train': 1.6314719915390015} +03/04/2022 09:35:11 - INFO - codeparrot_training - Step 17132: {'lr': 0.00048721379106886976, 'samples': 8772096, 'steps': 17132, 'loss/train': 2.1309096813201904} +03/04/2022 09:35:11 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 09:35:16 - INFO - codeparrot_training - Step 17133: {'lr': 0.0004872121156141506, 'samples': 8772608, 'steps': 17133, 'loss/train': 2.6584198474884033} +03/04/2022 09:35:19 - INFO - codeparrot_training - Step 17134: {'lr': 0.0004872104400525472, 'samples': 8773120, 'steps': 17134, 'loss/train': 2.2348036766052246} +03/04/2022 09:35:19 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 09:35:25 - INFO - codeparrot_training - Step 17135: {'lr': 0.0004872087643840605, 'samples': 8773632, 'steps': 17135, 'loss/train': 1.9893642663955688} +03/04/2022 09:35:28 - INFO - codeparrot_training - Step 17136: {'lr': 0.00048720708860869116, 'samples': 8774144, 'steps': 17136, 'loss/train': 2.3746910095214844} +03/04/2022 09:35:28 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 09:35:33 - INFO - codeparrot_training - Step 17137: {'lr': 0.00048720541272644004, 'samples': 8774656, 'steps': 17137, 'loss/train': 2.1710352897644043} +03/04/2022 09:35:36 - INFO - codeparrot_training - Step 17138: {'lr': 0.00048720373673730773, 'samples': 8775168, 'steps': 17138, 'loss/train': 2.093069553375244} +03/04/2022 09:35:36 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 09:35:42 - INFO - codeparrot_training - Step 17139: {'lr': 0.00048720206064129516, 'samples': 8775680, 'steps': 17139, 'loss/train': 1.600235104560852} +03/04/2022 09:35:44 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/04/2022 09:35:47 - INFO - codeparrot_training - Step 17140: {'lr': 0.0004872003844384029, 'samples': 8776192, 'steps': 17140, 'loss/train': 1.9445676803588867} +03/04/2022 09:35:50 - INFO - codeparrot_training - Step 17141: {'lr': 0.0004871987081286319, 'samples': 8776704, 'steps': 17141, 'loss/train': 2.0022990703582764} +03/04/2022 09:35:53 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/04/2022 09:35:55 - INFO - codeparrot_training - Step 17142: {'lr': 0.0004871970317119828, 'samples': 8777216, 'steps': 17142, 'loss/train': 2.2677621841430664} +03/04/2022 09:35:58 - INFO - codeparrot_training - Step 17143: {'lr': 0.00048719535518845634, 'samples': 8777728, 'steps': 17143, 'loss/train': 1.8111815452575684} +03/04/2022 09:36:01 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 09:36:04 - INFO - codeparrot_training - Step 17144: {'lr': 0.0004871936785580533, 'samples': 8778240, 'steps': 17144, 'loss/train': 1.5211622714996338} +03/04/2022 09:36:07 - INFO - codeparrot_training - Step 17145: {'lr': 0.0004871920018207745, 'samples': 8778752, 'steps': 17145, 'loss/train': 2.3356804847717285} +03/04/2022 09:36:09 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 09:36:12 - INFO - codeparrot_training - Step 17146: {'lr': 0.0004871903249766206, 'samples': 8779264, 'steps': 17146, 'loss/train': 0.3230781555175781} +03/04/2022 09:36:15 - INFO - codeparrot_training - Step 17147: {'lr': 0.0004871886480255925, 'samples': 8779776, 'steps': 17147, 'loss/train': 3.221109390258789} +03/04/2022 09:36:19 - INFO - codeparrot_training - Step 17148: {'lr': 0.0004871869709676907, 'samples': 8780288, 'steps': 17148, 'loss/train': 2.2650420665740967} +03/04/2022 09:36:19 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 09:36:24 - INFO - codeparrot_training - Step 17149: {'lr': 0.0004871852938029162, 'samples': 8780800, 'steps': 17149, 'loss/train': 2.366748571395874} +03/04/2022 09:36:27 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 09:36:29 - INFO - codeparrot_training - Step 17150: {'lr': 0.00048718361653126975, 'samples': 8781312, 'steps': 17150, 'loss/train': 2.064260959625244} +03/04/2022 09:36:33 - INFO - codeparrot_training - Step 17151: {'lr': 0.0004871819391527519, 'samples': 8781824, 'steps': 17151, 'loss/train': 1.5360852479934692} +03/04/2022 09:36:35 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 09:36:38 - INFO - codeparrot_training - Step 17152: {'lr': 0.0004871802616673636, 'samples': 8782336, 'steps': 17152, 'loss/train': 1.4840549230575562} +03/04/2022 09:36:41 - INFO - codeparrot_training - Step 17153: {'lr': 0.00048717858407510545, 'samples': 8782848, 'steps': 17153, 'loss/train': 1.605756402015686} +03/04/2022 09:36:44 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 09:36:46 - INFO - codeparrot_training - Step 17154: {'lr': 0.0004871769063759783, 'samples': 8783360, 'steps': 17154, 'loss/train': 2.627948522567749} +03/04/2022 09:36:49 - INFO - codeparrot_training - Step 17155: {'lr': 0.000487175228569983, 'samples': 8783872, 'steps': 17155, 'loss/train': 0.9743406176567078} +03/04/2022 09:36:52 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/04/2022 09:36:55 - INFO - codeparrot_training - Step 17156: {'lr': 0.0004871735506571201, 'samples': 8784384, 'steps': 17156, 'loss/train': 1.6425554752349854} +03/04/2022 09:36:58 - INFO - codeparrot_training - Step 17157: {'lr': 0.00048717187263739046, 'samples': 8784896, 'steps': 17157, 'loss/train': 1.7979509830474854} +03/04/2022 09:37:01 - INFO - codeparrot_training - Step 17158: {'lr': 0.00048717019451079493, 'samples': 8785408, 'steps': 17158, 'loss/train': 1.873268723487854} +03/04/2022 09:37:01 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 09:37:06 - INFO - codeparrot_training - Step 17159: {'lr': 0.00048716851627733404, 'samples': 8785920, 'steps': 17159, 'loss/train': 1.8272831439971924} +03/04/2022 09:37:09 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/04/2022 09:37:12 - INFO - codeparrot_training - Step 17160: {'lr': 0.00048716683793700876, 'samples': 8786432, 'steps': 17160, 'loss/train': 2.3660850524902344} +03/04/2022 09:37:15 - INFO - codeparrot_training - Step 17161: {'lr': 0.00048716515948981975, 'samples': 8786944, 'steps': 17161, 'loss/train': 2.6776652336120605} +03/04/2022 09:37:17 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 09:37:20 - INFO - codeparrot_training - Step 17162: {'lr': 0.0004871634809357678, 'samples': 8787456, 'steps': 17162, 'loss/train': 2.009446620941162} +03/04/2022 09:37:23 - INFO - codeparrot_training - Step 17163: {'lr': 0.00048716180227485365, 'samples': 8787968, 'steps': 17163, 'loss/train': 1.2721511125564575} +03/04/2022 09:37:26 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 09:37:28 - INFO - codeparrot_training - Step 17164: {'lr': 0.000487160123507078, 'samples': 8788480, 'steps': 17164, 'loss/train': 1.7694755792617798} +03/04/2022 09:37:32 - INFO - codeparrot_training - Step 17165: {'lr': 0.00048715844463244166, 'samples': 8788992, 'steps': 17165, 'loss/train': 2.118525981903076} +03/04/2022 09:37:34 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/04/2022 09:37:37 - INFO - codeparrot_training - Step 17166: {'lr': 0.0004871567656509454, 'samples': 8789504, 'steps': 17166, 'loss/train': 0.41063520312309265} +03/04/2022 09:37:40 - INFO - codeparrot_training - Step 17167: {'lr': 0.00048715508656259, 'samples': 8790016, 'steps': 17167, 'loss/train': 1.2649964094161987} +03/04/2022 09:37:43 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 09:37:45 - INFO - codeparrot_training - Step 17168: {'lr': 0.00048715340736737615, 'samples': 8790528, 'steps': 17168, 'loss/train': 2.618745803833008} +03/04/2022 09:37:48 - INFO - codeparrot_training - Step 17169: {'lr': 0.0004871517280653046, 'samples': 8791040, 'steps': 17169, 'loss/train': 2.0385875701904297} +03/04/2022 09:37:51 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/04/2022 09:37:54 - INFO - codeparrot_training - Step 17170: {'lr': 0.0004871500486563761, 'samples': 8791552, 'steps': 17170, 'loss/train': 2.3049304485321045} +03/04/2022 09:37:57 - INFO - codeparrot_training - Step 17171: {'lr': 0.0004871483691405916, 'samples': 8792064, 'steps': 17171, 'loss/train': 2.17268705368042} +03/04/2022 09:37:59 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/04/2022 09:38:02 - INFO - codeparrot_training - Step 17172: {'lr': 0.0004871466895179516, 'samples': 8792576, 'steps': 17172, 'loss/train': 2.057309150695801} +03/04/2022 09:38:05 - INFO - codeparrot_training - Step 17173: {'lr': 0.000487145009788457, 'samples': 8793088, 'steps': 17173, 'loss/train': 4.4460344314575195} +03/04/2022 09:38:08 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 09:38:11 - INFO - codeparrot_training - Step 17174: {'lr': 0.0004871433299521085, 'samples': 8793600, 'steps': 17174, 'loss/train': 2.193152904510498} +03/04/2022 09:38:14 - INFO - codeparrot_training - Step 17175: {'lr': 0.00048714165000890685, 'samples': 8794112, 'steps': 17175, 'loss/train': 1.9647339582443237} +03/04/2022 09:38:16 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/04/2022 09:38:19 - INFO - codeparrot_training - Step 17176: {'lr': 0.00048713996995885286, 'samples': 8794624, 'steps': 17176, 'loss/train': 1.7832444906234741} +03/04/2022 09:38:22 - INFO - codeparrot_training - Step 17177: {'lr': 0.0004871382898019472, 'samples': 8795136, 'steps': 17177, 'loss/train': 1.4419245719909668} +03/04/2022 09:38:25 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 09:38:27 - INFO - codeparrot_training - Step 17178: {'lr': 0.0004871366095381908, 'samples': 8795648, 'steps': 17178, 'loss/train': 1.6321632862091064} +03/04/2022 09:38:31 - INFO - codeparrot_training - Step 17179: {'lr': 0.00048713492916758425, 'samples': 8796160, 'steps': 17179, 'loss/train': 1.8229118585586548} +03/04/2022 09:38:33 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 09:38:36 - INFO - codeparrot_training - Step 17180: {'lr': 0.00048713324869012833, 'samples': 8796672, 'steps': 17180, 'loss/train': 1.8744003772735596} +03/04/2022 09:38:39 - INFO - codeparrot_training - Step 17181: {'lr': 0.0004871315681058238, 'samples': 8797184, 'steps': 17181, 'loss/train': 0.44500213861465454} +03/04/2022 09:38:42 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/04/2022 09:38:44 - INFO - codeparrot_training - Step 17182: {'lr': 0.0004871298874146716, 'samples': 8797696, 'steps': 17182, 'loss/train': 1.674233078956604} +03/04/2022 09:38:48 - INFO - codeparrot_training - Step 17183: {'lr': 0.00048712820661667215, 'samples': 8798208, 'steps': 17183, 'loss/train': 0.7629336714744568} +03/04/2022 09:38:50 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/04/2022 09:38:53 - INFO - codeparrot_training - Step 17184: {'lr': 0.0004871265257118265, 'samples': 8798720, 'steps': 17184, 'loss/train': 1.3591198921203613} +03/04/2022 09:38:56 - INFO - codeparrot_training - Step 17185: {'lr': 0.0004871248447001352, 'samples': 8799232, 'steps': 17185, 'loss/train': 1.744114637374878} +03/04/2022 09:38:59 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/04/2022 09:39:01 - INFO - codeparrot_training - Step 17186: {'lr': 0.0004871231635815992, 'samples': 8799744, 'steps': 17186, 'loss/train': 2.3362245559692383} +03/04/2022 09:39:04 - INFO - codeparrot_training - Step 17187: {'lr': 0.0004871214823562191, 'samples': 8800256, 'steps': 17187, 'loss/train': 2.155303955078125} +03/04/2022 09:39:07 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 09:39:10 - INFO - codeparrot_training - Step 17188: {'lr': 0.0004871198010239958, 'samples': 8800768, 'steps': 17188, 'loss/train': 2.2956764698028564} +03/04/2022 09:39:13 - INFO - codeparrot_training - Step 17189: {'lr': 0.0004871181195849299, 'samples': 8801280, 'steps': 17189, 'loss/train': 1.8212308883666992} +03/04/2022 09:39:16 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 09:39:18 - INFO - codeparrot_training - Step 17190: {'lr': 0.00048711643803902227, 'samples': 8801792, 'steps': 17190, 'loss/train': 1.9037054777145386} +03/04/2022 09:39:21 - INFO - codeparrot_training - Step 17191: {'lr': 0.00048711475638627363, 'samples': 8802304, 'steps': 17191, 'loss/train': 2.2839877605438232} +03/04/2022 09:39:24 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/04/2022 09:39:27 - INFO - codeparrot_training - Step 17192: {'lr': 0.0004871130746266847, 'samples': 8802816, 'steps': 17192, 'loss/train': 2.323458194732666} +03/04/2022 09:39:30 - INFO - codeparrot_training - Step 17193: {'lr': 0.00048711139276025626, 'samples': 8803328, 'steps': 17193, 'loss/train': 1.6542283296585083} +03/04/2022 09:39:32 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 09:39:35 - INFO - codeparrot_training - Step 17194: {'lr': 0.00048710971078698916, 'samples': 8803840, 'steps': 17194, 'loss/train': 1.9304890632629395} +03/04/2022 09:39:38 - INFO - codeparrot_training - Step 17195: {'lr': 0.0004871080287068841, 'samples': 8804352, 'steps': 17195, 'loss/train': 1.6915414333343506} +03/04/2022 09:39:40 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/04/2022 09:39:43 - INFO - codeparrot_training - Step 17196: {'lr': 0.00048710634651994176, 'samples': 8804864, 'steps': 17196, 'loss/train': 1.9733059406280518} +03/04/2022 09:39:47 - INFO - codeparrot_training - Step 17197: {'lr': 0.0004871046642261629, 'samples': 8805376, 'steps': 17197, 'loss/train': 1.8302311897277832} +03/04/2022 09:39:49 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 09:39:52 - INFO - codeparrot_training - Step 17198: {'lr': 0.0004871029818255485, 'samples': 8805888, 'steps': 17198, 'loss/train': 1.5511212348937988} +03/04/2022 09:39:55 - INFO - codeparrot_training - Step 17199: {'lr': 0.0004871012993180991, 'samples': 8806400, 'steps': 17199, 'loss/train': 2.3325717449188232} +03/04/2022 09:39:57 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 09:40:00 - INFO - codeparrot_training - Step 17200: {'lr': 0.0004870996167038154, 'samples': 8806912, 'steps': 17200, 'loss/train': 2.51958966255188} +03/04/2022 09:40:03 - INFO - codeparrot_training - Step 17201: {'lr': 0.0004870979339826984, 'samples': 8807424, 'steps': 17201, 'loss/train': 2.025771141052246} +03/04/2022 09:40:06 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/04/2022 09:40:09 - INFO - codeparrot_training - Step 17202: {'lr': 0.00048709625115474865, 'samples': 8807936, 'steps': 17202, 'loss/train': 1.9421520233154297} +03/04/2022 09:40:12 - INFO - codeparrot_training - Step 17203: {'lr': 0.00048709456821996705, 'samples': 8808448, 'steps': 17203, 'loss/train': 1.8844058513641357} +03/04/2022 09:40:14 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/04/2022 09:40:17 - INFO - codeparrot_training - Step 17204: {'lr': 0.0004870928851783543, 'samples': 8808960, 'steps': 17204, 'loss/train': 1.8601571321487427} +03/04/2022 09:40:20 - INFO - codeparrot_training - Step 17205: {'lr': 0.00048709120202991107, 'samples': 8809472, 'steps': 17205, 'loss/train': 1.4029299020767212} +03/04/2022 09:40:23 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/04/2022 09:40:26 - INFO - codeparrot_training - Step 17206: {'lr': 0.0004870895187746383, 'samples': 8809984, 'steps': 17206, 'loss/train': 1.2994829416275024} +03/04/2022 09:40:29 - INFO - codeparrot_training - Step 17207: {'lr': 0.00048708783541253655, 'samples': 8810496, 'steps': 17207, 'loss/train': 1.9149757623672485} +03/04/2022 09:40:32 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 09:40:34 - INFO - codeparrot_training - Step 17208: {'lr': 0.00048708615194360675, 'samples': 8811008, 'steps': 17208, 'loss/train': 1.8116834163665771} +03/04/2022 09:40:37 - INFO - codeparrot_training - Step 17209: {'lr': 0.0004870844683678496, 'samples': 8811520, 'steps': 17209, 'loss/train': 1.893630027770996} +03/04/2022 09:40:41 - INFO - codeparrot_training - Step 17210: {'lr': 0.0004870827846852658, 'samples': 8812032, 'steps': 17210, 'loss/train': 2.1882166862487793} +03/04/2022 09:40:41 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 09:40:46 - INFO - codeparrot_training - Step 17211: {'lr': 0.00048708110089585617, 'samples': 8812544, 'steps': 17211, 'loss/train': 0.36348676681518555} +03/04/2022 09:40:49 - INFO - codeparrot_training - Step 17212: {'lr': 0.00048707941699962143, 'samples': 8813056, 'steps': 17212, 'loss/train': 1.9120007753372192} +03/04/2022 09:40:49 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 09:40:54 - INFO - codeparrot_training - Step 17213: {'lr': 0.0004870777329965624, 'samples': 8813568, 'steps': 17213, 'loss/train': 1.587878704071045} +03/04/2022 09:40:57 - INFO - codeparrot_training - Step 17214: {'lr': 0.00048707604888667983, 'samples': 8814080, 'steps': 17214, 'loss/train': 1.5667716264724731} +03/04/2022 09:40:57 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 09:41:03 - INFO - codeparrot_training - Step 17215: {'lr': 0.0004870743646699744, 'samples': 8814592, 'steps': 17215, 'loss/train': 2.01271653175354} +03/04/2022 09:41:06 - INFO - codeparrot_training - Step 17216: {'lr': 0.0004870726803464469, 'samples': 8815104, 'steps': 17216, 'loss/train': 1.2849370241165161} +03/04/2022 09:41:07 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 09:41:11 - INFO - codeparrot_training - Step 17217: {'lr': 0.00048707099591609816, 'samples': 8815616, 'steps': 17217, 'loss/train': 1.995800256729126} +03/04/2022 09:41:14 - INFO - codeparrot_training - Step 17218: {'lr': 0.0004870693113789289, 'samples': 8816128, 'steps': 17218, 'loss/train': 2.1396496295928955} +03/04/2022 09:41:15 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 09:41:20 - INFO - codeparrot_training - Step 17219: {'lr': 0.00048706762673493987, 'samples': 8816640, 'steps': 17219, 'loss/train': 1.4164272546768188} +03/04/2022 09:41:23 - INFO - codeparrot_training - Step 17220: {'lr': 0.00048706594198413177, 'samples': 8817152, 'steps': 17220, 'loss/train': 1.4853577613830566} +03/04/2022 09:41:23 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 09:41:28 - INFO - codeparrot_training - Step 17221: {'lr': 0.0004870642571265054, 'samples': 8817664, 'steps': 17221, 'loss/train': 0.30922991037368774} +03/04/2022 09:41:31 - INFO - codeparrot_training - Step 17222: {'lr': 0.0004870625721620616, 'samples': 8818176, 'steps': 17222, 'loss/train': 2.199002981185913} +03/04/2022 09:41:32 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/04/2022 09:41:36 - INFO - codeparrot_training - Step 17223: {'lr': 0.00048706088709080103, 'samples': 8818688, 'steps': 17223, 'loss/train': 3.0391952991485596} +03/04/2022 09:41:40 - INFO - codeparrot_training - Step 17224: {'lr': 0.00048705920191272447, 'samples': 8819200, 'steps': 17224, 'loss/train': 1.3640570640563965} +03/04/2022 09:41:40 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 09:41:45 - INFO - codeparrot_training - Step 17225: {'lr': 0.0004870575166278327, 'samples': 8819712, 'steps': 17225, 'loss/train': 1.945119857788086} +03/04/2022 09:41:48 - INFO - codeparrot_training - Step 17226: {'lr': 0.0004870558312361265, 'samples': 8820224, 'steps': 17226, 'loss/train': 0.1389172524213791} +03/04/2022 09:41:48 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 09:41:53 - INFO - codeparrot_training - Step 17227: {'lr': 0.0004870541457376066, 'samples': 8820736, 'steps': 17227, 'loss/train': 1.1025177240371704} +03/04/2022 09:41:56 - INFO - codeparrot_training - Step 17228: {'lr': 0.0004870524601322737, 'samples': 8821248, 'steps': 17228, 'loss/train': 1.5354255437850952} +03/04/2022 09:41:56 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 09:42:02 - INFO - codeparrot_training - Step 17229: {'lr': 0.00048705077442012866, 'samples': 8821760, 'steps': 17229, 'loss/train': 3.416273593902588} +03/04/2022 09:42:05 - INFO - codeparrot_training - Step 17230: {'lr': 0.0004870490886011723, 'samples': 8822272, 'steps': 17230, 'loss/train': 2.3873677253723145} +03/04/2022 09:42:05 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 09:42:10 - INFO - codeparrot_training - Step 17231: {'lr': 0.0004870474026754051, 'samples': 8822784, 'steps': 17231, 'loss/train': 1.6248579025268555} +03/04/2022 09:42:13 - INFO - codeparrot_training - Step 17232: {'lr': 0.00048704571664282806, 'samples': 8823296, 'steps': 17232, 'loss/train': 1.8388217687606812} +03/04/2022 09:42:13 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 09:42:19 - INFO - codeparrot_training - Step 17233: {'lr': 0.0004870440305034419, 'samples': 8823808, 'steps': 17233, 'loss/train': 1.719441294670105} +03/04/2022 09:42:22 - INFO - codeparrot_training - Step 17234: {'lr': 0.00048704234425724736, 'samples': 8824320, 'steps': 17234, 'loss/train': 2.2794764041900635} +03/04/2022 09:42:22 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 09:42:27 - INFO - codeparrot_training - Step 17235: {'lr': 0.0004870406579042452, 'samples': 8824832, 'steps': 17235, 'loss/train': 2.451948881149292} +03/04/2022 09:42:30 - INFO - codeparrot_training - Step 17236: {'lr': 0.00048703897144443615, 'samples': 8825344, 'steps': 17236, 'loss/train': 2.450913906097412} +03/04/2022 09:42:30 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/04/2022 09:42:35 - INFO - codeparrot_training - Step 17237: {'lr': 0.000487037284877821, 'samples': 8825856, 'steps': 17237, 'loss/train': 1.8403140306472778} +03/04/2022 09:42:38 - INFO - codeparrot_training - Step 17238: {'lr': 0.00048703559820440054, 'samples': 8826368, 'steps': 17238, 'loss/train': 1.515211582183838} +03/04/2022 09:42:39 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 09:42:44 - INFO - codeparrot_training - Step 17239: {'lr': 0.0004870339114241755, 'samples': 8826880, 'steps': 17239, 'loss/train': 1.9624067544937134} +03/04/2022 09:42:47 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 09:42:49 - INFO - codeparrot_training - Step 17240: {'lr': 0.00048703222453714656, 'samples': 8827392, 'steps': 17240, 'loss/train': 1.813470721244812} +03/04/2022 09:42:52 - INFO - codeparrot_training - Step 17241: {'lr': 0.0004870305375433146, 'samples': 8827904, 'steps': 17241, 'loss/train': 1.9446817636489868} +03/04/2022 09:42:55 - INFO - codeparrot_training - Step 17242: {'lr': 0.0004870288504426804, 'samples': 8828416, 'steps': 17242, 'loss/train': 1.0914981365203857} +03/04/2022 09:42:55 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 09:43:01 - INFO - codeparrot_training - Step 17243: {'lr': 0.0004870271632352446, 'samples': 8828928, 'steps': 17243, 'loss/train': 1.5446547269821167} +03/04/2022 09:43:04 - INFO - codeparrot_training - Step 17244: {'lr': 0.000487025475921008, 'samples': 8829440, 'steps': 17244, 'loss/train': 1.75636625289917} +03/04/2022 09:43:04 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 09:43:09 - INFO - codeparrot_training - Step 17245: {'lr': 0.00048702378849997143, 'samples': 8829952, 'steps': 17245, 'loss/train': 2.251702070236206} +03/04/2022 09:43:12 - INFO - codeparrot_training - Step 17246: {'lr': 0.0004870221009721356, 'samples': 8830464, 'steps': 17246, 'loss/train': 2.2966339588165283} +03/04/2022 09:43:13 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/04/2022 09:43:18 - INFO - codeparrot_training - Step 17247: {'lr': 0.00048702041333750117, 'samples': 8830976, 'steps': 17247, 'loss/train': 1.9002872705459595} +03/04/2022 09:43:21 - INFO - codeparrot_training - Step 17248: {'lr': 0.0004870187255960691, 'samples': 8831488, 'steps': 17248, 'loss/train': 1.1937402486801147} +03/04/2022 09:43:21 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 09:43:26 - INFO - codeparrot_training - Step 17249: {'lr': 0.00048701703774784, 'samples': 8832000, 'steps': 17249, 'loss/train': 1.2602213621139526} +03/04/2022 09:43:29 - INFO - codeparrot_training - Step 17250: {'lr': 0.0004870153497928147, 'samples': 8832512, 'steps': 17250, 'loss/train': 1.9372429847717285} +03/04/2022 09:43:29 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 09:43:34 - INFO - codeparrot_training - Step 17251: {'lr': 0.00048701366173099396, 'samples': 8833024, 'steps': 17251, 'loss/train': 1.6361618041992188} +03/04/2022 09:43:38 - INFO - codeparrot_training - Step 17252: {'lr': 0.0004870119735623785, 'samples': 8833536, 'steps': 17252, 'loss/train': 1.045925259590149} +03/04/2022 09:43:38 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 09:43:43 - INFO - codeparrot_training - Step 17253: {'lr': 0.00048701028528696914, 'samples': 8834048, 'steps': 17253, 'loss/train': 0.7971089482307434} +03/04/2022 09:43:46 - INFO - codeparrot_training - Step 17254: {'lr': 0.0004870085969047665, 'samples': 8834560, 'steps': 17254, 'loss/train': 1.316550374031067} +03/04/2022 09:43:46 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/04/2022 09:43:51 - INFO - codeparrot_training - Step 17255: {'lr': 0.00048700690841577154, 'samples': 8835072, 'steps': 17255, 'loss/train': 0.9032773375511169} +03/04/2022 09:43:54 - INFO - codeparrot_training - Step 17256: {'lr': 0.0004870052198199849, 'samples': 8835584, 'steps': 17256, 'loss/train': 1.1718623638153076} +03/04/2022 09:43:54 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 09:44:00 - INFO - codeparrot_training - Step 17257: {'lr': 0.00048700353111740734, 'samples': 8836096, 'steps': 17257, 'loss/train': 1.1684075593948364} +03/04/2022 09:44:03 - INFO - codeparrot_training - Step 17258: {'lr': 0.0004870018423080397, 'samples': 8836608, 'steps': 17258, 'loss/train': 2.087352752685547} +03/04/2022 09:44:03 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/04/2022 09:44:08 - INFO - codeparrot_training - Step 17259: {'lr': 0.00048700015339188266, 'samples': 8837120, 'steps': 17259, 'loss/train': 1.2166283130645752} +03/04/2022 09:44:11 - INFO - codeparrot_training - Step 17260: {'lr': 0.0004869984643689369, 'samples': 8837632, 'steps': 17260, 'loss/train': 1.9575586318969727} +03/04/2022 09:44:11 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 09:44:17 - INFO - codeparrot_training - Step 17261: {'lr': 0.00048699677523920346, 'samples': 8838144, 'steps': 17261, 'loss/train': 1.5496422052383423} +03/04/2022 09:44:20 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/04/2022 09:44:22 - INFO - codeparrot_training - Step 17262: {'lr': 0.00048699508600268284, 'samples': 8838656, 'steps': 17262, 'loss/train': 2.54762864112854} +03/04/2022 09:44:25 - INFO - codeparrot_training - Step 17263: {'lr': 0.00048699339665937594, 'samples': 8839168, 'steps': 17263, 'loss/train': 2.1158645153045654} +03/04/2022 09:44:28 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/04/2022 09:44:30 - INFO - codeparrot_training - Step 17264: {'lr': 0.0004869917072092834, 'samples': 8839680, 'steps': 17264, 'loss/train': 1.9821112155914307} +03/04/2022 09:44:33 - INFO - codeparrot_training - Step 17265: {'lr': 0.00048699001765240615, 'samples': 8840192, 'steps': 17265, 'loss/train': 2.285323143005371} +03/04/2022 09:44:36 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 09:44:39 - INFO - codeparrot_training - Step 17266: {'lr': 0.00048698832798874477, 'samples': 8840704, 'steps': 17266, 'loss/train': 2.0845487117767334} +03/04/2022 09:44:42 - INFO - codeparrot_training - Step 17267: {'lr': 0.0004869866382183001, 'samples': 8841216, 'steps': 17267, 'loss/train': 1.7546137571334839} +03/04/2022 09:44:45 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 09:44:47 - INFO - codeparrot_training - Step 17268: {'lr': 0.00048698494834107297, 'samples': 8841728, 'steps': 17268, 'loss/train': 2.6543989181518555} +03/04/2022 09:44:50 - INFO - codeparrot_training - Step 17269: {'lr': 0.000486983258357064, 'samples': 8842240, 'steps': 17269, 'loss/train': 2.1987643241882324} +03/04/2022 09:44:54 - INFO - codeparrot_training - Step 17270: {'lr': 0.00048698156826627414, 'samples': 8842752, 'steps': 17270, 'loss/train': 1.4019421339035034} +03/04/2022 09:44:54 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/04/2022 09:44:59 - INFO - codeparrot_training - Step 17271: {'lr': 0.00048697987806870397, 'samples': 8843264, 'steps': 17271, 'loss/train': 2.231374502182007} +03/04/2022 09:45:02 - INFO - codeparrot_training - Step 17272: {'lr': 0.0004869781877643543, 'samples': 8843776, 'steps': 17272, 'loss/train': 2.0675179958343506} +03/04/2022 09:45:02 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 09:45:07 - INFO - codeparrot_training - Step 17273: {'lr': 0.000486976497353226, 'samples': 8844288, 'steps': 17273, 'loss/train': 1.9011173248291016} +03/04/2022 09:45:10 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 09:45:13 - INFO - codeparrot_training - Step 17274: {'lr': 0.0004869748068353197, 'samples': 8844800, 'steps': 17274, 'loss/train': 1.6328771114349365} +03/04/2022 09:45:16 - INFO - codeparrot_training - Step 17275: {'lr': 0.00048697311621063625, 'samples': 8845312, 'steps': 17275, 'loss/train': 2.1835434436798096} +03/04/2022 09:45:19 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 09:45:21 - INFO - codeparrot_training - Step 17276: {'lr': 0.0004869714254791763, 'samples': 8845824, 'steps': 17276, 'loss/train': 2.477288246154785} +03/04/2022 09:45:24 - INFO - codeparrot_training - Step 17277: {'lr': 0.00048696973464094076, 'samples': 8846336, 'steps': 17277, 'loss/train': 1.7769064903259277} +03/04/2022 09:45:27 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 09:45:30 - INFO - codeparrot_training - Step 17278: {'lr': 0.00048696804369593023, 'samples': 8846848, 'steps': 17278, 'loss/train': 2.077842950820923} +03/04/2022 09:45:33 - INFO - codeparrot_training - Step 17279: {'lr': 0.0004869663526441456, 'samples': 8847360, 'steps': 17279, 'loss/train': 2.525975465774536} +03/04/2022 09:45:35 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/04/2022 09:45:38 - INFO - codeparrot_training - Step 17280: {'lr': 0.0004869646614855876, 'samples': 8847872, 'steps': 17280, 'loss/train': 1.6253045797348022} +03/04/2022 09:45:41 - INFO - codeparrot_training - Step 17281: {'lr': 0.0004869629702202569, 'samples': 8848384, 'steps': 17281, 'loss/train': 2.010293960571289} +03/04/2022 09:45:43 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 09:45:47 - INFO - codeparrot_training - Step 17282: {'lr': 0.0004869612788481544, 'samples': 8848896, 'steps': 17282, 'loss/train': 0.5218611359596252} +03/04/2022 09:45:50 - INFO - codeparrot_training - Step 17283: {'lr': 0.00048695958736928084, 'samples': 8849408, 'steps': 17283, 'loss/train': 2.7404403686523438} +03/04/2022 09:45:52 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/04/2022 09:45:55 - INFO - codeparrot_training - Step 17284: {'lr': 0.00048695789578363693, 'samples': 8849920, 'steps': 17284, 'loss/train': 2.1637532711029053} +03/04/2022 09:45:58 - INFO - codeparrot_training - Step 17285: {'lr': 0.00048695620409122345, 'samples': 8850432, 'steps': 17285, 'loss/train': 0.47995975613594055} +03/04/2022 09:46:01 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 09:46:03 - INFO - codeparrot_training - Step 17286: {'lr': 0.00048695451229204115, 'samples': 8850944, 'steps': 17286, 'loss/train': 2.5006909370422363} +03/04/2022 09:46:07 - INFO - codeparrot_training - Step 17287: {'lr': 0.0004869528203860908, 'samples': 8851456, 'steps': 17287, 'loss/train': 2.719730854034424} +03/04/2022 09:46:09 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/04/2022 09:46:12 - INFO - codeparrot_training - Step 17288: {'lr': 0.0004869511283733732, 'samples': 8851968, 'steps': 17288, 'loss/train': 1.7425758838653564} +03/04/2022 09:46:15 - INFO - codeparrot_training - Step 17289: {'lr': 0.000486949436253889, 'samples': 8852480, 'steps': 17289, 'loss/train': 1.031009316444397} +03/04/2022 09:46:17 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 09:46:20 - INFO - codeparrot_training - Step 17290: {'lr': 0.0004869477440276391, 'samples': 8852992, 'steps': 17290, 'loss/train': 2.2604832649230957} +03/04/2022 09:46:23 - INFO - codeparrot_training - Step 17291: {'lr': 0.00048694605169462415, 'samples': 8853504, 'steps': 17291, 'loss/train': 3.219592332839966} +03/04/2022 09:46:25 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 09:46:29 - INFO - codeparrot_training - Step 17292: {'lr': 0.00048694435925484506, 'samples': 8854016, 'steps': 17292, 'loss/train': 2.4387688636779785} +03/04/2022 09:46:32 - INFO - codeparrot_training - Step 17293: {'lr': 0.0004869426667083024, 'samples': 8854528, 'steps': 17293, 'loss/train': 1.6752530336380005} +03/04/2022 09:46:34 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 09:46:37 - INFO - codeparrot_training - Step 17294: {'lr': 0.00048694097405499703, 'samples': 8855040, 'steps': 17294, 'loss/train': 2.059356451034546} +03/04/2022 09:46:40 - INFO - codeparrot_training - Step 17295: {'lr': 0.0004869392812949298, 'samples': 8855552, 'steps': 17295, 'loss/train': 2.0992352962493896} +03/04/2022 09:46:43 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 09:46:46 - INFO - codeparrot_training - Step 17296: {'lr': 0.00048693758842810133, 'samples': 8856064, 'steps': 17296, 'loss/train': 2.2586476802825928} +03/04/2022 09:46:49 - INFO - codeparrot_training - Step 17297: {'lr': 0.00048693589545451243, 'samples': 8856576, 'steps': 17297, 'loss/train': 1.8282818794250488} +03/04/2022 09:46:51 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 09:46:54 - INFO - codeparrot_training - Step 17298: {'lr': 0.00048693420237416393, 'samples': 8857088, 'steps': 17298, 'loss/train': 1.5754973888397217} +03/04/2022 09:46:57 - INFO - codeparrot_training - Step 17299: {'lr': 0.00048693250918705643, 'samples': 8857600, 'steps': 17299, 'loss/train': 1.4311652183532715} +03/04/2022 09:47:00 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/04/2022 09:47:02 - INFO - codeparrot_training - Step 17300: {'lr': 0.0004869308158931909, 'samples': 8858112, 'steps': 17300, 'loss/train': 2.1784815788269043} +03/04/2022 09:47:05 - INFO - codeparrot_training - Step 17301: {'lr': 0.00048692912249256794, 'samples': 8858624, 'steps': 17301, 'loss/train': 1.1745154857635498} +03/04/2022 09:47:08 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 09:47:11 - INFO - codeparrot_training - Step 17302: {'lr': 0.00048692742898518836, 'samples': 8859136, 'steps': 17302, 'loss/train': 2.6955294609069824} +03/04/2022 09:47:14 - INFO - codeparrot_training - Step 17303: {'lr': 0.000486925735371053, 'samples': 8859648, 'steps': 17303, 'loss/train': 2.307054281234741} +03/04/2022 09:47:16 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/04/2022 09:47:19 - INFO - codeparrot_training - Step 17304: {'lr': 0.00048692404165016256, 'samples': 8860160, 'steps': 17304, 'loss/train': 1.6755462884902954} +03/04/2022 09:47:22 - INFO - codeparrot_training - Step 17305: {'lr': 0.0004869223478225178, 'samples': 8860672, 'steps': 17305, 'loss/train': 1.9983936548233032} +03/04/2022 09:47:25 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/04/2022 09:47:28 - INFO - codeparrot_training - Step 17306: {'lr': 0.00048692065388811944, 'samples': 8861184, 'steps': 17306, 'loss/train': 1.57350492477417} +03/04/2022 09:47:31 - INFO - codeparrot_training - Step 17307: {'lr': 0.0004869189598469683, 'samples': 8861696, 'steps': 17307, 'loss/train': 1.6303462982177734} +03/04/2022 09:47:33 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 09:47:36 - INFO - codeparrot_training - Step 17308: {'lr': 0.00048691726569906514, 'samples': 8862208, 'steps': 17308, 'loss/train': 2.31445050239563} +03/04/2022 09:47:39 - INFO - codeparrot_training - Step 17309: {'lr': 0.0004869155714444107, 'samples': 8862720, 'steps': 17309, 'loss/train': 2.349963665008545} +03/04/2022 09:47:41 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 09:47:44 - INFO - codeparrot_training - Step 17310: {'lr': 0.00048691387708300584, 'samples': 8863232, 'steps': 17310, 'loss/train': 2.0869531631469727} +03/04/2022 09:47:48 - INFO - codeparrot_training - Step 17311: {'lr': 0.00048691218261485113, 'samples': 8863744, 'steps': 17311, 'loss/train': 1.0911774635314941} +03/04/2022 09:47:50 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/04/2022 09:47:53 - INFO - codeparrot_training - Step 17312: {'lr': 0.00048691048803994755, 'samples': 8864256, 'steps': 17312, 'loss/train': 2.1496052742004395} +03/04/2022 09:47:56 - INFO - codeparrot_training - Step 17313: {'lr': 0.00048690879335829565, 'samples': 8864768, 'steps': 17313, 'loss/train': 2.471639394760132} +03/04/2022 09:47:58 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/04/2022 09:48:01 - INFO - codeparrot_training - Step 17314: {'lr': 0.00048690709856989635, 'samples': 8865280, 'steps': 17314, 'loss/train': 2.02889084815979} +03/04/2022 09:48:05 - INFO - codeparrot_training - Step 17315: {'lr': 0.00048690540367475046, 'samples': 8865792, 'steps': 17315, 'loss/train': 2.2712082862854004} +03/04/2022 09:48:06 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 09:48:10 - INFO - codeparrot_training - Step 17316: {'lr': 0.00048690370867285847, 'samples': 8866304, 'steps': 17316, 'loss/train': 2.235522747039795} +03/04/2022 09:48:13 - INFO - codeparrot_training - Step 17317: {'lr': 0.00048690201356422146, 'samples': 8866816, 'steps': 17317, 'loss/train': 1.9494624137878418} +03/04/2022 09:48:15 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 09:48:19 - INFO - codeparrot_training - Step 17318: {'lr': 0.00048690031834884004, 'samples': 8867328, 'steps': 17318, 'loss/train': 1.9272725582122803} +03/04/2022 09:48:22 - INFO - codeparrot_training - Step 17319: {'lr': 0.00048689862302671495, 'samples': 8867840, 'steps': 17319, 'loss/train': 1.976380705833435} +03/04/2022 09:48:25 - INFO - codeparrot_training - Step 17320: {'lr': 0.000486896927597847, 'samples': 8868352, 'steps': 17320, 'loss/train': 4.037226676940918} +03/04/2022 09:48:25 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 09:48:30 - INFO - codeparrot_training - Step 17321: {'lr': 0.00048689523206223693, 'samples': 8868864, 'steps': 17321, 'loss/train': 2.167853832244873} +03/04/2022 09:48:33 - INFO - codeparrot_training - Step 17322: {'lr': 0.00048689353641988563, 'samples': 8869376, 'steps': 17322, 'loss/train': 2.148064374923706} +03/04/2022 09:48:34 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/04/2022 09:48:39 - INFO - codeparrot_training - Step 17323: {'lr': 0.0004868918406707937, 'samples': 8869888, 'steps': 17323, 'loss/train': 1.8597944974899292} +03/04/2022 09:48:42 - INFO - codeparrot_training - Step 17324: {'lr': 0.00048689014481496197, 'samples': 8870400, 'steps': 17324, 'loss/train': 2.038318157196045} +03/04/2022 09:48:43 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/04/2022 09:48:47 - INFO - codeparrot_training - Step 17325: {'lr': 0.0004868884488523911, 'samples': 8870912, 'steps': 17325, 'loss/train': 1.7909826040267944} +03/04/2022 09:48:51 - INFO - codeparrot_training - Step 17326: {'lr': 0.0004868867527830821, 'samples': 8871424, 'steps': 17326, 'loss/train': 2.2037529945373535} +03/04/2022 09:48:51 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/04/2022 09:48:56 - INFO - codeparrot_training - Step 17327: {'lr': 0.0004868850566070355, 'samples': 8871936, 'steps': 17327, 'loss/train': 1.774157166481018} +03/04/2022 09:48:59 - INFO - codeparrot_training - Step 17328: {'lr': 0.00048688336032425217, 'samples': 8872448, 'steps': 17328, 'loss/train': 1.6554423570632935} +03/04/2022 09:49:01 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 09:49:05 - INFO - codeparrot_training - Step 17329: {'lr': 0.0004868816639347328, 'samples': 8872960, 'steps': 17329, 'loss/train': 1.4885557889938354} +03/04/2022 09:49:08 - INFO - codeparrot_training - Step 17330: {'lr': 0.0004868799674384783, 'samples': 8873472, 'steps': 17330, 'loss/train': 1.5337415933609009} +03/04/2022 09:49:10 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/04/2022 09:49:13 - INFO - codeparrot_training - Step 17331: {'lr': 0.0004868782708354893, 'samples': 8873984, 'steps': 17331, 'loss/train': 2.437392473220825} +03/04/2022 09:49:16 - INFO - codeparrot_training - Step 17332: {'lr': 0.0004868765741257666, 'samples': 8874496, 'steps': 17332, 'loss/train': 2.2315688133239746} +03/04/2022 09:49:18 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 09:49:21 - INFO - codeparrot_training - Step 17333: {'lr': 0.00048687487730931096, 'samples': 8875008, 'steps': 17333, 'loss/train': 2.9484143257141113} +03/04/2022 09:49:25 - INFO - codeparrot_training - Step 17334: {'lr': 0.00048687318038612317, 'samples': 8875520, 'steps': 17334, 'loss/train': 2.6707797050476074} +03/04/2022 09:49:27 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/04/2022 09:49:30 - INFO - codeparrot_training - Step 17335: {'lr': 0.000486871483356204, 'samples': 8876032, 'steps': 17335, 'loss/train': 1.4475077390670776} +03/04/2022 09:49:33 - INFO - codeparrot_training - Step 17336: {'lr': 0.00048686978621955416, 'samples': 8876544, 'steps': 17336, 'loss/train': 2.0893421173095703} +03/04/2022 09:49:35 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 09:49:38 - INFO - codeparrot_training - Step 17337: {'lr': 0.00048686808897617447, 'samples': 8877056, 'steps': 17337, 'loss/train': 2.1351988315582275} +03/04/2022 09:49:41 - INFO - codeparrot_training - Step 17338: {'lr': 0.00048686639162606564, 'samples': 8877568, 'steps': 17338, 'loss/train': 1.9153072834014893} +03/04/2022 09:49:43 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 09:49:47 - INFO - codeparrot_training - Step 17339: {'lr': 0.0004868646941692285, 'samples': 8878080, 'steps': 17339, 'loss/train': 2.2189078330993652} +03/04/2022 09:49:50 - INFO - codeparrot_training - Step 17340: {'lr': 0.0004868629966056638, 'samples': 8878592, 'steps': 17340, 'loss/train': 1.9150062799453735} +03/04/2022 09:49:52 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 09:49:55 - INFO - codeparrot_training - Step 17341: {'lr': 0.0004868612989353722, 'samples': 8879104, 'steps': 17341, 'loss/train': 1.5314157009124756} +03/04/2022 09:49:58 - INFO - codeparrot_training - Step 17342: {'lr': 0.0004868596011583547, 'samples': 8879616, 'steps': 17342, 'loss/train': 2.3834354877471924} +03/04/2022 09:50:00 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 09:50:04 - INFO - codeparrot_training - Step 17343: {'lr': 0.00048685790327461184, 'samples': 8880128, 'steps': 17343, 'loss/train': 2.534484386444092} +03/04/2022 09:50:07 - INFO - codeparrot_training - Step 17344: {'lr': 0.0004868562052841444, 'samples': 8880640, 'steps': 17344, 'loss/train': 1.5348937511444092} +03/04/2022 09:50:08 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 09:50:12 - INFO - codeparrot_training - Step 17345: {'lr': 0.00048685450718695335, 'samples': 8881152, 'steps': 17345, 'loss/train': 2.5259578227996826} +03/04/2022 09:50:15 - INFO - codeparrot_training - Step 17346: {'lr': 0.00048685280898303916, 'samples': 8881664, 'steps': 17346, 'loss/train': 1.5742652416229248} +03/04/2022 09:50:17 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 09:50:20 - INFO - codeparrot_training - Step 17347: {'lr': 0.00048685111067240283, 'samples': 8882176, 'steps': 17347, 'loss/train': 1.5508952140808105} +03/04/2022 09:50:24 - INFO - codeparrot_training - Step 17348: {'lr': 0.00048684941225504507, 'samples': 8882688, 'steps': 17348, 'loss/train': 2.2930684089660645} +03/04/2022 09:50:25 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/04/2022 09:50:29 - INFO - codeparrot_training - Step 17349: {'lr': 0.0004868477137309666, 'samples': 8883200, 'steps': 17349, 'loss/train': 1.9486385583877563} +03/04/2022 09:50:32 - INFO - codeparrot_training - Step 17350: {'lr': 0.00048684601510016817, 'samples': 8883712, 'steps': 17350, 'loss/train': 1.42340886592865} +03/04/2022 09:50:33 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 09:50:37 - INFO - codeparrot_training - Step 17351: {'lr': 0.00048684431636265065, 'samples': 8884224, 'steps': 17351, 'loss/train': 3.863535165786743} +03/04/2022 09:50:41 - INFO - codeparrot_training - Step 17352: {'lr': 0.00048684261751841463, 'samples': 8884736, 'steps': 17352, 'loss/train': 2.362060308456421} +03/04/2022 09:50:42 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/04/2022 09:50:46 - INFO - codeparrot_training - Step 17353: {'lr': 0.000486840918567461, 'samples': 8885248, 'steps': 17353, 'loss/train': 2.6487672328948975} +03/04/2022 09:50:49 - INFO - codeparrot_training - Step 17354: {'lr': 0.0004868392195097906, 'samples': 8885760, 'steps': 17354, 'loss/train': 2.5347976684570312} +03/04/2022 09:50:51 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 09:50:54 - INFO - codeparrot_training - Step 17355: {'lr': 0.0004868375203454041, 'samples': 8886272, 'steps': 17355, 'loss/train': 1.7541838884353638} +03/04/2022 09:50:58 - INFO - codeparrot_training - Step 17356: {'lr': 0.00048683582107430227, 'samples': 8886784, 'steps': 17356, 'loss/train': 2.014817714691162} +03/04/2022 09:50:59 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/04/2022 09:51:03 - INFO - codeparrot_training - Step 17357: {'lr': 0.0004868341216964858, 'samples': 8887296, 'steps': 17357, 'loss/train': 1.5625805854797363} +03/04/2022 09:51:06 - INFO - codeparrot_training - Step 17358: {'lr': 0.00048683242221195553, 'samples': 8887808, 'steps': 17358, 'loss/train': 1.4986333847045898} +03/04/2022 09:51:08 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/04/2022 09:51:11 - INFO - codeparrot_training - Step 17359: {'lr': 0.00048683072262071224, 'samples': 8888320, 'steps': 17359, 'loss/train': 4.085904598236084} +03/04/2022 09:51:14 - INFO - codeparrot_training - Step 17360: {'lr': 0.00048682902292275667, 'samples': 8888832, 'steps': 17360, 'loss/train': 2.0843968391418457} +03/04/2022 09:51:16 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/04/2022 09:51:20 - INFO - codeparrot_training - Step 17361: {'lr': 0.00048682732311808964, 'samples': 8889344, 'steps': 17361, 'loss/train': 1.0921473503112793} +03/04/2022 09:51:23 - INFO - codeparrot_training - Step 17362: {'lr': 0.00048682562320671185, 'samples': 8889856, 'steps': 17362, 'loss/train': 1.8331644535064697} +03/04/2022 09:51:25 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 09:51:28 - INFO - codeparrot_training - Step 17363: {'lr': 0.00048682392318862407, 'samples': 8890368, 'steps': 17363, 'loss/train': 2.178312301635742} +03/04/2022 09:51:31 - INFO - codeparrot_training - Step 17364: {'lr': 0.00048682222306382705, 'samples': 8890880, 'steps': 17364, 'loss/train': 1.2663837671279907} +03/04/2022 09:51:33 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/04/2022 09:51:37 - INFO - codeparrot_training - Step 17365: {'lr': 0.0004868205228323217, 'samples': 8891392, 'steps': 17365, 'loss/train': 1.4743783473968506} +03/04/2022 09:51:40 - INFO - codeparrot_training - Step 17366: {'lr': 0.0004868188224941086, 'samples': 8891904, 'steps': 17366, 'loss/train': 1.944361925125122} +03/04/2022 09:51:42 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/04/2022 09:51:45 - INFO - codeparrot_training - Step 17367: {'lr': 0.0004868171220491886, 'samples': 8892416, 'steps': 17367, 'loss/train': 1.1691508293151855} +03/04/2022 09:51:48 - INFO - codeparrot_training - Step 17368: {'lr': 0.00048681542149756253, 'samples': 8892928, 'steps': 17368, 'loss/train': 4.596549034118652} +03/04/2022 09:51:50 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 09:51:54 - INFO - codeparrot_training - Step 17369: {'lr': 0.00048681372083923103, 'samples': 8893440, 'steps': 17369, 'loss/train': 2.343165159225464} +03/04/2022 09:51:57 - INFO - codeparrot_training - Step 17370: {'lr': 0.0004868120200741949, 'samples': 8893952, 'steps': 17370, 'loss/train': 1.1830004453659058} +03/04/2022 09:51:59 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 09:52:02 - INFO - codeparrot_training - Step 17371: {'lr': 0.0004868103192024549, 'samples': 8894464, 'steps': 17371, 'loss/train': 1.66206693649292} +03/04/2022 09:52:06 - INFO - codeparrot_training - Step 17372: {'lr': 0.0004868086182240119, 'samples': 8894976, 'steps': 17372, 'loss/train': 3.5952112674713135} +03/04/2022 09:52:08 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 09:52:11 - INFO - codeparrot_training - Step 17373: {'lr': 0.00048680691713886653, 'samples': 8895488, 'steps': 17373, 'loss/train': 2.071876049041748} +03/04/2022 09:52:14 - INFO - codeparrot_training - Step 17374: {'lr': 0.00048680521594701964, 'samples': 8896000, 'steps': 17374, 'loss/train': 1.9146305322647095} +03/04/2022 09:52:17 - INFO - codeparrot_training - Step 17375: {'lr': 0.00048680351464847207, 'samples': 8896512, 'steps': 17375, 'loss/train': 2.272498607635498} +03/04/2022 09:52:18 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/04/2022 09:52:22 - INFO - codeparrot_training - Step 17376: {'lr': 0.00048680181324322437, 'samples': 8897024, 'steps': 17376, 'loss/train': 1.9188755750656128} +03/04/2022 09:52:26 - INFO - codeparrot_training - Step 17377: {'lr': 0.00048680011173127746, 'samples': 8897536, 'steps': 17377, 'loss/train': 1.5273181200027466} +03/04/2022 09:52:26 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 09:52:31 - INFO - codeparrot_training - Step 17378: {'lr': 0.00048679841011263204, 'samples': 8898048, 'steps': 17378, 'loss/train': 2.3655288219451904} +03/04/2022 09:52:34 - INFO - codeparrot_training - Step 17379: {'lr': 0.00048679670838728894, 'samples': 8898560, 'steps': 17379, 'loss/train': 2.498816728591919} +03/04/2022 09:52:34 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 09:52:39 - INFO - codeparrot_training - Step 17380: {'lr': 0.0004867950065552489, 'samples': 8899072, 'steps': 17380, 'loss/train': 2.1316235065460205} +03/04/2022 09:52:43 - INFO - codeparrot_training - Step 17381: {'lr': 0.00048679330461651275, 'samples': 8899584, 'steps': 17381, 'loss/train': 2.5681545734405518} +03/04/2022 09:52:43 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 09:52:48 - INFO - codeparrot_training - Step 17382: {'lr': 0.00048679160257108107, 'samples': 8900096, 'steps': 17382, 'loss/train': 1.466504693031311} +03/04/2022 09:52:51 - INFO - codeparrot_training - Step 17383: {'lr': 0.00048678990041895484, 'samples': 8900608, 'steps': 17383, 'loss/train': 1.8894259929656982} +03/04/2022 09:52:51 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 09:52:56 - INFO - codeparrot_training - Step 17384: {'lr': 0.00048678819816013467, 'samples': 8901120, 'steps': 17384, 'loss/train': 1.6218822002410889} +03/04/2022 09:52:59 - INFO - codeparrot_training - Step 17385: {'lr': 0.0004867864957946214, 'samples': 8901632, 'steps': 17385, 'loss/train': 1.5856868028640747} +03/04/2022 09:52:59 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/04/2022 09:53:05 - INFO - codeparrot_training - Step 17386: {'lr': 0.0004867847933224158, 'samples': 8902144, 'steps': 17386, 'loss/train': 1.6890685558319092} +03/04/2022 09:53:08 - INFO - codeparrot_training - Step 17387: {'lr': 0.0004867830907435187, 'samples': 8902656, 'steps': 17387, 'loss/train': 1.729661226272583} +03/04/2022 09:53:08 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/04/2022 09:53:13 - INFO - codeparrot_training - Step 17388: {'lr': 0.0004867813880579307, 'samples': 8903168, 'steps': 17388, 'loss/train': 0.9801804423332214} +03/04/2022 09:53:17 - INFO - codeparrot_training - Step 17389: {'lr': 0.0004867796852656527, 'samples': 8903680, 'steps': 17389, 'loss/train': 1.689299464225769} +03/04/2022 09:53:17 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 09:53:22 - INFO - codeparrot_training - Step 17390: {'lr': 0.00048677798236668537, 'samples': 8904192, 'steps': 17390, 'loss/train': 1.9638237953186035} +03/04/2022 09:53:25 - INFO - codeparrot_training - Step 17391: {'lr': 0.00048677627936102966, 'samples': 8904704, 'steps': 17391, 'loss/train': 0.7934051156044006} +03/04/2022 09:53:26 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 09:53:30 - INFO - codeparrot_training - Step 17392: {'lr': 0.0004867745762486861, 'samples': 8905216, 'steps': 17392, 'loss/train': 2.198030471801758} +03/04/2022 09:53:33 - INFO - codeparrot_training - Step 17393: {'lr': 0.0004867728730296556, 'samples': 8905728, 'steps': 17393, 'loss/train': 2.381946086883545} +03/04/2022 09:53:34 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/04/2022 09:53:39 - INFO - codeparrot_training - Step 17394: {'lr': 0.0004867711697039389, 'samples': 8906240, 'steps': 17394, 'loss/train': 1.8666225671768188} +03/04/2022 09:53:42 - INFO - codeparrot_training - Step 17395: {'lr': 0.00048676946627153675, 'samples': 8906752, 'steps': 17395, 'loss/train': 1.9783819913864136} +03/04/2022 09:53:43 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 09:53:47 - INFO - codeparrot_training - Step 17396: {'lr': 0.00048676776273244994, 'samples': 8907264, 'steps': 17396, 'loss/train': 1.210360050201416} +03/04/2022 09:53:51 - INFO - codeparrot_training - Step 17397: {'lr': 0.00048676605908667926, 'samples': 8907776, 'steps': 17397, 'loss/train': 2.0759546756744385} +03/04/2022 09:53:52 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 09:53:56 - INFO - codeparrot_training - Step 17398: {'lr': 0.00048676435533422536, 'samples': 8908288, 'steps': 17398, 'loss/train': 1.6193052530288696} +03/04/2022 09:53:59 - INFO - codeparrot_training - Step 17399: {'lr': 0.00048676265147508917, 'samples': 8908800, 'steps': 17399, 'loss/train': 0.7337481379508972} +03/04/2022 09:54:00 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 09:54:04 - INFO - codeparrot_training - Step 17400: {'lr': 0.00048676094750927144, 'samples': 8909312, 'steps': 17400, 'loss/train': 0.6842808127403259} +03/04/2022 09:54:07 - INFO - codeparrot_training - Step 17401: {'lr': 0.0004867592434367728, 'samples': 8909824, 'steps': 17401, 'loss/train': 2.0560038089752197} +03/04/2022 09:54:08 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 09:54:13 - INFO - codeparrot_training - Step 17402: {'lr': 0.0004867575392575941, 'samples': 8910336, 'steps': 17402, 'loss/train': 2.546003818511963} +03/04/2022 09:54:16 - INFO - codeparrot_training - Step 17403: {'lr': 0.0004867558349717361, 'samples': 8910848, 'steps': 17403, 'loss/train': 1.4340087175369263} +03/04/2022 09:54:17 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/04/2022 09:54:21 - INFO - codeparrot_training - Step 17404: {'lr': 0.0004867541305791996, 'samples': 8911360, 'steps': 17404, 'loss/train': 2.2930328845977783} +03/04/2022 09:54:24 - INFO - codeparrot_training - Step 17405: {'lr': 0.00048675242607998533, 'samples': 8911872, 'steps': 17405, 'loss/train': 1.115888237953186} +03/04/2022 09:54:25 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 09:54:29 - INFO - codeparrot_training - Step 17406: {'lr': 0.00048675072147409405, 'samples': 8912384, 'steps': 17406, 'loss/train': 1.584164023399353} +03/04/2022 09:54:33 - INFO - codeparrot_training - Step 17407: {'lr': 0.0004867490167615266, 'samples': 8912896, 'steps': 17407, 'loss/train': 1.4932974576950073} +03/04/2022 09:54:33 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/04/2022 09:54:38 - INFO - codeparrot_training - Step 17408: {'lr': 0.0004867473119422837, 'samples': 8913408, 'steps': 17408, 'loss/train': 2.0521583557128906} +03/04/2022 09:54:41 - INFO - codeparrot_training - Step 17409: {'lr': 0.00048674560701636606, 'samples': 8913920, 'steps': 17409, 'loss/train': 2.3083689212799072} +03/04/2022 09:54:42 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 09:54:46 - INFO - codeparrot_training - Step 17410: {'lr': 0.0004867439019837745, 'samples': 8914432, 'steps': 17410, 'loss/train': 1.5680906772613525} +03/04/2022 09:54:50 - INFO - codeparrot_training - Step 17411: {'lr': 0.00048674219684450985, 'samples': 8914944, 'steps': 17411, 'loss/train': 2.1664350032806396} +03/04/2022 09:54:50 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 09:54:55 - INFO - codeparrot_training - Step 17412: {'lr': 0.00048674049159857277, 'samples': 8915456, 'steps': 17412, 'loss/train': 0.27272841334342957} +03/04/2022 09:54:58 - INFO - codeparrot_training - Step 17413: {'lr': 0.0004867387862459641, 'samples': 8915968, 'steps': 17413, 'loss/train': 0.3583604097366333} +03/04/2022 09:55:00 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/04/2022 09:55:04 - INFO - codeparrot_training - Step 17414: {'lr': 0.0004867370807866845, 'samples': 8916480, 'steps': 17414, 'loss/train': 2.5279293060302734} +03/04/2022 09:55:07 - INFO - codeparrot_training - Step 17415: {'lr': 0.000486735375220735, 'samples': 8916992, 'steps': 17415, 'loss/train': 1.5873537063598633} +03/04/2022 09:55:08 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/04/2022 09:55:12 - INFO - codeparrot_training - Step 17416: {'lr': 0.00048673366954811605, 'samples': 8917504, 'steps': 17416, 'loss/train': 1.963753581047058} +03/04/2022 09:55:15 - INFO - codeparrot_training - Step 17417: {'lr': 0.0004867319637688286, 'samples': 8918016, 'steps': 17417, 'loss/train': 2.0981223583221436} +03/04/2022 09:55:16 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/04/2022 09:55:20 - INFO - codeparrot_training - Step 17418: {'lr': 0.0004867302578828734, 'samples': 8918528, 'steps': 17418, 'loss/train': 1.9392707347869873} +03/04/2022 09:55:23 - INFO - codeparrot_training - Step 17419: {'lr': 0.0004867285518902512, 'samples': 8919040, 'steps': 17419, 'loss/train': 1.7900147438049316} +03/04/2022 09:55:25 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/04/2022 09:55:29 - INFO - codeparrot_training - Step 17420: {'lr': 0.0004867268457909627, 'samples': 8919552, 'steps': 17420, 'loss/train': 2.244166612625122} +03/04/2022 09:55:32 - INFO - codeparrot_training - Step 17421: {'lr': 0.0004867251395850088, 'samples': 8920064, 'steps': 17421, 'loss/train': 2.1061275005340576} +03/04/2022 09:55:34 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 09:55:37 - INFO - codeparrot_training - Step 17422: {'lr': 0.00048672343327239024, 'samples': 8920576, 'steps': 17422, 'loss/train': 2.272671699523926} +03/04/2022 09:55:40 - INFO - codeparrot_training - Step 17423: {'lr': 0.00048672172685310767, 'samples': 8921088, 'steps': 17423, 'loss/train': 2.0651159286499023} +03/04/2022 09:55:42 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 09:55:46 - INFO - codeparrot_training - Step 17424: {'lr': 0.000486720020327162, 'samples': 8921600, 'steps': 17424, 'loss/train': 2.221189260482788} +03/04/2022 09:55:49 - INFO - codeparrot_training - Step 17425: {'lr': 0.00048671831369455386, 'samples': 8922112, 'steps': 17425, 'loss/train': 2.040565013885498} +03/04/2022 09:55:50 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 09:55:54 - INFO - codeparrot_training - Step 17426: {'lr': 0.0004867166069552842, 'samples': 8922624, 'steps': 17426, 'loss/train': 2.3854594230651855} +03/04/2022 09:55:57 - INFO - codeparrot_training - Step 17427: {'lr': 0.00048671490010935366, 'samples': 8923136, 'steps': 17427, 'loss/train': 1.0068728923797607} +03/04/2022 09:55:59 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 09:56:03 - INFO - codeparrot_training - Step 17428: {'lr': 0.00048671319315676305, 'samples': 8923648, 'steps': 17428, 'loss/train': 0.772129237651825} +03/04/2022 09:56:06 - INFO - codeparrot_training - Step 17429: {'lr': 0.00048671148609751307, 'samples': 8924160, 'steps': 17429, 'loss/train': 1.6925119161605835} +03/04/2022 09:56:07 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 09:56:11 - INFO - codeparrot_training - Step 17430: {'lr': 0.0004867097789316046, 'samples': 8924672, 'steps': 17430, 'loss/train': 0.9923723936080933} +03/04/2022 09:56:14 - INFO - codeparrot_training - Step 17431: {'lr': 0.0004867080716590384, 'samples': 8925184, 'steps': 17431, 'loss/train': 2.7427384853363037} +03/04/2022 09:56:16 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/04/2022 09:56:20 - INFO - codeparrot_training - Step 17432: {'lr': 0.0004867063642798151, 'samples': 8925696, 'steps': 17432, 'loss/train': 1.735987663269043} +03/04/2022 09:56:23 - INFO - codeparrot_training - Step 17433: {'lr': 0.0004867046567939356, 'samples': 8926208, 'steps': 17433, 'loss/train': 1.1434357166290283} +03/04/2022 09:56:24 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 09:56:28 - INFO - codeparrot_training - Step 17434: {'lr': 0.00048670294920140063, 'samples': 8926720, 'steps': 17434, 'loss/train': 2.0311763286590576} +03/04/2022 09:56:31 - INFO - codeparrot_training - Step 17435: {'lr': 0.00048670124150221094, 'samples': 8927232, 'steps': 17435, 'loss/train': 1.5631150007247925} +03/04/2022 09:56:33 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 09:56:36 - INFO - codeparrot_training - Step 17436: {'lr': 0.00048669953369636737, 'samples': 8927744, 'steps': 17436, 'loss/train': 3.770456075668335} +03/04/2022 09:56:39 - INFO - codeparrot_training - Step 17437: {'lr': 0.00048669782578387067, 'samples': 8928256, 'steps': 17437, 'loss/train': 1.7463977336883545} +03/04/2022 09:56:41 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 09:56:45 - INFO - codeparrot_training - Step 17438: {'lr': 0.00048669611776472153, 'samples': 8928768, 'steps': 17438, 'loss/train': 1.8469116687774658} +03/04/2022 09:56:48 - INFO - codeparrot_training - Step 17439: {'lr': 0.00048669440963892074, 'samples': 8929280, 'steps': 17439, 'loss/train': 0.9974958300590515} +03/04/2022 09:56:50 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/04/2022 09:56:53 - INFO - codeparrot_training - Step 17440: {'lr': 0.00048669270140646914, 'samples': 8929792, 'steps': 17440, 'loss/train': 1.4441546201705933} +03/04/2022 09:56:56 - INFO - codeparrot_training - Step 17441: {'lr': 0.0004866909930673675, 'samples': 8930304, 'steps': 17441, 'loss/train': 4.304600238800049} +03/04/2022 09:56:58 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 09:57:02 - INFO - codeparrot_training - Step 17442: {'lr': 0.00048668928462161653, 'samples': 8930816, 'steps': 17442, 'loss/train': 1.6706762313842773} +03/04/2022 09:57:05 - INFO - codeparrot_training - Step 17443: {'lr': 0.000486687576069217, 'samples': 8931328, 'steps': 17443, 'loss/train': 2.1230764389038086} +03/04/2022 09:57:07 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/04/2022 09:57:10 - INFO - codeparrot_training - Step 17444: {'lr': 0.00048668586741016967, 'samples': 8931840, 'steps': 17444, 'loss/train': 2.54822039604187} +03/04/2022 09:57:13 - INFO - codeparrot_training - Step 17445: {'lr': 0.0004866841586444754, 'samples': 8932352, 'steps': 17445, 'loss/train': 1.943244457244873} +03/04/2022 09:57:15 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/04/2022 09:57:18 - INFO - codeparrot_training - Step 17446: {'lr': 0.0004866824497721349, 'samples': 8932864, 'steps': 17446, 'loss/train': 1.5995303392410278} +03/04/2022 09:57:22 - INFO - codeparrot_training - Step 17447: {'lr': 0.0004866807407931489, 'samples': 8933376, 'steps': 17447, 'loss/train': 1.1105982065200806} +03/04/2022 09:57:24 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 09:57:27 - INFO - codeparrot_training - Step 17448: {'lr': 0.0004866790317075182, 'samples': 8933888, 'steps': 17448, 'loss/train': 2.2593681812286377} +03/04/2022 09:57:30 - INFO - codeparrot_training - Step 17449: {'lr': 0.00048667732251524365, 'samples': 8934400, 'steps': 17449, 'loss/train': 1.4902570247650146} +03/04/2022 09:57:32 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/04/2022 09:57:35 - INFO - codeparrot_training - Step 17450: {'lr': 0.0004866756132163259, 'samples': 8934912, 'steps': 17450, 'loss/train': 1.6365641355514526} +03/04/2022 09:57:39 - INFO - codeparrot_training - Step 17451: {'lr': 0.0004866739038107658, 'samples': 8935424, 'steps': 17451, 'loss/train': 1.4022942781448364} +03/04/2022 09:57:41 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/04/2022 09:57:44 - INFO - codeparrot_training - Step 17452: {'lr': 0.000486672194298564, 'samples': 8935936, 'steps': 17452, 'loss/train': 1.8087025880813599} +03/04/2022 09:57:47 - INFO - codeparrot_training - Step 17453: {'lr': 0.00048667048467972146, 'samples': 8936448, 'steps': 17453, 'loss/train': 1.5490342378616333} +03/04/2022 09:57:49 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/04/2022 09:57:52 - INFO - codeparrot_training - Step 17454: {'lr': 0.00048666877495423885, 'samples': 8936960, 'steps': 17454, 'loss/train': 1.5633742809295654} +03/04/2022 09:57:56 - INFO - codeparrot_training - Step 17455: {'lr': 0.0004866670651221169, 'samples': 8937472, 'steps': 17455, 'loss/train': 1.9414530992507935} +03/04/2022 09:57:58 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 09:58:01 - INFO - codeparrot_training - Step 17456: {'lr': 0.0004866653551833564, 'samples': 8937984, 'steps': 17456, 'loss/train': 1.933963656425476} +03/04/2022 09:58:04 - INFO - codeparrot_training - Step 17457: {'lr': 0.00048666364513795816, 'samples': 8938496, 'steps': 17457, 'loss/train': 2.837127447128296} +03/04/2022 09:58:06 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 09:58:09 - INFO - codeparrot_training - Step 17458: {'lr': 0.00048666193498592304, 'samples': 8939008, 'steps': 17458, 'loss/train': 2.07375431060791} +03/04/2022 09:58:13 - INFO - codeparrot_training - Step 17459: {'lr': 0.0004866602247272516, 'samples': 8939520, 'steps': 17459, 'loss/train': 1.8138986825942993} +03/04/2022 09:58:15 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 09:58:18 - INFO - codeparrot_training - Step 17460: {'lr': 0.0004866585143619447, 'samples': 8940032, 'steps': 17460, 'loss/train': 2.1539480686187744} +03/04/2022 09:58:21 - INFO - codeparrot_training - Step 17461: {'lr': 0.00048665680389000315, 'samples': 8940544, 'steps': 17461, 'loss/train': 2.3927083015441895} +03/04/2022 09:58:23 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 09:58:26 - INFO - codeparrot_training - Step 17462: {'lr': 0.0004866550933114277, 'samples': 8941056, 'steps': 17462, 'loss/train': 1.6544108390808105} +03/04/2022 09:58:29 - INFO - codeparrot_training - Step 17463: {'lr': 0.00048665338262621915, 'samples': 8941568, 'steps': 17463, 'loss/train': 1.9657609462738037} +03/04/2022 09:58:31 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 09:58:35 - INFO - codeparrot_training - Step 17464: {'lr': 0.00048665167183437817, 'samples': 8942080, 'steps': 17464, 'loss/train': 1.7576611042022705} +03/04/2022 09:58:38 - INFO - codeparrot_training - Step 17465: {'lr': 0.00048664996093590563, 'samples': 8942592, 'steps': 17465, 'loss/train': 1.4785993099212646} +03/04/2022 09:58:40 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 09:58:43 - INFO - codeparrot_training - Step 17466: {'lr': 0.0004866482499308023, 'samples': 8943104, 'steps': 17466, 'loss/train': 2.5017590522766113} +03/04/2022 09:58:46 - INFO - codeparrot_training - Step 17467: {'lr': 0.0004866465388190689, 'samples': 8943616, 'steps': 17467, 'loss/train': 2.458921194076538} +03/04/2022 09:58:48 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/04/2022 09:58:51 - INFO - codeparrot_training - Step 17468: {'lr': 0.0004866448276007062, 'samples': 8944128, 'steps': 17468, 'loss/train': 1.8777260780334473} +03/04/2022 09:58:55 - INFO - codeparrot_training - Step 17469: {'lr': 0.000486643116275715, 'samples': 8944640, 'steps': 17469, 'loss/train': 0.9366203546524048} +03/04/2022 09:58:57 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 09:59:00 - INFO - codeparrot_training - Step 17470: {'lr': 0.00048664140484409613, 'samples': 8945152, 'steps': 17470, 'loss/train': 2.4246394634246826} +03/04/2022 09:59:03 - INFO - codeparrot_training - Step 17471: {'lr': 0.0004866396933058502, 'samples': 8945664, 'steps': 17471, 'loss/train': 1.7949024438858032} +03/04/2022 09:59:05 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 09:59:08 - INFO - codeparrot_training - Step 17472: {'lr': 0.00048663798166097814, 'samples': 8946176, 'steps': 17472, 'loss/train': 2.146641254425049} +03/04/2022 09:59:12 - INFO - codeparrot_training - Step 17473: {'lr': 0.0004866362699094806, 'samples': 8946688, 'steps': 17473, 'loss/train': 1.968339443206787} +03/04/2022 09:59:13 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/04/2022 09:59:17 - INFO - codeparrot_training - Step 17474: {'lr': 0.0004866345580513585, 'samples': 8947200, 'steps': 17474, 'loss/train': 1.3272043466567993} +03/04/2022 09:59:20 - INFO - codeparrot_training - Step 17475: {'lr': 0.0004866328460866124, 'samples': 8947712, 'steps': 17475, 'loss/train': 2.153667688369751} +03/04/2022 09:59:22 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 09:59:25 - INFO - codeparrot_training - Step 17476: {'lr': 0.0004866311340152433, 'samples': 8948224, 'steps': 17476, 'loss/train': 1.8605622053146362} +03/04/2022 09:59:28 - INFO - codeparrot_training - Step 17477: {'lr': 0.0004866294218372518, 'samples': 8948736, 'steps': 17477, 'loss/train': 2.1031110286712646} +03/04/2022 09:59:30 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 09:59:34 - INFO - codeparrot_training - Step 17478: {'lr': 0.0004866277095526387, 'samples': 8949248, 'steps': 17478, 'loss/train': 2.0577657222747803} +03/04/2022 09:59:37 - INFO - codeparrot_training - Step 17479: {'lr': 0.00048662599716140485, 'samples': 8949760, 'steps': 17479, 'loss/train': 1.7718837261199951} +03/04/2022 09:59:40 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 09:59:43 - INFO - codeparrot_training - Step 17480: {'lr': 0.00048662428466355104, 'samples': 8950272, 'steps': 17480, 'loss/train': 0.2787880003452301} +03/04/2022 09:59:46 - INFO - codeparrot_training - Step 17481: {'lr': 0.0004866225720590779, 'samples': 8950784, 'steps': 17481, 'loss/train': 1.958360195159912} +03/04/2022 09:59:49 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 09:59:51 - INFO - codeparrot_training - Step 17482: {'lr': 0.00048662085934798627, 'samples': 8951296, 'steps': 17482, 'loss/train': 2.183255195617676} +03/04/2022 09:59:54 - INFO - codeparrot_training - Step 17483: {'lr': 0.00048661914653027694, 'samples': 8951808, 'steps': 17483, 'loss/train': 1.2469645738601685} +03/04/2022 09:59:57 - INFO - codeparrot_training - Step 17484: {'lr': 0.0004866174336059507, 'samples': 8952320, 'steps': 17484, 'loss/train': 1.2980523109436035} +03/04/2022 09:59:57 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 10:00:03 - INFO - codeparrot_training - Step 17485: {'lr': 0.00048661572057500833, 'samples': 8952832, 'steps': 17485, 'loss/train': 1.6882472038269043} +03/04/2022 10:00:05 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 10:00:08 - INFO - codeparrot_training - Step 17486: {'lr': 0.00048661400743745057, 'samples': 8953344, 'steps': 17486, 'loss/train': 2.026535987854004} +03/04/2022 10:00:11 - INFO - codeparrot_training - Step 17487: {'lr': 0.00048661229419327806, 'samples': 8953856, 'steps': 17487, 'loss/train': 1.7439520359039307} +03/04/2022 10:00:14 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 10:00:16 - INFO - codeparrot_training - Step 17488: {'lr': 0.0004866105808424918, 'samples': 8954368, 'steps': 17488, 'loss/train': 1.232743740081787} +03/04/2022 10:00:20 - INFO - codeparrot_training - Step 17489: {'lr': 0.0004866088673850925, 'samples': 8954880, 'steps': 17489, 'loss/train': 0.8887256383895874} +03/04/2022 10:00:22 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 10:00:25 - INFO - codeparrot_training - Step 17490: {'lr': 0.0004866071538210808, 'samples': 8955392, 'steps': 17490, 'loss/train': 2.049025058746338} +03/04/2022 10:00:28 - INFO - codeparrot_training - Step 17491: {'lr': 0.0004866054401504576, 'samples': 8955904, 'steps': 17491, 'loss/train': 2.1216042041778564} +03/04/2022 10:00:30 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 10:00:33 - INFO - codeparrot_training - Step 17492: {'lr': 0.0004866037263732237, 'samples': 8956416, 'steps': 17492, 'loss/train': 1.8742773532867432} +03/04/2022 10:00:36 - INFO - codeparrot_training - Step 17493: {'lr': 0.00048660201248937974, 'samples': 8956928, 'steps': 17493, 'loss/train': 2.2393858432769775} +03/04/2022 10:00:39 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/04/2022 10:00:42 - INFO - codeparrot_training - Step 17494: {'lr': 0.0004866002984989266, 'samples': 8957440, 'steps': 17494, 'loss/train': 2.285616159439087} +03/04/2022 10:00:45 - INFO - codeparrot_training - Step 17495: {'lr': 0.000486598584401865, 'samples': 8957952, 'steps': 17495, 'loss/train': 1.475893259048462} +03/04/2022 10:00:47 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/04/2022 10:00:50 - INFO - codeparrot_training - Step 17496: {'lr': 0.0004865968701981958, 'samples': 8958464, 'steps': 17496, 'loss/train': 1.0781662464141846} +03/04/2022 10:00:53 - INFO - codeparrot_training - Step 17497: {'lr': 0.0004865951558879196, 'samples': 8958976, 'steps': 17497, 'loss/train': 1.1213489770889282} +03/04/2022 10:00:55 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/04/2022 10:00:59 - INFO - codeparrot_training - Step 17498: {'lr': 0.00048659344147103725, 'samples': 8959488, 'steps': 17498, 'loss/train': 1.6617635488510132} +03/04/2022 10:01:02 - INFO - codeparrot_training - Step 17499: {'lr': 0.0004865917269475496, 'samples': 8960000, 'steps': 17499, 'loss/train': 1.0772353410720825} +03/04/2022 10:01:06 - INFO - codeparrot_training - Step 17500: {'lr': 0.00048659001231745734, 'samples': 8960512, 'steps': 17500, 'loss/train': 2.22719669342041} +03/04/2022 10:01:07 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 10:01:11 - INFO - codeparrot_training - Step 17501: {'lr': 0.0004865882975807614, 'samples': 8961024, 'steps': 17501, 'loss/train': 1.9549181461334229} +03/04/2022 10:01:14 - INFO - codeparrot_training - Step 17502: {'lr': 0.00048658658273746224, 'samples': 8961536, 'steps': 17502, 'loss/train': 0.5291174650192261} +03/04/2022 10:01:15 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 10:01:19 - INFO - codeparrot_training - Step 17503: {'lr': 0.00048658486778756097, 'samples': 8962048, 'steps': 17503, 'loss/train': 0.6541529297828674} +03/04/2022 10:01:22 - INFO - codeparrot_training - Step 17504: {'lr': 0.0004865831527310581, 'samples': 8962560, 'steps': 17504, 'loss/train': 2.266359329223633} +03/04/2022 10:01:24 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/04/2022 10:01:28 - INFO - codeparrot_training - Step 17505: {'lr': 0.00048658143756795456, 'samples': 8963072, 'steps': 17505, 'loss/train': 2.1067585945129395} +03/04/2022 10:01:31 - INFO - codeparrot_training - Step 17506: {'lr': 0.0004865797222982511, 'samples': 8963584, 'steps': 17506, 'loss/train': 1.5077117681503296} +03/04/2022 10:01:32 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 10:01:36 - INFO - codeparrot_training - Step 17507: {'lr': 0.0004865780069219484, 'samples': 8964096, 'steps': 17507, 'loss/train': 2.258826494216919} +03/04/2022 10:01:39 - INFO - codeparrot_training - Step 17508: {'lr': 0.00048657629143904733, 'samples': 8964608, 'steps': 17508, 'loss/train': 1.718372106552124} +03/04/2022 10:01:41 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/04/2022 10:01:45 - INFO - codeparrot_training - Step 17509: {'lr': 0.0004865745758495487, 'samples': 8965120, 'steps': 17509, 'loss/train': 1.9021540880203247} +03/04/2022 10:01:48 - INFO - codeparrot_training - Step 17510: {'lr': 0.00048657286015345313, 'samples': 8965632, 'steps': 17510, 'loss/train': 1.8333172798156738} +03/04/2022 10:01:49 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/04/2022 10:01:53 - INFO - codeparrot_training - Step 17511: {'lr': 0.00048657114435076153, 'samples': 8966144, 'steps': 17511, 'loss/train': 1.5887185335159302} +03/04/2022 10:01:56 - INFO - codeparrot_training - Step 17512: {'lr': 0.00048656942844147464, 'samples': 8966656, 'steps': 17512, 'loss/train': 1.8052523136138916} +03/04/2022 10:01:57 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/04/2022 10:02:01 - INFO - codeparrot_training - Step 17513: {'lr': 0.00048656771242559316, 'samples': 8967168, 'steps': 17513, 'loss/train': 1.012425422668457} +03/04/2022 10:02:04 - INFO - codeparrot_training - Step 17514: {'lr': 0.0004865659963031179, 'samples': 8967680, 'steps': 17514, 'loss/train': 1.6891604661941528} +03/04/2022 10:02:06 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/04/2022 10:02:10 - INFO - codeparrot_training - Step 17515: {'lr': 0.0004865642800740497, 'samples': 8968192, 'steps': 17515, 'loss/train': 0.5532246232032776} +03/04/2022 10:02:13 - INFO - codeparrot_training - Step 17516: {'lr': 0.0004865625637383893, 'samples': 8968704, 'steps': 17516, 'loss/train': 1.4154000282287598} +03/04/2022 10:02:14 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/04/2022 10:02:18 - INFO - codeparrot_training - Step 17517: {'lr': 0.00048656084729613747, 'samples': 8969216, 'steps': 17517, 'loss/train': 1.7067205905914307} +03/04/2022 10:02:21 - INFO - codeparrot_training - Step 17518: {'lr': 0.0004865591307472949, 'samples': 8969728, 'steps': 17518, 'loss/train': 1.6207877397537231} +03/04/2022 10:02:22 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/04/2022 10:02:27 - INFO - codeparrot_training - Step 17519: {'lr': 0.0004865574140918625, 'samples': 8970240, 'steps': 17519, 'loss/train': 1.9701826572418213} +03/04/2022 10:02:30 - INFO - codeparrot_training - Step 17520: {'lr': 0.00048655569732984096, 'samples': 8970752, 'steps': 17520, 'loss/train': 2.5090763568878174} +03/04/2022 10:02:31 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 10:02:35 - INFO - codeparrot_training - Step 17521: {'lr': 0.000486553980461231, 'samples': 8971264, 'steps': 17521, 'loss/train': 1.961938738822937} +03/04/2022 10:02:38 - INFO - codeparrot_training - Step 17522: {'lr': 0.0004865522634860335, 'samples': 8971776, 'steps': 17522, 'loss/train': 1.928957223892212} +03/04/2022 10:02:39 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/04/2022 10:02:43 - INFO - codeparrot_training - Step 17523: {'lr': 0.00048655054640424936, 'samples': 8972288, 'steps': 17523, 'loss/train': 0.6531922817230225} +03/04/2022 10:02:47 - INFO - codeparrot_training - Step 17524: {'lr': 0.00048654882921587907, 'samples': 8972800, 'steps': 17524, 'loss/train': 1.671759843826294} +03/04/2022 10:02:47 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 10:02:52 - INFO - codeparrot_training - Step 17525: {'lr': 0.00048654711192092347, 'samples': 8973312, 'steps': 17525, 'loss/train': 2.2500996589660645} +03/04/2022 10:02:55 - INFO - codeparrot_training - Step 17526: {'lr': 0.0004865453945193835, 'samples': 8973824, 'steps': 17526, 'loss/train': 1.526464819908142} +03/04/2022 10:02:56 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 10:03:00 - INFO - codeparrot_training - Step 17527: {'lr': 0.00048654367701125975, 'samples': 8974336, 'steps': 17527, 'loss/train': 1.4553059339523315} +03/04/2022 10:03:03 - INFO - codeparrot_training - Step 17528: {'lr': 0.0004865419593965531, 'samples': 8974848, 'steps': 17528, 'loss/train': 1.3860597610473633} +03/04/2022 10:03:04 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 10:03:09 - INFO - codeparrot_training - Step 17529: {'lr': 0.0004865402416752642, 'samples': 8975360, 'steps': 17529, 'loss/train': 1.6394612789154053} +03/04/2022 10:03:12 - INFO - codeparrot_training - Step 17530: {'lr': 0.0004865385238473941, 'samples': 8975872, 'steps': 17530, 'loss/train': 1.9682866334915161} +03/04/2022 10:03:12 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 10:03:17 - INFO - codeparrot_training - Step 17531: {'lr': 0.00048653680591294324, 'samples': 8976384, 'steps': 17531, 'loss/train': 1.5194083452224731} +03/04/2022 10:03:20 - INFO - codeparrot_training - Step 17532: {'lr': 0.00048653508787191256, 'samples': 8976896, 'steps': 17532, 'loss/train': 0.8864246606826782} +03/04/2022 10:03:22 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 10:03:26 - INFO - codeparrot_training - Step 17533: {'lr': 0.00048653336972430297, 'samples': 8977408, 'steps': 17533, 'loss/train': 1.4005341529846191} +03/04/2022 10:03:29 - INFO - codeparrot_training - Step 17534: {'lr': 0.0004865316514701149, 'samples': 8977920, 'steps': 17534, 'loss/train': 2.2071056365966797} +03/04/2022 10:03:30 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/04/2022 10:03:34 - INFO - codeparrot_training - Step 17535: {'lr': 0.0004865299331093495, 'samples': 8978432, 'steps': 17535, 'loss/train': 2.486935615539551} +03/04/2022 10:03:37 - INFO - codeparrot_training - Step 17536: {'lr': 0.0004865282146420072, 'samples': 8978944, 'steps': 17536, 'loss/train': 1.3634134531021118} +03/04/2022 10:03:38 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 10:03:42 - INFO - codeparrot_training - Step 17537: {'lr': 0.000486526496068089, 'samples': 8979456, 'steps': 17537, 'loss/train': 1.4799875020980835} +03/04/2022 10:03:46 - INFO - codeparrot_training - Step 17538: {'lr': 0.0004865247773875956, 'samples': 8979968, 'steps': 17538, 'loss/train': 1.841787576675415} +03/04/2022 10:03:47 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 10:03:51 - INFO - codeparrot_training - Step 17539: {'lr': 0.0004865230586005278, 'samples': 8980480, 'steps': 17539, 'loss/train': 1.7419323921203613} +03/04/2022 10:03:54 - INFO - codeparrot_training - Step 17540: {'lr': 0.00048652133970688633, 'samples': 8980992, 'steps': 17540, 'loss/train': 2.4380974769592285} +03/04/2022 10:03:55 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 10:03:59 - INFO - codeparrot_training - Step 17541: {'lr': 0.00048651962070667197, 'samples': 8981504, 'steps': 17541, 'loss/train': 2.268869638442993} +03/04/2022 10:04:02 - INFO - codeparrot_training - Step 17542: {'lr': 0.00048651790159988563, 'samples': 8982016, 'steps': 17542, 'loss/train': 1.991766333580017} +03/04/2022 10:04:03 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 10:04:08 - INFO - codeparrot_training - Step 17543: {'lr': 0.0004865161823865279, 'samples': 8982528, 'steps': 17543, 'loss/train': 1.8155313730239868} +03/04/2022 10:04:11 - INFO - codeparrot_training - Step 17544: {'lr': 0.0004865144630665996, 'samples': 8983040, 'steps': 17544, 'loss/train': 2.5009231567382812} +03/04/2022 10:04:12 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/04/2022 10:04:16 - INFO - codeparrot_training - Step 17545: {'lr': 0.0004865127436401016, 'samples': 8983552, 'steps': 17545, 'loss/train': 1.4555943012237549} +03/04/2022 10:04:20 - INFO - codeparrot_training - Step 17546: {'lr': 0.00048651102410703464, 'samples': 8984064, 'steps': 17546, 'loss/train': 1.0512042045593262} +03/04/2022 10:04:20 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/04/2022 10:04:25 - INFO - codeparrot_training - Step 17547: {'lr': 0.00048650930446739936, 'samples': 8984576, 'steps': 17547, 'loss/train': 1.6190294027328491} +03/04/2022 10:04:28 - INFO - codeparrot_training - Step 17548: {'lr': 0.00048650758472119666, 'samples': 8985088, 'steps': 17548, 'loss/train': 2.3988797664642334} +03/04/2022 10:04:29 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 10:04:33 - INFO - codeparrot_training - Step 17549: {'lr': 0.0004865058648684273, 'samples': 8985600, 'steps': 17549, 'loss/train': 2.135314702987671} +03/04/2022 10:04:37 - INFO - codeparrot_training - Step 17550: {'lr': 0.00048650414490909207, 'samples': 8986112, 'steps': 17550, 'loss/train': 2.772081136703491} +03/04/2022 10:04:37 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 10:04:42 - INFO - codeparrot_training - Step 17551: {'lr': 0.00048650242484319175, 'samples': 8986624, 'steps': 17551, 'loss/train': 1.734859585762024} +03/04/2022 10:04:45 - INFO - codeparrot_training - Step 17552: {'lr': 0.000486500704670727, 'samples': 8987136, 'steps': 17552, 'loss/train': 1.4752333164215088} +03/04/2022 10:04:46 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 10:04:50 - INFO - codeparrot_training - Step 17553: {'lr': 0.0004864989843916987, 'samples': 8987648, 'steps': 17553, 'loss/train': 2.2745983600616455} +03/04/2022 10:04:53 - INFO - codeparrot_training - Step 17554: {'lr': 0.0004864972640061077, 'samples': 8988160, 'steps': 17554, 'loss/train': 1.5310611724853516} +03/04/2022 10:04:54 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/04/2022 10:04:59 - INFO - codeparrot_training - Step 17555: {'lr': 0.00048649554351395453, 'samples': 8988672, 'steps': 17555, 'loss/train': 1.629929780960083} +03/04/2022 10:05:02 - INFO - codeparrot_training - Step 17556: {'lr': 0.00048649382291524024, 'samples': 8989184, 'steps': 17556, 'loss/train': 2.7335784435272217} +03/04/2022 10:05:03 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 10:05:07 - INFO - codeparrot_training - Step 17557: {'lr': 0.0004864921022099654, 'samples': 8989696, 'steps': 17557, 'loss/train': 1.7072685956954956} +03/04/2022 10:05:10 - INFO - codeparrot_training - Step 17558: {'lr': 0.00048649038139813097, 'samples': 8990208, 'steps': 17558, 'loss/train': 1.6765410900115967} +03/04/2022 10:05:12 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 10:05:16 - INFO - codeparrot_training - Step 17559: {'lr': 0.00048648866047973756, 'samples': 8990720, 'steps': 17559, 'loss/train': 2.1487529277801514} +03/04/2022 10:05:19 - INFO - codeparrot_training - Step 17560: {'lr': 0.000486486939454786, 'samples': 8991232, 'steps': 17560, 'loss/train': 1.8863013982772827} +03/04/2022 10:05:21 - INFO - codeparrot_training - Skipping example with length 983 (seq_length=1024) +03/04/2022 10:05:24 - INFO - codeparrot_training - Step 17561: {'lr': 0.0004864852183232771, 'samples': 8991744, 'steps': 17561, 'loss/train': 2.576676368713379} +03/04/2022 10:05:28 - INFO - codeparrot_training - Step 17562: {'lr': 0.0004864834970852116, 'samples': 8992256, 'steps': 17562, 'loss/train': 0.7206814885139465} +03/04/2022 10:05:30 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 10:05:33 - INFO - codeparrot_training - Step 17563: {'lr': 0.0004864817757405903, 'samples': 8992768, 'steps': 17563, 'loss/train': 3.5090718269348145} +03/04/2022 10:05:36 - INFO - codeparrot_training - Step 17564: {'lr': 0.0004864800542894139, 'samples': 8993280, 'steps': 17564, 'loss/train': 2.3452277183532715} +03/04/2022 10:05:38 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 10:05:41 - INFO - codeparrot_training - Step 17565: {'lr': 0.0004864783327316833, 'samples': 8993792, 'steps': 17565, 'loss/train': 2.127779960632324} +03/04/2022 10:05:45 - INFO - codeparrot_training - Step 17566: {'lr': 0.0004864766110673992, 'samples': 8994304, 'steps': 17566, 'loss/train': 2.0877671241760254} +03/04/2022 10:05:47 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/04/2022 10:05:50 - INFO - codeparrot_training - Step 17567: {'lr': 0.00048647488929656237, 'samples': 8994816, 'steps': 17567, 'loss/train': 2.0738120079040527} +03/04/2022 10:05:53 - INFO - codeparrot_training - Step 17568: {'lr': 0.00048647316741917365, 'samples': 8995328, 'steps': 17568, 'loss/train': 2.059305429458618} +03/04/2022 10:05:55 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 10:05:58 - INFO - codeparrot_training - Step 17569: {'lr': 0.0004864714454352337, 'samples': 8995840, 'steps': 17569, 'loss/train': 0.8107307553291321} +03/04/2022 10:06:01 - INFO - codeparrot_training - Step 17570: {'lr': 0.00048646972334474343, 'samples': 8996352, 'steps': 17570, 'loss/train': 2.313713788986206} +03/04/2022 10:06:03 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 10:06:07 - INFO - codeparrot_training - Step 17571: {'lr': 0.0004864680011477035, 'samples': 8996864, 'steps': 17571, 'loss/train': 2.336162567138672} +03/04/2022 10:06:10 - INFO - codeparrot_training - Step 17572: {'lr': 0.00048646627884411475, 'samples': 8997376, 'steps': 17572, 'loss/train': 1.567208170890808} +03/04/2022 10:06:11 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 10:06:15 - INFO - codeparrot_training - Step 17573: {'lr': 0.00048646455643397803, 'samples': 8997888, 'steps': 17573, 'loss/train': 2.1745076179504395} +03/04/2022 10:06:18 - INFO - codeparrot_training - Step 17574: {'lr': 0.0004864628339172939, 'samples': 8998400, 'steps': 17574, 'loss/train': 1.6507943868637085} +03/04/2022 10:06:20 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/04/2022 10:06:24 - INFO - codeparrot_training - Step 17575: {'lr': 0.00048646111129406336, 'samples': 8998912, 'steps': 17575, 'loss/train': 1.9911279678344727} +03/04/2022 10:06:27 - INFO - codeparrot_training - Step 17576: {'lr': 0.00048645938856428704, 'samples': 8999424, 'steps': 17576, 'loss/train': 2.317091941833496} +03/04/2022 10:06:31 - INFO - codeparrot_training - Step 17577: {'lr': 0.0004864576657279658, 'samples': 8999936, 'steps': 17577, 'loss/train': 1.3761736154556274} +03/04/2022 10:06:32 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 10:06:36 - INFO - codeparrot_training - Step 17578: {'lr': 0.0004864559427851003, 'samples': 9000448, 'steps': 17578, 'loss/train': 2.0744898319244385} +03/04/2022 10:06:39 - INFO - codeparrot_training - Step 17579: {'lr': 0.0004864542197356915, 'samples': 9000960, 'steps': 17579, 'loss/train': 1.7768676280975342} +03/04/2022 10:06:40 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 10:06:44 - INFO - codeparrot_training - Step 17580: {'lr': 0.00048645249657974007, 'samples': 9001472, 'steps': 17580, 'loss/train': 2.198215961456299} +03/04/2022 10:06:47 - INFO - codeparrot_training - Step 17581: {'lr': 0.00048645077331724675, 'samples': 9001984, 'steps': 17581, 'loss/train': 2.1666393280029297} +03/04/2022 10:06:49 - INFO - codeparrot_training - Skipping example with length 996 (seq_length=1024) +03/04/2022 10:06:53 - INFO - codeparrot_training - Step 17582: {'lr': 0.00048644904994821236, 'samples': 9002496, 'steps': 17582, 'loss/train': 1.941099762916565} +03/04/2022 10:06:56 - INFO - codeparrot_training - Step 17583: {'lr': 0.0004864473264726377, 'samples': 9003008, 'steps': 17583, 'loss/train': 1.1259727478027344} +03/04/2022 10:06:57 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/04/2022 10:07:01 - INFO - codeparrot_training - Step 17584: {'lr': 0.00048644560289052354, 'samples': 9003520, 'steps': 17584, 'loss/train': 1.7475858926773071} +03/04/2022 10:07:04 - INFO - codeparrot_training - Step 17585: {'lr': 0.0004864438792018706, 'samples': 9004032, 'steps': 17585, 'loss/train': 2.318572521209717} +03/04/2022 10:07:06 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 10:07:10 - INFO - codeparrot_training - Step 17586: {'lr': 0.0004864421554066797, 'samples': 9004544, 'steps': 17586, 'loss/train': 2.3089892864227295} +03/04/2022 10:07:13 - INFO - codeparrot_training - Step 17587: {'lr': 0.00048644043150495165, 'samples': 9005056, 'steps': 17587, 'loss/train': 0.2032512128353119} +03/04/2022 10:07:15 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/04/2022 10:07:18 - INFO - codeparrot_training - Step 17588: {'lr': 0.00048643870749668717, 'samples': 9005568, 'steps': 17588, 'loss/train': 1.7974562644958496} +03/04/2022 10:07:21 - INFO - codeparrot_training - Step 17589: {'lr': 0.000486436983381887, 'samples': 9006080, 'steps': 17589, 'loss/train': 2.8676092624664307} +03/04/2022 10:07:23 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/04/2022 10:07:26 - INFO - codeparrot_training - Step 17590: {'lr': 0.0004864352591605521, 'samples': 9006592, 'steps': 17590, 'loss/train': 0.5876865386962891} +03/04/2022 10:07:30 - INFO - codeparrot_training - Step 17591: {'lr': 0.00048643353483268306, 'samples': 9007104, 'steps': 17591, 'loss/train': 1.7547401189804077} +03/04/2022 10:07:32 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/04/2022 10:07:35 - INFO - codeparrot_training - Step 17592: {'lr': 0.00048643181039828066, 'samples': 9007616, 'steps': 17592, 'loss/train': 2.2119452953338623} +03/04/2022 10:07:38 - INFO - codeparrot_training - Step 17593: {'lr': 0.00048643008585734575, 'samples': 9008128, 'steps': 17593, 'loss/train': 1.88961660861969} +03/04/2022 10:07:40 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 10:07:43 - INFO - codeparrot_training - Step 17594: {'lr': 0.00048642836120987913, 'samples': 9008640, 'steps': 17594, 'loss/train': 1.90827214717865} +03/04/2022 10:07:46 - INFO - codeparrot_training - Step 17595: {'lr': 0.0004864266364558816, 'samples': 9009152, 'steps': 17595, 'loss/train': 2.2824532985687256} +03/04/2022 10:07:49 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 10:07:52 - INFO - codeparrot_training - Step 17596: {'lr': 0.00048642491159535373, 'samples': 9009664, 'steps': 17596, 'loss/train': 1.8547747135162354} +03/04/2022 10:07:55 - INFO - codeparrot_training - Step 17597: {'lr': 0.0004864231866282965, 'samples': 9010176, 'steps': 17597, 'loss/train': 1.8802217245101929} +03/04/2022 10:07:57 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/04/2022 10:08:00 - INFO - codeparrot_training - Step 17598: {'lr': 0.0004864214615547107, 'samples': 9010688, 'steps': 17598, 'loss/train': 2.2182347774505615} +03/04/2022 10:08:04 - INFO - codeparrot_training - Step 17599: {'lr': 0.000486419736374597, 'samples': 9011200, 'steps': 17599, 'loss/train': 0.8598599433898926} +03/04/2022 10:08:06 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 10:08:09 - INFO - codeparrot_training - Step 17600: {'lr': 0.0004864180110879562, 'samples': 9011712, 'steps': 17600, 'loss/train': 1.7239487171173096} +03/04/2022 10:08:12 - INFO - codeparrot_training - Step 17601: {'lr': 0.00048641628569478916, 'samples': 9012224, 'steps': 17601, 'loss/train': 1.9089040756225586} +03/04/2022 10:08:14 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 10:08:17 - INFO - codeparrot_training - Step 17602: {'lr': 0.00048641456019509643, 'samples': 9012736, 'steps': 17602, 'loss/train': 1.6981314420700073} +03/04/2022 10:08:20 - INFO - codeparrot_training - Step 17603: {'lr': 0.0004864128345888791, 'samples': 9013248, 'steps': 17603, 'loss/train': 1.4950491189956665} +03/04/2022 10:08:22 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/04/2022 10:08:26 - INFO - codeparrot_training - Step 17604: {'lr': 0.0004864111088761377, 'samples': 9013760, 'steps': 17604, 'loss/train': 1.935075044631958} +03/04/2022 10:08:29 - INFO - codeparrot_training - Step 17605: {'lr': 0.00048640938305687315, 'samples': 9014272, 'steps': 17605, 'loss/train': 1.6077208518981934} +03/04/2022 10:08:31 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/04/2022 10:08:34 - INFO - codeparrot_training - Step 17606: {'lr': 0.00048640765713108615, 'samples': 9014784, 'steps': 17606, 'loss/train': 1.9503464698791504} +03/04/2022 10:08:37 - INFO - codeparrot_training - Step 17607: {'lr': 0.00048640593109877754, 'samples': 9015296, 'steps': 17607, 'loss/train': 1.9272223711013794} +03/04/2022 10:08:39 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 10:08:42 - INFO - codeparrot_training - Step 17608: {'lr': 0.00048640420495994806, 'samples': 9015808, 'steps': 17608, 'loss/train': 1.0286378860473633} +03/04/2022 10:08:46 - INFO - codeparrot_training - Step 17609: {'lr': 0.0004864024787145985, 'samples': 9016320, 'steps': 17609, 'loss/train': 1.7398898601531982} +03/04/2022 10:08:47 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 10:08:51 - INFO - codeparrot_training - Step 17610: {'lr': 0.00048640075236272963, 'samples': 9016832, 'steps': 17610, 'loss/train': 1.24214506149292} +03/04/2022 10:08:54 - INFO - codeparrot_training - Step 17611: {'lr': 0.00048639902590434214, 'samples': 9017344, 'steps': 17611, 'loss/train': 1.920548677444458} +03/04/2022 10:08:56 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 10:08:59 - INFO - codeparrot_training - Step 17612: {'lr': 0.000486397299339437, 'samples': 9017856, 'steps': 17612, 'loss/train': 2.1269848346710205} +03/04/2022 10:09:02 - INFO - codeparrot_training - Step 17613: {'lr': 0.0004863955726680149, 'samples': 9018368, 'steps': 17613, 'loss/train': 1.3759475946426392} +03/04/2022 10:09:04 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 10:09:08 - INFO - codeparrot_training - Step 17614: {'lr': 0.0004863938458900765, 'samples': 9018880, 'steps': 17614, 'loss/train': 2.0317418575286865} +03/04/2022 10:09:11 - INFO - codeparrot_training - Step 17615: {'lr': 0.0004863921190056227, 'samples': 9019392, 'steps': 17615, 'loss/train': 2.0633790493011475} +03/04/2022 10:09:12 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 10:09:16 - INFO - codeparrot_training - Step 17616: {'lr': 0.0004863903920146544, 'samples': 9019904, 'steps': 17616, 'loss/train': 1.8916194438934326} +03/04/2022 10:09:19 - INFO - codeparrot_training - Step 17617: {'lr': 0.00048638866491717214, 'samples': 9020416, 'steps': 17617, 'loss/train': 1.8547922372817993} +03/04/2022 10:09:20 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 10:09:25 - INFO - codeparrot_training - Step 17618: {'lr': 0.00048638693771317675, 'samples': 9020928, 'steps': 17618, 'loss/train': 1.225846290588379} +03/04/2022 10:09:28 - INFO - codeparrot_training - Step 17619: {'lr': 0.0004863852104026691, 'samples': 9021440, 'steps': 17619, 'loss/train': 1.4563539028167725} +03/04/2022 10:09:29 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 10:09:33 - INFO - codeparrot_training - Step 17620: {'lr': 0.00048638348298564996, 'samples': 9021952, 'steps': 17620, 'loss/train': 1.7040396928787231} +03/04/2022 10:09:36 - INFO - codeparrot_training - Step 17621: {'lr': 0.00048638175546212, 'samples': 9022464, 'steps': 17621, 'loss/train': 1.949562668800354} +03/04/2022 10:09:37 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 10:09:41 - INFO - codeparrot_training - Step 17622: {'lr': 0.00048638002783208013, 'samples': 9022976, 'steps': 17622, 'loss/train': 2.0607011318206787} +03/04/2022 10:09:45 - INFO - codeparrot_training - Step 17623: {'lr': 0.000486378300095531, 'samples': 9023488, 'steps': 17623, 'loss/train': 1.7003434896469116} +03/04/2022 10:09:46 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/04/2022 10:09:50 - INFO - codeparrot_training - Step 17624: {'lr': 0.0004863765722524735, 'samples': 9024000, 'steps': 17624, 'loss/train': 1.6860395669937134} +03/04/2022 10:09:53 - INFO - codeparrot_training - Step 17625: {'lr': 0.0004863748443029083, 'samples': 9024512, 'steps': 17625, 'loss/train': 0.8460400700569153} +03/04/2022 10:09:54 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 10:09:58 - INFO - codeparrot_training - Step 17626: {'lr': 0.00048637311624683634, 'samples': 9025024, 'steps': 17626, 'loss/train': 1.7587765455245972} +03/04/2022 10:10:02 - INFO - codeparrot_training - Step 17627: {'lr': 0.0004863713880842583, 'samples': 9025536, 'steps': 17627, 'loss/train': 1.8418492078781128} +03/04/2022 10:10:03 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 10:10:07 - INFO - codeparrot_training - Step 17628: {'lr': 0.0004863696598151749, 'samples': 9026048, 'steps': 17628, 'loss/train': 1.4723191261291504} +03/04/2022 10:10:10 - INFO - codeparrot_training - Step 17629: {'lr': 0.00048636793143958695, 'samples': 9026560, 'steps': 17629, 'loss/train': 1.772235631942749} +03/04/2022 10:10:11 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 10:10:15 - INFO - codeparrot_training - Step 17630: {'lr': 0.00048636620295749533, 'samples': 9027072, 'steps': 17630, 'loss/train': 1.7997725009918213} +03/04/2022 10:10:18 - INFO - codeparrot_training - Step 17631: {'lr': 0.00048636447436890075, 'samples': 9027584, 'steps': 17631, 'loss/train': 0.6357917785644531} +03/04/2022 10:10:19 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/04/2022 10:10:24 - INFO - codeparrot_training - Step 17632: {'lr': 0.0004863627456738039, 'samples': 9028096, 'steps': 17632, 'loss/train': 2.239332437515259} +03/04/2022 10:10:27 - INFO - codeparrot_training - Step 17633: {'lr': 0.00048636101687220566, 'samples': 9028608, 'steps': 17633, 'loss/train': 2.904937744140625} +03/04/2022 10:10:28 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 10:10:32 - INFO - codeparrot_training - Step 17634: {'lr': 0.0004863592879641069, 'samples': 9029120, 'steps': 17634, 'loss/train': 3.5956308841705322} +03/04/2022 10:10:35 - INFO - codeparrot_training - Step 17635: {'lr': 0.0004863575589495082, 'samples': 9029632, 'steps': 17635, 'loss/train': 2.3567843437194824} +03/04/2022 10:10:36 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/04/2022 10:10:41 - INFO - codeparrot_training - Step 17636: {'lr': 0.00048635582982841047, 'samples': 9030144, 'steps': 17636, 'loss/train': 0.4492986798286438} +03/04/2022 10:10:44 - INFO - codeparrot_training - Step 17637: {'lr': 0.0004863541006008144, 'samples': 9030656, 'steps': 17637, 'loss/train': 2.123424768447876} +03/04/2022 10:10:45 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 10:10:49 - INFO - codeparrot_training - Step 17638: {'lr': 0.0004863523712667209, 'samples': 9031168, 'steps': 17638, 'loss/train': 2.20582914352417} +03/04/2022 10:10:52 - INFO - codeparrot_training - Step 17639: {'lr': 0.00048635064182613063, 'samples': 9031680, 'steps': 17639, 'loss/train': 1.7743374109268188} +03/04/2022 10:10:53 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/04/2022 10:10:58 - INFO - codeparrot_training - Step 17640: {'lr': 0.00048634891227904435, 'samples': 9032192, 'steps': 17640, 'loss/train': 0.7972428202629089} +03/04/2022 10:11:01 - INFO - codeparrot_training - Step 17641: {'lr': 0.00048634718262546297, 'samples': 9032704, 'steps': 17641, 'loss/train': 2.2068734169006348} +03/04/2022 10:11:02 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 10:11:06 - INFO - codeparrot_training - Step 17642: {'lr': 0.0004863454528653872, 'samples': 9033216, 'steps': 17642, 'loss/train': 2.4259376525878906} +03/04/2022 10:11:09 - INFO - codeparrot_training - Step 17643: {'lr': 0.0004863437229988178, 'samples': 9033728, 'steps': 17643, 'loss/train': 2.207216739654541} +03/04/2022 10:11:10 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 10:11:15 - INFO - codeparrot_training - Step 17644: {'lr': 0.00048634199302575554, 'samples': 9034240, 'steps': 17644, 'loss/train': 1.207603931427002} +03/04/2022 10:11:18 - INFO - codeparrot_training - Step 17645: {'lr': 0.00048634026294620125, 'samples': 9034752, 'steps': 17645, 'loss/train': 2.7826144695281982} +03/04/2022 10:11:18 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 10:11:23 - INFO - codeparrot_training - Step 17646: {'lr': 0.00048633853276015566, 'samples': 9035264, 'steps': 17646, 'loss/train': 1.972805380821228} +03/04/2022 10:11:26 - INFO - codeparrot_training - Step 17647: {'lr': 0.00048633680246761956, 'samples': 9035776, 'steps': 17647, 'loss/train': 1.7668378353118896} +03/04/2022 10:11:27 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 10:11:32 - INFO - codeparrot_training - Step 17648: {'lr': 0.00048633507206859383, 'samples': 9036288, 'steps': 17648, 'loss/train': 2.037001609802246} +03/04/2022 10:11:35 - INFO - codeparrot_training - Step 17649: {'lr': 0.00048633334156307907, 'samples': 9036800, 'steps': 17649, 'loss/train': 2.074406623840332} +03/04/2022 10:11:35 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 10:11:40 - INFO - codeparrot_training - Step 17650: {'lr': 0.0004863316109510762, 'samples': 9037312, 'steps': 17650, 'loss/train': 2.3729002475738525} +03/04/2022 10:11:43 - INFO - codeparrot_training - Step 17651: {'lr': 0.00048632988023258596, 'samples': 9037824, 'steps': 17651, 'loss/train': 1.5393352508544922} +03/04/2022 10:11:44 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/04/2022 10:11:48 - INFO - codeparrot_training - Step 17652: {'lr': 0.00048632814940760907, 'samples': 9038336, 'steps': 17652, 'loss/train': 1.5689913034439087} +03/04/2022 10:11:52 - INFO - codeparrot_training - Step 17653: {'lr': 0.00048632641847614645, 'samples': 9038848, 'steps': 17653, 'loss/train': 1.1076264381408691} +03/04/2022 10:11:52 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 10:11:57 - INFO - codeparrot_training - Step 17654: {'lr': 0.0004863246874381987, 'samples': 9039360, 'steps': 17654, 'loss/train': 1.8635759353637695} +03/04/2022 10:12:00 - INFO - codeparrot_training - Step 17655: {'lr': 0.00048632295629376675, 'samples': 9039872, 'steps': 17655, 'loss/train': 6.607613563537598} +03/04/2022 10:12:01 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 10:12:05 - INFO - codeparrot_training - Step 17656: {'lr': 0.00048632122504285133, 'samples': 9040384, 'steps': 17656, 'loss/train': 1.8852834701538086} +03/04/2022 10:12:08 - INFO - codeparrot_training - Step 17657: {'lr': 0.0004863194936854531, 'samples': 9040896, 'steps': 17657, 'loss/train': 0.7700000405311584} +03/04/2022 10:12:09 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 10:12:14 - INFO - codeparrot_training - Step 17658: {'lr': 0.0004863177622215731, 'samples': 9041408, 'steps': 17658, 'loss/train': 1.6644924879074097} +03/04/2022 10:12:17 - INFO - codeparrot_training - Step 17659: {'lr': 0.00048631603065121186, 'samples': 9041920, 'steps': 17659, 'loss/train': 1.8249307870864868} +03/04/2022 10:12:18 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/04/2022 10:12:22 - INFO - codeparrot_training - Step 17660: {'lr': 0.00048631429897437033, 'samples': 9042432, 'steps': 17660, 'loss/train': 2.348249673843384} +03/04/2022 10:12:25 - INFO - codeparrot_training - Step 17661: {'lr': 0.0004863125671910492, 'samples': 9042944, 'steps': 17661, 'loss/train': 1.6972764730453491} +03/04/2022 10:12:26 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 10:12:31 - INFO - codeparrot_training - Step 17662: {'lr': 0.00048631083530124934, 'samples': 9043456, 'steps': 17662, 'loss/train': 3.2287490367889404} +03/04/2022 10:12:34 - INFO - codeparrot_training - Step 17663: {'lr': 0.00048630910330497133, 'samples': 9043968, 'steps': 17663, 'loss/train': 1.681514024734497} +03/04/2022 10:12:34 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 10:12:39 - INFO - codeparrot_training - Step 17664: {'lr': 0.0004863073712022162, 'samples': 9044480, 'steps': 17664, 'loss/train': 1.3225152492523193} +03/04/2022 10:12:42 - INFO - codeparrot_training - Step 17665: {'lr': 0.00048630563899298453, 'samples': 9044992, 'steps': 17665, 'loss/train': 1.9729799032211304} +03/04/2022 10:12:43 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 10:12:47 - INFO - codeparrot_training - Step 17666: {'lr': 0.00048630390667727725, 'samples': 9045504, 'steps': 17666, 'loss/train': 1.533424973487854} +03/04/2022 10:12:51 - INFO - codeparrot_training - Step 17667: {'lr': 0.00048630217425509503, 'samples': 9046016, 'steps': 17667, 'loss/train': 1.9706906080245972} +03/04/2022 10:12:51 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 10:12:56 - INFO - codeparrot_training - Step 17668: {'lr': 0.00048630044172643874, 'samples': 9046528, 'steps': 17668, 'loss/train': 1.8294565677642822} +03/04/2022 10:12:59 - INFO - codeparrot_training - Step 17669: {'lr': 0.0004862987090913091, 'samples': 9047040, 'steps': 17669, 'loss/train': 1.4363197088241577} +03/04/2022 10:13:00 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/04/2022 10:13:04 - INFO - codeparrot_training - Step 17670: {'lr': 0.0004862969763497069, 'samples': 9047552, 'steps': 17670, 'loss/train': 1.3284187316894531} +03/04/2022 10:13:08 - INFO - codeparrot_training - Step 17671: {'lr': 0.0004862952435016329, 'samples': 9048064, 'steps': 17671, 'loss/train': 2.295074462890625} +03/04/2022 10:13:08 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 10:13:13 - INFO - codeparrot_training - Step 17672: {'lr': 0.00048629351054708795, 'samples': 9048576, 'steps': 17672, 'loss/train': 2.168184518814087} +03/04/2022 10:13:16 - INFO - codeparrot_training - Step 17673: {'lr': 0.0004862917774860728, 'samples': 9049088, 'steps': 17673, 'loss/train': 0.5962517857551575} +03/04/2022 10:13:17 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/04/2022 10:13:21 - INFO - codeparrot_training - Step 17674: {'lr': 0.0004862900443185882, 'samples': 9049600, 'steps': 17674, 'loss/train': 1.9558539390563965} +03/04/2022 10:13:25 - INFO - codeparrot_training - Step 17675: {'lr': 0.00048628831104463496, 'samples': 9050112, 'steps': 17675, 'loss/train': 1.6678858995437622} +03/04/2022 10:13:25 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 10:13:30 - INFO - codeparrot_training - Step 17676: {'lr': 0.0004862865776642138, 'samples': 9050624, 'steps': 17676, 'loss/train': 1.8748124837875366} +03/04/2022 10:13:33 - INFO - codeparrot_training - Step 17677: {'lr': 0.00048628484417732567, 'samples': 9051136, 'steps': 17677, 'loss/train': 2.0056073665618896} +03/04/2022 10:13:34 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/04/2022 10:13:38 - INFO - codeparrot_training - Step 17678: {'lr': 0.00048628311058397113, 'samples': 9051648, 'steps': 17678, 'loss/train': 0.38666510581970215} +03/04/2022 10:13:41 - INFO - codeparrot_training - Step 17679: {'lr': 0.0004862813768841511, 'samples': 9052160, 'steps': 17679, 'loss/train': 2.337892532348633} +03/04/2022 10:13:42 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/04/2022 10:13:47 - INFO - codeparrot_training - Step 17680: {'lr': 0.0004862796430778663, 'samples': 9052672, 'steps': 17680, 'loss/train': 1.7537779808044434} +03/04/2022 10:13:50 - INFO - codeparrot_training - Step 17681: {'lr': 0.0004862779091651176, 'samples': 9053184, 'steps': 17681, 'loss/train': 1.5733609199523926} +03/04/2022 10:13:51 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/04/2022 10:13:55 - INFO - codeparrot_training - Step 17682: {'lr': 0.0004862761751459057, 'samples': 9053696, 'steps': 17682, 'loss/train': 1.7090234756469727} +03/04/2022 10:13:58 - INFO - codeparrot_training - Step 17683: {'lr': 0.0004862744410202314, 'samples': 9054208, 'steps': 17683, 'loss/train': 2.0518720149993896} +03/04/2022 10:13:59 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 10:14:04 - INFO - codeparrot_training - Step 17684: {'lr': 0.00048627270678809544, 'samples': 9054720, 'steps': 17684, 'loss/train': 1.5172041654586792} +03/04/2022 10:14:07 - INFO - codeparrot_training - Step 17685: {'lr': 0.0004862709724494987, 'samples': 9055232, 'steps': 17685, 'loss/train': 3.033221960067749} +03/04/2022 10:14:09 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 10:14:12 - INFO - codeparrot_training - Step 17686: {'lr': 0.0004862692380044419, 'samples': 9055744, 'steps': 17686, 'loss/train': 1.462652564048767} +03/04/2022 10:14:16 - INFO - codeparrot_training - Step 17687: {'lr': 0.0004862675034529258, 'samples': 9056256, 'steps': 17687, 'loss/train': 1.5332857370376587} +03/04/2022 10:14:17 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 10:14:21 - INFO - codeparrot_training - Step 17688: {'lr': 0.0004862657687949512, 'samples': 9056768, 'steps': 17688, 'loss/train': 2.1807737350463867} +03/04/2022 10:14:24 - INFO - codeparrot_training - Step 17689: {'lr': 0.00048626403403051894, 'samples': 9057280, 'steps': 17689, 'loss/train': 2.2318689823150635} +03/04/2022 10:14:26 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 10:14:29 - INFO - codeparrot_training - Step 17690: {'lr': 0.00048626229915962974, 'samples': 9057792, 'steps': 17690, 'loss/train': 2.1156489849090576} +03/04/2022 10:14:32 - INFO - codeparrot_training - Step 17691: {'lr': 0.00048626056418228436, 'samples': 9058304, 'steps': 17691, 'loss/train': 2.3834731578826904} +03/04/2022 10:14:34 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 10:14:38 - INFO - codeparrot_training - Step 17692: {'lr': 0.0004862588290984836, 'samples': 9058816, 'steps': 17692, 'loss/train': 1.3038816452026367} +03/04/2022 10:14:41 - INFO - codeparrot_training - Step 17693: {'lr': 0.0004862570939082283, 'samples': 9059328, 'steps': 17693, 'loss/train': 1.7594530582427979} +03/04/2022 10:14:43 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/04/2022 10:14:46 - INFO - codeparrot_training - Step 17694: {'lr': 0.0004862553586115192, 'samples': 9059840, 'steps': 17694, 'loss/train': 1.9955484867095947} +03/04/2022 10:14:49 - INFO - codeparrot_training - Step 17695: {'lr': 0.00048625362320835707, 'samples': 9060352, 'steps': 17695, 'loss/train': 1.9345263242721558} +03/04/2022 10:14:51 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 10:14:54 - INFO - codeparrot_training - Step 17696: {'lr': 0.00048625188769874274, 'samples': 9060864, 'steps': 17696, 'loss/train': 1.8068077564239502} +03/04/2022 10:14:58 - INFO - codeparrot_training - Step 17697: {'lr': 0.0004862501520826769, 'samples': 9061376, 'steps': 17697, 'loss/train': 2.245685577392578} +03/04/2022 10:14:59 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/04/2022 10:15:03 - INFO - codeparrot_training - Step 17698: {'lr': 0.0004862484163601604, 'samples': 9061888, 'steps': 17698, 'loss/train': 0.6369031071662903} +03/04/2022 10:15:06 - INFO - codeparrot_training - Step 17699: {'lr': 0.000486246680531194, 'samples': 9062400, 'steps': 17699, 'loss/train': 1.768449068069458} +03/04/2022 10:15:08 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 10:15:11 - INFO - codeparrot_training - Step 17700: {'lr': 0.0004862449445957785, 'samples': 9062912, 'steps': 17700, 'loss/train': 2.1338040828704834} +03/04/2022 10:15:14 - INFO - codeparrot_training - Step 17701: {'lr': 0.00048624320855391467, 'samples': 9063424, 'steps': 17701, 'loss/train': 2.0154802799224854} +03/04/2022 10:15:16 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 10:15:20 - INFO - codeparrot_training - Step 17702: {'lr': 0.00048624147240560335, 'samples': 9063936, 'steps': 17702, 'loss/train': 1.2814348936080933} +03/04/2022 10:15:23 - INFO - codeparrot_training - Step 17703: {'lr': 0.00048623973615084516, 'samples': 9064448, 'steps': 17703, 'loss/train': 1.9243766069412231} +03/04/2022 10:15:25 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/04/2022 10:15:28 - INFO - codeparrot_training - Step 17704: {'lr': 0.0004862379997896411, 'samples': 9064960, 'steps': 17704, 'loss/train': 2.4178271293640137} +03/04/2022 10:15:31 - INFO - codeparrot_training - Step 17705: {'lr': 0.0004862362633219918, 'samples': 9065472, 'steps': 17705, 'loss/train': 1.716621994972229} +03/04/2022 10:15:33 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 10:15:37 - INFO - codeparrot_training - Step 17706: {'lr': 0.000486234526747898, 'samples': 9065984, 'steps': 17706, 'loss/train': 1.9617663621902466} +03/04/2022 10:15:40 - INFO - codeparrot_training - Step 17707: {'lr': 0.0004862327900673607, 'samples': 9066496, 'steps': 17707, 'loss/train': 1.2907534837722778} +03/04/2022 10:15:42 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 10:15:45 - INFO - codeparrot_training - Step 17708: {'lr': 0.00048623105328038054, 'samples': 9067008, 'steps': 17708, 'loss/train': 2.301517963409424} +03/04/2022 10:15:48 - INFO - codeparrot_training - Step 17709: {'lr': 0.0004862293163869582, 'samples': 9067520, 'steps': 17709, 'loss/train': 6.103832721710205} +03/04/2022 10:15:50 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 10:15:54 - INFO - codeparrot_training - Step 17710: {'lr': 0.00048622757938709466, 'samples': 9068032, 'steps': 17710, 'loss/train': 1.9855128526687622} +03/04/2022 10:15:57 - INFO - codeparrot_training - Step 17711: {'lr': 0.0004862258422807906, 'samples': 9068544, 'steps': 17711, 'loss/train': 2.271008014678955} +03/04/2022 10:15:59 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/04/2022 10:16:02 - INFO - codeparrot_training - Step 17712: {'lr': 0.0004862241050680468, 'samples': 9069056, 'steps': 17712, 'loss/train': 1.8510534763336182} +03/04/2022 10:16:05 - INFO - codeparrot_training - Step 17713: {'lr': 0.00048622236774886415, 'samples': 9069568, 'steps': 17713, 'loss/train': 1.4446097612380981} +03/04/2022 10:16:07 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/04/2022 10:16:10 - INFO - codeparrot_training - Step 17714: {'lr': 0.00048622063032324324, 'samples': 9070080, 'steps': 17714, 'loss/train': 1.821443796157837} +03/04/2022 10:16:14 - INFO - codeparrot_training - Step 17715: {'lr': 0.000486218892791185, 'samples': 9070592, 'steps': 17715, 'loss/train': 2.290062427520752} +03/04/2022 10:16:16 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/04/2022 10:16:19 - INFO - codeparrot_training - Step 17716: {'lr': 0.00048621715515269017, 'samples': 9071104, 'steps': 17716, 'loss/train': 1.4744815826416016} +03/04/2022 10:16:22 - INFO - codeparrot_training - Step 17717: {'lr': 0.0004862154174077595, 'samples': 9071616, 'steps': 17717, 'loss/train': 1.8535871505737305} +03/04/2022 10:16:24 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/04/2022 10:16:27 - INFO - codeparrot_training - Step 17718: {'lr': 0.00048621367955639395, 'samples': 9072128, 'steps': 17718, 'loss/train': 1.9828473329544067} +03/04/2022 10:16:31 - INFO - codeparrot_training - Step 17719: {'lr': 0.00048621194159859403, 'samples': 9072640, 'steps': 17719, 'loss/train': 2.549678087234497} +03/04/2022 10:16:34 - INFO - codeparrot_training - Step 17720: {'lr': 0.0004862102035343607, 'samples': 9073152, 'steps': 17720, 'loss/train': 1.8110220432281494} +03/04/2022 10:16:34 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 10:16:39 - INFO - codeparrot_training - Step 17721: {'lr': 0.0004862084653636947, 'samples': 9073664, 'steps': 17721, 'loss/train': 2.418520927429199} +03/04/2022 10:16:42 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 10:16:44 - INFO - codeparrot_training - Step 17722: {'lr': 0.00048620672708659675, 'samples': 9074176, 'steps': 17722, 'loss/train': 2.3744192123413086} +03/04/2022 10:16:48 - INFO - codeparrot_training - Step 17723: {'lr': 0.0004862049887030677, 'samples': 9074688, 'steps': 17723, 'loss/train': 2.035863161087036} +03/04/2022 10:16:50 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/04/2022 10:16:53 - INFO - codeparrot_training - Step 17724: {'lr': 0.0004862032502131084, 'samples': 9075200, 'steps': 17724, 'loss/train': 1.3808050155639648} +03/04/2022 10:16:56 - INFO - codeparrot_training - Step 17725: {'lr': 0.00048620151161671955, 'samples': 9075712, 'steps': 17725, 'loss/train': 2.359888792037964} +03/04/2022 10:16:59 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 10:17:01 - INFO - codeparrot_training - Step 17726: {'lr': 0.00048619977291390186, 'samples': 9076224, 'steps': 17726, 'loss/train': 2.1780660152435303} +03/04/2022 10:17:04 - INFO - codeparrot_training - Step 17727: {'lr': 0.00048619803410465624, 'samples': 9076736, 'steps': 17727, 'loss/train': 2.087703227996826} +03/04/2022 10:17:07 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 10:17:10 - INFO - codeparrot_training - Step 17728: {'lr': 0.00048619629518898344, 'samples': 9077248, 'steps': 17728, 'loss/train': 2.3904354572296143} +03/04/2022 10:17:13 - INFO - codeparrot_training - Step 17729: {'lr': 0.00048619455616688426, 'samples': 9077760, 'steps': 17729, 'loss/train': 1.4124566316604614} +03/04/2022 10:17:15 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 10:17:18 - INFO - codeparrot_training - Step 17730: {'lr': 0.0004861928170383594, 'samples': 9078272, 'steps': 17730, 'loss/train': 2.0840649604797363} +03/04/2022 10:17:21 - INFO - codeparrot_training - Step 17731: {'lr': 0.0004861910778034098, 'samples': 9078784, 'steps': 17731, 'loss/train': 2.2153894901275635} +03/04/2022 10:17:24 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 10:17:26 - INFO - codeparrot_training - Step 17732: {'lr': 0.00048618933846203606, 'samples': 9079296, 'steps': 17732, 'loss/train': 2.1978988647460938} +03/04/2022 10:17:30 - INFO - codeparrot_training - Step 17733: {'lr': 0.00048618759901423905, 'samples': 9079808, 'steps': 17733, 'loss/train': 2.311605215072632} +03/04/2022 10:17:32 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 10:17:35 - INFO - codeparrot_training - Step 17734: {'lr': 0.0004861858594600196, 'samples': 9080320, 'steps': 17734, 'loss/train': 2.317366600036621} +03/04/2022 10:17:38 - INFO - codeparrot_training - Step 17735: {'lr': 0.0004861841197993784, 'samples': 9080832, 'steps': 17735, 'loss/train': 1.759225845336914} +03/04/2022 10:17:40 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 10:17:43 - INFO - codeparrot_training - Step 17736: {'lr': 0.0004861823800323163, 'samples': 9081344, 'steps': 17736, 'loss/train': 2.191605806350708} +03/04/2022 10:17:46 - INFO - codeparrot_training - Step 17737: {'lr': 0.00048618064015883405, 'samples': 9081856, 'steps': 17737, 'loss/train': 1.8051154613494873} +03/04/2022 10:17:49 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 10:17:52 - INFO - codeparrot_training - Step 17738: {'lr': 0.0004861789001789325, 'samples': 9082368, 'steps': 17738, 'loss/train': 1.6233527660369873} +03/04/2022 10:17:55 - INFO - codeparrot_training - Step 17739: {'lr': 0.00048617716009261236, 'samples': 9082880, 'steps': 17739, 'loss/train': 0.7418481707572937} +03/04/2022 10:17:58 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/04/2022 10:18:00 - INFO - codeparrot_training - Step 17740: {'lr': 0.00048617541989987435, 'samples': 9083392, 'steps': 17740, 'loss/train': 0.9539466500282288} +03/04/2022 10:18:03 - INFO - codeparrot_training - Step 17741: {'lr': 0.00048617367960071946, 'samples': 9083904, 'steps': 17741, 'loss/train': 2.478332042694092} +03/04/2022 10:18:06 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 10:18:09 - INFO - codeparrot_training - Step 17742: {'lr': 0.0004861719391951483, 'samples': 9084416, 'steps': 17742, 'loss/train': 2.326171636581421} +03/04/2022 10:18:12 - INFO - codeparrot_training - Step 17743: {'lr': 0.0004861701986831617, 'samples': 9084928, 'steps': 17743, 'loss/train': 0.6232874989509583} +03/04/2022 10:18:15 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 10:18:17 - INFO - codeparrot_training - Step 17744: {'lr': 0.0004861684580647605, 'samples': 9085440, 'steps': 17744, 'loss/train': 2.100576162338257} +03/04/2022 10:18:20 - INFO - codeparrot_training - Step 17745: {'lr': 0.0004861667173399453, 'samples': 9085952, 'steps': 17745, 'loss/train': 1.5485637187957764} +03/04/2022 10:18:23 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 10:18:26 - INFO - codeparrot_training - Step 17746: {'lr': 0.0004861649765087172, 'samples': 9086464, 'steps': 17746, 'loss/train': 2.3322460651397705} +03/04/2022 10:18:29 - INFO - codeparrot_training - Step 17747: {'lr': 0.0004861632355710767, 'samples': 9086976, 'steps': 17747, 'loss/train': 1.530109167098999} +03/04/2022 10:18:31 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 10:18:34 - INFO - codeparrot_training - Step 17748: {'lr': 0.00048616149452702473, 'samples': 9087488, 'steps': 17748, 'loss/train': 1.5493450164794922} +03/04/2022 10:18:37 - INFO - codeparrot_training - Step 17749: {'lr': 0.00048615975337656204, 'samples': 9088000, 'steps': 17749, 'loss/train': 1.6963845491409302} +03/04/2022 10:18:40 - INFO - codeparrot_training - Step 17750: {'lr': 0.00048615801211968936, 'samples': 9088512, 'steps': 17750, 'loss/train': 1.4918327331542969} +03/04/2022 10:18:40 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 10:18:46 - INFO - codeparrot_training - Step 17751: {'lr': 0.00048615627075640754, 'samples': 9089024, 'steps': 17751, 'loss/train': 1.7240005731582642} +03/04/2022 10:18:49 - INFO - codeparrot_training - Step 17752: {'lr': 0.00048615452928671746, 'samples': 9089536, 'steps': 17752, 'loss/train': 1.6236759424209595} +03/04/2022 10:18:49 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 10:18:54 - INFO - codeparrot_training - Step 17753: {'lr': 0.00048615278771061966, 'samples': 9090048, 'steps': 17753, 'loss/train': 1.8908346891403198} +03/04/2022 10:18:57 - INFO - codeparrot_training - Step 17754: {'lr': 0.0004861510460281151, 'samples': 9090560, 'steps': 17754, 'loss/train': 2.601339340209961} +03/04/2022 10:18:58 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 10:19:03 - INFO - codeparrot_training - Step 17755: {'lr': 0.0004861493042392045, 'samples': 9091072, 'steps': 17755, 'loss/train': 2.139415740966797} +03/04/2022 10:19:06 - INFO - codeparrot_training - Step 17756: {'lr': 0.00048614756234388866, 'samples': 9091584, 'steps': 17756, 'loss/train': 1.5492589473724365} +03/04/2022 10:19:06 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/04/2022 10:19:12 - INFO - codeparrot_training - Step 17757: {'lr': 0.00048614582034216844, 'samples': 9092096, 'steps': 17757, 'loss/train': 1.579012155532837} +03/04/2022 10:19:15 - INFO - codeparrot_training - Step 17758: {'lr': 0.0004861440782340445, 'samples': 9092608, 'steps': 17758, 'loss/train': 1.7648820877075195} +03/04/2022 10:19:16 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 10:19:20 - INFO - codeparrot_training - Step 17759: {'lr': 0.0004861423360195177, 'samples': 9093120, 'steps': 17759, 'loss/train': 1.7838704586029053} +03/04/2022 10:19:23 - INFO - codeparrot_training - Step 17760: {'lr': 0.0004861405936985888, 'samples': 9093632, 'steps': 17760, 'loss/train': 2.6226906776428223} +03/04/2022 10:19:24 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 10:19:28 - INFO - codeparrot_training - Step 17761: {'lr': 0.0004861388512712586, 'samples': 9094144, 'steps': 17761, 'loss/train': 1.6749874353408813} +03/04/2022 10:19:31 - INFO - codeparrot_training - Step 17762: {'lr': 0.0004861371087375279, 'samples': 9094656, 'steps': 17762, 'loss/train': 1.7222553491592407} +03/04/2022 10:19:32 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 10:19:37 - INFO - codeparrot_training - Step 17763: {'lr': 0.0004861353660973974, 'samples': 9095168, 'steps': 17763, 'loss/train': 1.785827398300171} +03/04/2022 10:19:40 - INFO - codeparrot_training - Step 17764: {'lr': 0.00048613362335086797, 'samples': 9095680, 'steps': 17764, 'loss/train': 1.5541337728500366} +03/04/2022 10:19:41 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 10:19:45 - INFO - codeparrot_training - Step 17765: {'lr': 0.00048613188049794045, 'samples': 9096192, 'steps': 17765, 'loss/train': 1.2781540155410767} +03/04/2022 10:19:48 - INFO - codeparrot_training - Step 17766: {'lr': 0.00048613013753861546, 'samples': 9096704, 'steps': 17766, 'loss/train': 2.3325955867767334} +03/04/2022 10:19:50 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/04/2022 10:19:54 - INFO - codeparrot_training - Step 17767: {'lr': 0.0004861283944728939, 'samples': 9097216, 'steps': 17767, 'loss/train': 0.5109339356422424} +03/04/2022 10:19:57 - INFO - codeparrot_training - Step 17768: {'lr': 0.0004861266513007765, 'samples': 9097728, 'steps': 17768, 'loss/train': 1.574790596961975} +03/04/2022 10:19:58 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 10:20:02 - INFO - codeparrot_training - Step 17769: {'lr': 0.00048612490802226415, 'samples': 9098240, 'steps': 17769, 'loss/train': 2.0797767639160156} +03/04/2022 10:20:05 - INFO - codeparrot_training - Step 17770: {'lr': 0.0004861231646373575, 'samples': 9098752, 'steps': 17770, 'loss/train': 1.0968800783157349} +03/04/2022 10:20:06 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/04/2022 10:20:10 - INFO - codeparrot_training - Step 17771: {'lr': 0.0004861214211460574, 'samples': 9099264, 'steps': 17771, 'loss/train': 2.443734884262085} +03/04/2022 10:20:13 - INFO - codeparrot_training - Step 17772: {'lr': 0.00048611967754836466, 'samples': 9099776, 'steps': 17772, 'loss/train': 1.7949525117874146} +03/04/2022 10:20:15 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 10:20:19 - INFO - codeparrot_training - Step 17773: {'lr': 0.00048611793384428006, 'samples': 9100288, 'steps': 17773, 'loss/train': 1.2950407266616821} +03/04/2022 10:20:22 - INFO - codeparrot_training - Step 17774: {'lr': 0.00048611619003380426, 'samples': 9100800, 'steps': 17774, 'loss/train': 1.9074108600616455} +03/04/2022 10:20:23 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/04/2022 10:20:27 - INFO - codeparrot_training - Step 17775: {'lr': 0.0004861144461169382, 'samples': 9101312, 'steps': 17775, 'loss/train': 1.7672216892242432} +03/04/2022 10:20:30 - INFO - codeparrot_training - Step 17776: {'lr': 0.00048611270209368264, 'samples': 9101824, 'steps': 17776, 'loss/train': 2.340533971786499} +03/04/2022 10:20:31 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 10:20:36 - INFO - codeparrot_training - Step 17777: {'lr': 0.0004861109579640384, 'samples': 9102336, 'steps': 17777, 'loss/train': 2.144207239151001} +03/04/2022 10:20:39 - INFO - codeparrot_training - Step 17778: {'lr': 0.0004861092137280061, 'samples': 9102848, 'steps': 17778, 'loss/train': 2.109093427658081} +03/04/2022 10:20:40 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 10:20:44 - INFO - codeparrot_training - Step 17779: {'lr': 0.00048610746938558666, 'samples': 9103360, 'steps': 17779, 'loss/train': 2.0275611877441406} +03/04/2022 10:20:47 - INFO - codeparrot_training - Step 17780: {'lr': 0.0004861057249367808, 'samples': 9103872, 'steps': 17780, 'loss/train': 1.4868932962417603} +03/04/2022 10:20:48 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 10:20:52 - INFO - codeparrot_training - Step 17781: {'lr': 0.00048610398038158943, 'samples': 9104384, 'steps': 17781, 'loss/train': 2.6400856971740723} +03/04/2022 10:20:56 - INFO - codeparrot_training - Step 17782: {'lr': 0.00048610223572001315, 'samples': 9104896, 'steps': 17782, 'loss/train': 1.562225103378296} +03/04/2022 10:20:57 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/04/2022 10:21:01 - INFO - codeparrot_training - Step 17783: {'lr': 0.0004861004909520529, 'samples': 9105408, 'steps': 17783, 'loss/train': 2.1309163570404053} +03/04/2022 10:21:04 - INFO - codeparrot_training - Step 17784: {'lr': 0.00048609874607770945, 'samples': 9105920, 'steps': 17784, 'loss/train': 2.0285756587982178} +03/04/2022 10:21:05 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/04/2022 10:21:09 - INFO - codeparrot_training - Step 17785: {'lr': 0.0004860970010969835, 'samples': 9106432, 'steps': 17785, 'loss/train': 2.120077133178711} +03/04/2022 10:21:13 - INFO - codeparrot_training - Step 17786: {'lr': 0.0004860952560098759, 'samples': 9106944, 'steps': 17786, 'loss/train': 6.571303367614746} +03/04/2022 10:21:14 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 10:21:18 - INFO - codeparrot_training - Step 17787: {'lr': 0.0004860935108163874, 'samples': 9107456, 'steps': 17787, 'loss/train': 2.4582624435424805} +03/04/2022 10:21:21 - INFO - codeparrot_training - Step 17788: {'lr': 0.0004860917655165188, 'samples': 9107968, 'steps': 17788, 'loss/train': 1.878982663154602} +03/04/2022 10:21:23 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 10:21:26 - INFO - codeparrot_training - Step 17789: {'lr': 0.00048609002011027093, 'samples': 9108480, 'steps': 17789, 'loss/train': 2.6355230808258057} +03/04/2022 10:21:30 - INFO - codeparrot_training - Step 17790: {'lr': 0.0004860882745976445, 'samples': 9108992, 'steps': 17790, 'loss/train': 1.7478280067443848} +03/04/2022 10:21:31 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 10:21:35 - INFO - codeparrot_training - Step 17791: {'lr': 0.00048608652897864034, 'samples': 9109504, 'steps': 17791, 'loss/train': 1.9377306699752808} +03/04/2022 10:21:38 - INFO - codeparrot_training - Step 17792: {'lr': 0.0004860847832532593, 'samples': 9110016, 'steps': 17792, 'loss/train': 6.481588840484619} +03/04/2022 10:21:40 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 10:21:43 - INFO - codeparrot_training - Step 17793: {'lr': 0.00048608303742150204, 'samples': 9110528, 'steps': 17793, 'loss/train': 1.701877474784851} +03/04/2022 10:21:46 - INFO - codeparrot_training - Step 17794: {'lr': 0.0004860812914833694, 'samples': 9111040, 'steps': 17794, 'loss/train': 1.6577447652816772} +03/04/2022 10:21:48 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 10:21:52 - INFO - codeparrot_training - Step 17795: {'lr': 0.00048607954543886225, 'samples': 9111552, 'steps': 17795, 'loss/train': 2.0152506828308105} +03/04/2022 10:21:55 - INFO - codeparrot_training - Step 17796: {'lr': 0.00048607779928798125, 'samples': 9112064, 'steps': 17796, 'loss/train': 2.206902027130127} +03/04/2022 10:21:58 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 10:22:00 - INFO - codeparrot_training - Step 17797: {'lr': 0.0004860760530307272, 'samples': 9112576, 'steps': 17797, 'loss/train': 2.1188292503356934} +03/04/2022 10:22:04 - INFO - codeparrot_training - Step 17798: {'lr': 0.00048607430666710097, 'samples': 9113088, 'steps': 17798, 'loss/train': 1.6576124429702759} +03/04/2022 10:22:07 - INFO - codeparrot_training - Step 17799: {'lr': 0.00048607256019710327, 'samples': 9113600, 'steps': 17799, 'loss/train': 0.6621900796890259} +03/04/2022 10:22:07 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/04/2022 10:22:12 - INFO - codeparrot_training - Step 17800: {'lr': 0.0004860708136207349, 'samples': 9114112, 'steps': 17800, 'loss/train': 1.7865641117095947} +03/04/2022 10:22:15 - INFO - codeparrot_training - Step 17801: {'lr': 0.0004860690669379967, 'samples': 9114624, 'steps': 17801, 'loss/train': 2.306563377380371} +03/04/2022 10:22:15 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/04/2022 10:22:21 - INFO - codeparrot_training - Step 17802: {'lr': 0.00048606732014888946, 'samples': 9115136, 'steps': 17802, 'loss/train': 1.876184344291687} +03/04/2022 10:22:24 - INFO - codeparrot_training - Step 17803: {'lr': 0.0004860655732534138, 'samples': 9115648, 'steps': 17803, 'loss/train': 2.920649766921997} +03/04/2022 10:22:24 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/04/2022 10:22:29 - INFO - codeparrot_training - Step 17804: {'lr': 0.00048606382625157075, 'samples': 9116160, 'steps': 17804, 'loss/train': 1.4683581590652466} +03/04/2022 10:22:32 - INFO - codeparrot_training - Step 17805: {'lr': 0.00048606207914336097, 'samples': 9116672, 'steps': 17805, 'loss/train': 1.7028052806854248} +03/04/2022 10:22:34 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 10:22:38 - INFO - codeparrot_training - Step 17806: {'lr': 0.0004860603319287853, 'samples': 9117184, 'steps': 17806, 'loss/train': 1.2631663084030151} +03/04/2022 10:22:41 - INFO - codeparrot_training - Step 17807: {'lr': 0.0004860585846078444, 'samples': 9117696, 'steps': 17807, 'loss/train': 2.9094741344451904} +03/04/2022 10:22:42 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/04/2022 10:22:46 - INFO - codeparrot_training - Step 17808: {'lr': 0.00048605683718053915, 'samples': 9118208, 'steps': 17808, 'loss/train': 1.3682935237884521} +03/04/2022 10:22:50 - INFO - codeparrot_training - Step 17809: {'lr': 0.0004860550896468704, 'samples': 9118720, 'steps': 17809, 'loss/train': 2.239513397216797} +03/04/2022 10:22:53 - INFO - codeparrot_training - Step 17810: {'lr': 0.00048605334200683883, 'samples': 9119232, 'steps': 17810, 'loss/train': 1.5332149267196655} +03/04/2022 10:22:53 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 10:22:58 - INFO - codeparrot_training - Step 17811: {'lr': 0.0004860515942604452, 'samples': 9119744, 'steps': 17811, 'loss/train': 0.28796666860580444} +03/04/2022 10:23:01 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 10:23:03 - INFO - codeparrot_training - Step 17812: {'lr': 0.00048604984640769047, 'samples': 9120256, 'steps': 17812, 'loss/train': 1.5087194442749023} +03/04/2022 10:23:06 - INFO - codeparrot_training - Step 17813: {'lr': 0.00048604809844857524, 'samples': 9120768, 'steps': 17813, 'loss/train': 1.7067015171051025} +03/04/2022 10:23:09 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/04/2022 10:23:12 - INFO - codeparrot_training - Step 17814: {'lr': 0.0004860463503831004, 'samples': 9121280, 'steps': 17814, 'loss/train': 2.264066219329834} +03/04/2022 10:23:15 - INFO - codeparrot_training - Step 17815: {'lr': 0.0004860446022112668, 'samples': 9121792, 'steps': 17815, 'loss/train': 1.8676426410675049} +03/04/2022 10:23:17 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/04/2022 10:23:20 - INFO - codeparrot_training - Step 17816: {'lr': 0.00048604285393307503, 'samples': 9122304, 'steps': 17816, 'loss/train': 1.574478268623352} +03/04/2022 10:23:23 - INFO - codeparrot_training - Step 17817: {'lr': 0.000486041105548526, 'samples': 9122816, 'steps': 17817, 'loss/train': 1.9657503366470337} +03/04/2022 10:23:26 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 10:23:28 - INFO - codeparrot_training - Step 17818: {'lr': 0.00048603935705762057, 'samples': 9123328, 'steps': 17818, 'loss/train': 1.4719229936599731} +03/04/2022 10:23:32 - INFO - codeparrot_training - Step 17819: {'lr': 0.0004860376084603594, 'samples': 9123840, 'steps': 17819, 'loss/train': 1.640405535697937} +03/04/2022 10:23:34 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/04/2022 10:23:37 - INFO - codeparrot_training - Step 17820: {'lr': 0.00048603585975674334, 'samples': 9124352, 'steps': 17820, 'loss/train': 2.375603675842285} +03/04/2022 10:23:40 - INFO - codeparrot_training - Step 17821: {'lr': 0.0004860341109467732, 'samples': 9124864, 'steps': 17821, 'loss/train': 1.5005080699920654} +03/04/2022 10:23:42 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 10:23:45 - INFO - codeparrot_training - Step 17822: {'lr': 0.00048603236203044963, 'samples': 9125376, 'steps': 17822, 'loss/train': 2.234281539916992} +03/04/2022 10:23:48 - INFO - codeparrot_training - Step 17823: {'lr': 0.00048603061300777365, 'samples': 9125888, 'steps': 17823, 'loss/train': 1.6722205877304077} +03/04/2022 10:23:51 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 10:23:54 - INFO - codeparrot_training - Step 17824: {'lr': 0.0004860288638787458, 'samples': 9126400, 'steps': 17824, 'loss/train': 2.1265439987182617} +03/04/2022 10:23:57 - INFO - codeparrot_training - Step 17825: {'lr': 0.000486027114643367, 'samples': 9126912, 'steps': 17825, 'loss/train': 1.9362610578536987} +03/04/2022 10:23:59 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 10:24:02 - INFO - codeparrot_training - Step 17826: {'lr': 0.0004860253653016381, 'samples': 9127424, 'steps': 17826, 'loss/train': 1.7321217060089111} +03/04/2022 10:24:06 - INFO - codeparrot_training - Step 17827: {'lr': 0.00048602361585355975, 'samples': 9127936, 'steps': 17827, 'loss/train': 1.7324565649032593} +03/04/2022 10:24:09 - INFO - codeparrot_training - Step 17828: {'lr': 0.0004860218662991328, 'samples': 9128448, 'steps': 17828, 'loss/train': 3.582538604736328} +03/04/2022 10:24:09 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 10:24:14 - INFO - codeparrot_training - Step 17829: {'lr': 0.0004860201166383581, 'samples': 9128960, 'steps': 17829, 'loss/train': 1.9982986450195312} +03/04/2022 10:24:18 - INFO - codeparrot_training - Step 17830: {'lr': 0.00048601836687123636, 'samples': 9129472, 'steps': 17830, 'loss/train': 2.0823776721954346} +03/04/2022 10:24:19 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 10:24:23 - INFO - codeparrot_training - Step 17831: {'lr': 0.00048601661699776834, 'samples': 9129984, 'steps': 17831, 'loss/train': 2.225951671600342} +03/04/2022 10:24:26 - INFO - codeparrot_training - Step 17832: {'lr': 0.0004860148670179549, 'samples': 9130496, 'steps': 17832, 'loss/train': 1.7576751708984375} +03/04/2022 10:24:28 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 10:24:31 - INFO - codeparrot_training - Step 17833: {'lr': 0.0004860131169317968, 'samples': 9131008, 'steps': 17833, 'loss/train': 1.6903122663497925} +03/04/2022 10:24:35 - INFO - codeparrot_training - Step 17834: {'lr': 0.0004860113667392948, 'samples': 9131520, 'steps': 17834, 'loss/train': 1.425572156906128} +03/04/2022 10:24:36 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 10:24:40 - INFO - codeparrot_training - Step 17835: {'lr': 0.00048600961644044977, 'samples': 9132032, 'steps': 17835, 'loss/train': 1.1547815799713135} +03/04/2022 10:24:43 - INFO - codeparrot_training - Step 17836: {'lr': 0.0004860078660352625, 'samples': 9132544, 'steps': 17836, 'loss/train': 1.5370306968688965} +03/04/2022 10:24:44 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 10:24:48 - INFO - codeparrot_training - Step 17837: {'lr': 0.0004860061155237336, 'samples': 9133056, 'steps': 17837, 'loss/train': 2.139240264892578} +03/04/2022 10:24:52 - INFO - codeparrot_training - Step 17838: {'lr': 0.0004860043649058641, 'samples': 9133568, 'steps': 17838, 'loss/train': 2.436844825744629} +03/04/2022 10:24:53 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 10:24:57 - INFO - codeparrot_training - Step 17839: {'lr': 0.00048600261418165456, 'samples': 9134080, 'steps': 17839, 'loss/train': 1.9371930360794067} +03/04/2022 10:25:00 - INFO - codeparrot_training - Step 17840: {'lr': 0.00048600086335110593, 'samples': 9134592, 'steps': 17840, 'loss/train': 2.185899257659912} +03/04/2022 10:25:01 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/04/2022 10:25:05 - INFO - codeparrot_training - Step 17841: {'lr': 0.000485999112414219, 'samples': 9135104, 'steps': 17841, 'loss/train': 1.8543041944503784} +03/04/2022 10:25:09 - INFO - codeparrot_training - Step 17842: {'lr': 0.0004859973613709945, 'samples': 9135616, 'steps': 17842, 'loss/train': 2.198478937149048} +03/04/2022 10:25:10 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/04/2022 10:25:14 - INFO - codeparrot_training - Step 17843: {'lr': 0.0004859956102214332, 'samples': 9136128, 'steps': 17843, 'loss/train': 0.8056599497795105} +03/04/2022 10:25:17 - INFO - codeparrot_training - Step 17844: {'lr': 0.00048599385896553595, 'samples': 9136640, 'steps': 17844, 'loss/train': 2.4066812992095947} +03/04/2022 10:25:19 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/04/2022 10:25:22 - INFO - codeparrot_training - Step 17845: {'lr': 0.0004859921076033034, 'samples': 9137152, 'steps': 17845, 'loss/train': 1.611732840538025} +03/04/2022 10:25:25 - INFO - codeparrot_training - Step 17846: {'lr': 0.00048599035613473656, 'samples': 9137664, 'steps': 17846, 'loss/train': 0.21718797087669373} +03/04/2022 10:25:27 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/04/2022 10:25:31 - INFO - codeparrot_training - Step 17847: {'lr': 0.0004859886045598361, 'samples': 9138176, 'steps': 17847, 'loss/train': 2.153369903564453} +03/04/2022 10:25:34 - INFO - codeparrot_training - Step 17848: {'lr': 0.0004859868528786028, 'samples': 9138688, 'steps': 17848, 'loss/train': 2.4223315715789795} +03/04/2022 10:25:36 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/04/2022 10:25:39 - INFO - codeparrot_training - Step 17849: {'lr': 0.0004859851010910374, 'samples': 9139200, 'steps': 17849, 'loss/train': 2.064225673675537} +03/04/2022 10:25:42 - INFO - codeparrot_training - Step 17850: {'lr': 0.0004859833491971409, 'samples': 9139712, 'steps': 17850, 'loss/train': 2.4244697093963623} +03/04/2022 10:25:44 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/04/2022 10:25:48 - INFO - codeparrot_training - Step 17851: {'lr': 0.0004859815971969138, 'samples': 9140224, 'steps': 17851, 'loss/train': 1.4374090433120728} +03/04/2022 10:25:51 - INFO - codeparrot_training - Step 17852: {'lr': 0.0004859798450903571, 'samples': 9140736, 'steps': 17852, 'loss/train': 1.2511670589447021} +03/04/2022 10:25:53 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 10:25:56 - INFO - codeparrot_training - Step 17853: {'lr': 0.00048597809287747153, 'samples': 9141248, 'steps': 17853, 'loss/train': 2.2726194858551025} +03/04/2022 10:25:59 - INFO - codeparrot_training - Step 17854: {'lr': 0.0004859763405582579, 'samples': 9141760, 'steps': 17854, 'loss/train': 2.140408515930176} +03/04/2022 10:26:01 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 10:26:05 - INFO - codeparrot_training - Step 17855: {'lr': 0.00048597458813271686, 'samples': 9142272, 'steps': 17855, 'loss/train': 1.5727490186691284} +03/04/2022 10:26:08 - INFO - codeparrot_training - Step 17856: {'lr': 0.0004859728356008494, 'samples': 9142784, 'steps': 17856, 'loss/train': 1.2421655654907227} +03/04/2022 10:26:10 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 10:26:13 - INFO - codeparrot_training - Step 17857: {'lr': 0.00048597108296265625, 'samples': 9143296, 'steps': 17857, 'loss/train': 2.2662882804870605} +03/04/2022 10:26:16 - INFO - codeparrot_training - Step 17858: {'lr': 0.00048596933021813815, 'samples': 9143808, 'steps': 17858, 'loss/train': 3.875774621963501} +03/04/2022 10:26:18 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 10:26:21 - INFO - codeparrot_training - Step 17859: {'lr': 0.0004859675773672959, 'samples': 9144320, 'steps': 17859, 'loss/train': 2.0801782608032227} +03/04/2022 10:26:24 - INFO - codeparrot_training - Step 17860: {'lr': 0.00048596582441013026, 'samples': 9144832, 'steps': 17860, 'loss/train': 1.5048261880874634} +03/04/2022 10:26:26 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/04/2022 10:26:30 - INFO - codeparrot_training - Step 17861: {'lr': 0.0004859640713466421, 'samples': 9145344, 'steps': 17861, 'loss/train': 1.3153377771377563} +03/04/2022 10:26:33 - INFO - codeparrot_training - Step 17862: {'lr': 0.0004859623181768321, 'samples': 9145856, 'steps': 17862, 'loss/train': 2.3679609298706055} +03/04/2022 10:26:35 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 10:26:38 - INFO - codeparrot_training - Step 17863: {'lr': 0.0004859605649007012, 'samples': 9146368, 'steps': 17863, 'loss/train': 1.0770646333694458} +03/04/2022 10:26:42 - INFO - codeparrot_training - Step 17864: {'lr': 0.00048595881151825015, 'samples': 9146880, 'steps': 17864, 'loss/train': 1.9120949506759644} +03/04/2022 10:26:44 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 10:26:47 - INFO - codeparrot_training - Step 17865: {'lr': 0.00048595705802947963, 'samples': 9147392, 'steps': 17865, 'loss/train': 2.716195583343506} +03/04/2022 10:26:50 - INFO - codeparrot_training - Step 17866: {'lr': 0.0004859553044343905, 'samples': 9147904, 'steps': 17866, 'loss/train': 1.989767074584961} +03/04/2022 10:26:53 - INFO - codeparrot_training - Step 17867: {'lr': 0.0004859535507329836, 'samples': 9148416, 'steps': 17867, 'loss/train': 2.102576971054077} +03/04/2022 10:26:53 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/04/2022 10:26:59 - INFO - codeparrot_training - Step 17868: {'lr': 0.0004859517969252596, 'samples': 9148928, 'steps': 17868, 'loss/train': 2.435636281967163} +03/04/2022 10:27:02 - INFO - codeparrot_training - Step 17869: {'lr': 0.0004859500430112194, 'samples': 9149440, 'steps': 17869, 'loss/train': 2.0064215660095215} +03/04/2022 10:27:02 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 10:27:07 - INFO - codeparrot_training - Step 17870: {'lr': 0.0004859482889908637, 'samples': 9149952, 'steps': 17870, 'loss/train': 2.305504322052002} +03/04/2022 10:27:10 - INFO - codeparrot_training - Step 17871: {'lr': 0.0004859465348641934, 'samples': 9150464, 'steps': 17871, 'loss/train': 2.0494394302368164} +03/04/2022 10:27:10 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/04/2022 10:27:16 - INFO - codeparrot_training - Step 17872: {'lr': 0.0004859447806312093, 'samples': 9150976, 'steps': 17872, 'loss/train': 1.590112328529358} +03/04/2022 10:27:19 - INFO - codeparrot_training - Step 17873: {'lr': 0.000485943026291912, 'samples': 9151488, 'steps': 17873, 'loss/train': 2.2903757095336914} +03/04/2022 10:27:19 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/04/2022 10:27:24 - INFO - codeparrot_training - Step 17874: {'lr': 0.0004859412718463025, 'samples': 9152000, 'steps': 17874, 'loss/train': 1.7905049324035645} +03/04/2022 10:27:27 - INFO - codeparrot_training - Step 17875: {'lr': 0.00048593951729438144, 'samples': 9152512, 'steps': 17875, 'loss/train': 2.9008686542510986} +03/04/2022 10:27:27 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/04/2022 10:27:33 - INFO - codeparrot_training - Step 17876: {'lr': 0.0004859377626361497, 'samples': 9153024, 'steps': 17876, 'loss/train': 2.267151117324829} +03/04/2022 10:27:36 - INFO - codeparrot_training - Step 17877: {'lr': 0.00048593600787160806, 'samples': 9153536, 'steps': 17877, 'loss/train': 2.1864523887634277} +03/04/2022 10:27:36 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 10:27:41 - INFO - codeparrot_training - Step 17878: {'lr': 0.0004859342530007572, 'samples': 9154048, 'steps': 17878, 'loss/train': 2.10567045211792} +03/04/2022 10:27:44 - INFO - codeparrot_training - Step 17879: {'lr': 0.0004859324980235982, 'samples': 9154560, 'steps': 17879, 'loss/train': 1.776522159576416} +03/04/2022 10:27:44 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 10:27:50 - INFO - codeparrot_training - Step 17880: {'lr': 0.0004859307429401315, 'samples': 9155072, 'steps': 17880, 'loss/train': 0.3131960332393646} +03/04/2022 10:27:53 - INFO - codeparrot_training - Step 17881: {'lr': 0.0004859289877503581, 'samples': 9155584, 'steps': 17881, 'loss/train': 1.7575178146362305} +03/04/2022 10:27:53 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 10:27:58 - INFO - codeparrot_training - Step 17882: {'lr': 0.00048592723245427874, 'samples': 9156096, 'steps': 17882, 'loss/train': 1.8007426261901855} +03/04/2022 10:28:01 - INFO - codeparrot_training - Step 17883: {'lr': 0.00048592547705189414, 'samples': 9156608, 'steps': 17883, 'loss/train': 1.7338191270828247} +03/04/2022 10:28:01 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 10:28:07 - INFO - codeparrot_training - Step 17884: {'lr': 0.00048592372154320526, 'samples': 9157120, 'steps': 17884, 'loss/train': 1.6480505466461182} +03/04/2022 10:28:10 - INFO - codeparrot_training - Step 17885: {'lr': 0.0004859219659282127, 'samples': 9157632, 'steps': 17885, 'loss/train': 1.8349205255508423} +03/04/2022 10:28:10 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/04/2022 10:28:15 - INFO - codeparrot_training - Step 17886: {'lr': 0.00048592021020691745, 'samples': 9158144, 'steps': 17886, 'loss/train': 1.9793702363967896} +03/04/2022 10:28:18 - INFO - codeparrot_training - Step 17887: {'lr': 0.00048591845437932014, 'samples': 9158656, 'steps': 17887, 'loss/train': 2.4264745712280273} +03/04/2022 10:28:18 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 10:28:23 - INFO - codeparrot_training - Step 17888: {'lr': 0.0004859166984454216, 'samples': 9159168, 'steps': 17888, 'loss/train': 1.9110783338546753} +03/04/2022 10:28:27 - INFO - codeparrot_training - Step 17889: {'lr': 0.0004859149424052226, 'samples': 9159680, 'steps': 17889, 'loss/train': 1.779573917388916} +03/04/2022 10:28:27 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/04/2022 10:28:32 - INFO - codeparrot_training - Step 17890: {'lr': 0.00048591318625872403, 'samples': 9160192, 'steps': 17890, 'loss/train': 2.6852073669433594} +03/04/2022 10:28:35 - INFO - codeparrot_training - Step 17891: {'lr': 0.00048591143000592665, 'samples': 9160704, 'steps': 17891, 'loss/train': 2.311511278152466} +03/04/2022 10:28:35 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 10:28:40 - INFO - codeparrot_training - Step 17892: {'lr': 0.00048590967364683116, 'samples': 9161216, 'steps': 17892, 'loss/train': 1.5691888332366943} +03/04/2022 10:28:43 - INFO - codeparrot_training - Step 17893: {'lr': 0.0004859079171814384, 'samples': 9161728, 'steps': 17893, 'loss/train': 1.765015721321106} +03/04/2022 10:28:44 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/04/2022 10:28:49 - INFO - codeparrot_training - Step 17894: {'lr': 0.00048590616060974917, 'samples': 9162240, 'steps': 17894, 'loss/train': 2.658764362335205} +03/04/2022 10:28:52 - INFO - codeparrot_training - Step 17895: {'lr': 0.00048590440393176434, 'samples': 9162752, 'steps': 17895, 'loss/train': 1.4545196294784546} +03/04/2022 10:28:52 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/04/2022 10:28:58 - INFO - codeparrot_training - Step 17896: {'lr': 0.00048590264714748455, 'samples': 9163264, 'steps': 17896, 'loss/train': 1.000144362449646} +03/04/2022 10:29:01 - INFO - codeparrot_training - Step 17897: {'lr': 0.0004859008902569107, 'samples': 9163776, 'steps': 17897, 'loss/train': 2.6297059059143066} +03/04/2022 10:29:04 - INFO - codeparrot_training - Step 17898: {'lr': 0.00048589913326004355, 'samples': 9164288, 'steps': 17898, 'loss/train': 3.405886173248291} +03/04/2022 10:29:04 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 10:29:10 - INFO - codeparrot_training - Step 17899: {'lr': 0.0004858973761568839, 'samples': 9164800, 'steps': 17899, 'loss/train': 2.0175230503082275} +03/04/2022 10:29:13 - INFO - codeparrot_training - Step 17900: {'lr': 0.0004858956189474325, 'samples': 9165312, 'steps': 17900, 'loss/train': 1.7527515888214111} +03/04/2022 10:29:14 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 10:29:18 - INFO - codeparrot_training - Step 17901: {'lr': 0.0004858938616316902, 'samples': 9165824, 'steps': 17901, 'loss/train': 1.518686294555664} +03/04/2022 10:29:21 - INFO - codeparrot_training - Step 17902: {'lr': 0.00048589210420965775, 'samples': 9166336, 'steps': 17902, 'loss/train': 1.7290287017822266} +03/04/2022 10:29:22 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 10:29:27 - INFO - codeparrot_training - Step 17903: {'lr': 0.0004858903466813359, 'samples': 9166848, 'steps': 17903, 'loss/train': 1.7932206392288208} +03/04/2022 10:29:30 - INFO - codeparrot_training - Step 17904: {'lr': 0.0004858885890467256, 'samples': 9167360, 'steps': 17904, 'loss/train': 2.01130747795105} +03/04/2022 10:29:30 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 10:29:35 - INFO - codeparrot_training - Step 17905: {'lr': 0.00048588683130582755, 'samples': 9167872, 'steps': 17905, 'loss/train': 2.3892242908477783} +03/04/2022 10:29:38 - INFO - codeparrot_training - Step 17906: {'lr': 0.00048588507345864246, 'samples': 9168384, 'steps': 17906, 'loss/train': 1.3342276811599731} +03/04/2022 10:29:39 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 10:29:43 - INFO - codeparrot_training - Step 17907: {'lr': 0.00048588331550517125, 'samples': 9168896, 'steps': 17907, 'loss/train': 1.867754578590393} +03/04/2022 10:29:47 - INFO - codeparrot_training - Step 17908: {'lr': 0.0004858815574454146, 'samples': 9169408, 'steps': 17908, 'loss/train': 2.381133556365967} +03/04/2022 10:29:47 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 10:29:52 - INFO - codeparrot_training - Step 17909: {'lr': 0.0004858797992793734, 'samples': 9169920, 'steps': 17909, 'loss/train': 1.1802715063095093} +03/04/2022 10:29:55 - INFO - codeparrot_training - Step 17910: {'lr': 0.0004858780410070484, 'samples': 9170432, 'steps': 17910, 'loss/train': 1.8835339546203613} +03/04/2022 10:29:56 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 10:30:00 - INFO - codeparrot_training - Step 17911: {'lr': 0.0004858762826284404, 'samples': 9170944, 'steps': 17911, 'loss/train': 1.9693881273269653} +03/04/2022 10:30:03 - INFO - codeparrot_training - Step 17912: {'lr': 0.00048587452414355014, 'samples': 9171456, 'steps': 17912, 'loss/train': 1.8358622789382935} +03/04/2022 10:30:04 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/04/2022 10:30:09 - INFO - codeparrot_training - Step 17913: {'lr': 0.00048587276555237853, 'samples': 9171968, 'steps': 17913, 'loss/train': 1.396765947341919} +03/04/2022 10:30:12 - INFO - codeparrot_training - Step 17914: {'lr': 0.00048587100685492626, 'samples': 9172480, 'steps': 17914, 'loss/train': 1.5540350675582886} +03/04/2022 10:30:13 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 10:30:17 - INFO - codeparrot_training - Step 17915: {'lr': 0.00048586924805119416, 'samples': 9172992, 'steps': 17915, 'loss/train': 1.8883739709854126} +03/04/2022 10:30:20 - INFO - codeparrot_training - Step 17916: {'lr': 0.00048586748914118303, 'samples': 9173504, 'steps': 17916, 'loss/train': 1.954619288444519} +03/04/2022 10:30:21 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/04/2022 10:30:26 - INFO - codeparrot_training - Step 17917: {'lr': 0.0004858657301248936, 'samples': 9174016, 'steps': 17917, 'loss/train': 1.6975774765014648} +03/04/2022 10:30:29 - INFO - codeparrot_training - Step 17918: {'lr': 0.00048586397100232673, 'samples': 9174528, 'steps': 17918, 'loss/train': 1.643062949180603} +03/04/2022 10:30:29 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 10:30:34 - INFO - codeparrot_training - Step 17919: {'lr': 0.00048586221177348323, 'samples': 9175040, 'steps': 17919, 'loss/train': 1.676998496055603} +03/04/2022 10:30:37 - INFO - codeparrot_training - Step 17920: {'lr': 0.00048586045243836386, 'samples': 9175552, 'steps': 17920, 'loss/train': 0.6074520945549011} +03/04/2022 10:30:38 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 10:30:42 - INFO - codeparrot_training - Step 17921: {'lr': 0.0004858586929969693, 'samples': 9176064, 'steps': 17921, 'loss/train': 1.6989924907684326} +03/04/2022 10:30:46 - INFO - codeparrot_training - Step 17922: {'lr': 0.0004858569334493006, 'samples': 9176576, 'steps': 17922, 'loss/train': 1.5533548593521118} +03/04/2022 10:30:46 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 10:30:51 - INFO - codeparrot_training - Step 17923: {'lr': 0.0004858551737953583, 'samples': 9177088, 'steps': 17923, 'loss/train': 1.2561545372009277} +03/04/2022 10:30:54 - INFO - codeparrot_training - Step 17924: {'lr': 0.00048585341403514337, 'samples': 9177600, 'steps': 17924, 'loss/train': 1.9216408729553223} +03/04/2022 10:30:54 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/04/2022 10:30:59 - INFO - codeparrot_training - Step 17925: {'lr': 0.0004858516541686565, 'samples': 9178112, 'steps': 17925, 'loss/train': 1.966538667678833} +03/04/2022 10:31:03 - INFO - codeparrot_training - Step 17926: {'lr': 0.0004858498941958985, 'samples': 9178624, 'steps': 17926, 'loss/train': 1.8347549438476562} +03/04/2022 10:31:03 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 10:31:08 - INFO - codeparrot_training - Step 17927: {'lr': 0.00048584813411687016, 'samples': 9179136, 'steps': 17927, 'loss/train': 2.569918394088745} +03/04/2022 10:31:11 - INFO - codeparrot_training - Step 17928: {'lr': 0.00048584637393157235, 'samples': 9179648, 'steps': 17928, 'loss/train': 2.0229415893554688} +03/04/2022 10:31:12 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/04/2022 10:31:16 - INFO - codeparrot_training - Step 17929: {'lr': 0.00048584461364000576, 'samples': 9180160, 'steps': 17929, 'loss/train': 1.9846856594085693} +03/04/2022 10:31:20 - INFO - codeparrot_training - Step 17930: {'lr': 0.00048584285324217125, 'samples': 9180672, 'steps': 17930, 'loss/train': 1.317876935005188} +03/04/2022 10:31:20 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 10:31:25 - INFO - codeparrot_training - Step 17931: {'lr': 0.00048584109273806954, 'samples': 9181184, 'steps': 17931, 'loss/train': 0.9953736662864685} +03/04/2022 10:31:28 - INFO - codeparrot_training - Step 17932: {'lr': 0.00048583933212770154, 'samples': 9181696, 'steps': 17932, 'loss/train': 1.6938326358795166} +03/04/2022 10:31:29 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 10:31:33 - INFO - codeparrot_training - Step 17933: {'lr': 0.00048583757141106796, 'samples': 9182208, 'steps': 17933, 'loss/train': 1.0503252744674683} +03/04/2022 10:31:37 - INFO - codeparrot_training - Step 17934: {'lr': 0.00048583581058816956, 'samples': 9182720, 'steps': 17934, 'loss/train': 2.849365234375} +03/04/2022 10:31:37 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/04/2022 10:31:42 - INFO - codeparrot_training - Step 17935: {'lr': 0.00048583404965900725, 'samples': 9183232, 'steps': 17935, 'loss/train': 2.1897635459899902} +03/04/2022 10:31:45 - INFO - codeparrot_training - Step 17936: {'lr': 0.0004858322886235817, 'samples': 9183744, 'steps': 17936, 'loss/train': 1.4693580865859985} +03/04/2022 10:31:46 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 10:31:50 - INFO - codeparrot_training - Step 17937: {'lr': 0.0004858305274818938, 'samples': 9184256, 'steps': 17937, 'loss/train': 2.256385326385498} +03/04/2022 10:31:54 - INFO - codeparrot_training - Step 17938: {'lr': 0.0004858287662339443, 'samples': 9184768, 'steps': 17938, 'loss/train': 2.1504509449005127} +03/04/2022 10:31:55 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/04/2022 10:31:59 - INFO - codeparrot_training - Step 17939: {'lr': 0.00048582700487973397, 'samples': 9185280, 'steps': 17939, 'loss/train': 0.9490213394165039} +03/04/2022 10:32:02 - INFO - codeparrot_training - Step 17940: {'lr': 0.00048582524341926365, 'samples': 9185792, 'steps': 17940, 'loss/train': 2.726505994796753} +03/04/2022 10:32:05 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 10:32:07 - INFO - codeparrot_training - Step 17941: {'lr': 0.0004858234818525341, 'samples': 9186304, 'steps': 17941, 'loss/train': 2.010714530944824} +03/04/2022 10:32:11 - INFO - codeparrot_training - Step 17942: {'lr': 0.0004858217201795462, 'samples': 9186816, 'steps': 17942, 'loss/train': 1.4964302778244019} +03/04/2022 10:32:13 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/04/2022 10:32:16 - INFO - codeparrot_training - Step 17943: {'lr': 0.0004858199584003006, 'samples': 9187328, 'steps': 17943, 'loss/train': 2.02614426612854} +03/04/2022 10:32:19 - INFO - codeparrot_training - Step 17944: {'lr': 0.00048581819651479814, 'samples': 9187840, 'steps': 17944, 'loss/train': 2.0124404430389404} +03/04/2022 10:32:21 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/04/2022 10:32:25 - INFO - codeparrot_training - Step 17945: {'lr': 0.0004858164345230397, 'samples': 9188352, 'steps': 17945, 'loss/train': 2.463244676589966} +03/04/2022 10:32:28 - INFO - codeparrot_training - Step 17946: {'lr': 0.000485814672425026, 'samples': 9188864, 'steps': 17946, 'loss/train': 2.408086061477661} +03/04/2022 10:32:31 - INFO - codeparrot_training - Step 17947: {'lr': 0.0004858129102207578, 'samples': 9189376, 'steps': 17947, 'loss/train': 2.2369468212127686} +03/04/2022 10:32:31 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 10:32:36 - INFO - codeparrot_training - Step 17948: {'lr': 0.0004858111479102359, 'samples': 9189888, 'steps': 17948, 'loss/train': 2.375378370285034} +03/04/2022 10:32:39 - INFO - codeparrot_training - Step 17949: {'lr': 0.00048580938549346134, 'samples': 9190400, 'steps': 17949, 'loss/train': 2.475522994995117} +03/04/2022 10:32:40 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/04/2022 10:32:45 - INFO - codeparrot_training - Step 17950: {'lr': 0.00048580762297043456, 'samples': 9190912, 'steps': 17950, 'loss/train': 2.236032009124756} +03/04/2022 10:32:48 - INFO - codeparrot_training - Step 17951: {'lr': 0.00048580586034115646, 'samples': 9191424, 'steps': 17951, 'loss/train': 2.2231626510620117} +03/04/2022 10:32:48 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 10:32:53 - INFO - codeparrot_training - Step 17952: {'lr': 0.000485804097605628, 'samples': 9191936, 'steps': 17952, 'loss/train': 1.9672455787658691} +03/04/2022 10:32:56 - INFO - codeparrot_training - Step 17953: {'lr': 0.00048580233476384975, 'samples': 9192448, 'steps': 17953, 'loss/train': 2.1051361560821533} +03/04/2022 10:32:56 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 10:33:02 - INFO - codeparrot_training - Step 17954: {'lr': 0.0004858005718158227, 'samples': 9192960, 'steps': 17954, 'loss/train': 2.143631935119629} +03/04/2022 10:33:05 - INFO - codeparrot_training - Step 17955: {'lr': 0.0004857988087615475, 'samples': 9193472, 'steps': 17955, 'loss/train': 1.2157316207885742} +03/04/2022 10:33:06 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 10:33:10 - INFO - codeparrot_training - Step 17956: {'lr': 0.000485797045601025, 'samples': 9193984, 'steps': 17956, 'loss/train': 2.2033121585845947} +03/04/2022 10:33:13 - INFO - codeparrot_training - Step 17957: {'lr': 0.000485795282334256, 'samples': 9194496, 'steps': 17957, 'loss/train': 2.0269510746002197} +03/04/2022 10:33:14 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 10:33:19 - INFO - codeparrot_training - Step 17958: {'lr': 0.00048579351896124127, 'samples': 9195008, 'steps': 17958, 'loss/train': 1.4344686269760132} +03/04/2022 10:33:22 - INFO - codeparrot_training - Step 17959: {'lr': 0.0004857917554819816, 'samples': 9195520, 'steps': 17959, 'loss/train': 1.6970462799072266} +03/04/2022 10:33:23 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 10:33:27 - INFO - codeparrot_training - Step 17960: {'lr': 0.00048578999189647786, 'samples': 9196032, 'steps': 17960, 'loss/train': 2.0903196334838867} +03/04/2022 10:33:30 - INFO - codeparrot_training - Step 17961: {'lr': 0.00048578822820473074, 'samples': 9196544, 'steps': 17961, 'loss/train': 2.6818742752075195} +03/04/2022 10:33:32 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 10:33:36 - INFO - codeparrot_training - Step 17962: {'lr': 0.00048578646440674113, 'samples': 9197056, 'steps': 17962, 'loss/train': 2.7154769897460938} +03/04/2022 10:33:39 - INFO - codeparrot_training - Step 17963: {'lr': 0.0004857847005025097, 'samples': 9197568, 'steps': 17963, 'loss/train': 0.820645272731781} +03/04/2022 10:33:40 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 10:33:44 - INFO - codeparrot_training - Step 17964: {'lr': 0.0004857829364920374, 'samples': 9198080, 'steps': 17964, 'loss/train': 1.5918126106262207} +03/04/2022 10:33:47 - INFO - codeparrot_training - Step 17965: {'lr': 0.0004857811723753249, 'samples': 9198592, 'steps': 17965, 'loss/train': 2.334970235824585} +03/04/2022 10:33:49 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 10:33:52 - INFO - codeparrot_training - Step 17966: {'lr': 0.00048577940815237305, 'samples': 9199104, 'steps': 17966, 'loss/train': 2.3832833766937256} +03/04/2022 10:33:55 - INFO - codeparrot_training - Step 17967: {'lr': 0.00048577764382318265, 'samples': 9199616, 'steps': 17967, 'loss/train': 2.2226688861846924} +03/04/2022 10:33:57 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 10:34:01 - INFO - codeparrot_training - Step 17968: {'lr': 0.0004857758793877545, 'samples': 9200128, 'steps': 17968, 'loss/train': 1.931725025177002} +03/04/2022 10:34:04 - INFO - codeparrot_training - Step 17969: {'lr': 0.00048577411484608936, 'samples': 9200640, 'steps': 17969, 'loss/train': 1.3119089603424072} +03/04/2022 10:34:05 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/04/2022 10:34:09 - INFO - codeparrot_training - Step 17970: {'lr': 0.000485772350198188, 'samples': 9201152, 'steps': 17970, 'loss/train': 2.582390069961548} +03/04/2022 10:34:12 - INFO - codeparrot_training - Step 17971: {'lr': 0.00048577058544405126, 'samples': 9201664, 'steps': 17971, 'loss/train': 3.350034236907959} +03/04/2022 10:34:14 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 10:34:18 - INFO - codeparrot_training - Step 17972: {'lr': 0.00048576882058368, 'samples': 9202176, 'steps': 17972, 'loss/train': 1.2379509210586548} +03/04/2022 10:34:21 - INFO - codeparrot_training - Step 17973: {'lr': 0.0004857670556170749, 'samples': 9202688, 'steps': 17973, 'loss/train': 1.0760782957077026} +03/04/2022 10:34:22 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 10:34:26 - INFO - codeparrot_training - Step 17974: {'lr': 0.0004857652905442368, 'samples': 9203200, 'steps': 17974, 'loss/train': 2.2804627418518066} +03/04/2022 10:34:29 - INFO - codeparrot_training - Step 17975: {'lr': 0.0004857635253651665, 'samples': 9203712, 'steps': 17975, 'loss/train': 2.1901297569274902} +03/04/2022 10:34:31 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/04/2022 10:34:34 - INFO - codeparrot_training - Step 17976: {'lr': 0.00048576176007986485, 'samples': 9204224, 'steps': 17976, 'loss/train': 1.8554326295852661} +03/04/2022 10:34:38 - INFO - codeparrot_training - Step 17977: {'lr': 0.00048575999468833256, 'samples': 9204736, 'steps': 17977, 'loss/train': 2.3351311683654785} +03/04/2022 10:34:39 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 10:34:43 - INFO - codeparrot_training - Step 17978: {'lr': 0.0004857582291905704, 'samples': 9205248, 'steps': 17978, 'loss/train': 0.15328112244606018} +03/04/2022 10:34:46 - INFO - codeparrot_training - Step 17979: {'lr': 0.00048575646358657934, 'samples': 9205760, 'steps': 17979, 'loss/train': 2.0097973346710205} +03/04/2022 10:34:48 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/04/2022 10:34:51 - INFO - codeparrot_training - Step 17980: {'lr': 0.00048575469787635997, 'samples': 9206272, 'steps': 17980, 'loss/train': 1.606844186782837} +03/04/2022 10:34:55 - INFO - codeparrot_training - Step 17981: {'lr': 0.00048575293205991313, 'samples': 9206784, 'steps': 17981, 'loss/train': 1.4724386930465698} +03/04/2022 10:34:56 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/04/2022 10:35:00 - INFO - codeparrot_training - Step 17982: {'lr': 0.0004857511661372397, 'samples': 9207296, 'steps': 17982, 'loss/train': 1.8826807737350464} +03/04/2022 10:35:03 - INFO - codeparrot_training - Step 17983: {'lr': 0.00048574940010834045, 'samples': 9207808, 'steps': 17983, 'loss/train': 1.4941933155059814} +03/04/2022 10:35:04 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 10:35:08 - INFO - codeparrot_training - Step 17984: {'lr': 0.0004857476339732161, 'samples': 9208320, 'steps': 17984, 'loss/train': 1.4574716091156006} +03/04/2022 10:35:11 - INFO - codeparrot_training - Step 17985: {'lr': 0.0004857458677318676, 'samples': 9208832, 'steps': 17985, 'loss/train': 0.6438436508178711} +03/04/2022 10:35:13 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/04/2022 10:35:16 - INFO - codeparrot_training - Step 17986: {'lr': 0.0004857441013842956, 'samples': 9209344, 'steps': 17986, 'loss/train': 1.3890058994293213} +03/04/2022 10:35:20 - INFO - codeparrot_training - Step 17987: {'lr': 0.0004857423349305009, 'samples': 9209856, 'steps': 17987, 'loss/train': 1.3494640588760376} +03/04/2022 10:35:21 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/04/2022 10:35:25 - INFO - codeparrot_training - Step 17988: {'lr': 0.00048574056837048443, 'samples': 9210368, 'steps': 17988, 'loss/train': 1.2988080978393555} +03/04/2022 10:35:28 - INFO - codeparrot_training - Step 17989: {'lr': 0.0004857388017042468, 'samples': 9210880, 'steps': 17989, 'loss/train': 2.346762180328369} +03/04/2022 10:35:29 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/04/2022 10:35:34 - INFO - codeparrot_training - Step 17990: {'lr': 0.000485737034931789, 'samples': 9211392, 'steps': 17990, 'loss/train': 0.5437436103820801} +03/04/2022 10:35:37 - INFO - codeparrot_training - Step 17991: {'lr': 0.00048573526805311166, 'samples': 9211904, 'steps': 17991, 'loss/train': 1.441810131072998} +03/04/2022 10:35:39 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/04/2022 10:35:42 - INFO - codeparrot_training - Step 17992: {'lr': 0.0004857335010682157, 'samples': 9212416, 'steps': 17992, 'loss/train': 1.4207707643508911} +03/04/2022 10:35:45 - INFO - codeparrot_training - Step 17993: {'lr': 0.0004857317339771018, 'samples': 9212928, 'steps': 17993, 'loss/train': 2.026493549346924} +03/04/2022 10:35:47 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 10:35:50 - INFO - codeparrot_training - Step 17994: {'lr': 0.0004857299667797709, 'samples': 9213440, 'steps': 17994, 'loss/train': 1.2757443189620972} +03/04/2022 10:35:54 - INFO - codeparrot_training - Step 17995: {'lr': 0.0004857281994762236, 'samples': 9213952, 'steps': 17995, 'loss/train': 2.365861415863037} +03/04/2022 10:35:55 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 10:35:59 - INFO - codeparrot_training - Step 17996: {'lr': 0.00048572643206646097, 'samples': 9214464, 'steps': 17996, 'loss/train': 2.232513189315796} +03/04/2022 10:36:02 - INFO - codeparrot_training - Step 17997: {'lr': 0.0004857246645504835, 'samples': 9214976, 'steps': 17997, 'loss/train': 1.9699103832244873} +03/04/2022 10:36:04 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/04/2022 10:36:07 - INFO - codeparrot_training - Step 17998: {'lr': 0.00048572289692829217, 'samples': 9215488, 'steps': 17998, 'loss/train': 0.9075934290885925} +03/04/2022 10:36:10 - INFO - codeparrot_training - Step 17999: {'lr': 0.00048572112919988776, 'samples': 9216000, 'steps': 17999, 'loss/train': 1.6704392433166504} +03/04/2022 10:36:12 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/04/2022 10:36:16 - INFO - codeparrot_training - Step 18000: {'lr': 0.00048571936136527106, 'samples': 9216512, 'steps': 18000, 'loss/train': 1.5186116695404053} +03/04/2022 10:36:19 - INFO - codeparrot_training - Step 18001: {'lr': 0.0004857175934244428, 'samples': 9217024, 'steps': 18001, 'loss/train': 1.5295902490615845} +03/04/2022 10:36:21 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/04/2022 10:36:24 - INFO - codeparrot_training - Step 18002: {'lr': 0.0004857158253774039, 'samples': 9217536, 'steps': 18002, 'loss/train': 2.2725470066070557} +03/04/2022 10:36:27 - INFO - codeparrot_training - Step 18003: {'lr': 0.0004857140572241551, 'samples': 9218048, 'steps': 18003, 'loss/train': 2.6969449520111084} +03/04/2022 10:36:29 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 10:36:33 - INFO - codeparrot_training - Step 18004: {'lr': 0.00048571228896469713, 'samples': 9218560, 'steps': 18004, 'loss/train': 3.7287542819976807} +03/04/2022 10:36:36 - INFO - codeparrot_training - Step 18005: {'lr': 0.0004857105205990308, 'samples': 9219072, 'steps': 18005, 'loss/train': 2.7973008155822754} +03/04/2022 10:36:38 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/04/2022 10:36:41 - INFO - codeparrot_training - Step 18006: {'lr': 0.00048570875212715706, 'samples': 9219584, 'steps': 18006, 'loss/train': 1.6999956369400024} +03/04/2022 10:36:44 - INFO - codeparrot_training - Step 18007: {'lr': 0.0004857069835490765, 'samples': 9220096, 'steps': 18007, 'loss/train': 1.8991800546646118} +03/04/2022 10:36:47 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 10:36:50 - INFO - codeparrot_training - Step 18008: {'lr': 0.00048570521486479004, 'samples': 9220608, 'steps': 18008, 'loss/train': 2.0468904972076416} +03/04/2022 10:36:53 - INFO - codeparrot_training - Step 18009: {'lr': 0.0004857034460742984, 'samples': 9221120, 'steps': 18009, 'loss/train': 1.946960210800171} +03/04/2022 10:36:55 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/04/2022 10:36:58 - INFO - codeparrot_training - Step 18010: {'lr': 0.0004857016771776025, 'samples': 9221632, 'steps': 18010, 'loss/train': 1.429425597190857} +03/04/2022 10:37:01 - INFO - codeparrot_training - Step 18011: {'lr': 0.000485699908174703, 'samples': 9222144, 'steps': 18011, 'loss/train': 1.0200977325439453} +03/04/2022 10:37:04 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/04/2022 10:37:07 - INFO - codeparrot_training - Step 18012: {'lr': 0.0004856981390656008, 'samples': 9222656, 'steps': 18012, 'loss/train': 1.9465402364730835} +03/04/2022 10:37:10 - INFO - codeparrot_training - Step 18013: {'lr': 0.00048569636985029664, 'samples': 9223168, 'steps': 18013, 'loss/train': 1.9815924167633057} +03/04/2022 10:37:13 - INFO - codeparrot_training - Step 18014: {'lr': 0.00048569460052879136, 'samples': 9223680, 'steps': 18014, 'loss/train': 1.3275963068008423} +03/04/2022 10:37:13 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/04/2022 10:37:18 - INFO - codeparrot_training - Step 18015: {'lr': 0.0004856928311010857, 'samples': 9224192, 'steps': 18015, 'loss/train': 0.8757449984550476} +03/04/2022 10:37:21 - INFO - codeparrot_training - Step 18016: {'lr': 0.00048569106156718045, 'samples': 9224704, 'steps': 18016, 'loss/train': 1.8529599905014038} +03/04/2022 10:37:22 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/04/2022 10:37:27 - INFO - codeparrot_training - Step 18017: {'lr': 0.00048568929192707657, 'samples': 9225216, 'steps': 18017, 'loss/train': 1.8402068614959717} +03/04/2022 10:37:30 - INFO - codeparrot_training - Step 18018: {'lr': 0.0004856875221807746, 'samples': 9225728, 'steps': 18018, 'loss/train': 1.3871861696243286} +03/04/2022 10:37:30 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/04/2022 10:37:35 - INFO - codeparrot_training - Step 18019: {'lr': 0.0004856857523282755, 'samples': 9226240, 'steps': 18019, 'loss/train': 1.9113489389419556} +03/04/2022 10:37:38 - INFO - codeparrot_training - Step 18020: {'lr': 0.0004856839823695801, 'samples': 9226752, 'steps': 18020, 'loss/train': 1.095318078994751} +03/04/2022 10:37:39 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/04/2022 10:37:44 - INFO - codeparrot_training - Step 18021: {'lr': 0.00048568221230468905, 'samples': 9227264, 'steps': 18021, 'loss/train': 2.558804988861084} +03/04/2022 10:37:47 - INFO - codeparrot_training - Step 18022: {'lr': 0.0004856804421336033, 'samples': 9227776, 'steps': 18022, 'loss/train': 2.1312220096588135} +03/04/2022 10:37:48 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/04/2022 10:37:52 - INFO - codeparrot_training - Step 18023: {'lr': 0.0004856786718563235, 'samples': 9228288, 'steps': 18023, 'loss/train': 2.475565195083618} +03/04/2022 10:37:55 - INFO - codeparrot_training - Step 18024: {'lr': 0.0004856769014728506, 'samples': 9228800, 'steps': 18024, 'loss/train': 1.9089443683624268} +03/04/2022 10:37:56 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 10:38:01 - INFO - codeparrot_training - Step 18025: {'lr': 0.0004856751309831853, 'samples': 9229312, 'steps': 18025, 'loss/train': 1.5559073686599731} +03/04/2022 10:38:04 - INFO - codeparrot_training - Step 18026: {'lr': 0.00048567336038732843, 'samples': 9229824, 'steps': 18026, 'loss/train': 1.7330362796783447} +03/04/2022 10:38:04 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 10:38:09 - INFO - codeparrot_training - Step 18027: {'lr': 0.0004856715896852808, 'samples': 9230336, 'steps': 18027, 'loss/train': 1.6141101121902466} +03/04/2022 10:38:12 - INFO - codeparrot_training - Step 18028: {'lr': 0.0004856698188770432, 'samples': 9230848, 'steps': 18028, 'loss/train': 1.4546313285827637} +03/04/2022 10:38:13 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 10:38:18 - INFO - codeparrot_training - Step 18029: {'lr': 0.0004856680479626163, 'samples': 9231360, 'steps': 18029, 'loss/train': 2.1798646450042725} +03/04/2022 10:38:21 - INFO - codeparrot_training - Step 18030: {'lr': 0.0004856662769420012, 'samples': 9231872, 'steps': 18030, 'loss/train': 1.4729083776474} +03/04/2022 10:38:21 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/04/2022 10:38:26 - INFO - codeparrot_training - Step 18031: {'lr': 0.0004856645058151984, 'samples': 9232384, 'steps': 18031, 'loss/train': 2.651660442352295} +03/04/2022 10:38:29 - INFO - codeparrot_training - Step 18032: {'lr': 0.0004856627345822088, 'samples': 9232896, 'steps': 18032, 'loss/train': 1.392628788948059} +03/04/2022 10:38:30 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 10:38:34 - INFO - codeparrot_training - Step 18033: {'lr': 0.0004856609632430332, 'samples': 9233408, 'steps': 18033, 'loss/train': 1.0114233493804932} +03/04/2022 10:38:38 - INFO - codeparrot_training - Step 18034: {'lr': 0.00048565919179767246, 'samples': 9233920, 'steps': 18034, 'loss/train': 1.9368866682052612} +03/04/2022 10:38:38 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 10:38:43 - INFO - codeparrot_training - Step 18035: {'lr': 0.0004856574202461273, 'samples': 9234432, 'steps': 18035, 'loss/train': 2.0502843856811523} +03/04/2022 10:38:46 - INFO - codeparrot_training - Step 18036: {'lr': 0.0004856556485883985, 'samples': 9234944, 'steps': 18036, 'loss/train': 2.0591673851013184} +03/04/2022 10:38:47 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 10:38:51 - INFO - codeparrot_training - Step 18037: {'lr': 0.000485653876824487, 'samples': 9235456, 'steps': 18037, 'loss/train': 2.247814178466797} +03/04/2022 10:38:55 - INFO - codeparrot_training - Step 18038: {'lr': 0.00048565210495439337, 'samples': 9235968, 'steps': 18038, 'loss/train': 1.8806456327438354} +03/04/2022 10:38:55 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/04/2022 10:39:00 - INFO - codeparrot_training - Step 18039: {'lr': 0.00048565033297811867, 'samples': 9236480, 'steps': 18039, 'loss/train': 1.737932801246643} +03/04/2022 10:39:03 - INFO - codeparrot_training - Step 18040: {'lr': 0.0004856485608956635, 'samples': 9236992, 'steps': 18040, 'loss/train': 2.3661177158355713} +03/04/2022 10:39:04 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/04/2022 10:39:08 - INFO - codeparrot_training - Step 18041: {'lr': 0.00048564678870702873, 'samples': 9237504, 'steps': 18041, 'loss/train': 1.7484675645828247} +03/04/2022 10:39:12 - INFO - codeparrot_training - Step 18042: {'lr': 0.00048564501641221516, 'samples': 9238016, 'steps': 18042, 'loss/train': 3.8200252056121826} +03/04/2022 10:39:13 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/04/2022 10:39:17 - INFO - codeparrot_training - Step 18043: {'lr': 0.00048564324401122357, 'samples': 9238528, 'steps': 18043, 'loss/train': 1.715099811553955} +03/04/2022 10:39:20 - INFO - codeparrot_training - Step 18044: {'lr': 0.0004856414715040548, 'samples': 9239040, 'steps': 18044, 'loss/train': 2.0503923892974854} +03/04/2022 10:39:21 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/04/2022 10:39:25 - INFO - codeparrot_training - Step 18045: {'lr': 0.0004856396988907096, 'samples': 9239552, 'steps': 18045, 'loss/train': 2.0969982147216797} +03/04/2022 10:39:28 - INFO - codeparrot_training - Step 18046: {'lr': 0.00048563792617118876, 'samples': 9240064, 'steps': 18046, 'loss/train': 2.093975782394409} +03/04/2022 10:39:29 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/04/2022 10:39:34 - INFO - codeparrot_training - Step 18047: {'lr': 0.00048563615334549316, 'samples': 9240576, 'steps': 18047, 'loss/train': 1.804709792137146} +03/04/2022 10:39:37 - INFO - codeparrot_training - Step 18048: {'lr': 0.0004856343804136235, 'samples': 9241088, 'steps': 18048, 'loss/train': 1.3839179277420044} +03/04/2022 10:39:37 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 10:39:42 - INFO - codeparrot_training - Step 18049: {'lr': 0.0004856326073755806, 'samples': 9241600, 'steps': 18049, 'loss/train': 1.8178420066833496} +03/04/2022 10:39:45 - INFO - codeparrot_training - Step 18050: {'lr': 0.0004856308342313653, 'samples': 9242112, 'steps': 18050, 'loss/train': 1.7415199279785156} +03/04/2022 10:39:46 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/04/2022 10:39:51 - INFO - codeparrot_training - Step 18051: {'lr': 0.00048562906098097847, 'samples': 9242624, 'steps': 18051, 'loss/train': 1.1081905364990234} +03/04/2022 10:39:54 - INFO - codeparrot_training - Step 18052: {'lr': 0.0004856272876244208, 'samples': 9243136, 'steps': 18052, 'loss/train': 1.2996468544006348} +03/04/2022 10:39:55 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/04/2022 10:39:59 - INFO - codeparrot_training - Step 18053: {'lr': 0.000485625514161693, 'samples': 9243648, 'steps': 18053, 'loss/train': 1.8077263832092285} +03/04/2022 10:40:02 - INFO - codeparrot_training - Step 18054: {'lr': 0.00048562374059279604, 'samples': 9244160, 'steps': 18054, 'loss/train': 1.6736817359924316} +03/04/2022 10:40:03 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/04/2022 10:40:07 - INFO - codeparrot_training - Step 18055: {'lr': 0.00048562196691773066, 'samples': 9244672, 'steps': 18055, 'loss/train': 2.287008762359619} +03/04/2022 10:40:11 - INFO - codeparrot_training - Step 18056: {'lr': 0.00048562019313649766, 'samples': 9245184, 'steps': 18056, 'loss/train': 1.9728847742080688} +03/04/2022 10:40:12 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/04/2022 10:40:16 - INFO - codeparrot_training - Step 18057: {'lr': 0.0004856184192490979, 'samples': 9245696, 'steps': 18057, 'loss/train': 2.219099998474121} +03/04/2022 10:40:19 - INFO - codeparrot_training - Step 18058: {'lr': 0.000485616645255532, 'samples': 9246208, 'steps': 18058, 'loss/train': 1.670425295829773} +03/04/2022 10:40:21 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 10:40:24 - INFO - codeparrot_training - Step 18059: {'lr': 0.0004856148711558009, 'samples': 9246720, 'steps': 18059, 'loss/train': 1.6569184064865112} +03/04/2022 10:40:27 - INFO - codeparrot_training - Step 18060: {'lr': 0.00048561309694990543, 'samples': 9247232, 'steps': 18060, 'loss/train': 2.1069703102111816} +03/04/2022 10:40:29 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 10:40:33 - INFO - codeparrot_training - Step 18061: {'lr': 0.00048561132263784634, 'samples': 9247744, 'steps': 18061, 'loss/train': 2.3802969455718994} +03/04/2022 10:40:36 - INFO - codeparrot_training - Step 18062: {'lr': 0.00048560954821962434, 'samples': 9248256, 'steps': 18062, 'loss/train': 1.9015684127807617} +03/04/2022 10:40:37 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/04/2022 10:40:41 - INFO - codeparrot_training - Step 18063: {'lr': 0.0004856077736952404, 'samples': 9248768, 'steps': 18063, 'loss/train': 1.143176794052124} +03/04/2022 10:40:44 - INFO - codeparrot_training - Step 18064: {'lr': 0.00048560599906469513, 'samples': 9249280, 'steps': 18064, 'loss/train': 1.6036642789840698} +03/04/2022 10:40:45 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/04/2022 10:40:50 - INFO - codeparrot_training - Step 18065: {'lr': 0.00048560422432798956, 'samples': 9249792, 'steps': 18065, 'loss/train': 1.9430943727493286} +03/04/2022 10:40:53 - INFO - codeparrot_training - Step 18066: {'lr': 0.0004856024494851243, 'samples': 9250304, 'steps': 18066, 'loss/train': 2.0861451625823975} +03/04/2022 10:40:54 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 10:40:58 - INFO - codeparrot_training - Step 18067: {'lr': 0.00048560067453610025, 'samples': 9250816, 'steps': 18067, 'loss/train': 2.1829288005828857} +03/04/2022 10:41:01 - INFO - codeparrot_training - Step 18068: {'lr': 0.00048559889948091814, 'samples': 9251328, 'steps': 18068, 'loss/train': 2.066016674041748} +03/04/2022 10:41:02 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 10:41:06 - INFO - codeparrot_training - Step 18069: {'lr': 0.0004855971243195788, 'samples': 9251840, 'steps': 18069, 'loss/train': 2.2173357009887695} +03/04/2022 10:41:10 - INFO - codeparrot_training - Step 18070: {'lr': 0.00048559534905208304, 'samples': 9252352, 'steps': 18070, 'loss/train': 1.6213802099227905} +03/04/2022 10:41:11 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 10:41:15 - INFO - codeparrot_training - Step 18071: {'lr': 0.0004855935736784316, 'samples': 9252864, 'steps': 18071, 'loss/train': 1.671156883239746} +03/04/2022 10:41:18 - INFO - codeparrot_training - Step 18072: {'lr': 0.00048559179819862537, 'samples': 9253376, 'steps': 18072, 'loss/train': 1.8741987943649292} +03/04/2022 10:41:19 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/04/2022 10:41:23 - INFO - codeparrot_training - Step 18073: {'lr': 0.0004855900226126651, 'samples': 9253888, 'steps': 18073, 'loss/train': 2.0565757751464844} +03/04/2022 10:41:26 - INFO - codeparrot_training - Step 18074: {'lr': 0.00048558824692055156, 'samples': 9254400, 'steps': 18074, 'loss/train': 1.8783035278320312} +03/04/2022 10:41:27 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 10:41:32 - INFO - codeparrot_training - Step 18075: {'lr': 0.0004855864711222857, 'samples': 9254912, 'steps': 18075, 'loss/train': 1.6303091049194336} +03/04/2022 10:41:35 - INFO - codeparrot_training - Step 18076: {'lr': 0.0004855846952178682, 'samples': 9255424, 'steps': 18076, 'loss/train': 2.446316719055176} +03/04/2022 10:41:36 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 10:41:40 - INFO - codeparrot_training - Step 18077: {'lr': 0.0004855829192072998, 'samples': 9255936, 'steps': 18077, 'loss/train': 2.20450496673584} +03/04/2022 10:41:43 - INFO - codeparrot_training - Step 18078: {'lr': 0.00048558114309058144, 'samples': 9256448, 'steps': 18078, 'loss/train': 2.0531225204467773} +03/04/2022 10:41:44 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/04/2022 10:41:49 - INFO - codeparrot_training - Step 18079: {'lr': 0.00048557936686771376, 'samples': 9256960, 'steps': 18079, 'loss/train': 1.5814470052719116} +03/04/2022 10:41:52 - INFO - codeparrot_training - Step 18080: {'lr': 0.0004855775905386977, 'samples': 9257472, 'steps': 18080, 'loss/train': 1.6231176853179932} +03/04/2022 10:41:53 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 10:41:57 - INFO - codeparrot_training - Step 18081: {'lr': 0.000485575814103534, 'samples': 9257984, 'steps': 18081, 'loss/train': 2.071795701980591} +03/04/2022 10:42:00 - INFO - codeparrot_training - Step 18082: {'lr': 0.0004855740375622235, 'samples': 9258496, 'steps': 18082, 'loss/train': 1.9343911409378052} +03/04/2022 10:42:02 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 10:42:06 - INFO - codeparrot_training - Step 18083: {'lr': 0.00048557226091476704, 'samples': 9259008, 'steps': 18083, 'loss/train': 2.3774161338806152} +03/04/2022 10:42:09 - INFO - codeparrot_training - Step 18084: {'lr': 0.0004855704841611652, 'samples': 9259520, 'steps': 18084, 'loss/train': 2.3986387252807617} +03/04/2022 10:42:10 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/04/2022 10:42:14 - INFO - codeparrot_training - Step 18085: {'lr': 0.00048556870730141906, 'samples': 9260032, 'steps': 18085, 'loss/train': 2.759908676147461} +03/04/2022 10:42:17 - INFO - codeparrot_training - Step 18086: {'lr': 0.00048556693033552926, 'samples': 9260544, 'steps': 18086, 'loss/train': 1.035249948501587} +03/04/2022 10:42:19 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 10:42:23 - INFO - codeparrot_training - Step 18087: {'lr': 0.0004855651532634966, 'samples': 9261056, 'steps': 18087, 'loss/train': 0.7822667956352234} +03/04/2022 10:42:26 - INFO - codeparrot_training - Step 18088: {'lr': 0.00048556337608532196, 'samples': 9261568, 'steps': 18088, 'loss/train': 1.3302208185195923} +03/04/2022 10:42:27 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 10:42:31 - INFO - codeparrot_training - Step 18089: {'lr': 0.00048556159880100604, 'samples': 9262080, 'steps': 18089, 'loss/train': 2.014796018600464} +03/04/2022 10:42:34 - INFO - codeparrot_training - Step 18090: {'lr': 0.00048555982141054976, 'samples': 9262592, 'steps': 18090, 'loss/train': 2.652547836303711} +03/04/2022 10:42:36 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 10:42:39 - INFO - codeparrot_training - Step 18091: {'lr': 0.0004855580439139539, 'samples': 9263104, 'steps': 18091, 'loss/train': 2.0466766357421875} +03/04/2022 10:42:43 - INFO - codeparrot_training - Step 18092: {'lr': 0.00048555626631121906, 'samples': 9263616, 'steps': 18092, 'loss/train': 2.089583396911621} +03/04/2022 10:42:44 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 10:42:48 - INFO - codeparrot_training - Step 18093: {'lr': 0.0004855544886023463, 'samples': 9264128, 'steps': 18093, 'loss/train': 0.16876201331615448} +03/04/2022 10:42:51 - INFO - codeparrot_training - Step 18094: {'lr': 0.00048555271078733637, 'samples': 9264640, 'steps': 18094, 'loss/train': 1.6249490976333618} +03/04/2022 10:42:53 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 10:42:56 - INFO - codeparrot_training - Step 18095: {'lr': 0.00048555093286618996, 'samples': 9265152, 'steps': 18095, 'loss/train': 2.4962408542633057} +03/04/2022 10:42:59 - INFO - codeparrot_training - Step 18096: {'lr': 0.0004855491548389079, 'samples': 9265664, 'steps': 18096, 'loss/train': 2.375613212585449} +03/04/2022 10:43:01 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 10:43:05 - INFO - codeparrot_training - Step 18097: {'lr': 0.0004855473767054911, 'samples': 9266176, 'steps': 18097, 'loss/train': 2.2110486030578613} +03/04/2022 10:43:08 - INFO - codeparrot_training - Step 18098: {'lr': 0.00048554559846594026, 'samples': 9266688, 'steps': 18098, 'loss/train': 2.25071120262146} +03/04/2022 10:43:09 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/04/2022 10:43:13 - INFO - codeparrot_training - Step 18099: {'lr': 0.0004855438201202562, 'samples': 9267200, 'steps': 18099, 'loss/train': 1.436682939529419} +03/04/2022 10:43:16 - INFO - codeparrot_training - Step 18100: {'lr': 0.0004855420416684398, 'samples': 9267712, 'steps': 18100, 'loss/train': 1.9390878677368164} +03/04/2022 10:43:18 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/04/2022 10:43:22 - INFO - codeparrot_training - Step 18101: {'lr': 0.0004855402631104917, 'samples': 9268224, 'steps': 18101, 'loss/train': 1.920385479927063} +03/04/2022 10:43:25 - INFO - codeparrot_training - Step 18102: {'lr': 0.0004855384844464128, 'samples': 9268736, 'steps': 18102, 'loss/train': 2.405860662460327} +03/04/2022 10:43:26 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/04/2022 10:43:30 - INFO - codeparrot_training - Step 18103: {'lr': 0.00048553670567620395, 'samples': 9269248, 'steps': 18103, 'loss/train': 0.23816826939582825} +03/04/2022 10:43:33 - INFO - codeparrot_training - Step 18104: {'lr': 0.0004855349267998659, 'samples': 9269760, 'steps': 18104, 'loss/train': 1.858398675918579} +03/04/2022 10:43:34 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/04/2022 10:43:39 - INFO - codeparrot_training - Step 18105: {'lr': 0.0004855331478173994, 'samples': 9270272, 'steps': 18105, 'loss/train': 1.6501152515411377} +03/04/2022 10:43:42 - INFO - codeparrot_training - Step 18106: {'lr': 0.0004855313687288053, 'samples': 9270784, 'steps': 18106, 'loss/train': 1.6377369165420532} +03/04/2022 10:43:44 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 10:43:47 - INFO - codeparrot_training - Step 18107: {'lr': 0.00048552958953408437, 'samples': 9271296, 'steps': 18107, 'loss/train': 2.0241332054138184} +03/04/2022 10:43:50 - INFO - codeparrot_training - Step 18108: {'lr': 0.0004855278102332375, 'samples': 9271808, 'steps': 18108, 'loss/train': 1.9219976663589478} +03/04/2022 10:43:52 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 10:43:56 - INFO - codeparrot_training - Step 18109: {'lr': 0.0004855260308262654, 'samples': 9272320, 'steps': 18109, 'loss/train': 1.2442052364349365} +03/04/2022 10:43:59 - INFO - codeparrot_training - Step 18110: {'lr': 0.00048552425131316893, 'samples': 9272832, 'steps': 18110, 'loss/train': 1.189245581626892} +03/04/2022 10:44:01 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/04/2022 10:44:04 - INFO - codeparrot_training - Step 18111: {'lr': 0.0004855224716939488, 'samples': 9273344, 'steps': 18111, 'loss/train': 0.2728780210018158} +03/04/2022 10:44:07 - INFO - codeparrot_training - Step 18112: {'lr': 0.0004855206919686059, 'samples': 9273856, 'steps': 18112, 'loss/train': 1.5287086963653564} +03/04/2022 10:44:10 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/04/2022 10:44:13 - INFO - codeparrot_training - Step 18113: {'lr': 0.0004855189121371411, 'samples': 9274368, 'steps': 18113, 'loss/train': 1.8683959245681763} +03/04/2022 10:44:16 - INFO - codeparrot_training - Step 18114: {'lr': 0.00048551713219955505, 'samples': 9274880, 'steps': 18114, 'loss/train': 2.177990436553955} +03/04/2022 10:44:18 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 10:44:21 - INFO - codeparrot_training - Step 18115: {'lr': 0.00048551535215584865, 'samples': 9275392, 'steps': 18115, 'loss/train': 1.667421817779541} +03/04/2022 10:44:24 - INFO - codeparrot_training - Step 18116: {'lr': 0.00048551357200602265, 'samples': 9275904, 'steps': 18116, 'loss/train': 1.9115840196609497} +03/04/2022 10:44:27 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 10:44:30 - INFO - codeparrot_training - Step 18117: {'lr': 0.0004855117917500778, 'samples': 9276416, 'steps': 18117, 'loss/train': 4.902676582336426} +03/04/2022 10:44:33 - INFO - codeparrot_training - Step 18118: {'lr': 0.000485510011388015, 'samples': 9276928, 'steps': 18118, 'loss/train': 1.980588674545288} +03/04/2022 10:44:35 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 10:44:38 - INFO - codeparrot_training - Step 18119: {'lr': 0.00048550823091983507, 'samples': 9277440, 'steps': 18119, 'loss/train': 1.7924127578735352} +03/04/2022 10:44:41 - INFO - codeparrot_training - Step 18120: {'lr': 0.00048550645034553877, 'samples': 9277952, 'steps': 18120, 'loss/train': 2.468632936477661} +03/04/2022 10:44:43 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/04/2022 10:44:47 - INFO - codeparrot_training - Step 18121: {'lr': 0.00048550466966512684, 'samples': 9278464, 'steps': 18121, 'loss/train': 2.55251145362854} +03/04/2022 10:44:50 - INFO - codeparrot_training - Step 18122: {'lr': 0.0004855028888786002, 'samples': 9278976, 'steps': 18122, 'loss/train': 2.2567946910858154} +03/04/2022 10:44:53 - INFO - codeparrot_training - Step 18123: {'lr': 0.00048550110798595953, 'samples': 9279488, 'steps': 18123, 'loss/train': 2.068143367767334} +03/04/2022 10:44:53 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/04/2022 10:44:59 - INFO - codeparrot_training - Step 18124: {'lr': 0.0004854993269872057, 'samples': 9280000, 'steps': 18124, 'loss/train': 1.589461088180542} +03/04/2022 10:45:02 - INFO - codeparrot_training - Step 18125: {'lr': 0.0004854975458823396, 'samples': 9280512, 'steps': 18125, 'loss/train': 1.9462170600891113} +03/04/2022 10:45:02 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/04/2022 10:45:07 - INFO - codeparrot_training - Step 18126: {'lr': 0.0004854957646713618, 'samples': 9281024, 'steps': 18126, 'loss/train': 2.1950531005859375} +03/04/2022 10:45:10 - INFO - codeparrot_training - Step 18127: {'lr': 0.00048549398335427337, 'samples': 9281536, 'steps': 18127, 'loss/train': 1.8298622369766235} +03/04/2022 10:45:10 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/04/2022 10:45:16 - INFO - codeparrot_training - Step 18128: {'lr': 0.0004854922019310749, 'samples': 9282048, 'steps': 18128, 'loss/train': 1.6145155429840088} +03/04/2022 10:45:19 - INFO - codeparrot_training - Step 18129: {'lr': 0.0004854904204017673, 'samples': 9282560, 'steps': 18129, 'loss/train': 3.039752244949341} +03/04/2022 10:45:19 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 10:45:24 - INFO - codeparrot_training - Step 18130: {'lr': 0.0004854886387663514, 'samples': 9283072, 'steps': 18130, 'loss/train': 2.218212366104126} +03/04/2022 10:45:27 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 10:45:29 - INFO - codeparrot_training - Step 18131: {'lr': 0.0004854868570248279, 'samples': 9283584, 'steps': 18131, 'loss/train': 2.1105751991271973} +03/04/2022 10:45:32 - INFO - codeparrot_training - Step 18132: {'lr': 0.00048548507517719766, 'samples': 9284096, 'steps': 18132, 'loss/train': 2.0062012672424316} +03/04/2022 10:45:35 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 10:45:38 - INFO - codeparrot_training - Step 18133: {'lr': 0.0004854832932234615, 'samples': 9284608, 'steps': 18133, 'loss/train': 2.2786216735839844} +03/04/2022 10:45:41 - INFO - codeparrot_training - Step 18134: {'lr': 0.0004854815111636202, 'samples': 9285120, 'steps': 18134, 'loss/train': 1.3398759365081787} +03/04/2022 10:45:44 - INFO - codeparrot_training - Step 18135: {'lr': 0.00048547972899767454, 'samples': 9285632, 'steps': 18135, 'loss/train': 1.7908903360366821} +03/04/2022 10:45:44 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 10:45:50 - INFO - codeparrot_training - Step 18136: {'lr': 0.0004854779467256254, 'samples': 9286144, 'steps': 18136, 'loss/train': 2.049302101135254} +03/04/2022 10:45:53 - INFO - codeparrot_training - Step 18137: {'lr': 0.00048547616434747344, 'samples': 9286656, 'steps': 18137, 'loss/train': 1.7474358081817627} +03/04/2022 10:45:57 - INFO - codeparrot_training - Step 18138: {'lr': 0.0004854743818632196, 'samples': 9287168, 'steps': 18138, 'loss/train': 2.1214473247528076} +03/04/2022 10:45:57 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 10:46:02 - INFO - codeparrot_training - Step 18139: {'lr': 0.0004854725992728647, 'samples': 9287680, 'steps': 18139, 'loss/train': 1.5066131353378296} +03/04/2022 10:46:05 - INFO - codeparrot_training - Step 18140: {'lr': 0.00048547081657640935, 'samples': 9288192, 'steps': 18140, 'loss/train': 1.8441829681396484} +03/04/2022 10:46:05 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/04/2022 10:46:10 - INFO - codeparrot_training - Step 18141: {'lr': 0.00048546903377385457, 'samples': 9288704, 'steps': 18141, 'loss/train': 2.590217113494873} +03/04/2022 10:46:14 - INFO - codeparrot_training - Step 18142: {'lr': 0.00048546725086520107, 'samples': 9289216, 'steps': 18142, 'loss/train': 1.0912885665893555} +03/04/2022 10:46:14 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/04/2022 10:46:19 - INFO - codeparrot_training - Step 18143: {'lr': 0.00048546546785044965, 'samples': 9289728, 'steps': 18143, 'loss/train': 1.736397385597229} +03/04/2022 10:46:22 - INFO - codeparrot_training - Step 18144: {'lr': 0.00048546368472960114, 'samples': 9290240, 'steps': 18144, 'loss/train': 2.0963330268859863} +03/04/2022 10:46:22 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 10:46:27 - INFO - codeparrot_training - Step 18145: {'lr': 0.00048546190150265634, 'samples': 9290752, 'steps': 18145, 'loss/train': 1.2742969989776611} +03/04/2022 10:46:30 - INFO - codeparrot_training - Step 18146: {'lr': 0.00048546011816961597, 'samples': 9291264, 'steps': 18146, 'loss/train': 1.5042163133621216} +03/04/2022 10:46:31 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/04/2022 10:46:36 - INFO - codeparrot_training - Step 18147: {'lr': 0.00048545833473048094, 'samples': 9291776, 'steps': 18147, 'loss/train': 1.5104914903640747} +03/04/2022 10:46:39 - INFO - codeparrot_training - Step 18148: {'lr': 0.00048545655118525206, 'samples': 9292288, 'steps': 18148, 'loss/train': 1.9205474853515625} +03/04/2022 10:46:39 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 10:46:44 - INFO - codeparrot_training - Step 18149: {'lr': 0.00048545476753393004, 'samples': 9292800, 'steps': 18149, 'loss/train': 2.573028564453125} +03/04/2022 10:46:47 - INFO - codeparrot_training - Step 18150: {'lr': 0.0004854529837765158, 'samples': 9293312, 'steps': 18150, 'loss/train': 2.023547887802124} +03/04/2022 10:46:48 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/04/2022 10:46:53 - INFO - codeparrot_training - Step 18151: {'lr': 0.00048545119991301, 'samples': 9293824, 'steps': 18151, 'loss/train': 2.0021164417266846} +03/04/2022 10:46:56 - INFO - codeparrot_training - Step 18152: {'lr': 0.0004854494159434135, 'samples': 9294336, 'steps': 18152, 'loss/train': 0.919468879699707} +03/04/2022 10:46:57 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 10:47:02 - INFO - codeparrot_training - Step 18153: {'lr': 0.0004854476318677272, 'samples': 9294848, 'steps': 18153, 'loss/train': 1.643168330192566} +03/04/2022 10:47:05 - INFO - codeparrot_training - Step 18154: {'lr': 0.00048544584768595185, 'samples': 9295360, 'steps': 18154, 'loss/train': 1.4010717868804932} +03/04/2022 10:47:06 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 10:47:10 - INFO - codeparrot_training - Step 18155: {'lr': 0.00048544406339808823, 'samples': 9295872, 'steps': 18155, 'loss/train': 1.7568873167037964} +03/04/2022 10:47:13 - INFO - codeparrot_training - Step 18156: {'lr': 0.00048544227900413706, 'samples': 9296384, 'steps': 18156, 'loss/train': 2.4814627170562744} +03/04/2022 10:47:15 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/04/2022 10:47:18 - INFO - codeparrot_training - Step 18157: {'lr': 0.0004854404945040993, 'samples': 9296896, 'steps': 18157, 'loss/train': 2.220127582550049} +03/04/2022 10:47:21 - INFO - codeparrot_training - Step 18158: {'lr': 0.0004854387098979757, 'samples': 9297408, 'steps': 18158, 'loss/train': 2.0150656700134277} +03/04/2022 10:47:23 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 10:47:27 - INFO - codeparrot_training - Step 18159: {'lr': 0.000485436925185767, 'samples': 9297920, 'steps': 18159, 'loss/train': 2.4550116062164307} +03/04/2022 10:47:30 - INFO - codeparrot_training - Step 18160: {'lr': 0.00048543514036747404, 'samples': 9298432, 'steps': 18160, 'loss/train': 1.6928722858428955} +03/04/2022 10:47:32 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 10:47:35 - INFO - codeparrot_training - Step 18161: {'lr': 0.00048543335544309776, 'samples': 9298944, 'steps': 18161, 'loss/train': 1.3122540712356567} +03/04/2022 10:47:38 - INFO - codeparrot_training - Step 18162: {'lr': 0.00048543157041263876, 'samples': 9299456, 'steps': 18162, 'loss/train': 2.782217264175415} +03/04/2022 10:47:40 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/04/2022 10:47:44 - INFO - codeparrot_training - Step 18163: {'lr': 0.0004854297852760979, 'samples': 9299968, 'steps': 18163, 'loss/train': 1.5713459253311157} +03/04/2022 10:47:47 - INFO - codeparrot_training - Step 18164: {'lr': 0.000485428000033476, 'samples': 9300480, 'steps': 18164, 'loss/train': 1.82054603099823} +03/04/2022 10:47:48 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 10:47:52 - INFO - codeparrot_training - Step 18165: {'lr': 0.00048542621468477393, 'samples': 9300992, 'steps': 18165, 'loss/train': 2.9185140132904053} +03/04/2022 10:47:55 - INFO - codeparrot_training - Step 18166: {'lr': 0.0004854244292299924, 'samples': 9301504, 'steps': 18166, 'loss/train': 1.570572853088379} +03/04/2022 10:47:57 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/04/2022 10:48:01 - INFO - codeparrot_training - Step 18167: {'lr': 0.0004854226436691323, 'samples': 9302016, 'steps': 18167, 'loss/train': 2.3989386558532715} +03/04/2022 10:48:04 - INFO - codeparrot_training - Step 18168: {'lr': 0.0004854208580021944, 'samples': 9302528, 'steps': 18168, 'loss/train': 2.2819504737854004} +03/04/2022 10:48:05 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 10:48:09 - INFO - codeparrot_training - Step 18169: {'lr': 0.00048541907222917946, 'samples': 9303040, 'steps': 18169, 'loss/train': 2.2821881771087646} +03/04/2022 10:48:12 - INFO - codeparrot_training - Step 18170: {'lr': 0.0004854172863500883, 'samples': 9303552, 'steps': 18170, 'loss/train': 1.4971988201141357} +03/04/2022 10:48:14 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/04/2022 10:48:17 - INFO - codeparrot_training - Step 18171: {'lr': 0.00048541550036492175, 'samples': 9304064, 'steps': 18171, 'loss/train': 1.3860183954238892} +03/04/2022 10:48:21 - INFO - codeparrot_training - Step 18172: {'lr': 0.00048541371427368064, 'samples': 9304576, 'steps': 18172, 'loss/train': 2.343593120574951} +03/04/2022 10:48:22 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 10:48:26 - INFO - codeparrot_training - Step 18173: {'lr': 0.0004854119280763657, 'samples': 9305088, 'steps': 18173, 'loss/train': 1.435987114906311} +03/04/2022 10:48:29 - INFO - codeparrot_training - Step 18174: {'lr': 0.00048541014177297783, 'samples': 9305600, 'steps': 18174, 'loss/train': 2.1377627849578857} +03/04/2022 10:48:30 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 10:48:34 - INFO - codeparrot_training - Step 18175: {'lr': 0.0004854083553635178, 'samples': 9306112, 'steps': 18175, 'loss/train': 0.17470578849315643} +03/04/2022 10:48:37 - INFO - codeparrot_training - Step 18176: {'lr': 0.00048540656884798626, 'samples': 9306624, 'steps': 18176, 'loss/train': 1.5009955167770386} +03/04/2022 10:48:39 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 10:48:43 - INFO - codeparrot_training - Step 18177: {'lr': 0.0004854047822263843, 'samples': 9307136, 'steps': 18177, 'loss/train': 2.6751229763031006} +03/04/2022 10:48:46 - INFO - codeparrot_training - Step 18178: {'lr': 0.00048540299549871256, 'samples': 9307648, 'steps': 18178, 'loss/train': 1.7673426866531372} +03/04/2022 10:48:47 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/04/2022 10:48:51 - INFO - codeparrot_training - Step 18179: {'lr': 0.0004854012086649718, 'samples': 9308160, 'steps': 18179, 'loss/train': 1.6755092144012451} +03/04/2022 10:48:54 - INFO - codeparrot_training - Step 18180: {'lr': 0.00048539942172516295, 'samples': 9308672, 'steps': 18180, 'loss/train': 1.1478462219238281} +03/04/2022 10:48:55 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 10:48:59 - INFO - codeparrot_training - Step 18181: {'lr': 0.00048539763467928665, 'samples': 9309184, 'steps': 18181, 'loss/train': 1.398455023765564} +03/04/2022 10:49:03 - INFO - codeparrot_training - Step 18182: {'lr': 0.0004853958475273439, 'samples': 9309696, 'steps': 18182, 'loss/train': 2.507258176803589} +03/04/2022 10:49:04 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/04/2022 10:49:08 - INFO - codeparrot_training - Step 18183: {'lr': 0.0004853940602693354, 'samples': 9310208, 'steps': 18183, 'loss/train': 2.286450147628784} +03/04/2022 10:49:11 - INFO - codeparrot_training - Step 18184: {'lr': 0.00048539227290526194, 'samples': 9310720, 'steps': 18184, 'loss/train': 2.0574443340301514} +03/04/2022 10:49:12 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/04/2022 10:49:16 - INFO - codeparrot_training - Step 18185: {'lr': 0.00048539048543512443, 'samples': 9311232, 'steps': 18185, 'loss/train': 2.2459826469421387} +03/04/2022 10:49:19 - INFO - codeparrot_training - Step 18186: {'lr': 0.0004853886978589235, 'samples': 9311744, 'steps': 18186, 'loss/train': 2.054753065109253} +03/04/2022 10:49:20 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/04/2022 10:49:25 - INFO - codeparrot_training - Step 18187: {'lr': 0.0004853869101766601, 'samples': 9312256, 'steps': 18187, 'loss/train': 2.14632248878479} +03/04/2022 10:49:28 - INFO - codeparrot_training - Step 18188: {'lr': 0.000485385122388335, 'samples': 9312768, 'steps': 18188, 'loss/train': 1.4769386053085327} +03/04/2022 10:49:29 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/04/2022 10:49:33 - INFO - codeparrot_training - Step 18189: {'lr': 0.000485383334493949, 'samples': 9313280, 'steps': 18189, 'loss/train': 2.614997625350952} +03/04/2022 10:49:36 - INFO - codeparrot_training - Step 18190: {'lr': 0.00048538154649350286, 'samples': 9313792, 'steps': 18190, 'loss/train': 2.3883473873138428} +03/04/2022 10:49:37 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 10:49:42 - INFO - codeparrot_training - Step 18191: {'lr': 0.00048537975838699744, 'samples': 9314304, 'steps': 18191, 'loss/train': 1.9786314964294434} +03/04/2022 10:49:45 - INFO - codeparrot_training - Step 18192: {'lr': 0.0004853779701744335, 'samples': 9314816, 'steps': 18192, 'loss/train': 3.294447660446167} +03/04/2022 10:49:46 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/04/2022 10:49:50 - INFO - codeparrot_training - Step 18193: {'lr': 0.000485376181855812, 'samples': 9315328, 'steps': 18193, 'loss/train': 1.499484896659851} +03/04/2022 10:49:53 - INFO - codeparrot_training - Step 18194: {'lr': 0.00048537439343113354, 'samples': 9315840, 'steps': 18194, 'loss/train': 1.5188711881637573} +03/04/2022 10:49:54 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 10:49:58 - INFO - codeparrot_training - Step 18195: {'lr': 0.000485372604900399, 'samples': 9316352, 'steps': 18195, 'loss/train': 1.8748955726623535} +03/04/2022 10:50:01 - INFO - codeparrot_training - Step 18196: {'lr': 0.0004853708162636092, 'samples': 9316864, 'steps': 18196, 'loss/train': 1.8775769472122192} +03/04/2022 10:50:02 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/04/2022 10:50:07 - INFO - codeparrot_training - Step 18197: {'lr': 0.00048536902752076494, 'samples': 9317376, 'steps': 18197, 'loss/train': 1.6920803785324097} +03/04/2022 10:50:10 - INFO - codeparrot_training - Step 18198: {'lr': 0.00048536723867186705, 'samples': 9317888, 'steps': 18198, 'loss/train': 1.82218337059021} +03/04/2022 10:50:11 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/04/2022 10:50:15 - INFO - codeparrot_training - Step 18199: {'lr': 0.0004853654497169163, 'samples': 9318400, 'steps': 18199, 'loss/train': 1.3713641166687012} +03/04/2022 10:50:18 - INFO - codeparrot_training - Step 18200: {'lr': 0.00048536366065591354, 'samples': 9318912, 'steps': 18200, 'loss/train': 2.632209062576294} +03/04/2022 10:50:19 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 10:50:24 - INFO - codeparrot_training - Step 18201: {'lr': 0.00048536187148885956, 'samples': 9319424, 'steps': 18201, 'loss/train': 1.1285573244094849} +03/04/2022 10:50:27 - INFO - codeparrot_training - Step 18202: {'lr': 0.0004853600822157551, 'samples': 9319936, 'steps': 18202, 'loss/train': 1.556025743484497} +03/04/2022 10:50:28 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 10:50:32 - INFO - codeparrot_training - Step 18203: {'lr': 0.000485358292836601, 'samples': 9320448, 'steps': 18203, 'loss/train': 1.3551188707351685} +03/04/2022 10:50:35 - INFO - codeparrot_training - Step 18204: {'lr': 0.0004853565033513982, 'samples': 9320960, 'steps': 18204, 'loss/train': 1.5533242225646973} +03/04/2022 10:50:36 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 10:50:40 - INFO - codeparrot_training - Step 18205: {'lr': 0.0004853547137601473, 'samples': 9321472, 'steps': 18205, 'loss/train': 2.388479709625244} +03/04/2022 10:50:44 - INFO - codeparrot_training - Step 18206: {'lr': 0.0004853529240628493, 'samples': 9321984, 'steps': 18206, 'loss/train': 0.753523051738739} +03/04/2022 10:50:44 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/04/2022 10:50:49 - INFO - codeparrot_training - Step 18207: {'lr': 0.00048535113425950474, 'samples': 9322496, 'steps': 18207, 'loss/train': 2.4384591579437256} +03/04/2022 10:50:52 - INFO - codeparrot_training - Step 18208: {'lr': 0.0004853493443501147, 'samples': 9323008, 'steps': 18208, 'loss/train': 1.854074478149414} +03/04/2022 10:50:52 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/04/2022 10:50:57 - INFO - codeparrot_training - Step 18209: {'lr': 0.0004853475543346798, 'samples': 9323520, 'steps': 18209, 'loss/train': 2.5852653980255127} +03/04/2022 10:51:00 - INFO - codeparrot_training - Step 18210: {'lr': 0.000485345764213201, 'samples': 9324032, 'steps': 18210, 'loss/train': 1.6585125923156738} +03/04/2022 10:51:01 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 10:51:06 - INFO - codeparrot_training - Step 18211: {'lr': 0.00048534397398567895, 'samples': 9324544, 'steps': 18211, 'loss/train': 1.8183873891830444} +03/04/2022 10:51:09 - INFO - codeparrot_training - Step 18212: {'lr': 0.00048534218365211456, 'samples': 9325056, 'steps': 18212, 'loss/train': 1.7596406936645508} +03/04/2022 10:51:09 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 10:51:14 - INFO - codeparrot_training - Step 18213: {'lr': 0.0004853403932125087, 'samples': 9325568, 'steps': 18213, 'loss/train': 1.411900281906128} +03/04/2022 10:51:17 - INFO - codeparrot_training - Step 18214: {'lr': 0.00048533860266686203, 'samples': 9326080, 'steps': 18214, 'loss/train': 1.0179402828216553} +03/04/2022 10:51:18 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 10:51:23 - INFO - codeparrot_training - Step 18215: {'lr': 0.0004853368120151754, 'samples': 9326592, 'steps': 18215, 'loss/train': 2.0289433002471924} +03/04/2022 10:51:26 - INFO - codeparrot_training - Step 18216: {'lr': 0.00048533502125744967, 'samples': 9327104, 'steps': 18216, 'loss/train': 1.7340096235275269} +03/04/2022 10:51:27 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 10:51:31 - INFO - codeparrot_training - Step 18217: {'lr': 0.0004853332303936856, 'samples': 9327616, 'steps': 18217, 'loss/train': 1.9724284410476685} +03/04/2022 10:51:34 - INFO - codeparrot_training - Step 18218: {'lr': 0.000485331439423884, 'samples': 9328128, 'steps': 18218, 'loss/train': 0.6834306120872498} +03/04/2022 10:51:34 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/04/2022 10:51:39 - INFO - codeparrot_training - Step 18219: {'lr': 0.00048532964834804566, 'samples': 9328640, 'steps': 18219, 'loss/train': 1.5619028806686401} +03/04/2022 10:51:43 - INFO - codeparrot_training - Step 18220: {'lr': 0.00048532785716617145, 'samples': 9329152, 'steps': 18220, 'loss/train': 2.102036714553833} +03/04/2022 10:51:43 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 10:51:48 - INFO - codeparrot_training - Step 18221: {'lr': 0.0004853260658782621, 'samples': 9329664, 'steps': 18221, 'loss/train': 1.6399744749069214} +03/04/2022 10:51:51 - INFO - codeparrot_training - Step 18222: {'lr': 0.0004853242744843185, 'samples': 9330176, 'steps': 18222, 'loss/train': 1.8537662029266357} +03/04/2022 10:51:51 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 10:51:56 - INFO - codeparrot_training - Step 18223: {'lr': 0.0004853224829843414, 'samples': 9330688, 'steps': 18223, 'loss/train': 1.5931265354156494} +03/04/2022 10:52:00 - INFO - codeparrot_training - Step 18224: {'lr': 0.00048532069137833156, 'samples': 9331200, 'steps': 18224, 'loss/train': 2.2442336082458496} +03/04/2022 10:52:00 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/04/2022 10:52:05 - INFO - codeparrot_training - Step 18225: {'lr': 0.00048531889966628997, 'samples': 9331712, 'steps': 18225, 'loss/train': 1.6392074823379517} +03/04/2022 10:52:08 - INFO - codeparrot_training - Step 18226: {'lr': 0.00048531710784821726, 'samples': 9332224, 'steps': 18226, 'loss/train': 1.8301458358764648} +03/04/2022 10:52:09 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 10:52:14 - INFO - codeparrot_training - Step 18227: {'lr': 0.0004853153159241143, 'samples': 9332736, 'steps': 18227, 'loss/train': 2.495155096054077} +03/04/2022 10:52:17 - INFO - codeparrot_training - Step 18228: {'lr': 0.0004853135238939818, 'samples': 9333248, 'steps': 18228, 'loss/train': 2.1819264888763428} +03/04/2022 10:52:18 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/04/2022 10:52:23 - INFO - codeparrot_training - Step 18229: {'lr': 0.0004853117317578207, 'samples': 9333760, 'steps': 18229, 'loss/train': 1.3795888423919678} +03/04/2022 10:52:26 - INFO - codeparrot_training - Step 18230: {'lr': 0.00048530993951563186, 'samples': 9334272, 'steps': 18230, 'loss/train': 2.9523425102233887} +03/04/2022 10:52:27 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 10:52:31 - INFO - codeparrot_training - Step 18231: {'lr': 0.0004853081471674159, 'samples': 9334784, 'steps': 18231, 'loss/train': 2.0674586296081543} +03/04/2022 10:52:34 - INFO - codeparrot_training - Step 18232: {'lr': 0.00048530635471317373, 'samples': 9335296, 'steps': 18232, 'loss/train': 2.2321128845214844} +03/04/2022 10:52:35 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 10:52:39 - INFO - codeparrot_training - Step 18233: {'lr': 0.0004853045621529062, 'samples': 9335808, 'steps': 18233, 'loss/train': 2.2057061195373535} +03/04/2022 10:52:42 - INFO - codeparrot_training - Step 18234: {'lr': 0.000485302769486614, 'samples': 9336320, 'steps': 18234, 'loss/train': 1.205553412437439} +03/04/2022 10:52:43 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 10:52:48 - INFO - codeparrot_training - Step 18235: {'lr': 0.000485300976714298, 'samples': 9336832, 'steps': 18235, 'loss/train': 2.614043951034546} +03/04/2022 10:52:51 - INFO - codeparrot_training - Step 18236: {'lr': 0.00048529918383595906, 'samples': 9337344, 'steps': 18236, 'loss/train': 2.301786422729492} +03/04/2022 10:52:52 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 10:52:56 - INFO - codeparrot_training - Step 18237: {'lr': 0.0004852973908515979, 'samples': 9337856, 'steps': 18237, 'loss/train': 2.196226119995117} +03/04/2022 10:52:59 - INFO - codeparrot_training - Step 18238: {'lr': 0.0004852955977612154, 'samples': 9338368, 'steps': 18238, 'loss/train': 1.836904525756836} +03/04/2022 10:53:00 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/04/2022 10:53:05 - INFO - codeparrot_training - Step 18239: {'lr': 0.0004852938045648123, 'samples': 9338880, 'steps': 18239, 'loss/train': 2.1827328205108643} +03/04/2022 10:53:08 - INFO - codeparrot_training - Step 18240: {'lr': 0.0004852920112623895, 'samples': 9339392, 'steps': 18240, 'loss/train': 1.7151637077331543} +03/04/2022 10:53:09 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/04/2022 10:53:13 - INFO - codeparrot_training - Step 18241: {'lr': 0.00048529021785394765, 'samples': 9339904, 'steps': 18241, 'loss/train': 2.133742570877075} +03/04/2022 10:53:16 - INFO - codeparrot_training - Step 18242: {'lr': 0.00048528842433948776, 'samples': 9340416, 'steps': 18242, 'loss/train': 2.045778512954712} +03/04/2022 10:53:17 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/04/2022 10:53:21 - INFO - codeparrot_training - Step 18243: {'lr': 0.00048528663071901047, 'samples': 9340928, 'steps': 18243, 'loss/train': 1.5479552745819092} +03/04/2022 10:53:24 - INFO - codeparrot_training - Step 18244: {'lr': 0.0004852848369925167, 'samples': 9341440, 'steps': 18244, 'loss/train': 2.0801334381103516} +03/04/2022 10:53:25 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 10:53:30 - INFO - codeparrot_training - Step 18245: {'lr': 0.00048528304316000723, 'samples': 9341952, 'steps': 18245, 'loss/train': 2.0605545043945312} +03/04/2022 10:53:33 - INFO - codeparrot_training - Step 18246: {'lr': 0.0004852812492214828, 'samples': 9342464, 'steps': 18246, 'loss/train': 2.2233240604400635} +03/04/2022 10:53:34 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 10:53:38 - INFO - codeparrot_training - Step 18247: {'lr': 0.0004852794551769443, 'samples': 9342976, 'steps': 18247, 'loss/train': 1.9782811403274536} +03/04/2022 10:53:41 - INFO - codeparrot_training - Step 18248: {'lr': 0.0004852776610263925, 'samples': 9343488, 'steps': 18248, 'loss/train': 2.9322197437286377} +03/04/2022 10:53:42 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 10:53:47 - INFO - codeparrot_training - Step 18249: {'lr': 0.0004852758667698282, 'samples': 9344000, 'steps': 18249, 'loss/train': 2.0607337951660156} +03/04/2022 10:53:50 - INFO - codeparrot_training - Step 18250: {'lr': 0.00048527407240725223, 'samples': 9344512, 'steps': 18250, 'loss/train': 1.9452424049377441} +03/04/2022 10:53:50 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/04/2022 10:53:55 - INFO - codeparrot_training - Step 18251: {'lr': 0.0004852722779386654, 'samples': 9345024, 'steps': 18251, 'loss/train': 1.723856806755066} +03/04/2022 10:53:58 - INFO - codeparrot_training - Step 18252: {'lr': 0.00048527048336406855, 'samples': 9345536, 'steps': 18252, 'loss/train': 2.221266984939575} +03/04/2022 10:53:59 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/04/2022 10:54:03 - INFO - codeparrot_training - Step 18253: {'lr': 0.00048526868868346243, 'samples': 9346048, 'steps': 18253, 'loss/train': 1.8505656719207764} +03/04/2022 10:54:07 - INFO - codeparrot_training - Step 18254: {'lr': 0.0004852668938968478, 'samples': 9346560, 'steps': 18254, 'loss/train': 1.4746960401535034} +03/04/2022 10:54:07 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 10:54:12 - INFO - codeparrot_training - Step 18255: {'lr': 0.0004852650990042256, 'samples': 9347072, 'steps': 18255, 'loss/train': 1.4613163471221924} +03/04/2022 10:54:15 - INFO - codeparrot_training - Step 18256: {'lr': 0.0004852633040055966, 'samples': 9347584, 'steps': 18256, 'loss/train': 0.9769079089164734} +03/04/2022 10:54:15 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/04/2022 10:54:20 - INFO - codeparrot_training - Step 18257: {'lr': 0.00048526150890096153, 'samples': 9348096, 'steps': 18257, 'loss/train': 1.9854415655136108} +03/04/2022 10:54:23 - INFO - codeparrot_training - Step 18258: {'lr': 0.0004852597136903213, 'samples': 9348608, 'steps': 18258, 'loss/train': 1.7607790231704712} +03/04/2022 10:54:24 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/04/2022 10:54:29 - INFO - codeparrot_training - Step 18259: {'lr': 0.0004852579183736766, 'samples': 9349120, 'steps': 18259, 'loss/train': 2.0394575595855713} +03/04/2022 10:54:32 - INFO - codeparrot_training - Step 18260: {'lr': 0.00048525612295102836, 'samples': 9349632, 'steps': 18260, 'loss/train': 2.3096461296081543} +03/04/2022 10:54:32 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/04/2022 10:54:37 - INFO - codeparrot_training - Step 18261: {'lr': 0.00048525432742237736, 'samples': 9350144, 'steps': 18261, 'loss/train': 2.093048334121704} +03/04/2022 10:54:41 - INFO - codeparrot_training - Step 18262: {'lr': 0.00048525253178772435, 'samples': 9350656, 'steps': 18262, 'loss/train': 1.3561371564865112} +03/04/2022 10:54:41 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/04/2022 10:54:46 - INFO - codeparrot_training - Step 18263: {'lr': 0.0004852507360470702, 'samples': 9351168, 'steps': 18263, 'loss/train': 2.1919522285461426} +03/04/2022 10:54:49 - INFO - codeparrot_training - Step 18264: {'lr': 0.0004852489402004157, 'samples': 9351680, 'steps': 18264, 'loss/train': 2.740966796875} +03/04/2022 10:54:49 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 10:54:55 - INFO - codeparrot_training - Step 18265: {'lr': 0.0004852471442477617, 'samples': 9352192, 'steps': 18265, 'loss/train': 2.069730758666992} +03/04/2022 10:54:58 - INFO - codeparrot_training - Step 18266: {'lr': 0.0004852453481891089, 'samples': 9352704, 'steps': 18266, 'loss/train': 2.016751527786255} +03/04/2022 10:54:58 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/04/2022 10:55:03 - INFO - codeparrot_training - Step 18267: {'lr': 0.00048524355202445827, 'samples': 9353216, 'steps': 18267, 'loss/train': 1.914576530456543} +03/04/2022 10:55:06 - INFO - codeparrot_training - Step 18268: {'lr': 0.0004852417557538104, 'samples': 9353728, 'steps': 18268, 'loss/train': 1.8839232921600342} +03/04/2022 10:55:07 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/04/2022 10:55:12 - INFO - codeparrot_training - Step 18269: {'lr': 0.00048523995937716625, 'samples': 9354240, 'steps': 18269, 'loss/train': 1.947262167930603} +03/04/2022 10:55:15 - INFO - codeparrot_training - Step 18270: {'lr': 0.0004852381628945267, 'samples': 9354752, 'steps': 18270, 'loss/train': 1.3263734579086304} +03/04/2022 10:55:15 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 10:55:20 - INFO - codeparrot_training - Step 18271: {'lr': 0.0004852363663058924, 'samples': 9355264, 'steps': 18271, 'loss/train': 1.7802315950393677} +03/04/2022 10:55:23 - INFO - codeparrot_training - Step 18272: {'lr': 0.0004852345696112642, 'samples': 9355776, 'steps': 18272, 'loss/train': 2.2371175289154053} +03/04/2022 10:55:23 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/04/2022 10:55:28 - INFO - codeparrot_training - Step 18273: {'lr': 0.00048523277281064295, 'samples': 9356288, 'steps': 18273, 'loss/train': 1.3925259113311768} +03/04/2022 10:55:32 - INFO - codeparrot_training - Step 18274: {'lr': 0.0004852309759040294, 'samples': 9356800, 'steps': 18274, 'loss/train': 2.0915920734405518} +03/04/2022 10:55:32 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/04/2022 10:55:37 - INFO - codeparrot_training - Step 18275: {'lr': 0.00048522917889142446, 'samples': 9357312, 'steps': 18275, 'loss/train': 0.8720462918281555} +03/04/2022 10:55:40 - INFO - codeparrot_training - Step 18276: {'lr': 0.00048522738177282887, 'samples': 9357824, 'steps': 18276, 'loss/train': 1.1411646604537964} +03/04/2022 10:55:40 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/04/2022 10:55:45 - INFO - codeparrot_training - Step 18277: {'lr': 0.0004852255845482435, 'samples': 9358336, 'steps': 18277, 'loss/train': 1.9095391035079956} +03/04/2022 10:55:48 - INFO - codeparrot_training - Step 18278: {'lr': 0.0004852237872176691, 'samples': 9358848, 'steps': 18278, 'loss/train': 2.213942766189575} +03/04/2022 10:55:48 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/04/2022 10:55:54 - INFO - codeparrot_training - Step 18279: {'lr': 0.00048522198978110645, 'samples': 9359360, 'steps': 18279, 'loss/train': 1.8118561506271362} +03/04/2022 10:55:57 - INFO - codeparrot_training - Step 18280: {'lr': 0.0004852201922385564, 'samples': 9359872, 'steps': 18280, 'loss/train': 1.664801836013794} +03/04/2022 10:55:57 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 10:56:02 - INFO - codeparrot_training - Step 18281: {'lr': 0.00048521839459001977, 'samples': 9360384, 'steps': 18281, 'loss/train': 1.5844284296035767} +03/04/2022 10:56:06 - INFO - codeparrot_training - Step 18282: {'lr': 0.0004852165968354973, 'samples': 9360896, 'steps': 18282, 'loss/train': 1.729973316192627} +03/04/2022 10:56:06 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 10:56:11 - INFO - codeparrot_training - Step 18283: {'lr': 0.00048521479897499, 'samples': 9361408, 'steps': 18283, 'loss/train': 1.558517575263977} +03/04/2022 10:56:14 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/04/2022 10:56:16 - INFO - codeparrot_training - Step 18284: {'lr': 0.0004852130010084984, 'samples': 9361920, 'steps': 18284, 'loss/train': 1.6576042175292969} +03/04/2022 10:56:19 - INFO - codeparrot_training - Step 18285: {'lr': 0.0004852112029360235, 'samples': 9362432, 'steps': 18285, 'loss/train': 1.002662181854248} +03/04/2022 10:56:22 - INFO - codeparrot_training - Step 18286: {'lr': 0.0004852094047575661, 'samples': 9362944, 'steps': 18286, 'loss/train': 1.1213048696517944} +03/04/2022 10:56:22 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 10:56:28 - INFO - codeparrot_training - Step 18287: {'lr': 0.00048520760647312696, 'samples': 9363456, 'steps': 18287, 'loss/train': 2.043750524520874} +03/04/2022 10:56:31 - INFO - codeparrot_training - Step 18288: {'lr': 0.00048520580808270687, 'samples': 9363968, 'steps': 18288, 'loss/train': 1.9394551515579224} +03/04/2022 10:56:31 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/04/2022 10:56:36 - INFO - codeparrot_training - Step 18289: {'lr': 0.0004852040095863067, 'samples': 9364480, 'steps': 18289, 'loss/train': 1.3327577114105225} +03/04/2022 10:56:39 - INFO - codeparrot_training - Step 18290: {'lr': 0.0004852022109839273, 'samples': 9364992, 'steps': 18290, 'loss/train': 0.5110849142074585} +03/04/2022 10:56:39 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 10:56:45 - INFO - codeparrot_training - Step 18291: {'lr': 0.0004852004122755693, 'samples': 9365504, 'steps': 18291, 'loss/train': 1.9804211854934692} +03/04/2022 10:56:48 - INFO - codeparrot_training - Step 18292: {'lr': 0.00048519861346123363, 'samples': 9366016, 'steps': 18292, 'loss/train': 2.014268398284912} +03/04/2022 10:56:48 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 10:56:53 - INFO - codeparrot_training - Step 18293: {'lr': 0.0004851968145409211, 'samples': 9366528, 'steps': 18293, 'loss/train': 1.0858033895492554} +03/04/2022 10:56:56 - INFO - codeparrot_training - Step 18294: {'lr': 0.00048519501551463255, 'samples': 9367040, 'steps': 18294, 'loss/train': 1.7102940082550049} +03/04/2022 10:56:57 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/04/2022 10:57:02 - INFO - codeparrot_training - Step 18295: {'lr': 0.0004851932163823688, 'samples': 9367552, 'steps': 18295, 'loss/train': 1.9881410598754883} +03/04/2022 10:57:05 - INFO - codeparrot_training - Step 18296: {'lr': 0.0004851914171441305, 'samples': 9368064, 'steps': 18296, 'loss/train': 2.1276955604553223} +03/04/2022 10:57:05 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/04/2022 10:57:10 - INFO - codeparrot_training - Step 18297: {'lr': 0.00048518961779991866, 'samples': 9368576, 'steps': 18297, 'loss/train': 1.192862868309021} +03/04/2022 10:57:13 - INFO - codeparrot_training - Step 18298: {'lr': 0.00048518781834973405, 'samples': 9369088, 'steps': 18298, 'loss/train': 2.3407680988311768} +03/04/2022 10:57:14 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/04/2022 10:57:18 - INFO - codeparrot_training - Step 18299: {'lr': 0.0004851860187935773, 'samples': 9369600, 'steps': 18299, 'loss/train': 1.3622851371765137} +03/04/2022 10:57:22 - INFO - codeparrot_training - Step 18300: {'lr': 0.0004851842191314494, 'samples': 9370112, 'steps': 18300, 'loss/train': 2.1296513080596924} +03/04/2022 10:57:22 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 10:57:27 - INFO - codeparrot_training - Step 18301: {'lr': 0.0004851824193633512, 'samples': 9370624, 'steps': 18301, 'loss/train': 3.5405852794647217} +03/04/2022 10:57:30 - INFO - codeparrot_training - Step 18302: {'lr': 0.00048518061948928337, 'samples': 9371136, 'steps': 18302, 'loss/train': 1.771193027496338} +03/04/2022 10:57:30 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 10:57:35 - INFO - codeparrot_training - Step 18303: {'lr': 0.0004851788195092468, 'samples': 9371648, 'steps': 18303, 'loss/train': 1.4726768732070923} +03/04/2022 10:57:38 - INFO - codeparrot_training - Step 18304: {'lr': 0.00048517701942324225, 'samples': 9372160, 'steps': 18304, 'loss/train': 1.878865122795105} +03/04/2022 10:57:39 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 10:57:44 - INFO - codeparrot_training - Step 18305: {'lr': 0.00048517521923127063, 'samples': 9372672, 'steps': 18305, 'loss/train': 1.5493512153625488} +03/04/2022 10:57:47 - INFO - codeparrot_training - Step 18306: {'lr': 0.00048517341893333267, 'samples': 9373184, 'steps': 18306, 'loss/train': 2.729825496673584} +03/04/2022 10:57:47 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 10:57:52 - INFO - codeparrot_training - Step 18307: {'lr': 0.0004851716185294291, 'samples': 9373696, 'steps': 18307, 'loss/train': 1.8430007696151733} +03/04/2022 10:57:55 - INFO - codeparrot_training - Step 18308: {'lr': 0.00048516981801956097, 'samples': 9374208, 'steps': 18308, 'loss/train': 2.2501707077026367} +03/04/2022 10:57:56 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/04/2022 10:58:00 - INFO - codeparrot_training - Step 18309: {'lr': 0.00048516801740372886, 'samples': 9374720, 'steps': 18309, 'loss/train': 2.4119529724121094} +03/04/2022 10:58:04 - INFO - codeparrot_training - Step 18310: {'lr': 0.0004851662166819337, 'samples': 9375232, 'steps': 18310, 'loss/train': 1.8136041164398193} +03/04/2022 10:58:04 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 10:58:09 - INFO - codeparrot_training - Step 18311: {'lr': 0.00048516441585417624, 'samples': 9375744, 'steps': 18311, 'loss/train': 1.4453084468841553} +03/04/2022 10:58:12 - INFO - codeparrot_training - Step 18312: {'lr': 0.0004851626149204573, 'samples': 9376256, 'steps': 18312, 'loss/train': 2.648723840713501} +03/04/2022 10:58:13 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/04/2022 10:58:17 - INFO - codeparrot_training - Step 18313: {'lr': 0.0004851608138807778, 'samples': 9376768, 'steps': 18313, 'loss/train': 1.7889841794967651} +03/04/2022 10:58:21 - INFO - codeparrot_training - Step 18314: {'lr': 0.0004851590127351384, 'samples': 9377280, 'steps': 18314, 'loss/train': 2.186408281326294} +03/04/2022 10:58:21 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/04/2022 10:58:26 - INFO - codeparrot_training - Step 18315: {'lr': 0.0004851572114835401, 'samples': 9377792, 'steps': 18315, 'loss/train': 1.7851719856262207} +03/04/2022 10:58:29 - INFO - codeparrot_training - Step 18316: {'lr': 0.0004851554101259834, 'samples': 9378304, 'steps': 18316, 'loss/train': 1.9697951078414917} +03/04/2022 10:58:30 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 10:58:34 - INFO - codeparrot_training - Step 18317: {'lr': 0.00048515360866246943, 'samples': 9378816, 'steps': 18317, 'loss/train': 1.8904918432235718} +03/04/2022 10:58:37 - INFO - codeparrot_training - Step 18318: {'lr': 0.00048515180709299884, 'samples': 9379328, 'steps': 18318, 'loss/train': 1.726677656173706} +03/04/2022 10:58:38 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 10:58:43 - INFO - codeparrot_training - Step 18319: {'lr': 0.0004851500054175725, 'samples': 9379840, 'steps': 18319, 'loss/train': 1.6175761222839355} +03/04/2022 10:58:46 - INFO - codeparrot_training - Step 18320: {'lr': 0.00048514820363619116, 'samples': 9380352, 'steps': 18320, 'loss/train': 1.8298180103302002} +03/04/2022 10:58:46 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 10:58:51 - INFO - codeparrot_training - Step 18321: {'lr': 0.0004851464017488556, 'samples': 9380864, 'steps': 18321, 'loss/train': 1.946031093597412} +03/04/2022 10:58:55 - INFO - codeparrot_training - Step 18322: {'lr': 0.0004851445997555668, 'samples': 9381376, 'steps': 18322, 'loss/train': 1.415920615196228} +03/04/2022 10:58:55 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 10:59:00 - INFO - codeparrot_training - Step 18323: {'lr': 0.00048514279765632547, 'samples': 9381888, 'steps': 18323, 'loss/train': 1.362214207649231} +03/04/2022 10:59:03 - INFO - codeparrot_training - Step 18324: {'lr': 0.0004851409954511324, 'samples': 9382400, 'steps': 18324, 'loss/train': 1.3780734539031982} +03/04/2022 10:59:03 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/04/2022 10:59:08 - INFO - codeparrot_training - Step 18325: {'lr': 0.0004851391931399884, 'samples': 9382912, 'steps': 18325, 'loss/train': 2.1429405212402344} +03/04/2022 10:59:11 - INFO - codeparrot_training - Step 18326: {'lr': 0.0004851373907228943, 'samples': 9383424, 'steps': 18326, 'loss/train': 0.9744027853012085} +03/04/2022 10:59:12 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/04/2022 10:59:17 - INFO - codeparrot_training - Step 18327: {'lr': 0.00048513558819985106, 'samples': 9383936, 'steps': 18327, 'loss/train': 2.1866867542266846} +03/04/2022 10:59:20 - INFO - codeparrot_training - Step 18328: {'lr': 0.0004851337855708592, 'samples': 9384448, 'steps': 18328, 'loss/train': 1.52213716506958} +03/04/2022 10:59:21 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/04/2022 10:59:25 - INFO - codeparrot_training - Step 18329: {'lr': 0.0004851319828359198, 'samples': 9384960, 'steps': 18329, 'loss/train': 2.0765464305877686} +03/04/2022 10:59:29 - INFO - codeparrot_training - Step 18330: {'lr': 0.0004851301799950334, 'samples': 9385472, 'steps': 18330, 'loss/train': 1.6254172325134277} +03/04/2022 10:59:30 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/04/2022 10:59:34 - INFO - codeparrot_training - Step 18331: {'lr': 0.00048512837704820107, 'samples': 9385984, 'steps': 18331, 'loss/train': 0.7720240354537964} +03/04/2022 10:59:37 - INFO - codeparrot_training - Step 18332: {'lr': 0.00048512657399542346, 'samples': 9386496, 'steps': 18332, 'loss/train': 1.239613652229309} +03/04/2022 10:59:38 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/04/2022 10:59:42 - INFO - codeparrot_training - Step 18333: {'lr': 0.0004851247708367015, 'samples': 9387008, 'steps': 18333, 'loss/train': 1.4806938171386719} +03/04/2022 10:59:46 - INFO - codeparrot_training - Step 18334: {'lr': 0.000485122967572036, 'samples': 9387520, 'steps': 18334, 'loss/train': 2.7859201431274414} +03/04/2022 10:59:46 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 10:59:51 - INFO - codeparrot_training - Step 18335: {'lr': 0.0004851211642014276, 'samples': 9388032, 'steps': 18335, 'loss/train': 1.7991938591003418} +03/04/2022 10:59:54 - INFO - codeparrot_training - Step 18336: {'lr': 0.0004851193607248773, 'samples': 9388544, 'steps': 18336, 'loss/train': 1.8204762935638428} +03/04/2022 10:59:54 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 10:59:59 - INFO - codeparrot_training - Step 18337: {'lr': 0.00048511755714238585, 'samples': 9389056, 'steps': 18337, 'loss/train': 1.6833757162094116} +03/04/2022 11:00:02 - INFO - codeparrot_training - Step 18338: {'lr': 0.0004851157534539541, 'samples': 9389568, 'steps': 18338, 'loss/train': 0.8949117660522461} +03/04/2022 11:00:03 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 11:00:08 - INFO - codeparrot_training - Step 18339: {'lr': 0.0004851139496595827, 'samples': 9390080, 'steps': 18339, 'loss/train': 2.561525344848633} +03/04/2022 11:00:11 - INFO - codeparrot_training - Step 18340: {'lr': 0.00048511214575927265, 'samples': 9390592, 'steps': 18340, 'loss/train': 1.8069894313812256} +03/04/2022 11:00:11 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 11:00:16 - INFO - codeparrot_training - Step 18341: {'lr': 0.0004851103417530247, 'samples': 9391104, 'steps': 18341, 'loss/train': 1.712579607963562} +03/04/2022 11:00:19 - INFO - codeparrot_training - Step 18342: {'lr': 0.0004851085376408396, 'samples': 9391616, 'steps': 18342, 'loss/train': 2.138662576675415} +03/04/2022 11:00:20 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 11:00:24 - INFO - codeparrot_training - Step 18343: {'lr': 0.0004851067334227183, 'samples': 9392128, 'steps': 18343, 'loss/train': 1.3638745546340942} +03/04/2022 11:00:27 - INFO - codeparrot_training - Step 18344: {'lr': 0.0004851049290986615, 'samples': 9392640, 'steps': 18344, 'loss/train': 1.8900386095046997} +03/04/2022 11:00:28 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/04/2022 11:00:33 - INFO - codeparrot_training - Step 18345: {'lr': 0.00048510312466867, 'samples': 9393152, 'steps': 18345, 'loss/train': 2.219289541244507} +03/04/2022 11:00:36 - INFO - codeparrot_training - Step 18346: {'lr': 0.0004851013201327448, 'samples': 9393664, 'steps': 18346, 'loss/train': 0.6594041585922241} +03/04/2022 11:00:36 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 11:00:41 - INFO - codeparrot_training - Step 18347: {'lr': 0.0004850995154908864, 'samples': 9394176, 'steps': 18347, 'loss/train': 1.8689993619918823} +03/04/2022 11:00:44 - INFO - codeparrot_training - Step 18348: {'lr': 0.0004850977107430959, 'samples': 9394688, 'steps': 18348, 'loss/train': 2.0344364643096924} +03/04/2022 11:00:45 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 11:00:50 - INFO - codeparrot_training - Step 18349: {'lr': 0.000485095905889374, 'samples': 9395200, 'steps': 18349, 'loss/train': 2.2893478870391846} +03/04/2022 11:00:53 - INFO - codeparrot_training - Step 18350: {'lr': 0.00048509410092972144, 'samples': 9395712, 'steps': 18350, 'loss/train': 2.0365936756134033} +03/04/2022 11:00:53 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/04/2022 11:00:58 - INFO - codeparrot_training - Step 18351: {'lr': 0.0004850922958641392, 'samples': 9396224, 'steps': 18351, 'loss/train': 1.7240798473358154} +03/04/2022 11:01:01 - INFO - codeparrot_training - Step 18352: {'lr': 0.0004850904906926279, 'samples': 9396736, 'steps': 18352, 'loss/train': 2.2109782695770264} +03/04/2022 11:01:01 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/04/2022 11:01:06 - INFO - codeparrot_training - Step 18353: {'lr': 0.0004850886854151885, 'samples': 9397248, 'steps': 18353, 'loss/train': 1.5442615747451782} +03/04/2022 11:01:10 - INFO - codeparrot_training - Step 18354: {'lr': 0.0004850868800318218, 'samples': 9397760, 'steps': 18354, 'loss/train': 1.3746986389160156} +03/04/2022 11:01:10 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/04/2022 11:01:15 - INFO - codeparrot_training - Step 18355: {'lr': 0.00048508507454252846, 'samples': 9398272, 'steps': 18355, 'loss/train': 1.8537578582763672} +03/04/2022 11:01:18 - INFO - codeparrot_training - Step 18356: {'lr': 0.00048508326894730955, 'samples': 9398784, 'steps': 18356, 'loss/train': 1.3993995189666748} +03/04/2022 11:01:19 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 11:01:23 - INFO - codeparrot_training - Step 18357: {'lr': 0.00048508146324616566, 'samples': 9399296, 'steps': 18357, 'loss/train': 2.464998483657837} +03/04/2022 11:01:27 - INFO - codeparrot_training - Step 18358: {'lr': 0.0004850796574390977, 'samples': 9399808, 'steps': 18358, 'loss/train': 1.473577857017517} +03/04/2022 11:01:27 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 11:01:32 - INFO - codeparrot_training - Step 18359: {'lr': 0.0004850778515261065, 'samples': 9400320, 'steps': 18359, 'loss/train': 1.4236046075820923} +03/04/2022 11:01:35 - INFO - codeparrot_training - Step 18360: {'lr': 0.0004850760455071929, 'samples': 9400832, 'steps': 18360, 'loss/train': 1.2305594682693481} +03/04/2022 11:01:36 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 11:01:40 - INFO - codeparrot_training - Step 18361: {'lr': 0.0004850742393823576, 'samples': 9401344, 'steps': 18361, 'loss/train': 2.2984797954559326} +03/04/2022 11:01:43 - INFO - codeparrot_training - Step 18362: {'lr': 0.0004850724331516014, 'samples': 9401856, 'steps': 18362, 'loss/train': 0.4427529573440552} +03/04/2022 11:01:44 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/04/2022 11:01:49 - INFO - codeparrot_training - Step 18363: {'lr': 0.0004850706268149253, 'samples': 9402368, 'steps': 18363, 'loss/train': 2.2801313400268555} +03/04/2022 11:01:52 - INFO - codeparrot_training - Step 18364: {'lr': 0.00048506882037233, 'samples': 9402880, 'steps': 18364, 'loss/train': 1.8894996643066406} +03/04/2022 11:01:57 - INFO - codeparrot_training - Step 18365: {'lr': 0.0004850670138238162, 'samples': 9403392, 'steps': 18365, 'loss/train': 2.2794344425201416} +03/04/2022 11:02:00 - INFO - codeparrot_training - Step 18366: {'lr': 0.00048506520716938496, 'samples': 9403904, 'steps': 18366, 'loss/train': 1.8961193561553955} +03/04/2022 11:02:01 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/04/2022 11:02:06 - INFO - codeparrot_training - Step 18367: {'lr': 0.00048506340040903697, 'samples': 9404416, 'steps': 18367, 'loss/train': 0.9867935180664062} +03/04/2022 11:02:09 - INFO - codeparrot_training - Step 18368: {'lr': 0.00048506159354277294, 'samples': 9404928, 'steps': 18368, 'loss/train': 1.255003809928894} +03/04/2022 11:02:10 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/04/2022 11:02:14 - INFO - codeparrot_training - Step 18369: {'lr': 0.00048505978657059385, 'samples': 9405440, 'steps': 18369, 'loss/train': 1.041564702987671} +03/04/2022 11:02:17 - INFO - codeparrot_training - Step 18370: {'lr': 0.0004850579794925004, 'samples': 9405952, 'steps': 18370, 'loss/train': 0.7095953822135925} +03/04/2022 11:02:18 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/04/2022 11:02:23 - INFO - codeparrot_training - Step 18371: {'lr': 0.0004850561723084935, 'samples': 9406464, 'steps': 18371, 'loss/train': 2.619865894317627} +03/04/2022 11:02:26 - INFO - codeparrot_training - Step 18372: {'lr': 0.0004850543650185739, 'samples': 9406976, 'steps': 18372, 'loss/train': 2.142362117767334} +03/04/2022 11:02:27 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 11:02:31 - INFO - codeparrot_training - Step 18373: {'lr': 0.0004850525576227425, 'samples': 9407488, 'steps': 18373, 'loss/train': 3.424837589263916} +03/04/2022 11:02:34 - INFO - codeparrot_training - Step 18374: {'lr': 0.000485050750121, 'samples': 9408000, 'steps': 18374, 'loss/train': 1.4887712001800537} +03/04/2022 11:02:35 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/04/2022 11:02:39 - INFO - codeparrot_training - Step 18375: {'lr': 0.0004850489425133472, 'samples': 9408512, 'steps': 18375, 'loss/train': 1.8042188882827759} +03/04/2022 11:02:43 - INFO - codeparrot_training - Step 18376: {'lr': 0.000485047134799785, 'samples': 9409024, 'steps': 18376, 'loss/train': 1.0195910930633545} +03/04/2022 11:02:44 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/04/2022 11:02:48 - INFO - codeparrot_training - Step 18377: {'lr': 0.00048504532698031416, 'samples': 9409536, 'steps': 18377, 'loss/train': 2.890273094177246} +03/04/2022 11:02:51 - INFO - codeparrot_training - Step 18378: {'lr': 0.0004850435190549356, 'samples': 9410048, 'steps': 18378, 'loss/train': 1.7224996089935303} +03/04/2022 11:02:52 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 11:02:56 - INFO - codeparrot_training - Step 18379: {'lr': 0.00048504171102365, 'samples': 9410560, 'steps': 18379, 'loss/train': 2.611513376235962} +03/04/2022 11:02:59 - INFO - codeparrot_training - Step 18380: {'lr': 0.0004850399028864583, 'samples': 9411072, 'steps': 18380, 'loss/train': 2.106030225753784} +03/04/2022 11:03:01 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 11:03:05 - INFO - codeparrot_training - Step 18381: {'lr': 0.0004850380946433611, 'samples': 9411584, 'steps': 18381, 'loss/train': 2.270951986312866} +03/04/2022 11:03:08 - INFO - codeparrot_training - Step 18382: {'lr': 0.00048503628629435947, 'samples': 9412096, 'steps': 18382, 'loss/train': 1.838679313659668} +03/04/2022 11:03:09 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 11:03:13 - INFO - codeparrot_training - Step 18383: {'lr': 0.0004850344778394541, 'samples': 9412608, 'steps': 18383, 'loss/train': 1.0550923347473145} +03/04/2022 11:03:16 - INFO - codeparrot_training - Step 18384: {'lr': 0.0004850326692786459, 'samples': 9413120, 'steps': 18384, 'loss/train': 1.6507469415664673} +03/04/2022 11:03:18 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/04/2022 11:03:22 - INFO - codeparrot_training - Step 18385: {'lr': 0.00048503086061193546, 'samples': 9413632, 'steps': 18385, 'loss/train': 1.7794790267944336} +03/04/2022 11:03:25 - INFO - codeparrot_training - Step 18386: {'lr': 0.0004850290518393238, 'samples': 9414144, 'steps': 18386, 'loss/train': 1.6199897527694702} +03/04/2022 11:03:26 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 11:03:30 - INFO - codeparrot_training - Step 18387: {'lr': 0.0004850272429608117, 'samples': 9414656, 'steps': 18387, 'loss/train': 2.7739269733428955} +03/04/2022 11:03:33 - INFO - codeparrot_training - Step 18388: {'lr': 0.0004850254339764, 'samples': 9415168, 'steps': 18388, 'loss/train': 2.6320977210998535} +03/04/2022 11:03:34 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 11:03:38 - INFO - codeparrot_training - Step 18389: {'lr': 0.00048502362488608933, 'samples': 9415680, 'steps': 18389, 'loss/train': 1.6955331563949585} +03/04/2022 11:03:42 - INFO - codeparrot_training - Step 18390: {'lr': 0.0004850218156898807, 'samples': 9416192, 'steps': 18390, 'loss/train': 2.1118922233581543} +03/04/2022 11:03:42 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 11:03:47 - INFO - codeparrot_training - Step 18391: {'lr': 0.00048502000638777487, 'samples': 9416704, 'steps': 18391, 'loss/train': 2.3155555725097656} +03/04/2022 11:03:50 - INFO - codeparrot_training - Step 18392: {'lr': 0.0004850181969797727, 'samples': 9417216, 'steps': 18392, 'loss/train': 2.1229631900787354} +03/04/2022 11:03:51 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/04/2022 11:03:55 - INFO - codeparrot_training - Step 18393: {'lr': 0.00048501638746587493, 'samples': 9417728, 'steps': 18393, 'loss/train': 0.9226692914962769} +03/04/2022 11:03:58 - INFO - codeparrot_training - Step 18394: {'lr': 0.0004850145778460824, 'samples': 9418240, 'steps': 18394, 'loss/train': 2.2738592624664307} +03/04/2022 11:04:00 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/04/2022 11:04:04 - INFO - codeparrot_training - Step 18395: {'lr': 0.00048501276812039585, 'samples': 9418752, 'steps': 18395, 'loss/train': 2.171638250350952} +03/04/2022 11:04:07 - INFO - codeparrot_training - Step 18396: {'lr': 0.00048501095828881627, 'samples': 9419264, 'steps': 18396, 'loss/train': 2.60176157951355} +03/04/2022 11:04:08 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 11:04:12 - INFO - codeparrot_training - Step 18397: {'lr': 0.00048500914835134434, 'samples': 9419776, 'steps': 18397, 'loss/train': 2.1515607833862305} +03/04/2022 11:04:15 - INFO - codeparrot_training - Step 18398: {'lr': 0.00048500733830798094, 'samples': 9420288, 'steps': 18398, 'loss/train': 2.244356870651245} +03/04/2022 11:04:17 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/04/2022 11:04:21 - INFO - codeparrot_training - Step 18399: {'lr': 0.00048500552815872687, 'samples': 9420800, 'steps': 18399, 'loss/train': 1.870226263999939} +03/04/2022 11:04:24 - INFO - codeparrot_training - Step 18400: {'lr': 0.0004850037179035829, 'samples': 9421312, 'steps': 18400, 'loss/train': 0.8520978689193726} +03/04/2022 11:04:26 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/04/2022 11:04:29 - INFO - codeparrot_training - Step 18401: {'lr': 0.00048500190754254994, 'samples': 9421824, 'steps': 18401, 'loss/train': 2.917323350906372} +03/04/2022 11:04:32 - INFO - codeparrot_training - Step 18402: {'lr': 0.00048500009707562865, 'samples': 9422336, 'steps': 18402, 'loss/train': 2.520568370819092} +03/04/2022 11:04:34 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 11:04:38 - INFO - codeparrot_training - Step 18403: {'lr': 0.00048499828650281994, 'samples': 9422848, 'steps': 18403, 'loss/train': 1.4298882484436035} +03/04/2022 11:04:41 - INFO - codeparrot_training - Step 18404: {'lr': 0.00048499647582412475, 'samples': 9423360, 'steps': 18404, 'loss/train': 1.807638168334961} +03/04/2022 11:04:43 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 11:04:46 - INFO - codeparrot_training - Step 18405: {'lr': 0.0004849946650395437, 'samples': 9423872, 'steps': 18405, 'loss/train': 2.1311452388763428} +03/04/2022 11:04:49 - INFO - codeparrot_training - Step 18406: {'lr': 0.0004849928541490777, 'samples': 9424384, 'steps': 18406, 'loss/train': 2.3526368141174316} +03/04/2022 11:04:51 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 11:04:54 - INFO - codeparrot_training - Step 18407: {'lr': 0.0004849910431527275, 'samples': 9424896, 'steps': 18407, 'loss/train': 4.080538749694824} +03/04/2022 11:04:58 - INFO - codeparrot_training - Step 18408: {'lr': 0.000484989232050494, 'samples': 9425408, 'steps': 18408, 'loss/train': 2.182673454284668} +03/04/2022 11:04:59 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 11:05:03 - INFO - codeparrot_training - Step 18409: {'lr': 0.00048498742084237796, 'samples': 9425920, 'steps': 18409, 'loss/train': 1.7661281824111938} +03/04/2022 11:05:06 - INFO - codeparrot_training - Step 18410: {'lr': 0.00048498560952838025, 'samples': 9426432, 'steps': 18410, 'loss/train': 1.5193949937820435} +03/04/2022 11:05:08 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/04/2022 11:05:11 - INFO - codeparrot_training - Step 18411: {'lr': 0.00048498379810850157, 'samples': 9426944, 'steps': 18411, 'loss/train': 1.077130675315857} +03/04/2022 11:05:14 - INFO - codeparrot_training - Step 18412: {'lr': 0.0004849819865827429, 'samples': 9427456, 'steps': 18412, 'loss/train': 1.9507781267166138} +03/04/2022 11:05:16 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 11:05:20 - INFO - codeparrot_training - Step 18413: {'lr': 0.0004849801749511049, 'samples': 9427968, 'steps': 18413, 'loss/train': 1.5407238006591797} +03/04/2022 11:05:23 - INFO - codeparrot_training - Step 18414: {'lr': 0.00048497836321358855, 'samples': 9428480, 'steps': 18414, 'loss/train': 1.251877784729004} +03/04/2022 11:05:24 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/04/2022 11:05:28 - INFO - codeparrot_training - Step 18415: {'lr': 0.00048497655137019454, 'samples': 9428992, 'steps': 18415, 'loss/train': 2.5805718898773193} +03/04/2022 11:05:31 - INFO - codeparrot_training - Step 18416: {'lr': 0.0004849747394209237, 'samples': 9429504, 'steps': 18416, 'loss/train': 1.6916395425796509} +03/04/2022 11:05:33 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/04/2022 11:05:37 - INFO - codeparrot_training - Step 18417: {'lr': 0.00048497292736577685, 'samples': 9430016, 'steps': 18417, 'loss/train': 1.8639475107192993} +03/04/2022 11:05:40 - INFO - codeparrot_training - Step 18418: {'lr': 0.0004849711152047549, 'samples': 9430528, 'steps': 18418, 'loss/train': 1.8788647651672363} +03/04/2022 11:05:41 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 11:05:45 - INFO - codeparrot_training - Step 18419: {'lr': 0.0004849693029378585, 'samples': 9431040, 'steps': 18419, 'loss/train': 1.8164551258087158} +03/04/2022 11:05:48 - INFO - codeparrot_training - Step 18420: {'lr': 0.0004849674905650886, 'samples': 9431552, 'steps': 18420, 'loss/train': 2.2755930423736572} +03/04/2022 11:05:50 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 11:05:54 - INFO - codeparrot_training - Step 18421: {'lr': 0.000484965678086446, 'samples': 9432064, 'steps': 18421, 'loss/train': 2.435889959335327} +03/04/2022 11:05:57 - INFO - codeparrot_training - Step 18422: {'lr': 0.0004849638655019315, 'samples': 9432576, 'steps': 18422, 'loss/train': 2.2917630672454834} +03/04/2022 11:05:58 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/04/2022 11:06:02 - INFO - codeparrot_training - Step 18423: {'lr': 0.0004849620528115458, 'samples': 9433088, 'steps': 18423, 'loss/train': 1.7281819581985474} +03/04/2022 11:06:05 - INFO - codeparrot_training - Step 18424: {'lr': 0.0004849602400152899, 'samples': 9433600, 'steps': 18424, 'loss/train': 1.268783450126648} +03/04/2022 11:06:07 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/04/2022 11:06:11 - INFO - codeparrot_training - Step 18425: {'lr': 0.0004849584271131646, 'samples': 9434112, 'steps': 18425, 'loss/train': 1.215120792388916} +03/04/2022 11:06:14 - INFO - codeparrot_training - Step 18426: {'lr': 0.00048495661410517056, 'samples': 9434624, 'steps': 18426, 'loss/train': 1.0621538162231445} +03/04/2022 11:06:15 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/04/2022 11:06:19 - INFO - codeparrot_training - Step 18427: {'lr': 0.0004849548009913087, 'samples': 9435136, 'steps': 18427, 'loss/train': 1.845723032951355} +03/04/2022 11:06:22 - INFO - codeparrot_training - Step 18428: {'lr': 0.00048495298777157994, 'samples': 9435648, 'steps': 18428, 'loss/train': 1.743212103843689} +03/04/2022 11:06:24 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/04/2022 11:06:28 - INFO - codeparrot_training - Step 18429: {'lr': 0.0004849511744459849, 'samples': 9436160, 'steps': 18429, 'loss/train': 1.881903886795044} +03/04/2022 11:06:31 - INFO - codeparrot_training - Step 18430: {'lr': 0.00048494936101452446, 'samples': 9436672, 'steps': 18430, 'loss/train': 2.280258893966675} +03/04/2022 11:06:33 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 11:06:36 - INFO - codeparrot_training - Step 18431: {'lr': 0.00048494754747719954, 'samples': 9437184, 'steps': 18431, 'loss/train': 2.0006377696990967} +03/04/2022 11:06:39 - INFO - codeparrot_training - Step 18432: {'lr': 0.00048494573383401084, 'samples': 9437696, 'steps': 18432, 'loss/train': 2.024935007095337} +03/04/2022 11:06:42 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/04/2022 11:06:45 - INFO - codeparrot_training - Step 18433: {'lr': 0.0004849439200849592, 'samples': 9438208, 'steps': 18433, 'loss/train': 1.4845633506774902} +03/04/2022 11:06:48 - INFO - codeparrot_training - Step 18434: {'lr': 0.0004849421062300455, 'samples': 9438720, 'steps': 18434, 'loss/train': 2.6267776489257812} +03/04/2022 11:06:50 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 11:06:53 - INFO - codeparrot_training - Step 18435: {'lr': 0.0004849402922692705, 'samples': 9439232, 'steps': 18435, 'loss/train': 1.8070685863494873} +03/04/2022 11:06:56 - INFO - codeparrot_training - Step 18436: {'lr': 0.000484938478202635, 'samples': 9439744, 'steps': 18436, 'loss/train': 1.5213689804077148} +03/04/2022 11:06:58 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/04/2022 11:07:02 - INFO - codeparrot_training - Step 18437: {'lr': 0.0004849366640301399, 'samples': 9440256, 'steps': 18437, 'loss/train': 1.969919204711914} +03/04/2022 11:07:05 - INFO - codeparrot_training - Step 18438: {'lr': 0.00048493484975178593, 'samples': 9440768, 'steps': 18438, 'loss/train': 1.0668004751205444} +03/04/2022 11:07:07 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/04/2022 11:07:10 - INFO - codeparrot_training - Step 18439: {'lr': 0.00048493303536757394, 'samples': 9441280, 'steps': 18439, 'loss/train': 2.2972185611724854} +03/04/2022 11:07:13 - INFO - codeparrot_training - Step 18440: {'lr': 0.00048493122087750473, 'samples': 9441792, 'steps': 18440, 'loss/train': 0.6279820799827576} +03/04/2022 11:07:15 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/04/2022 11:07:19 - INFO - codeparrot_training - Step 18441: {'lr': 0.0004849294062815792, 'samples': 9442304, 'steps': 18441, 'loss/train': 1.1119056940078735} +03/04/2022 11:07:22 - INFO - codeparrot_training - Step 18442: {'lr': 0.000484927591579798, 'samples': 9442816, 'steps': 18442, 'loss/train': 1.4312788248062134} +03/04/2022 11:07:24 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 11:07:27 - INFO - codeparrot_training - Step 18443: {'lr': 0.0004849257767721622, 'samples': 9443328, 'steps': 18443, 'loss/train': 1.7428877353668213} +03/04/2022 11:07:30 - INFO - codeparrot_training - Step 18444: {'lr': 0.00048492396185867236, 'samples': 9443840, 'steps': 18444, 'loss/train': 1.4280723333358765} +03/04/2022 11:07:32 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/04/2022 11:07:35 - INFO - codeparrot_training - Step 18445: {'lr': 0.0004849221468393294, 'samples': 9444352, 'steps': 18445, 'loss/train': 1.5985113382339478} +03/04/2022 11:07:39 - INFO - codeparrot_training - Step 18446: {'lr': 0.00048492033171413425, 'samples': 9444864, 'steps': 18446, 'loss/train': 1.890008807182312} +03/04/2022 11:07:40 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/04/2022 11:07:44 - INFO - codeparrot_training - Step 18447: {'lr': 0.00048491851648308756, 'samples': 9445376, 'steps': 18447, 'loss/train': 1.81052827835083} +03/04/2022 11:07:47 - INFO - codeparrot_training - Step 18448: {'lr': 0.00048491670114619026, 'samples': 9445888, 'steps': 18448, 'loss/train': 0.9550400376319885} +03/04/2022 11:07:49 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/04/2022 11:07:52 - INFO - codeparrot_training - Step 18449: {'lr': 0.000484914885703443, 'samples': 9446400, 'steps': 18449, 'loss/train': 2.0922329425811768} +03/04/2022 11:07:56 - INFO - codeparrot_training - Step 18450: {'lr': 0.00048491307015484684, 'samples': 9446912, 'steps': 18450, 'loss/train': 2.092632532119751} +03/04/2022 11:07:58 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 11:08:01 - INFO - codeparrot_training - Step 18451: {'lr': 0.0004849112545004024, 'samples': 9447424, 'steps': 18451, 'loss/train': 1.9754749536514282} +03/04/2022 11:08:04 - INFO - codeparrot_training - Step 18452: {'lr': 0.00048490943874011054, 'samples': 9447936, 'steps': 18452, 'loss/train': 2.3035242557525635} +03/04/2022 11:08:06 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/04/2022 11:08:09 - INFO - codeparrot_training - Step 18453: {'lr': 0.00048490762287397215, 'samples': 9448448, 'steps': 18453, 'loss/train': 2.4951043128967285} +03/04/2022 11:08:13 - INFO - codeparrot_training - Step 18454: {'lr': 0.00048490580690198804, 'samples': 9448960, 'steps': 18454, 'loss/train': 2.146663188934326} +03/04/2022 11:08:15 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 11:08:18 - INFO - codeparrot_training - Step 18455: {'lr': 0.000484903990824159, 'samples': 9449472, 'steps': 18455, 'loss/train': 2.28841233253479} +03/04/2022 11:08:21 - INFO - codeparrot_training - Step 18456: {'lr': 0.0004849021746404859, 'samples': 9449984, 'steps': 18456, 'loss/train': 2.623843193054199} +03/04/2022 11:08:23 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 11:08:26 - INFO - codeparrot_training - Step 18457: {'lr': 0.00048490035835096936, 'samples': 9450496, 'steps': 18457, 'loss/train': 1.6935759782791138} +03/04/2022 11:08:30 - INFO - codeparrot_training - Step 18458: {'lr': 0.0004848985419556104, 'samples': 9451008, 'steps': 18458, 'loss/train': 1.5955548286437988} +03/04/2022 11:08:32 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 11:08:35 - INFO - codeparrot_training - Step 18459: {'lr': 0.0004848967254544099, 'samples': 9451520, 'steps': 18459, 'loss/train': 1.4711685180664062} +03/04/2022 11:08:38 - INFO - codeparrot_training - Step 18460: {'lr': 0.00048489490884736844, 'samples': 9452032, 'steps': 18460, 'loss/train': 2.0197815895080566} +03/04/2022 11:08:40 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/04/2022 11:08:43 - INFO - codeparrot_training - Step 18461: {'lr': 0.00048489309213448696, 'samples': 9452544, 'steps': 18461, 'loss/train': 2.737483501434326} +03/04/2022 11:08:46 - INFO - codeparrot_training - Step 18462: {'lr': 0.00048489127531576627, 'samples': 9453056, 'steps': 18462, 'loss/train': 2.3246302604675293} +03/04/2022 11:08:49 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/04/2022 11:08:52 - INFO - codeparrot_training - Step 18463: {'lr': 0.0004848894583912072, 'samples': 9453568, 'steps': 18463, 'loss/train': 2.0120387077331543} +03/04/2022 11:08:55 - INFO - codeparrot_training - Step 18464: {'lr': 0.00048488764136081063, 'samples': 9454080, 'steps': 18464, 'loss/train': 2.274989128112793} +03/04/2022 11:08:57 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 11:09:00 - INFO - codeparrot_training - Step 18465: {'lr': 0.00048488582422457726, 'samples': 9454592, 'steps': 18465, 'loss/train': 1.391667366027832} +03/04/2022 11:09:03 - INFO - codeparrot_training - Step 18466: {'lr': 0.000484884006982508, 'samples': 9455104, 'steps': 18466, 'loss/train': 2.4340569972991943} +03/04/2022 11:09:05 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/04/2022 11:09:09 - INFO - codeparrot_training - Step 18467: {'lr': 0.0004848821896346036, 'samples': 9455616, 'steps': 18467, 'loss/train': 1.4563145637512207} +03/04/2022 11:09:12 - INFO - codeparrot_training - Step 18468: {'lr': 0.0004848803721808649, 'samples': 9456128, 'steps': 18468, 'loss/train': 2.0214438438415527} +03/04/2022 11:09:14 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/04/2022 11:09:17 - INFO - codeparrot_training - Step 18469: {'lr': 0.0004848785546212927, 'samples': 9456640, 'steps': 18469, 'loss/train': 1.5536054372787476} +03/04/2022 11:09:20 - INFO - codeparrot_training - Step 18470: {'lr': 0.00048487673695588794, 'samples': 9457152, 'steps': 18470, 'loss/train': 1.899634838104248} +03/04/2022 11:09:22 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/04/2022 11:09:26 - INFO - codeparrot_training - Step 18471: {'lr': 0.00048487491918465135, 'samples': 9457664, 'steps': 18471, 'loss/train': 1.238110899925232} +03/04/2022 11:09:29 - INFO - codeparrot_training - Step 18472: {'lr': 0.00048487310130758366, 'samples': 9458176, 'steps': 18472, 'loss/train': 0.931202232837677} +03/04/2022 11:09:30 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/04/2022 11:09:34 - INFO - codeparrot_training - Step 18473: {'lr': 0.00048487128332468576, 'samples': 9458688, 'steps': 18473, 'loss/train': 2.0761559009552} +03/04/2022 11:09:37 - INFO - codeparrot_training - Step 18474: {'lr': 0.00048486946523595856, 'samples': 9459200, 'steps': 18474, 'loss/train': 1.456026315689087} +03/04/2022 11:09:39 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 11:09:43 - INFO - codeparrot_training - Step 18475: {'lr': 0.00048486764704140276, 'samples': 9459712, 'steps': 18475, 'loss/train': 1.9242653846740723} +03/04/2022 11:09:46 - INFO - codeparrot_training - Step 18476: {'lr': 0.00048486582874101924, 'samples': 9460224, 'steps': 18476, 'loss/train': 2.184377670288086} +03/04/2022 11:09:47 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/04/2022 11:09:51 - INFO - codeparrot_training - Step 18477: {'lr': 0.0004848640103348088, 'samples': 9460736, 'steps': 18477, 'loss/train': 1.8564081192016602} +03/04/2022 11:09:54 - INFO - codeparrot_training - Step 18478: {'lr': 0.00048486219182277226, 'samples': 9461248, 'steps': 18478, 'loss/train': 2.330422878265381} +03/04/2022 11:09:56 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 11:09:59 - INFO - codeparrot_training - Step 18479: {'lr': 0.00048486037320491043, 'samples': 9461760, 'steps': 18479, 'loss/train': 1.584395170211792} +03/04/2022 11:10:03 - INFO - codeparrot_training - Step 18480: {'lr': 0.0004848585544812242, 'samples': 9462272, 'steps': 18480, 'loss/train': 1.6666456460952759} +03/04/2022 11:10:04 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 11:10:08 - INFO - codeparrot_training - Step 18481: {'lr': 0.0004848567356517143, 'samples': 9462784, 'steps': 18481, 'loss/train': 1.4618659019470215} +03/04/2022 11:10:11 - INFO - codeparrot_training - Step 18482: {'lr': 0.00048485491671638146, 'samples': 9463296, 'steps': 18482, 'loss/train': 2.1894304752349854} +03/04/2022 11:10:13 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/04/2022 11:10:16 - INFO - codeparrot_training - Step 18483: {'lr': 0.0004848530976752268, 'samples': 9463808, 'steps': 18483, 'loss/train': 2.0315933227539062} +03/04/2022 11:10:19 - INFO - codeparrot_training - Step 18484: {'lr': 0.0004848512785282508, 'samples': 9464320, 'steps': 18484, 'loss/train': 1.6910381317138672} +03/04/2022 11:10:21 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 11:10:25 - INFO - codeparrot_training - Step 18485: {'lr': 0.00048484945927545456, 'samples': 9464832, 'steps': 18485, 'loss/train': 0.8209490776062012} +03/04/2022 11:10:28 - INFO - codeparrot_training - Step 18486: {'lr': 0.0004848476399168387, 'samples': 9465344, 'steps': 18486, 'loss/train': 1.7438864707946777} +03/04/2022 11:10:30 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/04/2022 11:10:34 - INFO - codeparrot_training - Step 18487: {'lr': 0.0004848458204524042, 'samples': 9465856, 'steps': 18487, 'loss/train': 1.479312539100647} +03/04/2022 11:10:37 - INFO - codeparrot_training - Step 18488: {'lr': 0.00048484400088215173, 'samples': 9466368, 'steps': 18488, 'loss/train': 1.8798394203186035} +03/04/2022 11:10:40 - INFO - codeparrot_training - Step 18489: {'lr': 0.0004848421812060821, 'samples': 9466880, 'steps': 18489, 'loss/train': 1.4872218370437622} +03/04/2022 11:10:42 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/04/2022 11:10:45 - INFO - codeparrot_training - Step 18490: {'lr': 0.0004848403614241964, 'samples': 9467392, 'steps': 18490, 'loss/train': 1.8513764142990112} +03/04/2022 11:10:49 - INFO - codeparrot_training - Step 18491: {'lr': 0.00048483854153649514, 'samples': 9467904, 'steps': 18491, 'loss/train': 0.6446804404258728} +03/04/2022 11:10:51 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/04/2022 11:10:54 - INFO - codeparrot_training - Step 18492: {'lr': 0.0004848367215429793, 'samples': 9468416, 'steps': 18492, 'loss/train': 1.9910953044891357} +03/04/2022 11:10:57 - INFO - codeparrot_training - Step 18493: {'lr': 0.0004848349014436496, 'samples': 9468928, 'steps': 18493, 'loss/train': 1.5275315046310425} +03/04/2022 11:10:59 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/04/2022 11:11:02 - INFO - codeparrot_training - Step 18494: {'lr': 0.00048483308123850697, 'samples': 9469440, 'steps': 18494, 'loss/train': 1.7263447046279907} +03/04/2022 11:11:06 - INFO - codeparrot_training - Step 18495: {'lr': 0.00048483126092755215, 'samples': 9469952, 'steps': 18495, 'loss/train': 2.0994651317596436} +03/04/2022 11:11:08 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/04/2022 11:11:11 - INFO - codeparrot_training - Step 18496: {'lr': 0.000484829440510786, 'samples': 9470464, 'steps': 18496, 'loss/train': 2.1174917221069336} +03/04/2022 11:11:14 - INFO - codeparrot_training - Step 18497: {'lr': 0.0004848276199882093, 'samples': 9470976, 'steps': 18497, 'loss/train': 2.03486704826355} +03/04/2022 11:11:16 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/04/2022 11:11:19 - INFO - codeparrot_training - Step 18498: {'lr': 0.0004848257993598229, 'samples': 9471488, 'steps': 18498, 'loss/train': 2.219670295715332} +03/04/2022 11:11:22 - INFO - codeparrot_training - Step 18499: {'lr': 0.00048482397862562764, 'samples': 9472000, 'steps': 18499, 'loss/train': 2.98835825920105} +03/04/2022 11:11:25 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/04/2022 11:11:28 - INFO - codeparrot_training - Step 18500: {'lr': 0.00048482215778562434, 'samples': 9472512, 'steps': 18500, 'loss/train': 2.662452220916748} +03/04/2022 11:11:31 - INFO - codeparrot_training - Step 18501: {'lr': 0.00048482033683981376, 'samples': 9473024, 'steps': 18501, 'loss/train': 2.0756499767303467} +03/04/2022 11:11:33 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/04/2022 11:11:36 - INFO - codeparrot_training - Step 18502: {'lr': 0.0004848185157881968, 'samples': 9473536, 'steps': 18502, 'loss/train': 2.126345634460449} +03/04/2022 11:11:39 - INFO - codeparrot_training - Step 18503: {'lr': 0.0004848166946307742, 'samples': 9474048, 'steps': 18503, 'loss/train': 1.6209193468093872} +03/04/2022 11:11:41 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 11:11:44 - INFO - codeparrot_training - Step 18504: {'lr': 0.0004848148733675468, 'samples': 9474560, 'steps': 18504, 'loss/train': 2.2926669120788574} +03/04/2022 11:11:48 - INFO - codeparrot_training - Step 18505: {'lr': 0.0004848130519985155, 'samples': 9475072, 'steps': 18505, 'loss/train': 2.561042308807373} +03/04/2022 11:11:49 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/04/2022 11:11:53 - INFO - codeparrot_training - Step 18506: {'lr': 0.000484811230523681, 'samples': 9475584, 'steps': 18506, 'loss/train': 1.04912269115448} +03/04/2022 11:11:56 - INFO - codeparrot_training - Step 18507: {'lr': 0.00048480940894304425, 'samples': 9476096, 'steps': 18507, 'loss/train': 1.7873740196228027} +03/04/2022 11:11:59 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/04/2022 11:12:01 - INFO - codeparrot_training - Step 18508: {'lr': 0.000484807587256606, 'samples': 9476608, 'steps': 18508, 'loss/train': 2.349071741104126} +03/04/2022 11:12:05 - INFO - codeparrot_training - Step 18509: {'lr': 0.00048480576546436707, 'samples': 9477120, 'steps': 18509, 'loss/train': 1.917291522026062} +03/04/2022 11:12:07 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 11:12:10 - INFO - codeparrot_training - Step 18510: {'lr': 0.0004848039435663282, 'samples': 9477632, 'steps': 18510, 'loss/train': 2.1191718578338623} +03/04/2022 11:12:13 - INFO - codeparrot_training - Step 18511: {'lr': 0.0004848021215624904, 'samples': 9478144, 'steps': 18511, 'loss/train': 2.095357656478882} +03/04/2022 11:12:15 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/04/2022 11:12:18 - INFO - codeparrot_training - Step 18512: {'lr': 0.0004848002994528543, 'samples': 9478656, 'steps': 18512, 'loss/train': 2.636151075363159} +03/04/2022 11:12:21 - INFO - codeparrot_training - Step 18513: {'lr': 0.0004847984772374209, 'samples': 9479168, 'steps': 18513, 'loss/train': 1.6719088554382324} +03/04/2022 11:12:24 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 11:12:27 - INFO - codeparrot_training - Step 18514: {'lr': 0.0004847966549161909, 'samples': 9479680, 'steps': 18514, 'loss/train': 2.065788984298706} +03/04/2022 11:12:30 - INFO - codeparrot_training - Step 18515: {'lr': 0.0004847948324891651, 'samples': 9480192, 'steps': 18515, 'loss/train': 2.217582941055298} +03/04/2022 11:12:32 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 11:12:35 - INFO - codeparrot_training - Step 18516: {'lr': 0.00048479300995634447, 'samples': 9480704, 'steps': 18516, 'loss/train': 2.1215009689331055} +03/04/2022 11:12:38 - INFO - codeparrot_training - Step 18517: {'lr': 0.0004847911873177296, 'samples': 9481216, 'steps': 18517, 'loss/train': 1.1368504762649536} +03/04/2022 11:12:40 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/04/2022 11:12:44 - INFO - codeparrot_training - Step 18518: {'lr': 0.0004847893645733216, 'samples': 9481728, 'steps': 18518, 'loss/train': 2.0612235069274902} +03/04/2022 11:12:47 - INFO - codeparrot_training - Step 18519: {'lr': 0.000484787541723121, 'samples': 9482240, 'steps': 18519, 'loss/train': 1.5083931684494019} +03/04/2022 11:12:48 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 11:12:52 - INFO - codeparrot_training - Step 18520: {'lr': 0.0004847857187671288, 'samples': 9482752, 'steps': 18520, 'loss/train': 2.049680471420288} +03/04/2022 11:12:55 - INFO - codeparrot_training - Step 18521: {'lr': 0.00048478389570534575, 'samples': 9483264, 'steps': 18521, 'loss/train': 0.8156064748764038} +03/04/2022 11:12:57 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/04/2022 11:13:01 - INFO - codeparrot_training - Step 18522: {'lr': 0.0004847820725377728, 'samples': 9483776, 'steps': 18522, 'loss/train': 1.9106088876724243} +03/04/2022 11:13:04 - INFO - codeparrot_training - Step 18523: {'lr': 0.0004847802492644106, 'samples': 9484288, 'steps': 18523, 'loss/train': 1.9337154626846313} +03/04/2022 11:13:06 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 11:13:09 - INFO - codeparrot_training - Step 18524: {'lr': 0.00048477842588526, 'samples': 9484800, 'steps': 18524, 'loss/train': 2.2174770832061768} +03/04/2022 11:13:12 - INFO - codeparrot_training - Step 18525: {'lr': 0.000484776602400322, 'samples': 9485312, 'steps': 18525, 'loss/train': 2.1930646896362305} +03/04/2022 11:13:14 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/04/2022 11:13:18 - INFO - codeparrot_training - Step 18526: {'lr': 0.00048477477880959715, 'samples': 9485824, 'steps': 18526, 'loss/train': 1.790900707244873} +03/04/2022 11:13:21 - INFO - codeparrot_training - Step 18527: {'lr': 0.00048477295511308645, 'samples': 9486336, 'steps': 18527, 'loss/train': 1.7812747955322266} +03/04/2022 11:13:23 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 11:13:26 - INFO - codeparrot_training - Step 18528: {'lr': 0.0004847711313107907, 'samples': 9486848, 'steps': 18528, 'loss/train': 1.9633147716522217} +03/04/2022 11:13:29 - INFO - codeparrot_training - Step 18529: {'lr': 0.0004847693074027106, 'samples': 9487360, 'steps': 18529, 'loss/train': 2.104095220565796} +03/04/2022 11:13:31 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 11:13:34 - INFO - codeparrot_training - Step 18530: {'lr': 0.0004847674833888472, 'samples': 9487872, 'steps': 18530, 'loss/train': 1.7089003324508667} +03/04/2022 11:13:38 - INFO - codeparrot_training - Step 18531: {'lr': 0.0004847656592692012, 'samples': 9488384, 'steps': 18531, 'loss/train': 1.050249457359314} +03/04/2022 11:13:39 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/04/2022 11:13:43 - INFO - codeparrot_training - Step 18532: {'lr': 0.00048476383504377337, 'samples': 9488896, 'steps': 18532, 'loss/train': 2.0180017948150635} +03/04/2022 11:13:46 - INFO - codeparrot_training - Step 18533: {'lr': 0.00048476201071256453, 'samples': 9489408, 'steps': 18533, 'loss/train': 1.8865082263946533} +03/04/2022 11:13:48 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/04/2022 11:13:51 - INFO - codeparrot_training - Step 18534: {'lr': 0.0004847601862755756, 'samples': 9489920, 'steps': 18534, 'loss/train': 0.2787712812423706} +03/04/2022 11:13:54 - INFO - codeparrot_training - Step 18535: {'lr': 0.0004847583617328074, 'samples': 9490432, 'steps': 18535, 'loss/train': 2.752586603164673} +03/04/2022 11:13:57 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 11:14:00 - INFO - codeparrot_training - Step 18536: {'lr': 0.00048475653708426067, 'samples': 9490944, 'steps': 18536, 'loss/train': 1.963914394378662} +03/04/2022 11:14:03 - INFO - codeparrot_training - Step 18537: {'lr': 0.00048475471232993625, 'samples': 9491456, 'steps': 18537, 'loss/train': 1.4105111360549927} +03/04/2022 11:14:05 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/04/2022 11:14:08 - INFO - codeparrot_training - Step 18538: {'lr': 0.000484752887469835, 'samples': 9491968, 'steps': 18538, 'loss/train': 1.9283291101455688} +03/04/2022 11:14:11 - INFO - codeparrot_training - Step 18539: {'lr': 0.0004847510625039577, 'samples': 9492480, 'steps': 18539, 'loss/train': 0.3878302276134491} +03/04/2022 11:14:14 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/04/2022 11:14:17 - INFO - codeparrot_training - Step 18540: {'lr': 0.00048474923743230513, 'samples': 9492992, 'steps': 18540, 'loss/train': 2.338212490081787} +03/04/2022 11:14:20 - INFO - codeparrot_training - Step 18541: {'lr': 0.0004847474122548783, 'samples': 9493504, 'steps': 18541, 'loss/train': 2.1132614612579346} +03/04/2022 11:14:22 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 11:14:25 - INFO - codeparrot_training - Step 18542: {'lr': 0.00048474558697167783, 'samples': 9494016, 'steps': 18542, 'loss/train': 1.8636130094528198} +03/04/2022 11:14:28 - INFO - codeparrot_training - Step 18543: {'lr': 0.0004847437615827046, 'samples': 9494528, 'steps': 18543, 'loss/train': 2.4386281967163086} +03/04/2022 11:14:31 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/04/2022 11:14:33 - INFO - codeparrot_training - Step 18544: {'lr': 0.0004847419360879596, 'samples': 9495040, 'steps': 18544, 'loss/train': 1.816033959388733} +03/04/2022 11:14:37 - INFO - codeparrot_training - Step 18545: {'lr': 0.00048474011048744336, 'samples': 9495552, 'steps': 18545, 'loss/train': 2.0651051998138428} +03/04/2022 11:14:39 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 11:14:42 - INFO - codeparrot_training - Step 18546: {'lr': 0.0004847382847811569, 'samples': 9496064, 'steps': 18546, 'loss/train': 1.6472558975219727} +03/04/2022 11:14:45 - INFO - codeparrot_training - Step 18547: {'lr': 0.00048473645896910094, 'samples': 9496576, 'steps': 18547, 'loss/train': 1.049011468887329} +03/04/2022 11:14:47 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 11:14:50 - INFO - codeparrot_training - Step 18548: {'lr': 0.0004847346330512764, 'samples': 9497088, 'steps': 18548, 'loss/train': 2.3326668739318848} +03/04/2022 11:14:53 - INFO - codeparrot_training - Step 18549: {'lr': 0.0004847328070276841, 'samples': 9497600, 'steps': 18549, 'loss/train': 1.5755760669708252} +03/04/2022 11:14:55 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/04/2022 11:14:59 - INFO - codeparrot_training - Step 18550: {'lr': 0.00048473098089832475, 'samples': 9498112, 'steps': 18550, 'loss/train': 1.3139079809188843} +03/04/2022 11:15:02 - INFO - codeparrot_training - Step 18551: {'lr': 0.0004847291546631992, 'samples': 9498624, 'steps': 18551, 'loss/train': 1.1636090278625488} +03/04/2022 11:15:04 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 11:15:07 - INFO - codeparrot_training - Step 18552: {'lr': 0.0004847273283223084, 'samples': 9499136, 'steps': 18552, 'loss/train': 1.8903982639312744} +03/04/2022 11:15:10 - INFO - codeparrot_training - Step 18553: {'lr': 0.0004847255018756531, 'samples': 9499648, 'steps': 18553, 'loss/train': 2.1605091094970703} +03/04/2022 11:15:13 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/04/2022 11:15:16 - INFO - codeparrot_training - Step 18554: {'lr': 0.0004847236753232341, 'samples': 9500160, 'steps': 18554, 'loss/train': 1.2117749452590942} +03/04/2022 11:15:19 - INFO - codeparrot_training - Step 18555: {'lr': 0.0004847218486650522, 'samples': 9500672, 'steps': 18555, 'loss/train': 2.3666164875030518} +03/04/2022 11:15:22 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/04/2022 11:15:24 - INFO - codeparrot_training - Step 18556: {'lr': 0.00048472002190110827, 'samples': 9501184, 'steps': 18556, 'loss/train': 1.8320696353912354} +03/04/2022 11:15:27 - INFO - codeparrot_training - Step 18557: {'lr': 0.0004847181950314031, 'samples': 9501696, 'steps': 18557, 'loss/train': 1.1412675380706787} +03/04/2022 11:15:30 - INFO - codeparrot_training - Step 18558: {'lr': 0.00048471636805593756, 'samples': 9502208, 'steps': 18558, 'loss/train': 2.4363162517547607} +03/04/2022 11:15:31 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/04/2022 11:15:36 - INFO - codeparrot_training - Step 18559: {'lr': 0.0004847145409747125, 'samples': 9502720, 'steps': 18559, 'loss/train': 1.9373393058776855} +03/04/2022 11:15:39 - INFO - codeparrot_training - Step 18560: {'lr': 0.00048471271378772857, 'samples': 9503232, 'steps': 18560, 'loss/train': 1.5204576253890991} +03/04/2022 11:15:39 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/04/2022 11:15:44 - INFO - codeparrot_training - Step 18561: {'lr': 0.00048471088649498675, 'samples': 9503744, 'steps': 18561, 'loss/train': 2.788674831390381} +03/04/2022 11:15:47 - INFO - codeparrot_training - Step 18562: {'lr': 0.0004847090590964879, 'samples': 9504256, 'steps': 18562, 'loss/train': 1.7964012622833252} +03/04/2022 11:15:48 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 11:15:53 - INFO - codeparrot_training - Step 18563: {'lr': 0.00048470723159223266, 'samples': 9504768, 'steps': 18563, 'loss/train': 2.6237716674804688} +03/04/2022 11:15:56 - INFO - codeparrot_training - Step 18564: {'lr': 0.00048470540398222207, 'samples': 9505280, 'steps': 18564, 'loss/train': 1.6793261766433716} +03/04/2022 11:15:56 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 11:16:01 - INFO - codeparrot_training - Step 18565: {'lr': 0.00048470357626645676, 'samples': 9505792, 'steps': 18565, 'loss/train': 1.6865196228027344} +03/04/2022 11:16:04 - INFO - codeparrot_training - Step 18566: {'lr': 0.0004847017484449377, 'samples': 9506304, 'steps': 18566, 'loss/train': 2.3380489349365234} +03/04/2022 11:16:04 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 11:16:10 - INFO - codeparrot_training - Step 18567: {'lr': 0.0004846999205176657, 'samples': 9506816, 'steps': 18567, 'loss/train': 2.257566213607788} +03/04/2022 11:16:13 - INFO - codeparrot_training - Step 18568: {'lr': 0.00048469809248464135, 'samples': 9507328, 'steps': 18568, 'loss/train': 1.1617008447647095} +03/04/2022 11:16:13 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/04/2022 11:16:18 - INFO - codeparrot_training - Step 18569: {'lr': 0.0004846962643458658, 'samples': 9507840, 'steps': 18569, 'loss/train': 1.7205252647399902} +03/04/2022 11:16:21 - INFO - codeparrot_training - Step 18570: {'lr': 0.00048469443610133975, 'samples': 9508352, 'steps': 18570, 'loss/train': 2.593264102935791} +03/04/2022 11:16:21 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/04/2022 11:16:26 - INFO - codeparrot_training - Step 18571: {'lr': 0.00048469260775106394, 'samples': 9508864, 'steps': 18571, 'loss/train': 2.1549558639526367} +03/04/2022 11:16:30 - INFO - codeparrot_training - Step 18572: {'lr': 0.0004846907792950393, 'samples': 9509376, 'steps': 18572, 'loss/train': 1.3617210388183594} +03/04/2022 11:16:30 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 11:16:35 - INFO - codeparrot_training - Step 18573: {'lr': 0.00048468895073326663, 'samples': 9509888, 'steps': 18573, 'loss/train': 1.536342978477478} +03/04/2022 11:16:38 - INFO - codeparrot_training - Step 18574: {'lr': 0.0004846871220657467, 'samples': 9510400, 'steps': 18574, 'loss/train': 1.5257291793823242} +03/04/2022 11:16:38 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 11:16:43 - INFO - codeparrot_training - Step 18575: {'lr': 0.0004846852932924804, 'samples': 9510912, 'steps': 18575, 'loss/train': 2.4912612438201904} +03/04/2022 11:16:46 - INFO - codeparrot_training - Step 18576: {'lr': 0.00048468346441346853, 'samples': 9511424, 'steps': 18576, 'loss/train': 1.8597791194915771} +03/04/2022 11:16:46 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/04/2022 11:16:52 - INFO - codeparrot_training - Step 18577: {'lr': 0.0004846816354287119, 'samples': 9511936, 'steps': 18577, 'loss/train': 1.4713172912597656} +03/04/2022 11:16:55 - INFO - codeparrot_training - Step 18578: {'lr': 0.0004846798063382114, 'samples': 9512448, 'steps': 18578, 'loss/train': 1.8098719120025635} +03/04/2022 11:16:55 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 11:17:00 - INFO - codeparrot_training - Step 18579: {'lr': 0.0004846779771419677, 'samples': 9512960, 'steps': 18579, 'loss/train': 1.3425004482269287} +03/04/2022 11:17:04 - INFO - codeparrot_training - Step 18580: {'lr': 0.0004846761478399818, 'samples': 9513472, 'steps': 18580, 'loss/train': 1.9333806037902832} +03/04/2022 11:17:04 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/04/2022 11:17:09 - INFO - codeparrot_training - Step 18581: {'lr': 0.0004846743184322544, 'samples': 9513984, 'steps': 18581, 'loss/train': 2.009906768798828} +03/04/2022 11:17:12 - INFO - codeparrot_training - Step 18582: {'lr': 0.00048467248891878644, 'samples': 9514496, 'steps': 18582, 'loss/train': 2.2230184078216553} +03/04/2022 11:17:12 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/04/2022 11:17:17 - INFO - codeparrot_training - Step 18583: {'lr': 0.00048467065929957867, 'samples': 9515008, 'steps': 18583, 'loss/train': 2.2730114459991455} +03/04/2022 11:17:21 - INFO - codeparrot_training - Step 18584: {'lr': 0.00048466882957463186, 'samples': 9515520, 'steps': 18584, 'loss/train': 0.6144452691078186} +03/04/2022 11:17:21 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/04/2022 11:17:26 - INFO - codeparrot_training - Step 18585: {'lr': 0.0004846669997439469, 'samples': 9516032, 'steps': 18585, 'loss/train': 1.515714406967163} +03/04/2022 11:17:29 - INFO - codeparrot_training - Step 18586: {'lr': 0.0004846651698075246, 'samples': 9516544, 'steps': 18586, 'loss/train': 1.4900751113891602} +03/04/2022 11:17:29 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/04/2022 11:17:34 - INFO - codeparrot_training - Step 18587: {'lr': 0.00048466333976536594, 'samples': 9517056, 'steps': 18587, 'loss/train': 1.795306921005249} +03/04/2022 11:17:37 - INFO - codeparrot_training - Step 18588: {'lr': 0.0004846615096174715, 'samples': 9517568, 'steps': 18588, 'loss/train': 2.108651638031006} +03/04/2022 11:17:38 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/04/2022 11:17:43 - INFO - codeparrot_training - Step 18589: {'lr': 0.00048465967936384217, 'samples': 9518080, 'steps': 18589, 'loss/train': 1.6753283739089966} +03/04/2022 11:17:46 - INFO - codeparrot_training - Step 18590: {'lr': 0.00048465784900447885, 'samples': 9518592, 'steps': 18590, 'loss/train': 1.7715721130371094} +03/04/2022 11:17:46 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/04/2022 11:17:51 - INFO - codeparrot_training - Step 18591: {'lr': 0.00048465601853938224, 'samples': 9519104, 'steps': 18591, 'loss/train': 2.307238817214966} +03/04/2022 11:17:54 - INFO - codeparrot_training - Step 18592: {'lr': 0.0004846541879685533, 'samples': 9519616, 'steps': 18592, 'loss/train': 2.603482246398926} +03/04/2022 11:17:54 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 11:18:00 - INFO - codeparrot_training - Step 18593: {'lr': 0.0004846523572919929, 'samples': 9520128, 'steps': 18593, 'loss/train': 2.4395039081573486} +03/04/2022 11:18:03 - INFO - codeparrot_training - Step 18594: {'lr': 0.00048465052650970166, 'samples': 9520640, 'steps': 18594, 'loss/train': 2.085561513900757} +03/04/2022 11:18:03 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/04/2022 11:18:08 - INFO - codeparrot_training - Step 18595: {'lr': 0.00048464869562168055, 'samples': 9521152, 'steps': 18595, 'loss/train': 1.9810832738876343} +03/04/2022 11:18:11 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 11:18:13 - INFO - codeparrot_training - Step 18596: {'lr': 0.0004846468646279304, 'samples': 9521664, 'steps': 18596, 'loss/train': 1.617682695388794} +03/04/2022 11:18:17 - INFO - codeparrot_training - Step 18597: {'lr': 0.0004846450335284519, 'samples': 9522176, 'steps': 18597, 'loss/train': 2.1768369674682617} +03/04/2022 11:18:19 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/04/2022 11:18:22 - INFO - codeparrot_training - Step 18598: {'lr': 0.00048464320232324604, 'samples': 9522688, 'steps': 18598, 'loss/train': 2.210847854614258} +03/04/2022 11:18:25 - INFO - codeparrot_training - Step 18599: {'lr': 0.00048464137101231355, 'samples': 9523200, 'steps': 18599, 'loss/train': 1.6373380422592163} +03/04/2022 11:18:28 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/04/2022 11:18:30 - INFO - codeparrot_training - Step 18600: {'lr': 0.0004846395395956553, 'samples': 9523712, 'steps': 18600, 'loss/train': 1.5658397674560547} +03/04/2022 11:18:33 - INFO - codeparrot_training - Step 18601: {'lr': 0.00048463770807327206, 'samples': 9524224, 'steps': 18601, 'loss/train': 1.9267481565475464} +03/04/2022 11:18:36 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 11:18:39 - INFO - codeparrot_training - Step 18602: {'lr': 0.00048463587644516473, 'samples': 9524736, 'steps': 18602, 'loss/train': 1.4138847589492798} +03/04/2022 11:18:42 - INFO - codeparrot_training - Step 18603: {'lr': 0.00048463404471133404, 'samples': 9525248, 'steps': 18603, 'loss/train': 2.1727564334869385} +03/04/2022 11:18:45 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/04/2022 11:18:47 - INFO - codeparrot_training - Step 18604: {'lr': 0.00048463221287178094, 'samples': 9525760, 'steps': 18604, 'loss/train': 2.15557861328125} +03/04/2022 11:18:50 - INFO - codeparrot_training - Step 18605: {'lr': 0.0004846303809265061, 'samples': 9526272, 'steps': 18605, 'loss/train': 2.978668451309204} +03/04/2022 11:18:53 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/04/2022 11:18:56 - INFO - codeparrot_training - Step 18606: {'lr': 0.00048462854887551044, 'samples': 9526784, 'steps': 18606, 'loss/train': 1.835233211517334} +03/04/2022 11:18:59 - INFO - codeparrot_training - Step 18607: {'lr': 0.0004846267167187949, 'samples': 9527296, 'steps': 18607, 'loss/train': 0.6333346962928772} +03/04/2022 11:19:02 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 11:19:04 - INFO - codeparrot_training - Step 18608: {'lr': 0.00048462488445636005, 'samples': 9527808, 'steps': 18608, 'loss/train': 2.327176570892334} +03/04/2022 11:19:07 - INFO - codeparrot_training - Step 18609: {'lr': 0.0004846230520882069, 'samples': 9528320, 'steps': 18609, 'loss/train': 2.1208627223968506} +03/04/2022 11:19:10 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 11:19:12 - INFO - codeparrot_training - Step 18610: {'lr': 0.00048462121961433623, 'samples': 9528832, 'steps': 18610, 'loss/train': 1.677850604057312} +03/04/2022 11:19:16 - INFO - codeparrot_training - Step 18611: {'lr': 0.00048461938703474886, 'samples': 9529344, 'steps': 18611, 'loss/train': 1.2169485092163086} +03/04/2022 11:19:18 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/04/2022 11:19:21 - INFO - codeparrot_training - Step 18612: {'lr': 0.00048461755434944554, 'samples': 9529856, 'steps': 18612, 'loss/train': 1.8525623083114624} +03/04/2022 11:19:24 - INFO - codeparrot_training - Step 18613: {'lr': 0.00048461572155842725, 'samples': 9530368, 'steps': 18613, 'loss/train': 2.153980255126953} +03/04/2022 11:19:27 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/04/2022 11:19:30 - INFO - codeparrot_training - Step 18614: {'lr': 0.00048461388866169474, 'samples': 9530880, 'steps': 18614, 'loss/train': 0.7949098944664001} +03/04/2022 11:19:33 - INFO - codeparrot_training - Step 18615: {'lr': 0.00048461205565924884, 'samples': 9531392, 'steps': 18615, 'loss/train': 2.060805082321167} +03/04/2022 11:19:35 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 11:19:38 - INFO - codeparrot_training - Step 18616: {'lr': 0.0004846102225510903, 'samples': 9531904, 'steps': 18616, 'loss/train': 0.91429203748703} +03/04/2022 11:19:41 - INFO - codeparrot_training - Step 18617: {'lr': 0.00048460838933722005, 'samples': 9532416, 'steps': 18617, 'loss/train': 2.4738800525665283} +03/04/2022 11:19:44 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/04/2022 11:19:47 - INFO - codeparrot_training - Step 18618: {'lr': 0.0004846065560176389, 'samples': 9532928, 'steps': 18618, 'loss/train': 1.794521689414978} +03/04/2022 11:19:50 - INFO - codeparrot_training - Step 18619: {'lr': 0.00048460472259234764, 'samples': 9533440, 'steps': 18619, 'loss/train': 3.534989595413208} +03/04/2022 11:19:52 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 11:19:55 - INFO - codeparrot_training - Step 18620: {'lr': 0.0004846028890613471, 'samples': 9533952, 'steps': 18620, 'loss/train': 1.9484655857086182} +03/04/2022 11:19:58 - INFO - codeparrot_training - Step 18621: {'lr': 0.00048460105542463805, 'samples': 9534464, 'steps': 18621, 'loss/train': 1.5850346088409424} +03/04/2022 11:20:01 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 11:20:04 - INFO - codeparrot_training - Step 18622: {'lr': 0.00048459922168222146, 'samples': 9534976, 'steps': 18622, 'loss/train': 2.3599042892456055} +03/04/2022 11:20:07 - INFO - codeparrot_training - Step 18623: {'lr': 0.00048459738783409814, 'samples': 9535488, 'steps': 18623, 'loss/train': 2.1653757095336914} +03/04/2022 11:20:10 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 11:20:12 - INFO - codeparrot_training - Step 18624: {'lr': 0.0004845955538802688, 'samples': 9536000, 'steps': 18624, 'loss/train': 0.38898828625679016} +03/04/2022 11:20:15 - INFO - codeparrot_training - Step 18625: {'lr': 0.0004845937198207343, 'samples': 9536512, 'steps': 18625, 'loss/train': 1.0213191509246826} +03/04/2022 11:20:18 - INFO - codeparrot_training - Step 18626: {'lr': 0.0004845918856554955, 'samples': 9537024, 'steps': 18626, 'loss/train': 1.5238466262817383} +03/04/2022 11:20:19 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/04/2022 11:20:24 - INFO - codeparrot_training - Step 18627: {'lr': 0.00048459005138455326, 'samples': 9537536, 'steps': 18627, 'loss/train': 1.6991515159606934} +03/04/2022 11:20:27 - INFO - codeparrot_training - Step 18628: {'lr': 0.0004845882170079083, 'samples': 9538048, 'steps': 18628, 'loss/train': 2.7840237617492676} +03/04/2022 11:20:28 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/04/2022 11:20:33 - INFO - codeparrot_training - Step 18629: {'lr': 0.00048458638252556153, 'samples': 9538560, 'steps': 18629, 'loss/train': 1.6910163164138794} +03/04/2022 11:20:36 - INFO - codeparrot_training - Step 18630: {'lr': 0.0004845845479375138, 'samples': 9539072, 'steps': 18630, 'loss/train': 1.8570753335952759} +03/04/2022 11:20:36 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/04/2022 11:20:41 - INFO - codeparrot_training - Step 18631: {'lr': 0.00048458271324376586, 'samples': 9539584, 'steps': 18631, 'loss/train': 1.959281086921692} +03/04/2022 11:20:44 - INFO - codeparrot_training - Step 18632: {'lr': 0.0004845808784443185, 'samples': 9540096, 'steps': 18632, 'loss/train': 1.8797138929367065} +03/04/2022 11:20:46 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/04/2022 11:20:50 - INFO - codeparrot_training - Step 18633: {'lr': 0.00048457904353917277, 'samples': 9540608, 'steps': 18633, 'loss/train': 2.426520347595215} +03/04/2022 11:20:53 - INFO - codeparrot_training - Step 18634: {'lr': 0.0004845772085283292, 'samples': 9541120, 'steps': 18634, 'loss/train': 2.1023919582366943} +03/04/2022 11:20:54 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 11:20:58 - INFO - codeparrot_training - Step 18635: {'lr': 0.00048457537341178885, 'samples': 9541632, 'steps': 18635, 'loss/train': 0.7451614141464233} +03/04/2022 11:21:01 - INFO - codeparrot_training - Step 18636: {'lr': 0.0004845735381895524, 'samples': 9542144, 'steps': 18636, 'loss/train': 1.9899470806121826} +03/04/2022 11:21:03 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/04/2022 11:21:07 - INFO - codeparrot_training - Step 18637: {'lr': 0.0004845717028616208, 'samples': 9542656, 'steps': 18637, 'loss/train': 1.253427267074585} +03/04/2022 11:21:10 - INFO - codeparrot_training - Step 18638: {'lr': 0.00048456986742799474, 'samples': 9543168, 'steps': 18638, 'loss/train': 1.4142787456512451} +03/04/2022 11:21:12 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/04/2022 11:21:15 - INFO - codeparrot_training - Step 18639: {'lr': 0.00048456803188867513, 'samples': 9543680, 'steps': 18639, 'loss/train': 0.3258473575115204} +03/04/2022 11:21:18 - INFO - codeparrot_training - Step 18640: {'lr': 0.00048456619624366284, 'samples': 9544192, 'steps': 18640, 'loss/train': 1.8020405769348145} +03/04/2022 11:21:20 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/04/2022 11:21:23 - INFO - codeparrot_training - Step 18641: {'lr': 0.0004845643604929586, 'samples': 9544704, 'steps': 18641, 'loss/train': 1.9824633598327637} +03/04/2022 11:21:26 - INFO - codeparrot_training - Step 18642: {'lr': 0.00048456252463656326, 'samples': 9545216, 'steps': 18642, 'loss/train': 1.7848169803619385} +03/04/2022 11:21:28 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 11:21:32 - INFO - codeparrot_training - Step 18643: {'lr': 0.00048456068867447767, 'samples': 9545728, 'steps': 18643, 'loss/train': 2.3442366123199463} +03/04/2022 11:21:35 - INFO - codeparrot_training - Step 18644: {'lr': 0.0004845588526067027, 'samples': 9546240, 'steps': 18644, 'loss/train': 1.2293621301651} +03/04/2022 11:21:37 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 11:21:40 - INFO - codeparrot_training - Step 18645: {'lr': 0.00048455701643323914, 'samples': 9546752, 'steps': 18645, 'loss/train': 0.39537808299064636} +03/04/2022 11:21:43 - INFO - codeparrot_training - Step 18646: {'lr': 0.00048455518015408773, 'samples': 9547264, 'steps': 18646, 'loss/train': 1.3319121599197388} +03/04/2022 11:21:45 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 11:21:49 - INFO - codeparrot_training - Step 18647: {'lr': 0.00048455334376924943, 'samples': 9547776, 'steps': 18647, 'loss/train': 2.346625328063965} +03/04/2022 11:21:52 - INFO - codeparrot_training - Step 18648: {'lr': 0.000484551507278725, 'samples': 9548288, 'steps': 18648, 'loss/train': 1.897567629814148} +03/04/2022 11:21:53 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/04/2022 11:21:57 - INFO - codeparrot_training - Step 18649: {'lr': 0.0004845496706825152, 'samples': 9548800, 'steps': 18649, 'loss/train': 0.4966985881328583} +03/04/2022 11:22:00 - INFO - codeparrot_training - Step 18650: {'lr': 0.0004845478339806211, 'samples': 9549312, 'steps': 18650, 'loss/train': 2.2122271060943604} +03/04/2022 11:22:02 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/04/2022 11:22:05 - INFO - codeparrot_training - Step 18651: {'lr': 0.00048454599717304327, 'samples': 9549824, 'steps': 18651, 'loss/train': 1.926338791847229} +03/04/2022 11:22:09 - INFO - codeparrot_training - Step 18652: {'lr': 0.0004845441602597826, 'samples': 9550336, 'steps': 18652, 'loss/train': 1.9956798553466797} +03/04/2022 11:22:11 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 11:22:14 - INFO - codeparrot_training - Step 18653: {'lr': 0.00048454232324084004, 'samples': 9550848, 'steps': 18653, 'loss/train': 2.1422927379608154} +03/04/2022 11:22:17 - INFO - codeparrot_training - Step 18654: {'lr': 0.0004845404861162163, 'samples': 9551360, 'steps': 18654, 'loss/train': 1.9878453016281128} +03/04/2022 11:22:19 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 11:22:22 - INFO - codeparrot_training - Step 18655: {'lr': 0.00048453864888591214, 'samples': 9551872, 'steps': 18655, 'loss/train': 1.5010088682174683} +03/04/2022 11:22:26 - INFO - codeparrot_training - Step 18656: {'lr': 0.0004845368115499286, 'samples': 9552384, 'steps': 18656, 'loss/train': 1.9379220008850098} +03/04/2022 11:22:27 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/04/2022 11:22:31 - INFO - codeparrot_training - Step 18657: {'lr': 0.0004845349741082663, 'samples': 9552896, 'steps': 18657, 'loss/train': 1.701704502105713} +03/04/2022 11:22:34 - INFO - codeparrot_training - Step 18658: {'lr': 0.00048453313656092624, 'samples': 9553408, 'steps': 18658, 'loss/train': 1.5231951475143433} +03/04/2022 11:22:36 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 11:22:39 - INFO - codeparrot_training - Step 18659: {'lr': 0.0004845312989079091, 'samples': 9553920, 'steps': 18659, 'loss/train': 1.9109621047973633} +03/04/2022 11:22:43 - INFO - codeparrot_training - Step 18660: {'lr': 0.0004845294611492158, 'samples': 9554432, 'steps': 18660, 'loss/train': 2.275043487548828} +03/04/2022 11:22:44 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 11:22:48 - INFO - codeparrot_training - Step 18661: {'lr': 0.00048452762328484724, 'samples': 9554944, 'steps': 18661, 'loss/train': 1.6225910186767578} +03/04/2022 11:22:51 - INFO - codeparrot_training - Step 18662: {'lr': 0.000484525785314804, 'samples': 9555456, 'steps': 18662, 'loss/train': 1.8858624696731567} +03/04/2022 11:22:53 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 11:22:56 - INFO - codeparrot_training - Step 18663: {'lr': 0.0004845239472390872, 'samples': 9555968, 'steps': 18663, 'loss/train': 1.585341453552246} +03/04/2022 11:22:59 - INFO - codeparrot_training - Step 18664: {'lr': 0.0004845221090576974, 'samples': 9556480, 'steps': 18664, 'loss/train': 1.6354918479919434} +03/04/2022 11:23:01 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/04/2022 11:23:05 - INFO - codeparrot_training - Step 18665: {'lr': 0.0004845202707706356, 'samples': 9556992, 'steps': 18665, 'loss/train': 2.3232977390289307} +03/04/2022 11:23:08 - INFO - codeparrot_training - Step 18666: {'lr': 0.0004845184323779026, 'samples': 9557504, 'steps': 18666, 'loss/train': 1.7097289562225342} +03/04/2022 11:23:09 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 11:23:13 - INFO - codeparrot_training - Step 18667: {'lr': 0.0004845165938794992, 'samples': 9558016, 'steps': 18667, 'loss/train': 2.210744857788086} +03/04/2022 11:23:16 - INFO - codeparrot_training - Step 18668: {'lr': 0.0004845147552754263, 'samples': 9558528, 'steps': 18668, 'loss/train': 1.9959138631820679} +03/04/2022 11:23:17 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/04/2022 11:23:21 - INFO - codeparrot_training - Step 18669: {'lr': 0.0004845129165656846, 'samples': 9559040, 'steps': 18669, 'loss/train': 2.067938804626465} +03/04/2022 11:23:25 - INFO - codeparrot_training - Step 18670: {'lr': 0.00048451107775027505, 'samples': 9559552, 'steps': 18670, 'loss/train': 1.9366010427474976} +03/04/2022 11:23:26 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/04/2022 11:23:30 - INFO - codeparrot_training - Step 18671: {'lr': 0.0004845092388291984, 'samples': 9560064, 'steps': 18671, 'loss/train': 1.8821918964385986} +03/04/2022 11:23:33 - INFO - codeparrot_training - Step 18672: {'lr': 0.0004845073998024555, 'samples': 9560576, 'steps': 18672, 'loss/train': 1.7919342517852783} +03/04/2022 11:23:34 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 11:23:38 - INFO - codeparrot_training - Step 18673: {'lr': 0.0004845055606700472, 'samples': 9561088, 'steps': 18673, 'loss/train': 1.4468282461166382} +03/04/2022 11:23:41 - INFO - codeparrot_training - Step 18674: {'lr': 0.0004845037214319743, 'samples': 9561600, 'steps': 18674, 'loss/train': 1.2045326232910156} +03/04/2022 11:23:42 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/04/2022 11:23:47 - INFO - codeparrot_training - Step 18675: {'lr': 0.00048450188208823766, 'samples': 9562112, 'steps': 18675, 'loss/train': 0.3356643617153168} +03/04/2022 11:23:50 - INFO - codeparrot_training - Step 18676: {'lr': 0.00048450004263883806, 'samples': 9562624, 'steps': 18676, 'loss/train': 1.7247512340545654} +03/04/2022 11:23:51 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/04/2022 11:23:55 - INFO - codeparrot_training - Step 18677: {'lr': 0.00048449820308377634, 'samples': 9563136, 'steps': 18677, 'loss/train': 1.617692232131958} +03/04/2022 11:23:58 - INFO - codeparrot_training - Step 18678: {'lr': 0.00048449636342305343, 'samples': 9563648, 'steps': 18678, 'loss/train': 1.7291593551635742} +03/04/2022 11:24:00 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 11:24:04 - INFO - codeparrot_training - Step 18679: {'lr': 0.00048449452365667003, 'samples': 9564160, 'steps': 18679, 'loss/train': 2.717499017715454} +03/04/2022 11:24:07 - INFO - codeparrot_training - Step 18680: {'lr': 0.00048449268378462695, 'samples': 9564672, 'steps': 18680, 'loss/train': 1.1360418796539307} +03/04/2022 11:24:08 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/04/2022 11:24:12 - INFO - codeparrot_training - Step 18681: {'lr': 0.00048449084380692523, 'samples': 9565184, 'steps': 18681, 'loss/train': 1.5038435459136963} +03/04/2022 11:24:15 - INFO - codeparrot_training - Step 18682: {'lr': 0.0004844890037235654, 'samples': 9565696, 'steps': 18682, 'loss/train': 1.6827296018600464} +03/04/2022 11:24:16 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/04/2022 11:24:20 - INFO - codeparrot_training - Step 18683: {'lr': 0.00048448716353454856, 'samples': 9566208, 'steps': 18683, 'loss/train': 2.609442949295044} +03/04/2022 11:24:24 - INFO - codeparrot_training - Step 18684: {'lr': 0.0004844853232398754, 'samples': 9566720, 'steps': 18684, 'loss/train': 1.8652749061584473} +03/04/2022 11:24:25 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/04/2022 11:24:29 - INFO - codeparrot_training - Step 18685: {'lr': 0.00048448348283954674, 'samples': 9567232, 'steps': 18685, 'loss/train': 3.186627149581909} +03/04/2022 11:24:32 - INFO - codeparrot_training - Step 18686: {'lr': 0.00048448164233356344, 'samples': 9567744, 'steps': 18686, 'loss/train': 1.9955461025238037} +03/04/2022 11:24:33 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/04/2022 11:24:37 - INFO - codeparrot_training - Step 18687: {'lr': 0.0004844798017219264, 'samples': 9568256, 'steps': 18687, 'loss/train': 1.6639479398727417} +03/04/2022 11:24:40 - INFO - codeparrot_training - Step 18688: {'lr': 0.00048447796100463625, 'samples': 9568768, 'steps': 18688, 'loss/train': 2.1941463947296143} +03/04/2022 11:24:42 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/04/2022 11:24:46 - INFO - codeparrot_training - Step 18689: {'lr': 0.0004844761201816941, 'samples': 9569280, 'steps': 18689, 'loss/train': 1.9166429042816162} +03/04/2022 11:24:49 - INFO - codeparrot_training - Step 18690: {'lr': 0.0004844742792531005, 'samples': 9569792, 'steps': 18690, 'loss/train': 0.9608784317970276} +03/04/2022 11:24:50 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/04/2022 11:24:54 - INFO - codeparrot_training - Step 18691: {'lr': 0.00048447243821885644, 'samples': 9570304, 'steps': 18691, 'loss/train': 2.1382737159729004} +03/04/2022 11:24:57 - INFO - codeparrot_training - Step 18692: {'lr': 0.0004844705970789628, 'samples': 9570816, 'steps': 18692, 'loss/train': 2.12459659576416} +03/04/2022 11:24:59 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 11:25:03 - INFO - codeparrot_training - Step 18693: {'lr': 0.0004844687558334202, 'samples': 9571328, 'steps': 18693, 'loss/train': 1.4535390138626099} +03/04/2022 11:25:06 - INFO - codeparrot_training - Step 18694: {'lr': 0.0004844669144822297, 'samples': 9571840, 'steps': 18694, 'loss/train': 1.6481950283050537} +03/04/2022 11:25:07 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/04/2022 11:25:11 - INFO - codeparrot_training - Step 18695: {'lr': 0.000484465073025392, 'samples': 9572352, 'steps': 18695, 'loss/train': 2.197214365005493} +03/04/2022 11:25:14 - INFO - codeparrot_training - Step 18696: {'lr': 0.00048446323146290795, 'samples': 9572864, 'steps': 18696, 'loss/train': 2.472566843032837} +03/04/2022 11:25:15 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/04/2022 11:25:20 - INFO - codeparrot_training - Step 18697: {'lr': 0.0004844613897947784, 'samples': 9573376, 'steps': 18697, 'loss/train': 2.1920671463012695} +03/04/2022 11:25:23 - INFO - codeparrot_training - Step 18698: {'lr': 0.00048445954802100414, 'samples': 9573888, 'steps': 18698, 'loss/train': 1.8001036643981934} +03/04/2022 11:25:24 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/04/2022 11:25:28 - INFO - codeparrot_training - Step 18699: {'lr': 0.000484457706141586, 'samples': 9574400, 'steps': 18699, 'loss/train': 3.9755606651306152} +03/04/2022 11:25:31 - INFO - codeparrot_training - Step 18700: {'lr': 0.0004844558641565249, 'samples': 9574912, 'steps': 18700, 'loss/train': 2.1277782917022705} +03/04/2022 11:25:33 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/04/2022 11:25:37 - INFO - codeparrot_training - Step 18701: {'lr': 0.00048445402206582155, 'samples': 9575424, 'steps': 18701, 'loss/train': 2.263615608215332} +03/04/2022 11:25:40 - INFO - codeparrot_training - Step 18702: {'lr': 0.0004844521798694768, 'samples': 9575936, 'steps': 18702, 'loss/train': 2.168971538543701} +03/04/2022 11:25:41 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 11:25:45 - INFO - codeparrot_training - Step 18703: {'lr': 0.0004844503375674916, 'samples': 9576448, 'steps': 18703, 'loss/train': 2.249758243560791} +03/04/2022 11:25:48 - INFO - codeparrot_training - Step 18704: {'lr': 0.0004844484951598667, 'samples': 9576960, 'steps': 18704, 'loss/train': 2.043807029724121} +03/04/2022 11:25:49 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/04/2022 11:25:54 - INFO - codeparrot_training - Step 18705: {'lr': 0.00048444665264660286, 'samples': 9577472, 'steps': 18705, 'loss/train': 1.8751020431518555} +03/04/2022 11:25:57 - INFO - codeparrot_training - Step 18706: {'lr': 0.000484444810027701, 'samples': 9577984, 'steps': 18706, 'loss/train': 1.8361985683441162} +03/04/2022 11:25:58 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 11:26:02 - INFO - codeparrot_training - Step 18707: {'lr': 0.00048444296730316196, 'samples': 9578496, 'steps': 18707, 'loss/train': 2.200230836868286} +03/04/2022 11:26:05 - INFO - codeparrot_training - Step 18708: {'lr': 0.0004844411244729865, 'samples': 9579008, 'steps': 18708, 'loss/train': 2.188616991043091} +03/04/2022 11:26:06 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/04/2022 11:26:11 - INFO - codeparrot_training - Step 18709: {'lr': 0.00048443928153717555, 'samples': 9579520, 'steps': 18709, 'loss/train': 1.953428864479065} +03/04/2022 11:26:14 - INFO - codeparrot_training - Step 18710: {'lr': 0.00048443743849572974, 'samples': 9580032, 'steps': 18710, 'loss/train': 2.486377239227295} +03/04/2022 11:26:14 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/04/2022 11:26:19 - INFO - codeparrot_training - Step 18711: {'lr': 0.00048443559534865017, 'samples': 9580544, 'steps': 18711, 'loss/train': 1.770187497138977} +03/04/2022 11:26:22 - INFO - codeparrot_training - Step 18712: {'lr': 0.0004844337520959375, 'samples': 9581056, 'steps': 18712, 'loss/train': 2.476099967956543} +03/04/2022 11:26:23 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 11:26:28 - INFO - codeparrot_training - Step 18713: {'lr': 0.00048443190873759256, 'samples': 9581568, 'steps': 18713, 'loss/train': 0.15470083057880402} +03/04/2022 11:26:31 - INFO - codeparrot_training - Step 18714: {'lr': 0.00048443006527361626, 'samples': 9582080, 'steps': 18714, 'loss/train': 1.3089662790298462} +03/04/2022 11:26:32 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/04/2022 11:26:36 - INFO - codeparrot_training - Step 18715: {'lr': 0.0004844282217040094, 'samples': 9582592, 'steps': 18715, 'loss/train': 1.8125014305114746} +03/04/2022 11:26:39 - INFO - codeparrot_training - Step 18716: {'lr': 0.00048442637802877277, 'samples': 9583104, 'steps': 18716, 'loss/train': 1.7381807565689087} +03/04/2022 11:26:40 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 11:26:44 - INFO - codeparrot_training - Step 18717: {'lr': 0.0004844245342479072, 'samples': 9583616, 'steps': 18717, 'loss/train': 2.0978126525878906} +03/04/2022 11:26:48 - INFO - codeparrot_training - Step 18718: {'lr': 0.00048442269036141363, 'samples': 9584128, 'steps': 18718, 'loss/train': 2.6747753620147705} +03/04/2022 11:26:49 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/04/2022 11:26:53 - INFO - codeparrot_training - Step 18719: {'lr': 0.0004844208463692928, 'samples': 9584640, 'steps': 18719, 'loss/train': 2.0001118183135986} +03/04/2022 11:26:56 - INFO - codeparrot_training - Step 18720: {'lr': 0.00048441900227154557, 'samples': 9585152, 'steps': 18720, 'loss/train': 2.0925276279449463} +03/04/2022 11:26:57 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/04/2022 11:27:01 - INFO - codeparrot_training - Step 18721: {'lr': 0.00048441715806817265, 'samples': 9585664, 'steps': 18721, 'loss/train': 1.874349594116211} +03/04/2022 11:27:04 - INFO - codeparrot_training - Step 18722: {'lr': 0.0004844153137591751, 'samples': 9586176, 'steps': 18722, 'loss/train': 1.665478229522705} +03/04/2022 11:27:06 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 11:27:10 - INFO - codeparrot_training - Step 18723: {'lr': 0.00048441346934455356, 'samples': 9586688, 'steps': 18723, 'loss/train': 2.8001251220703125} +03/04/2022 11:27:13 - INFO - codeparrot_training - Step 18724: {'lr': 0.0004844116248243089, 'samples': 9587200, 'steps': 18724, 'loss/train': 6.581409454345703} +03/04/2022 11:27:15 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/04/2022 11:27:18 - INFO - codeparrot_training - Step 18725: {'lr': 0.0004844097801984421, 'samples': 9587712, 'steps': 18725, 'loss/train': 0.9101142883300781} +03/04/2022 11:27:21 - INFO - codeparrot_training - Step 18726: {'lr': 0.0004844079354669537, 'samples': 9588224, 'steps': 18726, 'loss/train': 2.154237985610962} +03/04/2022 11:27:23 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 11:27:27 - INFO - codeparrot_training - Step 18727: {'lr': 0.0004844060906298448, 'samples': 9588736, 'steps': 18727, 'loss/train': 2.1521406173706055} +03/04/2022 11:27:30 - INFO - codeparrot_training - Step 18728: {'lr': 0.0004844042456871162, 'samples': 9589248, 'steps': 18728, 'loss/train': 2.6673505306243896} +03/04/2022 11:27:32 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/04/2022 11:27:35 - INFO - codeparrot_training - Step 18729: {'lr': 0.0004844024006387685, 'samples': 9589760, 'steps': 18729, 'loss/train': 1.8051283359527588} +03/04/2022 11:27:38 - INFO - codeparrot_training - Step 18730: {'lr': 0.00048440055548480275, 'samples': 9590272, 'steps': 18730, 'loss/train': 1.8653290271759033} +03/04/2022 11:27:40 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/04/2022 11:27:43 - INFO - codeparrot_training - Step 18731: {'lr': 0.0004843987102252198, 'samples': 9590784, 'steps': 18731, 'loss/train': 2.2406249046325684} +03/04/2022 11:27:47 - INFO - codeparrot_training - Step 18732: {'lr': 0.0004843968648600204, 'samples': 9591296, 'steps': 18732, 'loss/train': 2.10929536819458} +03/04/2022 11:27:48 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/04/2022 11:27:52 - INFO - codeparrot_training - Step 18733: {'lr': 0.00048439501938920534, 'samples': 9591808, 'steps': 18733, 'loss/train': 2.762437343597412} +03/04/2022 11:27:55 - INFO - codeparrot_training - Step 18734: {'lr': 0.0004843931738127755, 'samples': 9592320, 'steps': 18734, 'loss/train': 1.3573552370071411} +03/04/2022 11:27:57 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 11:28:00 - INFO - codeparrot_training - Step 18735: {'lr': 0.0004843913281307317, 'samples': 9592832, 'steps': 18735, 'loss/train': 2.4097611904144287} +03/04/2022 11:28:03 - INFO - codeparrot_training - Step 18736: {'lr': 0.0004843894823430749, 'samples': 9593344, 'steps': 18736, 'loss/train': 2.028916358947754} +03/04/2022 11:28:05 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 11:28:09 - INFO - codeparrot_training - Step 18737: {'lr': 0.00048438763644980564, 'samples': 9593856, 'steps': 18737, 'loss/train': 1.843319058418274} +03/04/2022 11:28:12 - INFO - codeparrot_training - Step 18738: {'lr': 0.0004843857904509251, 'samples': 9594368, 'steps': 18738, 'loss/train': 1.6730185747146606} +03/04/2022 11:28:13 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 11:28:17 - INFO - codeparrot_training - Step 18739: {'lr': 0.00048438394434643386, 'samples': 9594880, 'steps': 18739, 'loss/train': 1.9768719673156738} +03/04/2022 11:28:20 - INFO - codeparrot_training - Step 18740: {'lr': 0.0004843820981363328, 'samples': 9595392, 'steps': 18740, 'loss/train': 1.3856767416000366} +03/04/2022 11:28:22 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 11:28:26 - INFO - codeparrot_training - Step 18741: {'lr': 0.00048438025182062286, 'samples': 9595904, 'steps': 18741, 'loss/train': 1.7631081342697144} +03/04/2022 11:28:29 - INFO - codeparrot_training - Step 18742: {'lr': 0.00048437840539930466, 'samples': 9596416, 'steps': 18742, 'loss/train': 1.2301301956176758} +03/04/2022 11:28:30 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/04/2022 11:28:34 - INFO - codeparrot_training - Step 18743: {'lr': 0.0004843765588723793, 'samples': 9596928, 'steps': 18743, 'loss/train': 1.481696605682373} +03/04/2022 11:28:37 - INFO - codeparrot_training - Step 18744: {'lr': 0.00048437471223984743, 'samples': 9597440, 'steps': 18744, 'loss/train': 0.9930187463760376} +03/04/2022 11:28:38 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/04/2022 11:28:42 - INFO - codeparrot_training - Step 18745: {'lr': 0.00048437286550170996, 'samples': 9597952, 'steps': 18745, 'loss/train': 2.121004819869995} +03/04/2022 11:28:45 - INFO - codeparrot_training - Step 18746: {'lr': 0.00048437101865796763, 'samples': 9598464, 'steps': 18746, 'loss/train': 1.6093497276306152} +03/04/2022 11:28:46 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 11:28:51 - INFO - codeparrot_training - Step 18747: {'lr': 0.0004843691717086214, 'samples': 9598976, 'steps': 18747, 'loss/train': 1.3541057109832764} +03/04/2022 11:28:54 - INFO - codeparrot_training - Step 18748: {'lr': 0.000484367324653672, 'samples': 9599488, 'steps': 18748, 'loss/train': 3.338500499725342} +03/04/2022 11:28:55 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 11:28:59 - INFO - codeparrot_training - Step 18749: {'lr': 0.0004843654774931203, 'samples': 9600000, 'steps': 18749, 'loss/train': 2.453988552093506} +03/04/2022 11:29:02 - INFO - codeparrot_training - Step 18750: {'lr': 0.00048436363022696715, 'samples': 9600512, 'steps': 18750, 'loss/train': 1.7140696048736572} +03/04/2022 11:29:03 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 11:29:08 - INFO - codeparrot_training - Step 18751: {'lr': 0.0004843617828552134, 'samples': 9601024, 'steps': 18751, 'loss/train': 2.0701112747192383} +03/04/2022 11:29:11 - INFO - codeparrot_training - Step 18752: {'lr': 0.00048435993537785976, 'samples': 9601536, 'steps': 18752, 'loss/train': 1.9245834350585938} +03/04/2022 11:29:11 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 11:29:16 - INFO - codeparrot_training - Step 18753: {'lr': 0.0004843580877949072, 'samples': 9602048, 'steps': 18753, 'loss/train': 1.781040072441101} +03/04/2022 11:29:19 - INFO - codeparrot_training - Step 18754: {'lr': 0.0004843562401063565, 'samples': 9602560, 'steps': 18754, 'loss/train': 0.4453231990337372} +03/04/2022 11:29:20 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/04/2022 11:29:24 - INFO - codeparrot_training - Step 18755: {'lr': 0.0004843543923122085, 'samples': 9603072, 'steps': 18755, 'loss/train': 1.7451831102371216} +03/04/2022 11:29:27 - INFO - codeparrot_training - Step 18756: {'lr': 0.000484352544412464, 'samples': 9603584, 'steps': 18756, 'loss/train': 2.2969822883605957} +03/04/2022 11:29:28 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 11:29:33 - INFO - codeparrot_training - Step 18757: {'lr': 0.0004843506964071239, 'samples': 9604096, 'steps': 18757, 'loss/train': 1.0719404220581055} +03/04/2022 11:29:36 - INFO - codeparrot_training - Step 18758: {'lr': 0.000484348848296189, 'samples': 9604608, 'steps': 18758, 'loss/train': 2.1931777000427246} +03/04/2022 11:29:36 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 11:29:41 - INFO - codeparrot_training - Step 18759: {'lr': 0.00048434700007966006, 'samples': 9605120, 'steps': 18759, 'loss/train': 1.4994028806686401} +03/04/2022 11:29:44 - INFO - codeparrot_training - Step 18760: {'lr': 0.000484345151757538, 'samples': 9605632, 'steps': 18760, 'loss/train': 1.6078579425811768} +03/04/2022 11:29:44 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/04/2022 11:29:50 - INFO - codeparrot_training - Step 18761: {'lr': 0.0004843433033298237, 'samples': 9606144, 'steps': 18761, 'loss/train': 2.0058186054229736} +03/04/2022 11:29:53 - INFO - codeparrot_training - Step 18762: {'lr': 0.00048434145479651783, 'samples': 9606656, 'steps': 18762, 'loss/train': 0.9449602961540222} +03/04/2022 11:29:53 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/04/2022 11:29:58 - INFO - codeparrot_training - Step 18763: {'lr': 0.00048433960615762136, 'samples': 9607168, 'steps': 18763, 'loss/train': 1.5731689929962158} +03/04/2022 11:30:01 - INFO - codeparrot_training - Step 18764: {'lr': 0.0004843377574131351, 'samples': 9607680, 'steps': 18764, 'loss/train': 2.0683114528656006} +03/04/2022 11:30:01 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 11:30:06 - INFO - codeparrot_training - Step 18765: {'lr': 0.0004843359085630598, 'samples': 9608192, 'steps': 18765, 'loss/train': 1.8814387321472168} +03/04/2022 11:30:10 - INFO - codeparrot_training - Step 18766: {'lr': 0.0004843340596073964, 'samples': 9608704, 'steps': 18766, 'loss/train': 2.196035623550415} +03/04/2022 11:30:10 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 11:30:15 - INFO - codeparrot_training - Step 18767: {'lr': 0.0004843322105461457, 'samples': 9609216, 'steps': 18767, 'loss/train': 1.5553410053253174} +03/04/2022 11:30:18 - INFO - codeparrot_training - Step 18768: {'lr': 0.0004843303613793085, 'samples': 9609728, 'steps': 18768, 'loss/train': 1.979210615158081} +03/04/2022 11:30:18 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/04/2022 11:30:23 - INFO - codeparrot_training - Step 18769: {'lr': 0.00048432851210688567, 'samples': 9610240, 'steps': 18769, 'loss/train': 1.5078314542770386} +03/04/2022 11:30:27 - INFO - codeparrot_training - Step 18770: {'lr': 0.00048432666272887805, 'samples': 9610752, 'steps': 18770, 'loss/train': 0.7725059390068054} +03/04/2022 11:30:27 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 11:30:32 - INFO - codeparrot_training - Step 18771: {'lr': 0.0004843248132452864, 'samples': 9611264, 'steps': 18771, 'loss/train': 2.1700289249420166} +03/04/2022 11:30:35 - INFO - codeparrot_training - Step 18772: {'lr': 0.0004843229636561116, 'samples': 9611776, 'steps': 18772, 'loss/train': 2.0334813594818115} +03/04/2022 11:30:36 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 11:30:40 - INFO - codeparrot_training - Step 18773: {'lr': 0.00048432111396135447, 'samples': 9612288, 'steps': 18773, 'loss/train': 0.9832890629768372} +03/04/2022 11:30:43 - INFO - codeparrot_training - Step 18774: {'lr': 0.0004843192641610159, 'samples': 9612800, 'steps': 18774, 'loss/train': 1.4118719100952148} +03/04/2022 11:30:44 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 11:30:49 - INFO - codeparrot_training - Step 18775: {'lr': 0.00048431741425509676, 'samples': 9613312, 'steps': 18775, 'loss/train': 1.9816854000091553} +03/04/2022 11:30:52 - INFO - codeparrot_training - Step 18776: {'lr': 0.0004843155642435977, 'samples': 9613824, 'steps': 18776, 'loss/train': 2.5705301761627197} +03/04/2022 11:30:53 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 11:30:57 - INFO - codeparrot_training - Step 18777: {'lr': 0.0004843137141265197, 'samples': 9614336, 'steps': 18777, 'loss/train': 2.2404863834381104} +03/04/2022 11:31:00 - INFO - codeparrot_training - Step 18778: {'lr': 0.00048431186390386356, 'samples': 9614848, 'steps': 18778, 'loss/train': 2.130772113800049} +03/04/2022 11:31:02 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 11:31:06 - INFO - codeparrot_training - Step 18779: {'lr': 0.0004843100135756301, 'samples': 9615360, 'steps': 18779, 'loss/train': 1.9024523496627808} +03/04/2022 11:31:09 - INFO - codeparrot_training - Step 18780: {'lr': 0.0004843081631418202, 'samples': 9615872, 'steps': 18780, 'loss/train': 1.7291662693023682} +03/04/2022 11:31:10 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 11:31:14 - INFO - codeparrot_training - Step 18781: {'lr': 0.00048430631260243465, 'samples': 9616384, 'steps': 18781, 'loss/train': 0.40338289737701416} +03/04/2022 11:31:17 - INFO - codeparrot_training - Step 18782: {'lr': 0.00048430446195747424, 'samples': 9616896, 'steps': 18782, 'loss/train': 1.7083451747894287} +03/04/2022 11:31:19 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 11:31:23 - INFO - codeparrot_training - Step 18783: {'lr': 0.00048430261120693986, 'samples': 9617408, 'steps': 18783, 'loss/train': 1.6826075315475464} +03/04/2022 11:31:26 - INFO - codeparrot_training - Step 18784: {'lr': 0.0004843007603508324, 'samples': 9617920, 'steps': 18784, 'loss/train': 1.7044516801834106} +03/04/2022 11:31:27 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/04/2022 11:31:31 - INFO - codeparrot_training - Step 18785: {'lr': 0.00048429890938915255, 'samples': 9618432, 'steps': 18785, 'loss/train': 2.071485996246338} +03/04/2022 11:31:35 - INFO - codeparrot_training - Step 18786: {'lr': 0.0004842970583219013, 'samples': 9618944, 'steps': 18786, 'loss/train': 1.734368085861206} +03/04/2022 11:31:37 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/04/2022 11:31:40 - INFO - codeparrot_training - Step 18787: {'lr': 0.0004842952071490794, 'samples': 9619456, 'steps': 18787, 'loss/train': 1.2761015892028809} +03/04/2022 11:31:43 - INFO - codeparrot_training - Step 18788: {'lr': 0.0004842933558706877, 'samples': 9619968, 'steps': 18788, 'loss/train': 2.4544172286987305} +03/04/2022 11:31:45 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/04/2022 11:31:48 - INFO - codeparrot_training - Step 18789: {'lr': 0.000484291504486727, 'samples': 9620480, 'steps': 18789, 'loss/train': 1.68110990524292} +03/04/2022 11:31:51 - INFO - codeparrot_training - Step 18790: {'lr': 0.0004842896529971982, 'samples': 9620992, 'steps': 18790, 'loss/train': 1.950022578239441} +03/04/2022 11:31:54 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 11:31:57 - INFO - codeparrot_training - Step 18791: {'lr': 0.00048428780140210204, 'samples': 9621504, 'steps': 18791, 'loss/train': 1.8345999717712402} +03/04/2022 11:32:00 - INFO - codeparrot_training - Step 18792: {'lr': 0.0004842859497014394, 'samples': 9622016, 'steps': 18792, 'loss/train': 2.5877773761749268} +03/04/2022 11:32:02 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/04/2022 11:32:05 - INFO - codeparrot_training - Step 18793: {'lr': 0.0004842840978952112, 'samples': 9622528, 'steps': 18793, 'loss/train': 1.6293140649795532} +03/04/2022 11:32:09 - INFO - codeparrot_training - Step 18794: {'lr': 0.00048428224598341815, 'samples': 9623040, 'steps': 18794, 'loss/train': 2.328880548477173} +03/04/2022 11:32:12 - INFO - codeparrot_training - Step 18795: {'lr': 0.0004842803939660612, 'samples': 9623552, 'steps': 18795, 'loss/train': 2.7087950706481934} +03/04/2022 11:32:12 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/04/2022 11:32:17 - INFO - codeparrot_training - Step 18796: {'lr': 0.00048427854184314103, 'samples': 9624064, 'steps': 18796, 'loss/train': 1.5651978254318237} +03/04/2022 11:32:20 - INFO - codeparrot_training - Step 18797: {'lr': 0.0004842766896146586, 'samples': 9624576, 'steps': 18797, 'loss/train': 1.7399743795394897} +03/04/2022 11:32:20 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/04/2022 11:32:25 - INFO - codeparrot_training - Step 18798: {'lr': 0.0004842748372806147, 'samples': 9625088, 'steps': 18798, 'loss/train': 1.9911308288574219} +03/04/2022 11:32:29 - INFO - codeparrot_training - Step 18799: {'lr': 0.00048427298484101023, 'samples': 9625600, 'steps': 18799, 'loss/train': 1.6232185363769531} +03/04/2022 11:32:29 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 11:32:34 - INFO - codeparrot_training - Step 18800: {'lr': 0.0004842711322958459, 'samples': 9626112, 'steps': 18800, 'loss/train': 1.7984949350357056} +03/04/2022 11:32:37 - INFO - codeparrot_training - Step 18801: {'lr': 0.0004842692796451226, 'samples': 9626624, 'steps': 18801, 'loss/train': 2.115788459777832} +03/04/2022 11:32:37 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 11:32:42 - INFO - codeparrot_training - Step 18802: {'lr': 0.0004842674268888413, 'samples': 9627136, 'steps': 18802, 'loss/train': 2.425422191619873} +03/04/2022 11:32:46 - INFO - codeparrot_training - Step 18803: {'lr': 0.0004842655740270026, 'samples': 9627648, 'steps': 18803, 'loss/train': 2.3153176307678223} +03/04/2022 11:32:46 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 11:32:51 - INFO - codeparrot_training - Step 18804: {'lr': 0.0004842637210596075, 'samples': 9628160, 'steps': 18804, 'loss/train': 2.315945863723755} +03/04/2022 11:32:54 - INFO - codeparrot_training - Step 18805: {'lr': 0.0004842618679866567, 'samples': 9628672, 'steps': 18805, 'loss/train': 2.0727834701538086} +03/04/2022 11:32:54 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 11:32:59 - INFO - codeparrot_training - Step 18806: {'lr': 0.0004842600148081512, 'samples': 9629184, 'steps': 18806, 'loss/train': 1.867490291595459} +03/04/2022 11:33:03 - INFO - codeparrot_training - Step 18807: {'lr': 0.00048425816152409173, 'samples': 9629696, 'steps': 18807, 'loss/train': 1.857409954071045} +03/04/2022 11:33:03 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/04/2022 11:33:08 - INFO - codeparrot_training - Step 18808: {'lr': 0.00048425630813447916, 'samples': 9630208, 'steps': 18808, 'loss/train': 1.7350701093673706} +03/04/2022 11:33:11 - INFO - codeparrot_training - Step 18809: {'lr': 0.0004842544546393143, 'samples': 9630720, 'steps': 18809, 'loss/train': 2.1851868629455566} +03/04/2022 11:33:11 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 11:33:16 - INFO - codeparrot_training - Step 18810: {'lr': 0.00048425260103859797, 'samples': 9631232, 'steps': 18810, 'loss/train': 1.3644074201583862} +03/04/2022 11:33:19 - INFO - codeparrot_training - Step 18811: {'lr': 0.0004842507473323311, 'samples': 9631744, 'steps': 18811, 'loss/train': 1.9206910133361816} +03/04/2022 11:33:20 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/04/2022 11:33:25 - INFO - codeparrot_training - Step 18812: {'lr': 0.00048424889352051436, 'samples': 9632256, 'steps': 18812, 'loss/train': 2.1552112102508545} +03/04/2022 11:33:28 - INFO - codeparrot_training - Step 18813: {'lr': 0.00048424703960314876, 'samples': 9632768, 'steps': 18813, 'loss/train': 1.8455636501312256} +03/04/2022 11:33:29 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/04/2022 11:33:33 - INFO - codeparrot_training - Step 18814: {'lr': 0.00048424518558023505, 'samples': 9633280, 'steps': 18814, 'loss/train': 1.9680267572402954} +03/04/2022 11:33:36 - INFO - codeparrot_training - Step 18815: {'lr': 0.00048424333145177405, 'samples': 9633792, 'steps': 18815, 'loss/train': 1.1750985383987427} +03/04/2022 11:33:37 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 11:33:42 - INFO - codeparrot_training - Step 18816: {'lr': 0.00048424147721776666, 'samples': 9634304, 'steps': 18816, 'loss/train': 1.6044591665267944} +03/04/2022 11:33:45 - INFO - codeparrot_training - Step 18817: {'lr': 0.00048423962287821366, 'samples': 9634816, 'steps': 18817, 'loss/train': 1.5276528596878052} +03/04/2022 11:33:46 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/04/2022 11:33:50 - INFO - codeparrot_training - Step 18818: {'lr': 0.00048423776843311585, 'samples': 9635328, 'steps': 18818, 'loss/train': 1.3185639381408691} +03/04/2022 11:33:53 - INFO - codeparrot_training - Step 18819: {'lr': 0.00048423591388247416, 'samples': 9635840, 'steps': 18819, 'loss/train': 2.2984344959259033} +03/04/2022 11:33:54 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 11:33:59 - INFO - codeparrot_training - Step 18820: {'lr': 0.0004842340592262894, 'samples': 9636352, 'steps': 18820, 'loss/train': 1.6472363471984863} +03/04/2022 11:34:02 - INFO - codeparrot_training - Step 18821: {'lr': 0.00048423220446456233, 'samples': 9636864, 'steps': 18821, 'loss/train': 1.7826682329177856} +03/04/2022 11:34:02 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/04/2022 11:34:07 - INFO - codeparrot_training - Step 18822: {'lr': 0.0004842303495972939, 'samples': 9637376, 'steps': 18822, 'loss/train': 1.6552066802978516} +03/04/2022 11:34:10 - INFO - codeparrot_training - Step 18823: {'lr': 0.00048422849462448483, 'samples': 9637888, 'steps': 18823, 'loss/train': 1.492329716682434} +03/04/2022 11:34:11 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/04/2022 11:34:16 - INFO - codeparrot_training - Step 18824: {'lr': 0.0004842266395461361, 'samples': 9638400, 'steps': 18824, 'loss/train': 0.8477040529251099} +03/04/2022 11:34:19 - INFO - codeparrot_training - Step 18825: {'lr': 0.0004842247843622484, 'samples': 9638912, 'steps': 18825, 'loss/train': 2.4148192405700684} +03/04/2022 11:34:19 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/04/2022 11:34:24 - INFO - codeparrot_training - Step 18826: {'lr': 0.0004842229290728226, 'samples': 9639424, 'steps': 18826, 'loss/train': 2.385000228881836} +03/04/2022 11:34:28 - INFO - codeparrot_training - Step 18827: {'lr': 0.0004842210736778596, 'samples': 9639936, 'steps': 18827, 'loss/train': 2.152012825012207} +03/04/2022 11:34:29 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/04/2022 11:34:33 - INFO - codeparrot_training - Step 18828: {'lr': 0.0004842192181773602, 'samples': 9640448, 'steps': 18828, 'loss/train': 1.4903982877731323} +03/04/2022 11:34:36 - INFO - codeparrot_training - Step 18829: {'lr': 0.0004842173625713252, 'samples': 9640960, 'steps': 18829, 'loss/train': 2.1151227951049805} +03/04/2022 11:34:38 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/04/2022 11:34:41 - INFO - codeparrot_training - Step 18830: {'lr': 0.0004842155068597556, 'samples': 9641472, 'steps': 18830, 'loss/train': 2.022162675857544} +03/04/2022 11:34:44 - INFO - codeparrot_training - Step 18831: {'lr': 0.0004842136510426519, 'samples': 9641984, 'steps': 18831, 'loss/train': 1.7516696453094482} +03/04/2022 11:34:46 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 11:34:50 - INFO - codeparrot_training - Step 18832: {'lr': 0.00048421179512001536, 'samples': 9642496, 'steps': 18832, 'loss/train': 2.323667287826538} +03/04/2022 11:34:53 - INFO - codeparrot_training - Step 18833: {'lr': 0.0004842099390918464, 'samples': 9643008, 'steps': 18833, 'loss/train': 1.1472338438034058} +03/04/2022 11:34:55 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/04/2022 11:34:58 - INFO - codeparrot_training - Step 18834: {'lr': 0.00048420808295814624, 'samples': 9643520, 'steps': 18834, 'loss/train': 1.6151005029678345} +03/04/2022 11:35:01 - INFO - codeparrot_training - Step 18835: {'lr': 0.00048420622671891533, 'samples': 9644032, 'steps': 18835, 'loss/train': 1.7424150705337524} +03/04/2022 11:35:03 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/04/2022 11:35:06 - INFO - codeparrot_training - Step 18836: {'lr': 0.00048420437037415486, 'samples': 9644544, 'steps': 18836, 'loss/train': 2.0469555854797363} +03/04/2022 11:35:10 - INFO - codeparrot_training - Step 18837: {'lr': 0.00048420251392386547, 'samples': 9645056, 'steps': 18837, 'loss/train': 2.0514585971832275} +03/04/2022 11:35:12 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/04/2022 11:35:15 - INFO - codeparrot_training - Step 18838: {'lr': 0.0004842006573680481, 'samples': 9645568, 'steps': 18838, 'loss/train': 1.6851743459701538} +03/04/2022 11:35:18 - INFO - codeparrot_training - Step 18839: {'lr': 0.0004841988007067034, 'samples': 9646080, 'steps': 18839, 'loss/train': 2.150240182876587} +03/04/2022 11:35:20 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 11:35:23 - INFO - codeparrot_training - Step 18840: {'lr': 0.00048419694393983244, 'samples': 9646592, 'steps': 18840, 'loss/train': 1.399125099182129} +03/04/2022 11:35:26 - INFO - codeparrot_training - Step 18841: {'lr': 0.00048419508706743587, 'samples': 9647104, 'steps': 18841, 'loss/train': 1.7481093406677246} +03/04/2022 11:35:28 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 11:35:32 - INFO - codeparrot_training - Step 18842: {'lr': 0.00048419323008951467, 'samples': 9647616, 'steps': 18842, 'loss/train': 0.8703935146331787} +03/04/2022 11:35:35 - INFO - codeparrot_training - Step 18843: {'lr': 0.00048419137300606963, 'samples': 9648128, 'steps': 18843, 'loss/train': 2.217064142227173} +03/04/2022 11:35:37 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/04/2022 11:35:40 - INFO - codeparrot_training - Step 18844: {'lr': 0.00048418951581710154, 'samples': 9648640, 'steps': 18844, 'loss/train': 1.8804919719696045} +03/04/2022 11:35:43 - INFO - codeparrot_training - Step 18845: {'lr': 0.00048418765852261124, 'samples': 9649152, 'steps': 18845, 'loss/train': 1.0722016096115112} +03/04/2022 11:35:45 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/04/2022 11:35:49 - INFO - codeparrot_training - Step 18846: {'lr': 0.0004841858011225996, 'samples': 9649664, 'steps': 18846, 'loss/train': 2.064152956008911} +03/04/2022 11:35:52 - INFO - codeparrot_training - Step 18847: {'lr': 0.0004841839436170675, 'samples': 9650176, 'steps': 18847, 'loss/train': 2.1688082218170166} +03/04/2022 11:35:54 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/04/2022 11:35:57 - INFO - codeparrot_training - Step 18848: {'lr': 0.0004841820860060157, 'samples': 9650688, 'steps': 18848, 'loss/train': 2.0846259593963623} +03/04/2022 11:36:00 - INFO - codeparrot_training - Step 18849: {'lr': 0.0004841802282894451, 'samples': 9651200, 'steps': 18849, 'loss/train': 2.3165507316589355} +03/04/2022 11:36:02 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 11:36:06 - INFO - codeparrot_training - Step 18850: {'lr': 0.0004841783704673565, 'samples': 9651712, 'steps': 18850, 'loss/train': 2.692078113555908} +03/04/2022 11:36:09 - INFO - codeparrot_training - Step 18851: {'lr': 0.00048417651253975067, 'samples': 9652224, 'steps': 18851, 'loss/train': 1.9232019186019897} +03/04/2022 11:36:11 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/04/2022 11:36:14 - INFO - codeparrot_training - Step 18852: {'lr': 0.00048417465450662856, 'samples': 9652736, 'steps': 18852, 'loss/train': 1.7609304189682007} +03/04/2022 11:36:17 - INFO - codeparrot_training - Step 18853: {'lr': 0.0004841727963679909, 'samples': 9653248, 'steps': 18853, 'loss/train': 2.05295729637146} +03/04/2022 11:36:19 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/04/2022 11:36:22 - INFO - codeparrot_training - Step 18854: {'lr': 0.0004841709381238387, 'samples': 9653760, 'steps': 18854, 'loss/train': 1.6966197490692139} +03/04/2022 11:36:26 - INFO - codeparrot_training - Step 18855: {'lr': 0.0004841690797741726, 'samples': 9654272, 'steps': 18855, 'loss/train': 1.5814461708068848} +03/04/2022 11:36:28 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 11:36:31 - INFO - codeparrot_training - Step 18856: {'lr': 0.0004841672213189936, 'samples': 9654784, 'steps': 18856, 'loss/train': 2.0249903202056885} +03/04/2022 11:36:34 - INFO - codeparrot_training - Step 18857: {'lr': 0.00048416536275830245, 'samples': 9655296, 'steps': 18857, 'loss/train': 1.6269772052764893} +03/04/2022 11:36:36 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/04/2022 11:36:39 - INFO - codeparrot_training - Step 18858: {'lr': 0.00048416350409209995, 'samples': 9655808, 'steps': 18858, 'loss/train': 1.9774409532546997} +03/04/2022 11:36:42 - INFO - codeparrot_training - Step 18859: {'lr': 0.000484161645320387, 'samples': 9656320, 'steps': 18859, 'loss/train': 0.8609490990638733} +03/04/2022 11:36:45 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/04/2022 11:36:48 - INFO - codeparrot_training - Step 18860: {'lr': 0.0004841597864431645, 'samples': 9656832, 'steps': 18860, 'loss/train': 1.1048015356063843} +03/04/2022 11:36:51 - INFO - codeparrot_training - Step 18861: {'lr': 0.00048415792746043314, 'samples': 9657344, 'steps': 18861, 'loss/train': 1.8426761627197266} +03/04/2022 11:36:53 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 11:36:56 - INFO - codeparrot_training - Step 18862: {'lr': 0.00048415606837219383, 'samples': 9657856, 'steps': 18862, 'loss/train': 1.9556596279144287} +03/04/2022 11:36:59 - INFO - codeparrot_training - Step 18863: {'lr': 0.00048415420917844744, 'samples': 9658368, 'steps': 18863, 'loss/train': 2.4026763439178467} +03/04/2022 11:37:01 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 11:37:04 - INFO - codeparrot_training - Step 18864: {'lr': 0.00048415234987919474, 'samples': 9658880, 'steps': 18864, 'loss/train': 2.398061990737915} +03/04/2022 11:37:08 - INFO - codeparrot_training - Step 18865: {'lr': 0.0004841504904744367, 'samples': 9659392, 'steps': 18865, 'loss/train': 1.456453561782837} +03/04/2022 11:37:10 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/04/2022 11:37:13 - INFO - codeparrot_training - Step 18866: {'lr': 0.0004841486309641739, 'samples': 9659904, 'steps': 18866, 'loss/train': 2.2722597122192383} +03/04/2022 11:37:16 - INFO - codeparrot_training - Step 18867: {'lr': 0.00048414677134840753, 'samples': 9660416, 'steps': 18867, 'loss/train': 1.5971976518630981} +03/04/2022 11:37:18 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 11:37:21 - INFO - codeparrot_training - Step 18868: {'lr': 0.00048414491162713814, 'samples': 9660928, 'steps': 18868, 'loss/train': 1.6686257123947144} +03/04/2022 11:37:24 - INFO - codeparrot_training - Step 18869: {'lr': 0.00048414305180036665, 'samples': 9661440, 'steps': 18869, 'loss/train': 1.4527621269226074} +03/04/2022 11:37:27 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/04/2022 11:37:30 - INFO - codeparrot_training - Step 18870: {'lr': 0.0004841411918680939, 'samples': 9661952, 'steps': 18870, 'loss/train': 2.0201773643493652} +03/04/2022 11:37:33 - INFO - codeparrot_training - Step 18871: {'lr': 0.0004841393318303208, 'samples': 9662464, 'steps': 18871, 'loss/train': 2.09139347076416} +03/04/2022 11:37:35 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/04/2022 11:37:38 - INFO - codeparrot_training - Step 18872: {'lr': 0.0004841374716870481, 'samples': 9662976, 'steps': 18872, 'loss/train': 1.3751871585845947} +03/04/2022 11:37:41 - INFO - codeparrot_training - Step 18873: {'lr': 0.00048413561143827665, 'samples': 9663488, 'steps': 18873, 'loss/train': 1.838456392288208} +03/04/2022 11:37:43 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/04/2022 11:37:47 - INFO - codeparrot_training - Step 18874: {'lr': 0.00048413375108400736, 'samples': 9664000, 'steps': 18874, 'loss/train': 1.8765060901641846} +03/04/2022 11:37:50 - INFO - codeparrot_training - Step 18875: {'lr': 0.000484131890624241, 'samples': 9664512, 'steps': 18875, 'loss/train': 2.359983205795288} +03/04/2022 11:37:52 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 11:37:55 - INFO - codeparrot_training - Step 18876: {'lr': 0.00048413003005897835, 'samples': 9665024, 'steps': 18876, 'loss/train': 0.865959644317627} +03/04/2022 11:37:58 - INFO - codeparrot_training - Step 18877: {'lr': 0.0004841281693882204, 'samples': 9665536, 'steps': 18877, 'loss/train': 2.9329640865325928} +03/04/2022 11:38:00 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/04/2022 11:38:03 - INFO - codeparrot_training - Step 18878: {'lr': 0.0004841263086119679, 'samples': 9666048, 'steps': 18878, 'loss/train': 1.5220533609390259} +03/04/2022 11:38:06 - INFO - codeparrot_training - Step 18879: {'lr': 0.00048412444773022166, 'samples': 9666560, 'steps': 18879, 'loss/train': 2.1814446449279785} +03/04/2022 11:38:08 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 11:38:12 - INFO - codeparrot_training - Step 18880: {'lr': 0.0004841225867429826, 'samples': 9667072, 'steps': 18880, 'loss/train': 1.748016119003296} +03/04/2022 11:38:15 - INFO - codeparrot_training - Step 18881: {'lr': 0.0004841207256502515, 'samples': 9667584, 'steps': 18881, 'loss/train': 1.4086146354675293} +03/04/2022 11:38:17 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/04/2022 11:38:20 - INFO - codeparrot_training - Step 18882: {'lr': 0.0004841188644520292, 'samples': 9668096, 'steps': 18882, 'loss/train': 0.8337785601615906} +03/04/2022 11:38:23 - INFO - codeparrot_training - Step 18883: {'lr': 0.0004841170031483165, 'samples': 9668608, 'steps': 18883, 'loss/train': 1.4421498775482178} +03/04/2022 11:38:25 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 11:38:29 - INFO - codeparrot_training - Step 18884: {'lr': 0.0004841151417391144, 'samples': 9669120, 'steps': 18884, 'loss/train': 2.093618631362915} +03/04/2022 11:38:32 - INFO - codeparrot_training - Step 18885: {'lr': 0.00048411328022442357, 'samples': 9669632, 'steps': 18885, 'loss/train': 0.3024679720401764} +03/04/2022 11:38:33 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/04/2022 11:38:37 - INFO - codeparrot_training - Step 18886: {'lr': 0.000484111418604245, 'samples': 9670144, 'steps': 18886, 'loss/train': 0.9253832697868347} +03/04/2022 11:38:40 - INFO - codeparrot_training - Step 18887: {'lr': 0.00048410955687857926, 'samples': 9670656, 'steps': 18887, 'loss/train': 1.9889194965362549} +03/04/2022 11:38:42 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/04/2022 11:38:45 - INFO - codeparrot_training - Step 18888: {'lr': 0.0004841076950474275, 'samples': 9671168, 'steps': 18888, 'loss/train': 2.1439049243927} +03/04/2022 11:38:49 - INFO - codeparrot_training - Step 18889: {'lr': 0.0004841058331107904, 'samples': 9671680, 'steps': 18889, 'loss/train': 1.5098984241485596} +03/04/2022 11:38:50 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 11:38:54 - INFO - codeparrot_training - Step 18890: {'lr': 0.00048410397106866883, 'samples': 9672192, 'steps': 18890, 'loss/train': 2.2515597343444824} +03/04/2022 11:38:57 - INFO - codeparrot_training - Step 18891: {'lr': 0.0004841021089210636, 'samples': 9672704, 'steps': 18891, 'loss/train': 1.8628891706466675} +03/04/2022 11:38:59 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/04/2022 11:39:03 - INFO - codeparrot_training - Step 18892: {'lr': 0.0004841002466679756, 'samples': 9673216, 'steps': 18892, 'loss/train': 1.9686559438705444} +03/04/2022 11:39:06 - INFO - codeparrot_training - Step 18893: {'lr': 0.00048409838430940556, 'samples': 9673728, 'steps': 18893, 'loss/train': 2.1620476245880127} +03/04/2022 11:39:09 - INFO - codeparrot_training - Step 18894: {'lr': 0.00048409652184535447, 'samples': 9674240, 'steps': 18894, 'loss/train': 2.1118781566619873} +03/04/2022 11:39:10 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 11:39:15 - INFO - codeparrot_training - Step 18895: {'lr': 0.0004840946592758231, 'samples': 9674752, 'steps': 18895, 'loss/train': 1.2853448390960693} +03/04/2022 11:39:18 - INFO - codeparrot_training - Step 18896: {'lr': 0.00048409279660081226, 'samples': 9675264, 'steps': 18896, 'loss/train': 1.9561264514923096} +03/04/2022 11:39:18 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 11:39:23 - INFO - codeparrot_training - Step 18897: {'lr': 0.0004840909338203229, 'samples': 9675776, 'steps': 18897, 'loss/train': 1.5552215576171875} +03/04/2022 11:39:26 - INFO - codeparrot_training - Step 18898: {'lr': 0.0004840890709343557, 'samples': 9676288, 'steps': 18898, 'loss/train': 1.9831048250198364} +03/04/2022 11:39:27 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/04/2022 11:39:32 - INFO - codeparrot_training - Step 18899: {'lr': 0.0004840872079429116, 'samples': 9676800, 'steps': 18899, 'loss/train': 2.1790332794189453} +03/04/2022 11:39:35 - INFO - codeparrot_training - Step 18900: {'lr': 0.00048408534484599143, 'samples': 9677312, 'steps': 18900, 'loss/train': 0.7052431702613831} +03/04/2022 11:39:35 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/04/2022 11:39:40 - INFO - codeparrot_training - Step 18901: {'lr': 0.00048408348164359594, 'samples': 9677824, 'steps': 18901, 'loss/train': 2.4601945877075195} +03/04/2022 11:39:43 - INFO - codeparrot_training - Step 18902: {'lr': 0.00048408161833572613, 'samples': 9678336, 'steps': 18902, 'loss/train': 0.45307451486587524} +03/04/2022 11:39:44 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 11:39:48 - INFO - codeparrot_training - Step 18903: {'lr': 0.0004840797549223827, 'samples': 9678848, 'steps': 18903, 'loss/train': 1.3949933052062988} +03/04/2022 11:39:52 - INFO - codeparrot_training - Step 18904: {'lr': 0.00048407789140356654, 'samples': 9679360, 'steps': 18904, 'loss/train': 2.1120448112487793} +03/04/2022 11:39:52 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 11:39:57 - INFO - codeparrot_training - Step 18905: {'lr': 0.00048407602777927856, 'samples': 9679872, 'steps': 18905, 'loss/train': 1.866988182067871} +03/04/2022 11:40:00 - INFO - codeparrot_training - Step 18906: {'lr': 0.0004840741640495195, 'samples': 9680384, 'steps': 18906, 'loss/train': 1.9366118907928467} +03/04/2022 11:40:01 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 11:40:05 - INFO - codeparrot_training - Step 18907: {'lr': 0.0004840723002142902, 'samples': 9680896, 'steps': 18907, 'loss/train': 1.7483357191085815} +03/04/2022 11:40:09 - INFO - codeparrot_training - Step 18908: {'lr': 0.0004840704362735916, 'samples': 9681408, 'steps': 18908, 'loss/train': 1.833714485168457} +03/04/2022 11:40:09 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/04/2022 11:40:14 - INFO - codeparrot_training - Step 18909: {'lr': 0.0004840685722274244, 'samples': 9681920, 'steps': 18909, 'loss/train': 2.896550178527832} +03/04/2022 11:40:17 - INFO - codeparrot_training - Step 18910: {'lr': 0.0004840667080757896, 'samples': 9682432, 'steps': 18910, 'loss/train': 0.30527424812316895} +03/04/2022 11:40:20 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 11:40:23 - INFO - codeparrot_training - Step 18911: {'lr': 0.00048406484381868786, 'samples': 9682944, 'steps': 18911, 'loss/train': 2.285104274749756} +03/04/2022 11:40:26 - INFO - codeparrot_training - Step 18912: {'lr': 0.0004840629794561202, 'samples': 9683456, 'steps': 18912, 'loss/train': 2.1645188331604004} +03/04/2022 11:40:28 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 11:40:31 - INFO - codeparrot_training - Step 18913: {'lr': 0.0004840611149880873, 'samples': 9683968, 'steps': 18913, 'loss/train': 2.071486473083496} +03/04/2022 11:40:34 - INFO - codeparrot_training - Step 18914: {'lr': 0.0004840592504145901, 'samples': 9684480, 'steps': 18914, 'loss/train': 2.4001708030700684} +03/04/2022 11:40:36 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 11:40:40 - INFO - codeparrot_training - Step 18915: {'lr': 0.0004840573857356294, 'samples': 9684992, 'steps': 18915, 'loss/train': 1.8180575370788574} +03/04/2022 11:40:43 - INFO - codeparrot_training - Step 18916: {'lr': 0.0004840555209512061, 'samples': 9685504, 'steps': 18916, 'loss/train': 1.3874621391296387} +03/04/2022 11:40:45 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/04/2022 11:40:48 - INFO - codeparrot_training - Step 18917: {'lr': 0.00048405365606132096, 'samples': 9686016, 'steps': 18917, 'loss/train': 2.131986379623413} +03/04/2022 11:40:51 - INFO - codeparrot_training - Step 18918: {'lr': 0.00048405179106597487, 'samples': 9686528, 'steps': 18918, 'loss/train': 2.083812713623047} +03/04/2022 11:40:53 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/04/2022 11:40:56 - INFO - codeparrot_training - Step 18919: {'lr': 0.0004840499259651686, 'samples': 9687040, 'steps': 18919, 'loss/train': 1.722593069076538} +03/04/2022 11:41:00 - INFO - codeparrot_training - Step 18920: {'lr': 0.0004840480607589031, 'samples': 9687552, 'steps': 18920, 'loss/train': 1.0751938819885254} +03/04/2022 11:41:01 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/04/2022 11:41:05 - INFO - codeparrot_training - Step 18921: {'lr': 0.0004840461954471792, 'samples': 9688064, 'steps': 18921, 'loss/train': 1.32506263256073} +03/04/2022 11:41:08 - INFO - codeparrot_training - Step 18922: {'lr': 0.00048404433002999757, 'samples': 9688576, 'steps': 18922, 'loss/train': 1.8463013172149658} +03/04/2022 11:41:10 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/04/2022 11:41:13 - INFO - codeparrot_training - Step 18923: {'lr': 0.0004840424645073593, 'samples': 9689088, 'steps': 18923, 'loss/train': 1.4452793598175049} +03/04/2022 11:41:17 - INFO - codeparrot_training - Step 18924: {'lr': 0.000484040598879265, 'samples': 9689600, 'steps': 18924, 'loss/train': 1.8344627618789673} +03/04/2022 11:41:18 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 11:41:22 - INFO - codeparrot_training - Step 18925: {'lr': 0.0004840387331457157, 'samples': 9690112, 'steps': 18925, 'loss/train': 1.174454689025879} +03/04/2022 11:41:25 - INFO - codeparrot_training - Step 18926: {'lr': 0.00048403686730671215, 'samples': 9690624, 'steps': 18926, 'loss/train': 2.839798927307129} +03/04/2022 11:41:27 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/04/2022 11:41:30 - INFO - codeparrot_training - Step 18927: {'lr': 0.0004840350013622552, 'samples': 9691136, 'steps': 18927, 'loss/train': 2.109863042831421} +03/04/2022 11:41:34 - INFO - codeparrot_training - Step 18928: {'lr': 0.0004840331353123456, 'samples': 9691648, 'steps': 18928, 'loss/train': 1.0836116075515747} +03/04/2022 11:41:35 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/04/2022 11:41:39 - INFO - codeparrot_training - Step 18929: {'lr': 0.00048403126915698435, 'samples': 9692160, 'steps': 18929, 'loss/train': 1.998971700668335} +03/04/2022 11:41:42 - INFO - codeparrot_training - Step 18930: {'lr': 0.00048402940289617223, 'samples': 9692672, 'steps': 18930, 'loss/train': 2.094602346420288} +03/04/2022 11:41:44 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/04/2022 11:41:47 - INFO - codeparrot_training - Step 18931: {'lr': 0.00048402753652991007, 'samples': 9693184, 'steps': 18931, 'loss/train': 2.234116315841675} +03/04/2022 11:41:51 - INFO - codeparrot_training - Step 18932: {'lr': 0.0004840256700581988, 'samples': 9693696, 'steps': 18932, 'loss/train': 1.6848856210708618} +03/04/2022 11:41:53 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 11:41:56 - INFO - codeparrot_training - Step 18933: {'lr': 0.000484023803481039, 'samples': 9694208, 'steps': 18933, 'loss/train': 1.5142589807510376} +03/04/2022 11:41:59 - INFO - codeparrot_training - Step 18934: {'lr': 0.00048402193679843175, 'samples': 9694720, 'steps': 18934, 'loss/train': 1.6145626306533813} +03/04/2022 11:42:01 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/04/2022 11:42:04 - INFO - codeparrot_training - Step 18935: {'lr': 0.00048402007001037786, 'samples': 9695232, 'steps': 18935, 'loss/train': 2.3712897300720215} +03/04/2022 11:42:07 - INFO - codeparrot_training - Step 18936: {'lr': 0.0004840182031168781, 'samples': 9695744, 'steps': 18936, 'loss/train': 2.1619105339050293} +03/04/2022 11:42:10 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/04/2022 11:42:13 - INFO - codeparrot_training - Step 18937: {'lr': 0.0004840163361179334, 'samples': 9696256, 'steps': 18937, 'loss/train': 1.4338833093643188} +03/04/2022 11:42:16 - INFO - codeparrot_training - Step 18938: {'lr': 0.00048401446901354453, 'samples': 9696768, 'steps': 18938, 'loss/train': 1.5021555423736572} +03/04/2022 11:42:18 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/04/2022 11:42:21 - INFO - codeparrot_training - Step 18939: {'lr': 0.0004840126018037123, 'samples': 9697280, 'steps': 18939, 'loss/train': 2.1810550689697266} +03/04/2022 11:42:24 - INFO - codeparrot_training - Step 18940: {'lr': 0.0004840107344884377, 'samples': 9697792, 'steps': 18940, 'loss/train': 2.2943902015686035} +03/04/2022 11:42:26 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 11:42:29 - INFO - codeparrot_training - Step 18941: {'lr': 0.0004840088670677214, 'samples': 9698304, 'steps': 18941, 'loss/train': 1.8216155767440796} +03/04/2022 11:42:33 - INFO - codeparrot_training - Step 18942: {'lr': 0.0004840069995415643, 'samples': 9698816, 'steps': 18942, 'loss/train': 1.0184500217437744} +03/04/2022 11:42:35 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/04/2022 11:42:38 - INFO - codeparrot_training - Step 18943: {'lr': 0.0004840051319099673, 'samples': 9699328, 'steps': 18943, 'loss/train': 0.5910075902938843} +03/04/2022 11:42:41 - INFO - codeparrot_training - Step 18944: {'lr': 0.0004840032641729312, 'samples': 9699840, 'steps': 18944, 'loss/train': 2.010183811187744} +03/04/2022 11:42:43 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/04/2022 11:42:46 - INFO - codeparrot_training - Step 18945: {'lr': 0.0004840013963304568, 'samples': 9700352, 'steps': 18945, 'loss/train': 1.8688609600067139} +03/04/2022 11:42:50 - INFO - codeparrot_training - Step 18946: {'lr': 0.000483999528382545, 'samples': 9700864, 'steps': 18946, 'loss/train': 1.3555198907852173} +03/04/2022 11:42:52 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 11:42:55 - INFO - codeparrot_training - Step 18947: {'lr': 0.00048399766032919666, 'samples': 9701376, 'steps': 18947, 'loss/train': 2.201612949371338} +03/04/2022 11:42:58 - INFO - codeparrot_training - Step 18948: {'lr': 0.0004839957921704126, 'samples': 9701888, 'steps': 18948, 'loss/train': 1.6157660484313965} +03/04/2022 11:43:00 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/04/2022 11:43:03 - INFO - codeparrot_training - Step 18949: {'lr': 0.0004839939239061936, 'samples': 9702400, 'steps': 18949, 'loss/train': 1.6845982074737549} +03/04/2022 11:43:06 - INFO - codeparrot_training - Step 18950: {'lr': 0.00048399205553654046, 'samples': 9702912, 'steps': 18950, 'loss/train': 1.034011721611023} +03/04/2022 11:43:09 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/04/2022 11:43:12 - INFO - codeparrot_training - Step 18951: {'lr': 0.0004839901870614543, 'samples': 9703424, 'steps': 18951, 'loss/train': 1.8456207513809204} +03/04/2022 11:43:15 - INFO - codeparrot_training - Step 18952: {'lr': 0.0004839883184809356, 'samples': 9703936, 'steps': 18952, 'loss/train': 2.756098985671997} +03/04/2022 11:43:18 - INFO - codeparrot_training - Step 18953: {'lr': 0.00048398644979498543, 'samples': 9704448, 'steps': 18953, 'loss/train': 2.1422269344329834} +03/04/2022 11:43:19 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/04/2022 11:43:24 - INFO - codeparrot_training - Step 18954: {'lr': 0.0004839845810036047, 'samples': 9704960, 'steps': 18954, 'loss/train': 2.0475411415100098} +03/04/2022 11:43:27 - INFO - codeparrot_training - Step 18955: {'lr': 0.00048398271210679393, 'samples': 9705472, 'steps': 18955, 'loss/train': 1.032584547996521} +03/04/2022 11:43:27 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 11:43:33 - INFO - codeparrot_training - Step 18956: {'lr': 0.0004839808431045543, 'samples': 9705984, 'steps': 18956, 'loss/train': 1.9920839071273804} +03/04/2022 11:43:36 - INFO - codeparrot_training - Step 18957: {'lr': 0.00048397897399688643, 'samples': 9706496, 'steps': 18957, 'loss/train': 2.384577989578247} +03/04/2022 11:43:37 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/04/2022 11:43:41 - INFO - codeparrot_training - Step 18958: {'lr': 0.0004839771047837913, 'samples': 9707008, 'steps': 18958, 'loss/train': 1.454741358757019} +03/04/2022 11:43:44 - INFO - codeparrot_training - Step 18959: {'lr': 0.00048397523546526966, 'samples': 9707520, 'steps': 18959, 'loss/train': 1.9132548570632935} +03/04/2022 11:43:46 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 11:43:50 - INFO - codeparrot_training - Step 18960: {'lr': 0.0004839733660413224, 'samples': 9708032, 'steps': 18960, 'loss/train': 1.326029658317566} +03/04/2022 11:43:53 - INFO - codeparrot_training - Step 18961: {'lr': 0.0004839714965119504, 'samples': 9708544, 'steps': 18961, 'loss/train': 2.167956829071045} +03/04/2022 11:43:55 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/04/2022 11:43:58 - INFO - codeparrot_training - Step 18962: {'lr': 0.0004839696268771544, 'samples': 9709056, 'steps': 18962, 'loss/train': 1.6004564762115479} +03/04/2022 11:44:01 - INFO - codeparrot_training - Step 18963: {'lr': 0.0004839677571369353, 'samples': 9709568, 'steps': 18963, 'loss/train': 1.5438333749771118} +03/04/2022 11:44:03 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 11:44:06 - INFO - codeparrot_training - Step 18964: {'lr': 0.000483965887291294, 'samples': 9710080, 'steps': 18964, 'loss/train': 2.3998265266418457} +03/04/2022 11:44:10 - INFO - codeparrot_training - Step 18965: {'lr': 0.0004839640173402312, 'samples': 9710592, 'steps': 18965, 'loss/train': 1.964352011680603} +03/04/2022 11:44:12 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/04/2022 11:44:15 - INFO - codeparrot_training - Step 18966: {'lr': 0.00048396214728374786, 'samples': 9711104, 'steps': 18966, 'loss/train': 2.3243701457977295} +03/04/2022 11:44:18 - INFO - codeparrot_training - Step 18967: {'lr': 0.00048396027712184475, 'samples': 9711616, 'steps': 18967, 'loss/train': 0.46013572812080383} +03/04/2022 11:44:21 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 11:44:23 - INFO - codeparrot_training - Step 18968: {'lr': 0.0004839584068545228, 'samples': 9712128, 'steps': 18968, 'loss/train': 1.8779137134552002} +03/04/2022 11:44:27 - INFO - codeparrot_training - Step 18969: {'lr': 0.0004839565364817828, 'samples': 9712640, 'steps': 18969, 'loss/train': 1.3133918046951294} +03/04/2022 11:44:29 - INFO - codeparrot_training - Skipping example with length 506 (seq_length=1024) +03/04/2022 11:44:32 - INFO - codeparrot_training - Step 18970: {'lr': 0.0004839546660036256, 'samples': 9713152, 'steps': 18970, 'loss/train': 1.8428951501846313} +03/04/2022 11:44:35 - INFO - codeparrot_training - Step 18971: {'lr': 0.000483952795420052, 'samples': 9713664, 'steps': 18971, 'loss/train': 2.036573886871338} +03/04/2022 11:44:38 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 11:44:40 - INFO - codeparrot_training - Step 18972: {'lr': 0.0004839509247310629, 'samples': 9714176, 'steps': 18972, 'loss/train': 1.877515196800232} +03/04/2022 11:44:43 - INFO - codeparrot_training - Step 18973: {'lr': 0.00048394905393665913, 'samples': 9714688, 'steps': 18973, 'loss/train': 2.503880500793457} +03/04/2022 11:44:46 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/04/2022 11:44:49 - INFO - codeparrot_training - Step 18974: {'lr': 0.00048394718303684147, 'samples': 9715200, 'steps': 18974, 'loss/train': 1.5977927446365356} +03/04/2022 11:44:52 - INFO - codeparrot_training - Step 18975: {'lr': 0.00048394531203161084, 'samples': 9715712, 'steps': 18975, 'loss/train': 1.4963845014572144} +03/04/2022 11:44:55 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/04/2022 11:44:57 - INFO - codeparrot_training - Step 18976: {'lr': 0.00048394344092096816, 'samples': 9716224, 'steps': 18976, 'loss/train': 0.832568883895874} +03/04/2022 11:45:00 - INFO - codeparrot_training - Step 18977: {'lr': 0.0004839415697049141, 'samples': 9716736, 'steps': 18977, 'loss/train': 1.440832495689392} +03/04/2022 11:45:03 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/04/2022 11:45:06 - INFO - codeparrot_training - Step 18978: {'lr': 0.00048393969838344956, 'samples': 9717248, 'steps': 18978, 'loss/train': 1.5311620235443115} +03/04/2022 11:45:09 - INFO - codeparrot_training - Step 18979: {'lr': 0.0004839378269565754, 'samples': 9717760, 'steps': 18979, 'loss/train': 1.526186227798462} +03/04/2022 11:45:12 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/04/2022 11:45:14 - INFO - codeparrot_training - Step 18980: {'lr': 0.00048393595542429253, 'samples': 9718272, 'steps': 18980, 'loss/train': 1.9008655548095703} +03/04/2022 11:45:17 - INFO - codeparrot_training - Step 18981: {'lr': 0.0004839340837866016, 'samples': 9718784, 'steps': 18981, 'loss/train': 1.1398667097091675} +03/04/2022 11:45:20 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/04/2022 11:45:23 - INFO - codeparrot_training - Step 18982: {'lr': 0.00048393221204350376, 'samples': 9719296, 'steps': 18982, 'loss/train': 1.7926596403121948} +03/04/2022 11:45:26 - INFO - codeparrot_training - Step 18983: {'lr': 0.0004839303401949996, 'samples': 9719808, 'steps': 18983, 'loss/train': 1.5848299264907837} +03/04/2022 11:45:28 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/04/2022 11:45:31 - INFO - codeparrot_training - Step 18984: {'lr': 0.00048392846824109, 'samples': 9720320, 'steps': 18984, 'loss/train': 2.1856110095977783} +03/04/2022 11:45:34 - INFO - codeparrot_training - Step 18985: {'lr': 0.00048392659618177585, 'samples': 9720832, 'steps': 18985, 'loss/train': 1.6672126054763794} +03/04/2022 11:45:37 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 11:45:39 - INFO - codeparrot_training - Step 18986: {'lr': 0.000483924724017058, 'samples': 9721344, 'steps': 18986, 'loss/train': 2.425717830657959} +03/04/2022 11:45:43 - INFO - codeparrot_training - Step 18987: {'lr': 0.00048392285174693727, 'samples': 9721856, 'steps': 18987, 'loss/train': 1.7312487363815308} +03/04/2022 11:45:45 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 11:45:48 - INFO - codeparrot_training - Step 18988: {'lr': 0.0004839209793714146, 'samples': 9722368, 'steps': 18988, 'loss/train': 1.3286852836608887} +03/04/2022 11:45:51 - INFO - codeparrot_training - Step 18989: {'lr': 0.00048391910689049057, 'samples': 9722880, 'steps': 18989, 'loss/train': 1.4873157739639282} +03/04/2022 11:45:53 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/04/2022 11:45:56 - INFO - codeparrot_training - Step 18990: {'lr': 0.00048391723430416634, 'samples': 9723392, 'steps': 18990, 'loss/train': 2.465550422668457} +03/04/2022 11:45:59 - INFO - codeparrot_training - Step 18991: {'lr': 0.00048391536161244254, 'samples': 9723904, 'steps': 18991, 'loss/train': 1.602460503578186} +03/04/2022 11:46:02 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 11:46:05 - INFO - codeparrot_training - Step 18992: {'lr': 0.0004839134888153202, 'samples': 9724416, 'steps': 18992, 'loss/train': 2.133727788925171} +03/04/2022 11:46:08 - INFO - codeparrot_training - Step 18993: {'lr': 0.00048391161591279994, 'samples': 9724928, 'steps': 18993, 'loss/train': 1.759141445159912} +03/04/2022 11:46:10 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/04/2022 11:46:13 - INFO - codeparrot_training - Step 18994: {'lr': 0.0004839097429048827, 'samples': 9725440, 'steps': 18994, 'loss/train': 1.1383148431777954} +03/04/2022 11:46:16 - INFO - codeparrot_training - Step 18995: {'lr': 0.00048390786979156944, 'samples': 9725952, 'steps': 18995, 'loss/train': 1.5835357904434204} +03/04/2022 11:46:18 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/04/2022 11:46:21 - INFO - codeparrot_training - Step 18996: {'lr': 0.0004839059965728608, 'samples': 9726464, 'steps': 18996, 'loss/train': 2.011943817138672} +03/04/2022 11:46:25 - INFO - codeparrot_training - Step 18997: {'lr': 0.0004839041232487578, 'samples': 9726976, 'steps': 18997, 'loss/train': 2.0797183513641357} +03/04/2022 11:46:27 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 11:46:30 - INFO - codeparrot_training - Step 18998: {'lr': 0.0004839022498192612, 'samples': 9727488, 'steps': 18998, 'loss/train': 1.960619330406189} +03/04/2022 11:46:33 - INFO - codeparrot_training - Step 18999: {'lr': 0.0004839003762843718, 'samples': 9728000, 'steps': 18999, 'loss/train': 2.089538812637329} +03/04/2022 11:46:35 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/04/2022 11:46:38 - INFO - codeparrot_training - Step 19000: {'lr': 0.00048389850264409054, 'samples': 9728512, 'steps': 19000, 'loss/train': 3.279268980026245} +03/04/2022 11:46:41 - INFO - codeparrot_training - Step 19001: {'lr': 0.00048389662889841825, 'samples': 9729024, 'steps': 19001, 'loss/train': 1.7961187362670898} +03/04/2022 11:46:43 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/04/2022 11:46:47 - INFO - codeparrot_training - Step 19002: {'lr': 0.0004838947550473557, 'samples': 9729536, 'steps': 19002, 'loss/train': 1.7270654439926147} +03/04/2022 11:46:50 - INFO - codeparrot_training - Step 19003: {'lr': 0.00048389288109090383, 'samples': 9730048, 'steps': 19003, 'loss/train': 1.5645725727081299} +03/04/2022 11:46:52 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 11:46:55 - INFO - codeparrot_training - Step 19004: {'lr': 0.0004838910070290634, 'samples': 9730560, 'steps': 19004, 'loss/train': 2.003458261489868} +03/04/2022 11:46:58 - INFO - codeparrot_training - Step 19005: {'lr': 0.00048388913286183535, 'samples': 9731072, 'steps': 19005, 'loss/train': 2.1503946781158447} +03/04/2022 11:47:00 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/04/2022 11:47:04 - INFO - codeparrot_training - Step 19006: {'lr': 0.0004838872585892204, 'samples': 9731584, 'steps': 19006, 'loss/train': 1.5831043720245361} +03/04/2022 11:47:07 - INFO - codeparrot_training - Step 19007: {'lr': 0.00048388538421121946, 'samples': 9732096, 'steps': 19007, 'loss/train': 1.6411235332489014} +03/04/2022 11:47:09 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/04/2022 11:47:12 - INFO - codeparrot_training - Step 19008: {'lr': 0.00048388350972783346, 'samples': 9732608, 'steps': 19008, 'loss/train': 1.3564029932022095} +03/04/2022 11:47:15 - INFO - codeparrot_training - Step 19009: {'lr': 0.000483881635139063, 'samples': 9733120, 'steps': 19009, 'loss/train': 1.3944162130355835} +03/04/2022 11:47:17 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 11:47:21 - INFO - codeparrot_training - Step 19010: {'lr': 0.00048387976044490924, 'samples': 9733632, 'steps': 19010, 'loss/train': 2.2904255390167236} +03/04/2022 11:47:24 - INFO - codeparrot_training - Step 19011: {'lr': 0.0004838778856453728, 'samples': 9734144, 'steps': 19011, 'loss/train': 0.15952759981155396} +03/04/2022 11:47:26 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/04/2022 11:47:29 - INFO - codeparrot_training - Step 19012: {'lr': 0.00048387601074045464, 'samples': 9734656, 'steps': 19012, 'loss/train': 1.0988926887512207} +03/04/2022 11:47:32 - INFO - codeparrot_training - Step 19013: {'lr': 0.0004838741357301555, 'samples': 9735168, 'steps': 19013, 'loss/train': 2.176988124847412} +03/04/2022 11:47:35 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/04/2022 11:47:37 - INFO - codeparrot_training - Step 19014: {'lr': 0.00048387226061447633, 'samples': 9735680, 'steps': 19014, 'loss/train': 1.3985174894332886} +03/04/2022 11:47:41 - INFO - codeparrot_training - Step 19015: {'lr': 0.0004838703853934179, 'samples': 9736192, 'steps': 19015, 'loss/train': 1.3478243350982666} +03/04/2022 11:47:44 - INFO - codeparrot_training - Step 19016: {'lr': 0.0004838685100669811, 'samples': 9736704, 'steps': 19016, 'loss/train': 2.4625613689422607} +03/04/2022 11:47:44 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 11:47:49 - INFO - codeparrot_training - Step 19017: {'lr': 0.0004838666346351667, 'samples': 9737216, 'steps': 19017, 'loss/train': 1.6678452491760254} +03/04/2022 11:47:52 - INFO - codeparrot_training - Step 19018: {'lr': 0.0004838647590979757, 'samples': 9737728, 'steps': 19018, 'loss/train': 1.9159846305847168} +03/04/2022 11:47:52 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/04/2022 11:47:58 - INFO - codeparrot_training - Step 19019: {'lr': 0.00048386288345540876, 'samples': 9738240, 'steps': 19019, 'loss/train': 2.9255483150482178} +03/04/2022 11:48:01 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/04/2022 11:48:03 - INFO - codeparrot_training - Step 19020: {'lr': 0.00048386100770746686, 'samples': 9738752, 'steps': 19020, 'loss/train': 1.499014973640442} +03/04/2022 11:48:06 - INFO - codeparrot_training - Step 19021: {'lr': 0.00048385913185415076, 'samples': 9739264, 'steps': 19021, 'loss/train': 1.595326542854309} +03/04/2022 11:48:10 - INFO - codeparrot_training - Step 19022: {'lr': 0.00048385725589546137, 'samples': 9739776, 'steps': 19022, 'loss/train': 2.221832513809204} +03/04/2022 11:48:10 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 11:48:15 - INFO - codeparrot_training - Step 19023: {'lr': 0.0004838553798313995, 'samples': 9740288, 'steps': 19023, 'loss/train': 1.2831733226776123} +03/04/2022 11:48:18 - INFO - codeparrot_training - Step 19024: {'lr': 0.000483853503661966, 'samples': 9740800, 'steps': 19024, 'loss/train': 1.8742140531539917} +03/04/2022 11:48:19 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 11:48:23 - INFO - codeparrot_training - Step 19025: {'lr': 0.00048385162738716174, 'samples': 9741312, 'steps': 19025, 'loss/train': 2.7154455184936523} +03/04/2022 11:48:26 - INFO - codeparrot_training - Step 19026: {'lr': 0.00048384975100698756, 'samples': 9741824, 'steps': 19026, 'loss/train': 2.0792431831359863} +03/04/2022 11:48:27 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/04/2022 11:48:32 - INFO - codeparrot_training - Step 19027: {'lr': 0.0004838478745214443, 'samples': 9742336, 'steps': 19027, 'loss/train': 1.7366892099380493} +03/04/2022 11:48:35 - INFO - codeparrot_training - Step 19028: {'lr': 0.00048384599793053275, 'samples': 9742848, 'steps': 19028, 'loss/train': 2.5608417987823486} +03/04/2022 11:48:35 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/04/2022 11:48:40 - INFO - codeparrot_training - Step 19029: {'lr': 0.0004838441212342538, 'samples': 9743360, 'steps': 19029, 'loss/train': 1.9441933631896973} +03/04/2022 11:48:43 - INFO - codeparrot_training - Step 19030: {'lr': 0.0004838422444326084, 'samples': 9743872, 'steps': 19030, 'loss/train': 2.444429874420166} +03/04/2022 11:48:43 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 11:48:48 - INFO - codeparrot_training - Step 19031: {'lr': 0.0004838403675255971, 'samples': 9744384, 'steps': 19031, 'loss/train': 1.1212583780288696} +03/04/2022 11:48:52 - INFO - codeparrot_training - Step 19032: {'lr': 0.0004838384905132211, 'samples': 9744896, 'steps': 19032, 'loss/train': 1.5249422788619995} +03/04/2022 11:48:52 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/04/2022 11:48:57 - INFO - codeparrot_training - Step 19033: {'lr': 0.000483836613395481, 'samples': 9745408, 'steps': 19033, 'loss/train': 1.9500643014907837} +03/04/2022 11:49:00 - INFO - codeparrot_training - Step 19034: {'lr': 0.0004838347361723778, 'samples': 9745920, 'steps': 19034, 'loss/train': 3.127875328063965} +03/04/2022 11:49:00 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/04/2022 11:49:05 - INFO - codeparrot_training - Step 19035: {'lr': 0.0004838328588439123, 'samples': 9746432, 'steps': 19035, 'loss/train': 1.4075943231582642} +03/04/2022 11:49:09 - INFO - codeparrot_training - Step 19036: {'lr': 0.0004838309814100852, 'samples': 9746944, 'steps': 19036, 'loss/train': 1.29261314868927} +03/04/2022 11:49:09 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/04/2022 11:49:14 - INFO - codeparrot_training - Step 19037: {'lr': 0.0004838291038708975, 'samples': 9747456, 'steps': 19037, 'loss/train': 2.0055441856384277} +03/04/2022 11:49:17 - INFO - codeparrot_training - Step 19038: {'lr': 0.00048382722622635014, 'samples': 9747968, 'steps': 19038, 'loss/train': 2.0530107021331787} +03/04/2022 11:49:17 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/04/2022 11:49:22 - INFO - codeparrot_training - Step 19039: {'lr': 0.0004838253484764437, 'samples': 9748480, 'steps': 19039, 'loss/train': 2.0924108028411865} +03/04/2022 11:49:25 - INFO - codeparrot_training - Step 19040: {'lr': 0.0004838234706211792, 'samples': 9748992, 'steps': 19040, 'loss/train': 1.9779973030090332} +03/04/2022 11:49:26 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/04/2022 11:49:31 - INFO - codeparrot_training - Step 19041: {'lr': 0.00048382159266055746, 'samples': 9749504, 'steps': 19041, 'loss/train': 2.190882921218872} +03/04/2022 11:49:34 - INFO - codeparrot_training - Step 19042: {'lr': 0.0004838197145945793, 'samples': 9750016, 'steps': 19042, 'loss/train': 1.2907377481460571} +03/04/2022 11:49:34 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 11:49:39 - INFO - codeparrot_training - Step 19043: {'lr': 0.0004838178364232456, 'samples': 9750528, 'steps': 19043, 'loss/train': 1.638651967048645} +03/04/2022 11:49:42 - INFO - codeparrot_training - Step 19044: {'lr': 0.00048381595814655723, 'samples': 9751040, 'steps': 19044, 'loss/train': 1.438443660736084} +03/04/2022 11:49:43 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/04/2022 11:49:48 - INFO - codeparrot_training - Step 19045: {'lr': 0.000483814079764515, 'samples': 9751552, 'steps': 19045, 'loss/train': 2.238020658493042} +03/04/2022 11:49:51 - INFO - codeparrot_training - Step 19046: {'lr': 0.00048381220127711967, 'samples': 9752064, 'steps': 19046, 'loss/train': 2.3671090602874756} +03/04/2022 11:49:52 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 11:49:56 - INFO - codeparrot_training - Step 19047: {'lr': 0.0004838103226843722, 'samples': 9752576, 'steps': 19047, 'loss/train': 2.1221764087677} +03/04/2022 11:49:59 - INFO - codeparrot_training - Step 19048: {'lr': 0.00048380844398627343, 'samples': 9753088, 'steps': 19048, 'loss/train': 2.0431272983551025} +03/04/2022 11:50:00 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/04/2022 11:50:05 - INFO - codeparrot_training - Step 19049: {'lr': 0.0004838065651828242, 'samples': 9753600, 'steps': 19049, 'loss/train': 2.2768983840942383} +03/04/2022 11:50:08 - INFO - codeparrot_training - Step 19050: {'lr': 0.0004838046862740253, 'samples': 9754112, 'steps': 19050, 'loss/train': 1.7647593021392822} +03/04/2022 11:50:09 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 11:50:13 - INFO - codeparrot_training - Step 19051: {'lr': 0.0004838028072598777, 'samples': 9754624, 'steps': 19051, 'loss/train': 1.7352306842803955} +03/04/2022 11:50:16 - INFO - codeparrot_training - Step 19052: {'lr': 0.00048380092814038204, 'samples': 9755136, 'steps': 19052, 'loss/train': 1.452945590019226} +03/04/2022 11:50:17 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 11:50:21 - INFO - codeparrot_training - Step 19053: {'lr': 0.0004837990489155394, 'samples': 9755648, 'steps': 19053, 'loss/train': 2.563706874847412} +03/04/2022 11:50:25 - INFO - codeparrot_training - Step 19054: {'lr': 0.00048379716958535043, 'samples': 9756160, 'steps': 19054, 'loss/train': 2.543973922729492} +03/04/2022 11:50:25 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 11:50:30 - INFO - codeparrot_training - Step 19055: {'lr': 0.00048379529014981604, 'samples': 9756672, 'steps': 19055, 'loss/train': 2.4402763843536377} +03/04/2022 11:50:33 - INFO - codeparrot_training - Step 19056: {'lr': 0.0004837934106089372, 'samples': 9757184, 'steps': 19056, 'loss/train': 1.7875601053237915} +03/04/2022 11:50:34 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/04/2022 11:50:38 - INFO - codeparrot_training - Step 19057: {'lr': 0.0004837915309627146, 'samples': 9757696, 'steps': 19057, 'loss/train': 1.4509854316711426} +03/04/2022 11:50:41 - INFO - codeparrot_training - Step 19058: {'lr': 0.00048378965121114917, 'samples': 9758208, 'steps': 19058, 'loss/train': 1.6792768239974976} +03/04/2022 11:50:42 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/04/2022 11:50:47 - INFO - codeparrot_training - Step 19059: {'lr': 0.00048378777135424166, 'samples': 9758720, 'steps': 19059, 'loss/train': 2.413252353668213} +03/04/2022 11:50:50 - INFO - codeparrot_training - Step 19060: {'lr': 0.0004837858913919931, 'samples': 9759232, 'steps': 19060, 'loss/train': 1.2645363807678223} +03/04/2022 11:50:51 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/04/2022 11:50:55 - INFO - codeparrot_training - Step 19061: {'lr': 0.0004837840113244042, 'samples': 9759744, 'steps': 19061, 'loss/train': 1.630685806274414} +03/04/2022 11:50:58 - INFO - codeparrot_training - Step 19062: {'lr': 0.00048378213115147573, 'samples': 9760256, 'steps': 19062, 'loss/train': 1.057685136795044} +03/04/2022 11:50:59 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 11:51:04 - INFO - codeparrot_training - Step 19063: {'lr': 0.00048378025087320877, 'samples': 9760768, 'steps': 19063, 'loss/train': 1.7737960815429688} +03/04/2022 11:51:07 - INFO - codeparrot_training - Step 19064: {'lr': 0.0004837783704896039, 'samples': 9761280, 'steps': 19064, 'loss/train': 1.5925313234329224} +03/04/2022 11:51:08 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/04/2022 11:51:12 - INFO - codeparrot_training - Step 19065: {'lr': 0.0004837764900006623, 'samples': 9761792, 'steps': 19065, 'loss/train': 2.1146035194396973} +03/04/2022 11:51:15 - INFO - codeparrot_training - Step 19066: {'lr': 0.0004837746094063844, 'samples': 9762304, 'steps': 19066, 'loss/train': 2.251293420791626} +03/04/2022 11:51:16 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/04/2022 11:51:20 - INFO - codeparrot_training - Step 19067: {'lr': 0.00048377272870677135, 'samples': 9762816, 'steps': 19067, 'loss/train': 1.7928745746612549} +03/04/2022 11:51:23 - INFO - codeparrot_training - Step 19068: {'lr': 0.000483770847901824, 'samples': 9763328, 'steps': 19068, 'loss/train': 1.2164032459259033} +03/04/2022 11:51:24 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/04/2022 11:51:29 - INFO - codeparrot_training - Step 19069: {'lr': 0.000483768966991543, 'samples': 9763840, 'steps': 19069, 'loss/train': 1.2205106019973755} +03/04/2022 11:51:32 - INFO - codeparrot_training - Step 19070: {'lr': 0.0004837670859759294, 'samples': 9764352, 'steps': 19070, 'loss/train': 1.5738540887832642} +03/04/2022 11:51:32 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/04/2022 11:51:37 - INFO - codeparrot_training - Step 19071: {'lr': 0.0004837652048549839, 'samples': 9764864, 'steps': 19071, 'loss/train': 1.7990918159484863} +03/04/2022 11:51:40 - INFO - codeparrot_training - Step 19072: {'lr': 0.00048376332362870745, 'samples': 9765376, 'steps': 19072, 'loss/train': 0.4122887849807739} +03/04/2022 11:51:41 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/04/2022 11:51:46 - INFO - codeparrot_training - Step 19073: {'lr': 0.00048376144229710083, 'samples': 9765888, 'steps': 19073, 'loss/train': 1.8524566888809204} +03/04/2022 11:51:49 - INFO - codeparrot_training - Step 19074: {'lr': 0.00048375956086016495, 'samples': 9766400, 'steps': 19074, 'loss/train': 0.6110670566558838} +03/04/2022 11:51:50 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/04/2022 11:51:54 - INFO - codeparrot_training - Step 19075: {'lr': 0.0004837576793179005, 'samples': 9766912, 'steps': 19075, 'loss/train': 1.9604233503341675} +03/04/2022 11:51:57 - INFO - codeparrot_training - Step 19076: {'lr': 0.00048375579767030854, 'samples': 9767424, 'steps': 19076, 'loss/train': 2.643186330795288} +03/04/2022 11:51:58 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 11:52:02 - INFO - codeparrot_training - Step 19077: {'lr': 0.0004837539159173898, 'samples': 9767936, 'steps': 19077, 'loss/train': 1.9121967554092407} +03/04/2022 11:52:06 - INFO - codeparrot_training - Step 19078: {'lr': 0.00048375203405914515, 'samples': 9768448, 'steps': 19078, 'loss/train': 1.8233567476272583} +03/04/2022 11:52:06 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/04/2022 11:52:11 - INFO - codeparrot_training - Step 19079: {'lr': 0.00048375015209557547, 'samples': 9768960, 'steps': 19079, 'loss/train': 1.0104713439941406} +03/04/2022 11:52:14 - INFO - codeparrot_training - Step 19080: {'lr': 0.00048374827002668156, 'samples': 9769472, 'steps': 19080, 'loss/train': 2.27640438079834} +03/04/2022 11:52:15 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/04/2022 11:52:19 - INFO - codeparrot_training - Step 19081: {'lr': 0.0004837463878524643, 'samples': 9769984, 'steps': 19081, 'loss/train': 1.6136780977249146} +03/04/2022 11:52:22 - INFO - codeparrot_training - Step 19082: {'lr': 0.0004837445055729245, 'samples': 9770496, 'steps': 19082, 'loss/train': 1.794377088546753} +03/04/2022 11:52:23 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/04/2022 11:52:28 - INFO - codeparrot_training - Step 19083: {'lr': 0.00048374262318806306, 'samples': 9771008, 'steps': 19083, 'loss/train': 2.699237585067749} +03/04/2022 11:52:31 - INFO - codeparrot_training - Step 19084: {'lr': 0.00048374074069788077, 'samples': 9771520, 'steps': 19084, 'loss/train': 1.7152440547943115} +03/04/2022 11:52:32 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/04/2022 11:52:36 - INFO - codeparrot_training - Step 19085: {'lr': 0.0004837388581023785, 'samples': 9772032, 'steps': 19085, 'loss/train': 2.246276617050171} +03/04/2022 11:52:39 - INFO - codeparrot_training - Step 19086: {'lr': 0.0004837369754015571, 'samples': 9772544, 'steps': 19086, 'loss/train': 2.317816734313965} +03/04/2022 11:52:40 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/04/2022 11:52:44 - INFO - codeparrot_training - Step 19087: {'lr': 0.0004837350925954175, 'samples': 9773056, 'steps': 19087, 'loss/train': 2.17105770111084} +03/04/2022 11:52:47 - INFO - codeparrot_training - Step 19088: {'lr': 0.00048373320968396043, 'samples': 9773568, 'steps': 19088, 'loss/train': 1.25314199924469} +03/04/2022 11:52:48 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 11:52:53 - INFO - codeparrot_training - Step 19089: {'lr': 0.0004837313266671868, 'samples': 9774080, 'steps': 19089, 'loss/train': 2.478390693664551} +03/04/2022 11:52:56 - INFO - codeparrot_training - Step 19090: {'lr': 0.0004837294435450974, 'samples': 9774592, 'steps': 19090, 'loss/train': 2.1400089263916016} +03/04/2022 11:52:57 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/04/2022 11:53:01 - INFO - codeparrot_training - Step 19091: {'lr': 0.00048372756031769316, 'samples': 9775104, 'steps': 19091, 'loss/train': 1.4048184156417847} +03/04/2022 11:53:04 - INFO - codeparrot_training - Step 19092: {'lr': 0.00048372567698497487, 'samples': 9775616, 'steps': 19092, 'loss/train': 1.4838258028030396} +03/04/2022 11:53:05 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 11:53:10 - INFO - codeparrot_training - Step 19093: {'lr': 0.0004837237935469434, 'samples': 9776128, 'steps': 19093, 'loss/train': 2.8436176776885986} +03/04/2022 11:53:13 - INFO - codeparrot_training - Step 19094: {'lr': 0.00048372191000359955, 'samples': 9776640, 'steps': 19094, 'loss/train': 1.7321768999099731} +03/04/2022 11:53:14 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/04/2022 11:53:18 - INFO - codeparrot_training - Step 19095: {'lr': 0.00048372002635494425, 'samples': 9777152, 'steps': 19095, 'loss/train': 1.1744433641433716} +03/04/2022 11:53:21 - INFO - codeparrot_training - Step 19096: {'lr': 0.00048371814260097834, 'samples': 9777664, 'steps': 19096, 'loss/train': 2.2022459506988525} +03/04/2022 11:53:23 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/04/2022 11:53:27 - INFO - codeparrot_training - Step 19097: {'lr': 0.0004837162587417027, 'samples': 9778176, 'steps': 19097, 'loss/train': 2.0238356590270996} +03/04/2022 11:53:30 - INFO - codeparrot_training - Step 19098: {'lr': 0.000483714374777118, 'samples': 9778688, 'steps': 19098, 'loss/train': 1.7563862800598145} +03/04/2022 11:53:31 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 11:53:35 - INFO - codeparrot_training - Step 19099: {'lr': 0.00048371249070722525, 'samples': 9779200, 'steps': 19099, 'loss/train': 2.233898401260376} +03/04/2022 11:53:38 - INFO - codeparrot_training - Step 19100: {'lr': 0.0004837106065320253, 'samples': 9779712, 'steps': 19100, 'loss/train': 1.9447814226150513} +03/04/2022 11:53:39 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 11:53:44 - INFO - codeparrot_training - Step 19101: {'lr': 0.00048370872225151886, 'samples': 9780224, 'steps': 19101, 'loss/train': 1.9835928678512573} +03/04/2022 11:53:47 - INFO - codeparrot_training - Step 19102: {'lr': 0.0004837068378657069, 'samples': 9780736, 'steps': 19102, 'loss/train': 3.504672050476074} +03/04/2022 11:53:48 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 11:53:52 - INFO - codeparrot_training - Step 19103: {'lr': 0.0004837049533745903, 'samples': 9781248, 'steps': 19103, 'loss/train': 1.6461220979690552} +03/04/2022 11:53:55 - INFO - codeparrot_training - Step 19104: {'lr': 0.00048370306877816983, 'samples': 9781760, 'steps': 19104, 'loss/train': 1.7725573778152466} +03/04/2022 11:53:56 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/04/2022 11:54:00 - INFO - codeparrot_training - Step 19105: {'lr': 0.00048370118407644637, 'samples': 9782272, 'steps': 19105, 'loss/train': 2.170769691467285} +03/04/2022 11:54:04 - INFO - codeparrot_training - Step 19106: {'lr': 0.0004836992992694208, 'samples': 9782784, 'steps': 19106, 'loss/train': 1.5784662961959839} +03/04/2022 11:54:04 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/04/2022 11:54:09 - INFO - codeparrot_training - Step 19107: {'lr': 0.00048369741435709383, 'samples': 9783296, 'steps': 19107, 'loss/train': 1.3038992881774902} +03/04/2022 11:54:12 - INFO - codeparrot_training - Step 19108: {'lr': 0.0004836955293394665, 'samples': 9783808, 'steps': 19108, 'loss/train': 0.14452621340751648} +03/04/2022 11:54:13 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 11:54:17 - INFO - codeparrot_training - Step 19109: {'lr': 0.00048369364421653953, 'samples': 9784320, 'steps': 19109, 'loss/train': 1.9905946254730225} +03/04/2022 11:54:20 - INFO - codeparrot_training - Step 19110: {'lr': 0.00048369175898831384, 'samples': 9784832, 'steps': 19110, 'loss/train': 1.8986555337905884} +03/04/2022 11:54:21 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/04/2022 11:54:26 - INFO - codeparrot_training - Step 19111: {'lr': 0.0004836898736547902, 'samples': 9785344, 'steps': 19111, 'loss/train': 1.5801424980163574} +03/04/2022 11:54:29 - INFO - codeparrot_training - Step 19112: {'lr': 0.0004836879882159696, 'samples': 9785856, 'steps': 19112, 'loss/train': 1.1774191856384277} +03/04/2022 11:54:30 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 11:54:34 - INFO - codeparrot_training - Step 19113: {'lr': 0.0004836861026718527, 'samples': 9786368, 'steps': 19113, 'loss/train': 1.3509186506271362} +03/04/2022 11:54:37 - INFO - codeparrot_training - Step 19114: {'lr': 0.00048368421702244045, 'samples': 9786880, 'steps': 19114, 'loss/train': 1.0715093612670898} +03/04/2022 11:54:39 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/04/2022 11:54:43 - INFO - codeparrot_training - Step 19115: {'lr': 0.00048368233126773377, 'samples': 9787392, 'steps': 19115, 'loss/train': 2.2267539501190186} +03/04/2022 11:54:46 - INFO - codeparrot_training - Step 19116: {'lr': 0.0004836804454077334, 'samples': 9787904, 'steps': 19116, 'loss/train': 0.31432002782821655} +03/04/2022 11:54:48 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/04/2022 11:54:51 - INFO - codeparrot_training - Step 19117: {'lr': 0.0004836785594424402, 'samples': 9788416, 'steps': 19117, 'loss/train': 1.4672951698303223} +03/04/2022 11:54:54 - INFO - codeparrot_training - Step 19118: {'lr': 0.0004836766733718551, 'samples': 9788928, 'steps': 19118, 'loss/train': 1.642343282699585} +03/04/2022 11:54:57 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/04/2022 11:55:00 - INFO - codeparrot_training - Step 19119: {'lr': 0.0004836747871959789, 'samples': 9789440, 'steps': 19119, 'loss/train': 2.110652446746826} +03/04/2022 11:55:03 - INFO - codeparrot_training - Step 19120: {'lr': 0.0004836729009148124, 'samples': 9789952, 'steps': 19120, 'loss/train': 1.5951192378997803} +03/04/2022 11:55:05 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/04/2022 11:55:08 - INFO - codeparrot_training - Step 19121: {'lr': 0.0004836710145283565, 'samples': 9790464, 'steps': 19121, 'loss/train': 1.269866704940796} +03/04/2022 11:55:11 - INFO - codeparrot_training - Step 19122: {'lr': 0.0004836691280366121, 'samples': 9790976, 'steps': 19122, 'loss/train': 1.9087607860565186} +03/04/2022 11:55:14 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/04/2022 11:55:16 - INFO - codeparrot_training - Step 19123: {'lr': 0.00048366724143958, 'samples': 9791488, 'steps': 19123, 'loss/train': 1.862273097038269} +03/04/2022 11:55:20 - INFO - codeparrot_training - Step 19124: {'lr': 0.0004836653547372609, 'samples': 9792000, 'steps': 19124, 'loss/train': 2.057279109954834} +03/04/2022 11:55:22 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/04/2022 11:55:25 - INFO - codeparrot_training - Step 19125: {'lr': 0.00048366346792965597, 'samples': 9792512, 'steps': 19125, 'loss/train': 1.9222984313964844} +03/04/2022 11:55:28 - INFO - codeparrot_training - Step 19126: {'lr': 0.0004836615810167658, 'samples': 9793024, 'steps': 19126, 'loss/train': 2.2686641216278076} +03/04/2022 11:55:32 - INFO - codeparrot_training - Step 19127: {'lr': 0.00048365969399859134, 'samples': 9793536, 'steps': 19127, 'loss/train': 0.8202565312385559} +03/04/2022 11:55:32 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 11:55:37 - INFO - codeparrot_training - Step 19128: {'lr': 0.00048365780687513346, 'samples': 9794048, 'steps': 19128, 'loss/train': 2.5410282611846924} +03/04/2022 11:55:40 - INFO - codeparrot_training - Step 19129: {'lr': 0.00048365591964639294, 'samples': 9794560, 'steps': 19129, 'loss/train': 2.3165457248687744} +03/04/2022 11:55:40 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/04/2022 11:55:45 - INFO - codeparrot_training - Step 19130: {'lr': 0.0004836540323123707, 'samples': 9795072, 'steps': 19130, 'loss/train': 1.4240673780441284} +03/04/2022 11:55:49 - INFO - codeparrot_training - Step 19131: {'lr': 0.00048365214487306753, 'samples': 9795584, 'steps': 19131, 'loss/train': 1.878705382347107} +03/04/2022 11:55:49 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 11:55:54 - INFO - codeparrot_training - Step 19132: {'lr': 0.00048365025732848433, 'samples': 9796096, 'steps': 19132, 'loss/train': 1.3484946489334106} +03/04/2022 11:55:57 - INFO - codeparrot_training - Step 19133: {'lr': 0.0004836483696786219, 'samples': 9796608, 'steps': 19133, 'loss/train': 0.6353148818016052} +03/04/2022 11:55:57 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/04/2022 11:56:03 - INFO - codeparrot_training - Step 19134: {'lr': 0.00048364648192348117, 'samples': 9797120, 'steps': 19134, 'loss/train': 1.8673421144485474} +03/04/2022 11:56:06 - INFO - codeparrot_training - Step 19135: {'lr': 0.0004836445940630629, 'samples': 9797632, 'steps': 19135, 'loss/train': 7.163940906524658} +03/04/2022 11:56:06 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 11:56:12 - INFO - codeparrot_training - Step 19136: {'lr': 0.0004836427060973679, 'samples': 9798144, 'steps': 19136, 'loss/train': 1.746547818183899} +03/04/2022 11:56:15 - INFO - codeparrot_training - Step 19137: {'lr': 0.00048364081802639724, 'samples': 9798656, 'steps': 19137, 'loss/train': 1.1063121557235718} +03/04/2022 11:56:15 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/04/2022 11:56:20 - INFO - codeparrot_training - Step 19138: {'lr': 0.00048363892985015157, 'samples': 9799168, 'steps': 19138, 'loss/train': 2.475808620452881} +03/04/2022 11:56:24 - INFO - codeparrot_training - Step 19139: {'lr': 0.00048363704156863187, 'samples': 9799680, 'steps': 19139, 'loss/train': 1.6867518424987793} +03/04/2022 11:56:24 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/04/2022 11:56:29 - INFO - codeparrot_training - Step 19140: {'lr': 0.0004836351531818388, 'samples': 9800192, 'steps': 19140, 'loss/train': 1.982181429862976} +03/04/2022 11:56:32 - INFO - codeparrot_training - Step 19141: {'lr': 0.00048363326468977343, 'samples': 9800704, 'steps': 19141, 'loss/train': 1.9938501119613647} +03/04/2022 11:56:34 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/04/2022 11:56:38 - INFO - codeparrot_training - Step 19142: {'lr': 0.00048363137609243654, 'samples': 9801216, 'steps': 19142, 'loss/train': 2.0216190814971924} +03/04/2022 11:56:41 - INFO - codeparrot_training - Step 19143: {'lr': 0.0004836294873898289, 'samples': 9801728, 'steps': 19143, 'loss/train': 1.3790148496627808} +03/04/2022 11:56:42 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 11:56:46 - INFO - codeparrot_training - Step 19144: {'lr': 0.00048362759858195146, 'samples': 9802240, 'steps': 19144, 'loss/train': 2.297219753265381} +03/04/2022 11:56:49 - INFO - codeparrot_training - Step 19145: {'lr': 0.0004836257096688049, 'samples': 9802752, 'steps': 19145, 'loss/train': 2.281494379043579} +03/04/2022 11:56:51 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 11:56:54 - INFO - codeparrot_training - Step 19146: {'lr': 0.00048362382065039034, 'samples': 9803264, 'steps': 19146, 'loss/train': 1.4901124238967896} +03/04/2022 11:56:58 - INFO - codeparrot_training - Step 19147: {'lr': 0.00048362193152670847, 'samples': 9803776, 'steps': 19147, 'loss/train': 2.378389596939087} +03/04/2022 11:56:59 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 11:57:03 - INFO - codeparrot_training - Step 19148: {'lr': 0.0004836200422977601, 'samples': 9804288, 'steps': 19148, 'loss/train': 2.3423774242401123} +03/04/2022 11:57:06 - INFO - codeparrot_training - Step 19149: {'lr': 0.00048361815296354624, 'samples': 9804800, 'steps': 19149, 'loss/train': 1.0031681060791016} +03/04/2022 11:57:07 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/04/2022 11:57:11 - INFO - codeparrot_training - Step 19150: {'lr': 0.00048361626352406756, 'samples': 9805312, 'steps': 19150, 'loss/train': 1.2146589756011963} +03/04/2022 11:57:14 - INFO - codeparrot_training - Step 19151: {'lr': 0.00048361437397932504, 'samples': 9805824, 'steps': 19151, 'loss/train': 1.7427363395690918} +03/04/2022 11:57:16 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/04/2022 11:57:20 - INFO - codeparrot_training - Step 19152: {'lr': 0.0004836124843293195, 'samples': 9806336, 'steps': 19152, 'loss/train': 1.4175242185592651} +03/04/2022 11:57:23 - INFO - codeparrot_training - Step 19153: {'lr': 0.00048361059457405176, 'samples': 9806848, 'steps': 19153, 'loss/train': 1.8482346534729004} +03/04/2022 11:57:24 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 11:57:28 - INFO - codeparrot_training - Step 19154: {'lr': 0.0004836087047135227, 'samples': 9807360, 'steps': 19154, 'loss/train': 1.7978466749191284} +03/04/2022 11:57:31 - INFO - codeparrot_training - Step 19155: {'lr': 0.0004836068147477331, 'samples': 9807872, 'steps': 19155, 'loss/train': 0.8323595523834229} +03/04/2022 11:57:33 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/04/2022 11:57:37 - INFO - codeparrot_training - Step 19156: {'lr': 0.0004836049246766839, 'samples': 9808384, 'steps': 19156, 'loss/train': 1.709352731704712} +03/04/2022 11:57:40 - INFO - codeparrot_training - Step 19157: {'lr': 0.000483603034500376, 'samples': 9808896, 'steps': 19157, 'loss/train': 2.8450510501861572} +03/04/2022 11:57:41 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 11:57:45 - INFO - codeparrot_training - Step 19158: {'lr': 0.0004836011442188101, 'samples': 9809408, 'steps': 19158, 'loss/train': 2.048278331756592} +03/04/2022 11:57:48 - INFO - codeparrot_training - Step 19159: {'lr': 0.00048359925383198714, 'samples': 9809920, 'steps': 19159, 'loss/train': 2.202749729156494} +03/04/2022 11:57:50 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/04/2022 11:57:54 - INFO - codeparrot_training - Step 19160: {'lr': 0.000483597363339908, 'samples': 9810432, 'steps': 19160, 'loss/train': 3.1279592514038086} +03/04/2022 11:57:57 - INFO - codeparrot_training - Step 19161: {'lr': 0.0004835954727425734, 'samples': 9810944, 'steps': 19161, 'loss/train': 1.1556938886642456} +03/04/2022 11:57:58 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/04/2022 11:58:02 - INFO - codeparrot_training - Step 19162: {'lr': 0.0004835935820399844, 'samples': 9811456, 'steps': 19162, 'loss/train': 1.2859561443328857} +03/04/2022 11:58:05 - INFO - codeparrot_training - Step 19163: {'lr': 0.0004835916912321417, 'samples': 9811968, 'steps': 19163, 'loss/train': 1.7841856479644775} +03/04/2022 11:58:07 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/04/2022 11:58:11 - INFO - codeparrot_training - Step 19164: {'lr': 0.0004835898003190462, 'samples': 9812480, 'steps': 19164, 'loss/train': 1.51145339012146} +03/04/2022 11:58:14 - INFO - codeparrot_training - Step 19165: {'lr': 0.00048358790930069876, 'samples': 9812992, 'steps': 19165, 'loss/train': 2.3694372177124023} +03/04/2022 11:58:16 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 11:58:19 - INFO - codeparrot_training - Step 19166: {'lr': 0.0004835860181771001, 'samples': 9813504, 'steps': 19166, 'loss/train': 1.9863017797470093} +03/04/2022 11:58:22 - INFO - codeparrot_training - Step 19167: {'lr': 0.0004835841269482513, 'samples': 9814016, 'steps': 19167, 'loss/train': 1.4639633893966675} +03/04/2022 11:58:25 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 11:58:27 - INFO - codeparrot_training - Step 19168: {'lr': 0.00048358223561415306, 'samples': 9814528, 'steps': 19168, 'loss/train': 1.8229817152023315} +03/04/2022 11:58:31 - INFO - codeparrot_training - Step 19169: {'lr': 0.0004835803441748062, 'samples': 9815040, 'steps': 19169, 'loss/train': 2.03450083732605} +03/04/2022 11:58:33 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 11:58:36 - INFO - codeparrot_training - Step 19170: {'lr': 0.0004835784526302117, 'samples': 9815552, 'steps': 19170, 'loss/train': 2.0309102535247803} +03/04/2022 11:58:39 - INFO - codeparrot_training - Step 19171: {'lr': 0.0004835765609803704, 'samples': 9816064, 'steps': 19171, 'loss/train': 2.1762948036193848} +03/04/2022 11:58:42 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/04/2022 11:58:44 - INFO - codeparrot_training - Step 19172: {'lr': 0.00048357466922528306, 'samples': 9816576, 'steps': 19172, 'loss/train': 1.680019497871399} +03/04/2022 11:58:48 - INFO - codeparrot_training - Step 19173: {'lr': 0.00048357277736495055, 'samples': 9817088, 'steps': 19173, 'loss/train': 2.8795645236968994} +03/04/2022 11:58:50 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/04/2022 11:58:53 - INFO - codeparrot_training - Step 19174: {'lr': 0.0004835708853993738, 'samples': 9817600, 'steps': 19174, 'loss/train': 2.565791368484497} +03/04/2022 11:58:56 - INFO - codeparrot_training - Step 19175: {'lr': 0.0004835689933285536, 'samples': 9818112, 'steps': 19175, 'loss/train': 1.8138941526412964} +03/04/2022 11:58:58 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 11:59:01 - INFO - codeparrot_training - Step 19176: {'lr': 0.0004835671011524908, 'samples': 9818624, 'steps': 19176, 'loss/train': 2.5753040313720703} +03/04/2022 11:59:04 - INFO - codeparrot_training - Step 19177: {'lr': 0.0004835652088711863, 'samples': 9819136, 'steps': 19177, 'loss/train': 1.8757364749908447} +03/04/2022 11:59:07 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/04/2022 11:59:10 - INFO - codeparrot_training - Step 19178: {'lr': 0.0004835633164846409, 'samples': 9819648, 'steps': 19178, 'loss/train': 1.2767757177352905} +03/04/2022 11:59:13 - INFO - codeparrot_training - Step 19179: {'lr': 0.00048356142399285545, 'samples': 9820160, 'steps': 19179, 'loss/train': 2.2289605140686035} +03/04/2022 11:59:15 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/04/2022 11:59:18 - INFO - codeparrot_training - Step 19180: {'lr': 0.00048355953139583087, 'samples': 9820672, 'steps': 19180, 'loss/train': 2.0466277599334717} +03/04/2022 11:59:21 - INFO - codeparrot_training - Step 19181: {'lr': 0.00048355763869356794, 'samples': 9821184, 'steps': 19181, 'loss/train': 1.4102799892425537} +03/04/2022 11:59:24 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 11:59:27 - INFO - codeparrot_training - Step 19182: {'lr': 0.0004835557458860675, 'samples': 9821696, 'steps': 19182, 'loss/train': 1.8225607872009277} +03/04/2022 11:59:30 - INFO - codeparrot_training - Step 19183: {'lr': 0.00048355385297333054, 'samples': 9822208, 'steps': 19183, 'loss/train': 1.7419897317886353} +03/04/2022 11:59:33 - INFO - codeparrot_training - Step 19184: {'lr': 0.0004835519599553578, 'samples': 9822720, 'steps': 19184, 'loss/train': 0.32982850074768066} +03/04/2022 11:59:33 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/04/2022 11:59:38 - INFO - codeparrot_training - Step 19185: {'lr': 0.0004835500668321501, 'samples': 9823232, 'steps': 19185, 'loss/train': 1.4835785627365112} +03/04/2022 11:59:41 - INFO - codeparrot_training - Step 19186: {'lr': 0.0004835481736037084, 'samples': 9823744, 'steps': 19186, 'loss/train': 1.8163987398147583} +03/04/2022 11:59:42 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/04/2022 11:59:47 - INFO - codeparrot_training - Step 19187: {'lr': 0.0004835462802700334, 'samples': 9824256, 'steps': 19187, 'loss/train': 1.8382768630981445} +03/04/2022 11:59:50 - INFO - codeparrot_training - Step 19188: {'lr': 0.00048354438683112614, 'samples': 9824768, 'steps': 19188, 'loss/train': 2.3431332111358643} +03/04/2022 11:59:50 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/04/2022 11:59:55 - INFO - codeparrot_training - Step 19189: {'lr': 0.00048354249328698743, 'samples': 9825280, 'steps': 19189, 'loss/train': 1.530107855796814} +03/04/2022 11:59:58 - INFO - codeparrot_training - Step 19190: {'lr': 0.000483540599637618, 'samples': 9825792, 'steps': 19190, 'loss/train': 1.6673619747161865} +03/04/2022 11:59:59 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 12:00:04 - INFO - codeparrot_training - Step 19191: {'lr': 0.00048353870588301875, 'samples': 9826304, 'steps': 19191, 'loss/train': 2.306063413619995} +03/04/2022 12:00:07 - INFO - codeparrot_training - Step 19192: {'lr': 0.00048353681202319056, 'samples': 9826816, 'steps': 19192, 'loss/train': 1.1075419187545776} +03/04/2022 12:00:07 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/04/2022 12:00:12 - INFO - codeparrot_training - Step 19193: {'lr': 0.0004835349180581343, 'samples': 9827328, 'steps': 19193, 'loss/train': 2.191236734390259} +03/04/2022 12:00:15 - INFO - codeparrot_training - Step 19194: {'lr': 0.0004835330239878509, 'samples': 9827840, 'steps': 19194, 'loss/train': 1.6895511150360107} +03/04/2022 12:00:16 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/04/2022 12:00:21 - INFO - codeparrot_training - Step 19195: {'lr': 0.00048353112981234104, 'samples': 9828352, 'steps': 19195, 'loss/train': 1.9214640855789185} +03/04/2022 12:00:24 - INFO - codeparrot_training - Step 19196: {'lr': 0.0004835292355316057, 'samples': 9828864, 'steps': 19196, 'loss/train': 2.1213974952697754} +03/04/2022 12:00:24 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 12:00:29 - INFO - codeparrot_training - Step 19197: {'lr': 0.0004835273411456456, 'samples': 9829376, 'steps': 19197, 'loss/train': 1.8801548480987549} +03/04/2022 12:00:32 - INFO - codeparrot_training - Step 19198: {'lr': 0.00048352544665446174, 'samples': 9829888, 'steps': 19198, 'loss/train': 1.9582149982452393} +03/04/2022 12:00:33 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/04/2022 12:00:38 - INFO - codeparrot_training - Step 19199: {'lr': 0.000483523552058055, 'samples': 9830400, 'steps': 19199, 'loss/train': 1.9739776849746704} +03/04/2022 12:00:41 - INFO - codeparrot_training - Step 19200: {'lr': 0.00048352165735642607, 'samples': 9830912, 'steps': 19200, 'loss/train': 2.1278481483459473} +03/04/2022 12:00:41 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/04/2022 12:00:46 - INFO - codeparrot_training - Step 19201: {'lr': 0.00048351976254957585, 'samples': 9831424, 'steps': 19201, 'loss/train': 0.1349494755268097} +03/04/2022 12:00:49 - INFO - codeparrot_training - Step 19202: {'lr': 0.0004835178676375053, 'samples': 9831936, 'steps': 19202, 'loss/train': 2.1137335300445557} +03/04/2022 12:00:50 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/04/2022 12:00:55 - INFO - codeparrot_training - Step 19203: {'lr': 0.0004835159726202151, 'samples': 9832448, 'steps': 19203, 'loss/train': 2.3948311805725098} +03/04/2022 12:00:58 - INFO - codeparrot_training - Step 19204: {'lr': 0.0004835140774977063, 'samples': 9832960, 'steps': 19204, 'loss/train': 1.5204931497573853} +03/04/2022 12:00:58 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/04/2022 12:01:03 - INFO - codeparrot_training - Step 19205: {'lr': 0.0004835121822699796, 'samples': 9833472, 'steps': 19205, 'loss/train': 0.251239150762558} +03/04/2022 12:01:06 - INFO - codeparrot_training - Step 19206: {'lr': 0.000483510286937036, 'samples': 9833984, 'steps': 19206, 'loss/train': 2.0902206897735596} +03/04/2022 12:01:07 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/04/2022 12:01:12 - INFO - codeparrot_training - Step 19207: {'lr': 0.0004835083914988762, 'samples': 9834496, 'steps': 19207, 'loss/train': 2.0913524627685547} +03/04/2022 12:01:15 - INFO - codeparrot_training - Step 19208: {'lr': 0.0004835064959555011, 'samples': 9835008, 'steps': 19208, 'loss/train': 2.2383651733398438} +03/04/2022 12:01:16 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 12:01:20 - INFO - codeparrot_training - Step 19209: {'lr': 0.00048350460030691165, 'samples': 9835520, 'steps': 19209, 'loss/train': 0.13571296632289886} +03/04/2022 12:01:23 - INFO - codeparrot_training - Step 19210: {'lr': 0.00048350270455310864, 'samples': 9836032, 'steps': 19210, 'loss/train': 2.2258427143096924} +03/04/2022 12:01:24 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/04/2022 12:01:29 - INFO - codeparrot_training - Step 19211: {'lr': 0.00048350080869409285, 'samples': 9836544, 'steps': 19211, 'loss/train': 2.1521544456481934} +03/04/2022 12:01:32 - INFO - codeparrot_training - Step 19212: {'lr': 0.0004834989127298652, 'samples': 9837056, 'steps': 19212, 'loss/train': 1.8312052488327026} +03/04/2022 12:01:33 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/04/2022 12:01:37 - INFO - codeparrot_training - Step 19213: {'lr': 0.00048349701666042656, 'samples': 9837568, 'steps': 19213, 'loss/train': 1.7185653448104858} +03/04/2022 12:01:40 - INFO - codeparrot_training - Step 19214: {'lr': 0.00048349512048577784, 'samples': 9838080, 'steps': 19214, 'loss/train': 1.8280302286148071} +03/04/2022 12:01:41 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/04/2022 12:01:46 - INFO - codeparrot_training - Step 19215: {'lr': 0.00048349322420591966, 'samples': 9838592, 'steps': 19215, 'loss/train': 1.0927460193634033} +03/04/2022 12:01:49 - INFO - codeparrot_training - Step 19216: {'lr': 0.00048349132782085316, 'samples': 9839104, 'steps': 19216, 'loss/train': 2.105433940887451} +03/04/2022 12:01:50 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/04/2022 12:01:54 - INFO - codeparrot_training - Step 19217: {'lr': 0.00048348943133057903, 'samples': 9839616, 'steps': 19217, 'loss/train': 2.235795736312866} +03/04/2022 12:01:57 - INFO - codeparrot_training - Step 19218: {'lr': 0.0004834875347350982, 'samples': 9840128, 'steps': 19218, 'loss/train': 1.8776991367340088} +03/04/2022 12:01:58 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/04/2022 12:02:03 - INFO - codeparrot_training - Step 19219: {'lr': 0.00048348563803441146, 'samples': 9840640, 'steps': 19219, 'loss/train': 1.2585382461547852} +03/04/2022 12:02:06 - INFO - codeparrot_training - Step 19220: {'lr': 0.0004834837412285197, 'samples': 9841152, 'steps': 19220, 'loss/train': 2.2572309970855713} +03/04/2022 12:02:07 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/04/2022 12:02:11 - INFO - codeparrot_training - Step 19221: {'lr': 0.00048348184431742377, 'samples': 9841664, 'steps': 19221, 'loss/train': 2.3496923446655273} +03/04/2022 12:02:14 - INFO - codeparrot_training - Step 19222: {'lr': 0.00048347994730112457, 'samples': 9842176, 'steps': 19222, 'loss/train': 1.4268132448196411} +03/04/2022 12:02:15 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/04/2022 12:02:19 - INFO - codeparrot_training - Step 19223: {'lr': 0.00048347805017962274, 'samples': 9842688, 'steps': 19223, 'loss/train': 1.7214634418487549} +03/04/2022 12:02:23 - INFO - codeparrot_training - Step 19224: {'lr': 0.00048347615295291947, 'samples': 9843200, 'steps': 19224, 'loss/train': 2.3575832843780518} +03/04/2022 12:02:24 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/04/2022 12:02:28 - INFO - codeparrot_training - Step 19225: {'lr': 0.0004834742556210154, 'samples': 9843712, 'steps': 19225, 'loss/train': 1.6836076974868774} +03/04/2022 12:02:31 - INFO - codeparrot_training - Step 19226: {'lr': 0.00048347235818391144, 'samples': 9844224, 'steps': 19226, 'loss/train': 1.7872214317321777} +03/04/2022 12:02:32 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/04/2022 12:02:36 - INFO - codeparrot_training - Step 19227: {'lr': 0.0004834704606416084, 'samples': 9844736, 'steps': 19227, 'loss/train': 1.583264946937561} +03/04/2022 12:02:39 - INFO - codeparrot_training - Step 19228: {'lr': 0.00048346856299410725, 'samples': 9845248, 'steps': 19228, 'loss/train': 1.917128324508667} +03/04/2022 12:02:40 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/04/2022 12:02:45 - INFO - codeparrot_training - Step 19229: {'lr': 0.0004834666652414087, 'samples': 9845760, 'steps': 19229, 'loss/train': 1.6872631311416626} +03/04/2022 12:02:48 - INFO - codeparrot_training - Step 19230: {'lr': 0.0004834647673835137, 'samples': 9846272, 'steps': 19230, 'loss/train': 1.928819179534912} +03/04/2022 12:02:49 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 12:02:53 - INFO - codeparrot_training - Step 19231: {'lr': 0.00048346286942042307, 'samples': 9846784, 'steps': 19231, 'loss/train': 1.5109403133392334} +03/04/2022 12:02:57 - INFO - codeparrot_training - Step 19232: {'lr': 0.0004834609713521377, 'samples': 9847296, 'steps': 19232, 'loss/train': 2.046553134918213} +03/04/2022 12:02:58 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/04/2022 12:03:02 - INFO - codeparrot_training - Step 19233: {'lr': 0.0004834590731786584, 'samples': 9847808, 'steps': 19233, 'loss/train': 2.262617349624634} +03/04/2022 12:03:05 - INFO - codeparrot_training - Step 19234: {'lr': 0.000483457174899986, 'samples': 9848320, 'steps': 19234, 'loss/train': 1.626882791519165} +03/04/2022 12:03:06 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/04/2022 12:03:10 - INFO - codeparrot_training - Step 19235: {'lr': 0.00048345527651612145, 'samples': 9848832, 'steps': 19235, 'loss/train': 2.079869031906128} +03/04/2022 12:03:13 - INFO - codeparrot_training - Step 19236: {'lr': 0.00048345337802706555, 'samples': 9849344, 'steps': 19236, 'loss/train': 1.315872311592102} +03/04/2022 12:03:15 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/04/2022 12:03:19 - INFO - codeparrot_training - Step 19237: {'lr': 0.0004834514794328192, 'samples': 9849856, 'steps': 19237, 'loss/train': 2.0964419841766357} +03/04/2022 12:03:22 - INFO - codeparrot_training - Step 19238: {'lr': 0.00048344958073338315, 'samples': 9850368, 'steps': 19238, 'loss/train': 2.6559531688690186} +03/04/2022 12:03:24 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/04/2022 12:03:27 - INFO - codeparrot_training - Step 19239: {'lr': 0.00048344768192875833, 'samples': 9850880, 'steps': 19239, 'loss/train': 1.4975786209106445} +03/04/2022 12:03:30 - INFO - codeparrot_training - Step 19240: {'lr': 0.00048344578301894557, 'samples': 9851392, 'steps': 19240, 'loss/train': 2.0107827186584473} +03/04/2022 12:03:32 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/04/2022 12:03:36 - INFO - codeparrot_training - Step 19241: {'lr': 0.0004834438840039458, 'samples': 9851904, 'steps': 19241, 'loss/train': 1.8459316492080688} +03/04/2022 12:03:39 - INFO - codeparrot_training - Step 19242: {'lr': 0.0004834419848837598, 'samples': 9852416, 'steps': 19242, 'loss/train': 1.7205618619918823} +03/04/2022 12:03:40 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/04/2022 12:03:44 - INFO - codeparrot_training - Step 19243: {'lr': 0.00048344008565838844, 'samples': 9852928, 'steps': 19243, 'loss/train': 1.420960545539856} +03/04/2022 12:03:47 - INFO - codeparrot_training - Step 19244: {'lr': 0.00048343818632783255, 'samples': 9853440, 'steps': 19244, 'loss/train': 1.973958969116211} +03/04/2022 12:03:48 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/04/2022 12:03:52 - INFO - codeparrot_training - Step 19245: {'lr': 0.00048343628689209305, 'samples': 9853952, 'steps': 19245, 'loss/train': 1.7565151453018188} +03/04/2022 12:03:56 - INFO - codeparrot_training - Step 19246: {'lr': 0.00048343438735117076, 'samples': 9854464, 'steps': 19246, 'loss/train': 2.030247449874878} +03/04/2022 12:03:57 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/04/2022 12:04:01 - INFO - codeparrot_training - Step 19247: {'lr': 0.00048343248770506655, 'samples': 9854976, 'steps': 19247, 'loss/train': 1.8498939275741577} +03/04/2022 12:04:04 - INFO - codeparrot_training - Step 19248: {'lr': 0.0004834305879537812, 'samples': 9855488, 'steps': 19248, 'loss/train': 1.9585132598876953} +03/04/2022 12:04:05 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/04/2022 12:04:09 - INFO - codeparrot_training - Step 19249: {'lr': 0.00048342868809731567, 'samples': 9856000, 'steps': 19249, 'loss/train': 1.5519633293151855} +03/04/2022 12:04:12 - INFO - codeparrot_training - Step 19250: {'lr': 0.0004834267881356708, 'samples': 9856512, 'steps': 19250, 'loss/train': 1.732340693473816} +03/04/2022 12:04:13 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/04/2022 12:04:18 - INFO - codeparrot_training - Step 19251: {'lr': 0.0004834248880688474, 'samples': 9857024, 'steps': 19251, 'loss/train': 2.159620523452759} +03/04/2022 12:04:21 - INFO - codeparrot_training - Step 19252: {'lr': 0.00048342298789684637, 'samples': 9857536, 'steps': 19252, 'loss/train': 1.828635573387146} +03/04/2022 12:04:23 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 12:04:26 - INFO - codeparrot_training - Step 19253: {'lr': 0.0004834210876196685, 'samples': 9858048, 'steps': 19253, 'loss/train': 1.9632184505462646} +03/04/2022 12:04:29 - INFO - codeparrot_training - Step 19254: {'lr': 0.0004834191872373147, 'samples': 9858560, 'steps': 19254, 'loss/train': 1.521689534187317} +03/04/2022 12:04:31 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/04/2022 12:04:35 - INFO - codeparrot_training - Step 19255: {'lr': 0.0004834172867497858, 'samples': 9859072, 'steps': 19255, 'loss/train': 1.9479116201400757} +03/04/2022 12:04:38 - INFO - codeparrot_training - Step 19256: {'lr': 0.0004834153861570827, 'samples': 9859584, 'steps': 19256, 'loss/train': 1.847349762916565} +03/04/2022 12:04:40 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/04/2022 12:04:43 - INFO - codeparrot_training - Step 19257: {'lr': 0.00048341348545920623, 'samples': 9860096, 'steps': 19257, 'loss/train': 2.014557361602783} +03/04/2022 12:04:46 - INFO - codeparrot_training - Step 19258: {'lr': 0.0004834115846561572, 'samples': 9860608, 'steps': 19258, 'loss/train': 1.702178955078125} +03/04/2022 12:04:49 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/04/2022 12:04:52 - INFO - codeparrot_training - Step 19259: {'lr': 0.0004834096837479366, 'samples': 9861120, 'steps': 19259, 'loss/train': 1.6481796503067017} +03/04/2022 12:04:55 - INFO - codeparrot_training - Step 19260: {'lr': 0.00048340778273454514, 'samples': 9861632, 'steps': 19260, 'loss/train': 1.8168548345565796} +03/04/2022 12:04:57 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/04/2022 12:05:00 - INFO - codeparrot_training - Step 19261: {'lr': 0.00048340588161598373, 'samples': 9862144, 'steps': 19261, 'loss/train': 2.3058178424835205} +03/04/2022 12:05:03 - INFO - codeparrot_training - Step 19262: {'lr': 0.00048340398039225325, 'samples': 9862656, 'steps': 19262, 'loss/train': 1.9409716129302979} +03/04/2022 12:05:05 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/04/2022 12:05:09 - INFO - codeparrot_training - Step 19263: {'lr': 0.0004834020790633545, 'samples': 9863168, 'steps': 19263, 'loss/train': 0.9839010238647461} +03/04/2022 12:05:12 - INFO - codeparrot_training - Step 19264: {'lr': 0.00048340017762928843, 'samples': 9863680, 'steps': 19264, 'loss/train': 2.3588013648986816} +03/04/2022 12:05:14 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/04/2022 12:05:17 - INFO - codeparrot_training - Step 19265: {'lr': 0.00048339827609005583, 'samples': 9864192, 'steps': 19265, 'loss/train': 1.7827677726745605} +03/04/2022 12:05:20 - INFO - codeparrot_training - Step 19266: {'lr': 0.00048339637444565756, 'samples': 9864704, 'steps': 19266, 'loss/train': 1.8599581718444824} +03/04/2022 12:05:23 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/04/2022 12:05:26 - INFO - codeparrot_training - Step 19267: {'lr': 0.0004833944726960945, 'samples': 9865216, 'steps': 19267, 'loss/train': 2.3508286476135254} +03/04/2022 12:05:29 - INFO - codeparrot_training - Step 19268: {'lr': 0.00048339257084136747, 'samples': 9865728, 'steps': 19268, 'loss/train': 2.3346028327941895} +03/04/2022 12:05:31 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/04/2022 12:05:34 - INFO - codeparrot_training - Step 19269: {'lr': 0.0004833906688814774, 'samples': 9866240, 'steps': 19269, 'loss/train': 2.1749258041381836} +03/04/2022 12:05:37 - INFO - codeparrot_training - Step 19270: {'lr': 0.00048338876681642504, 'samples': 9866752, 'steps': 19270, 'loss/train': 1.7811172008514404} +03/04/2022 12:05:39 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/04/2022 12:05:42 - INFO - codeparrot_training - Step 19271: {'lr': 0.0004833868646462113, 'samples': 9867264, 'steps': 19271, 'loss/train': 1.9683254957199097} +03/04/2022 12:05:46 - INFO - codeparrot_training - Step 19272: {'lr': 0.00048338496237083705, 'samples': 9867776, 'steps': 19272, 'loss/train': 2.141453981399536} +03/04/2022 12:05:48 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 12:05:51 - INFO - codeparrot_training - Step 19273: {'lr': 0.00048338305999030313, 'samples': 9868288, 'steps': 19273, 'loss/train': 2.4394173622131348} +03/04/2022 12:05:54 - INFO - codeparrot_training - Step 19274: {'lr': 0.00048338115750461044, 'samples': 9868800, 'steps': 19274, 'loss/train': 1.9317326545715332} +03/04/2022 12:05:56 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/04/2022 12:06:00 - INFO - codeparrot_training - Step 19275: {'lr': 0.0004833792549137598, 'samples': 9869312, 'steps': 19275, 'loss/train': 1.9801526069641113} +03/04/2022 12:06:03 - INFO - codeparrot_training - Step 19276: {'lr': 0.00048337735221775204, 'samples': 9869824, 'steps': 19276, 'loss/train': 1.58380126953125} +03/04/2022 12:06:05 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 12:06:08 - INFO - codeparrot_training - Step 19277: {'lr': 0.000483375449416588, 'samples': 9870336, 'steps': 19277, 'loss/train': 1.0538156032562256} +03/04/2022 12:06:11 - INFO - codeparrot_training - Step 19278: {'lr': 0.0004833735465102687, 'samples': 9870848, 'steps': 19278, 'loss/train': 2.0572452545166016} +03/04/2022 12:06:14 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/04/2022 12:06:16 - INFO - codeparrot_training - Step 19279: {'lr': 0.0004833716434987948, 'samples': 9871360, 'steps': 19279, 'loss/train': 2.492720127105713} +03/04/2022 12:06:20 - INFO - codeparrot_training - Step 19280: {'lr': 0.0004833697403821672, 'samples': 9871872, 'steps': 19280, 'loss/train': 1.0325214862823486} +03/04/2022 12:06:22 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/04/2022 12:06:25 - INFO - codeparrot_training - Step 19281: {'lr': 0.0004833678371603869, 'samples': 9872384, 'steps': 19281, 'loss/train': 2.03554630279541} +03/04/2022 12:06:28 - INFO - codeparrot_training - Step 19282: {'lr': 0.0004833659338334546, 'samples': 9872896, 'steps': 19282, 'loss/train': 1.0189350843429565} +03/04/2022 12:06:30 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/04/2022 12:06:33 - INFO - codeparrot_training - Step 19283: {'lr': 0.0004833640304013712, 'samples': 9873408, 'steps': 19283, 'loss/train': 1.4337482452392578} +03/04/2022 12:06:36 - INFO - codeparrot_training - Step 19284: {'lr': 0.0004833621268641376, 'samples': 9873920, 'steps': 19284, 'loss/train': 1.8620758056640625} +03/04/2022 12:06:38 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/04/2022 12:06:42 - INFO - codeparrot_training - Step 19285: {'lr': 0.0004833602232217546, 'samples': 9874432, 'steps': 19285, 'loss/train': 2.062908887863159} +03/04/2022 12:06:45 - INFO - codeparrot_training - Step 19286: {'lr': 0.0004833583194742231, 'samples': 9874944, 'steps': 19286, 'loss/train': 2.3928706645965576} +03/04/2022 12:06:47 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 12:06:50 - INFO - codeparrot_training - Step 19287: {'lr': 0.00048335641562154396, 'samples': 9875456, 'steps': 19287, 'loss/train': 2.010359764099121} +03/04/2022 12:06:53 - INFO - codeparrot_training - Step 19288: {'lr': 0.00048335451166371803, 'samples': 9875968, 'steps': 19288, 'loss/train': 1.8696197271347046} +03/04/2022 12:06:55 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 12:06:59 - INFO - codeparrot_training - Step 19289: {'lr': 0.0004833526076007461, 'samples': 9876480, 'steps': 19289, 'loss/train': 2.1333889961242676} +03/04/2022 12:07:02 - INFO - codeparrot_training - Step 19290: {'lr': 0.0004833507034326291, 'samples': 9876992, 'steps': 19290, 'loss/train': 1.6926838159561157} +03/04/2022 12:07:04 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/04/2022 12:07:07 - INFO - codeparrot_training - Step 19291: {'lr': 0.0004833487991593679, 'samples': 9877504, 'steps': 19291, 'loss/train': 1.4763946533203125} +03/04/2022 12:07:10 - INFO - codeparrot_training - Step 19292: {'lr': 0.0004833468947809633, 'samples': 9878016, 'steps': 19292, 'loss/train': 1.8973913192749023} +03/04/2022 12:07:12 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/04/2022 12:07:15 - INFO - codeparrot_training - Step 19293: {'lr': 0.0004833449902974162, 'samples': 9878528, 'steps': 19293, 'loss/train': 2.0443220138549805} +03/04/2022 12:07:19 - INFO - codeparrot_training - Step 19294: {'lr': 0.00048334308570872745, 'samples': 9879040, 'steps': 19294, 'loss/train': 1.4786533117294312} +03/04/2022 12:07:21 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/04/2022 12:07:24 - INFO - codeparrot_training - Step 19295: {'lr': 0.00048334118101489793, 'samples': 9879552, 'steps': 19295, 'loss/train': 1.7237526178359985} +03/04/2022 12:07:27 - INFO - codeparrot_training - Step 19296: {'lr': 0.00048333927621592844, 'samples': 9880064, 'steps': 19296, 'loss/train': 2.043454170227051} +03/04/2022 12:07:29 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/04/2022 12:07:32 - INFO - codeparrot_training - Step 19297: {'lr': 0.00048333737131181986, 'samples': 9880576, 'steps': 19297, 'loss/train': 2.1414129734039307} +03/04/2022 12:07:36 - INFO - codeparrot_training - Step 19298: {'lr': 0.00048333546630257315, 'samples': 9881088, 'steps': 19298, 'loss/train': 1.6831251382827759} +03/04/2022 12:07:38 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/04/2022 12:07:41 - INFO - codeparrot_training - Step 19299: {'lr': 0.000483333561188189, 'samples': 9881600, 'steps': 19299, 'loss/train': 1.9294929504394531} +03/04/2022 12:07:44 - INFO - codeparrot_training - Step 19300: {'lr': 0.00048333165596866837, 'samples': 9882112, 'steps': 19300, 'loss/train': 1.6280734539031982} +03/04/2022 12:07:46 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/04/2022 12:07:49 - INFO - codeparrot_training - Step 19301: {'lr': 0.00048332975064401207, 'samples': 9882624, 'steps': 19301, 'loss/train': 1.9647443294525146} +03/04/2022 12:07:52 - INFO - codeparrot_training - Step 19302: {'lr': 0.000483327845214221, 'samples': 9883136, 'steps': 19302, 'loss/train': 2.116105318069458} +03/04/2022 12:07:54 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 12:07:58 - INFO - codeparrot_training - Step 19303: {'lr': 0.00048332593967929607, 'samples': 9883648, 'steps': 19303, 'loss/train': 1.831972599029541} +03/04/2022 12:08:01 - INFO - codeparrot_training - Step 19304: {'lr': 0.000483324034039238, 'samples': 9884160, 'steps': 19304, 'loss/train': 2.275942802429199} +03/04/2022 12:08:03 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/04/2022 12:08:06 - INFO - codeparrot_training - Step 19305: {'lr': 0.00048332212829404775, 'samples': 9884672, 'steps': 19305, 'loss/train': 1.900220274925232} +03/04/2022 12:08:09 - INFO - codeparrot_training - Step 19306: {'lr': 0.0004833202224437261, 'samples': 9885184, 'steps': 19306, 'loss/train': 1.8362157344818115} +03/04/2022 12:08:11 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/04/2022 12:08:14 - INFO - codeparrot_training - Step 19307: {'lr': 0.000483318316488274, 'samples': 9885696, 'steps': 19307, 'loss/train': 1.8705945014953613} +03/04/2022 12:08:18 - INFO - codeparrot_training - Step 19308: {'lr': 0.00048331641042769223, 'samples': 9886208, 'steps': 19308, 'loss/train': 2.4039907455444336} +03/04/2022 12:08:20 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/04/2022 12:08:23 - INFO - codeparrot_training - Step 19309: {'lr': 0.00048331450426198177, 'samples': 9886720, 'steps': 19309, 'loss/train': 1.7002663612365723} +03/04/2022 12:08:26 - INFO - codeparrot_training - Step 19310: {'lr': 0.0004833125979911434, 'samples': 9887232, 'steps': 19310, 'loss/train': 1.6479078531265259} +03/04/2022 12:08:28 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/04/2022 12:08:31 - INFO - codeparrot_training - Step 19311: {'lr': 0.0004833106916151778, 'samples': 9887744, 'steps': 19311, 'loss/train': 2.1744163036346436} +03/04/2022 12:08:34 - INFO - codeparrot_training - Step 19312: {'lr': 0.00048330878513408616, 'samples': 9888256, 'steps': 19312, 'loss/train': 1.5220141410827637} +03/04/2022 12:08:36 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/04/2022 12:08:40 - INFO - codeparrot_training - Step 19313: {'lr': 0.00048330687854786914, 'samples': 9888768, 'steps': 19313, 'loss/train': 1.9622236490249634} +03/04/2022 12:08:43 - INFO - codeparrot_training - Step 19314: {'lr': 0.00048330497185652765, 'samples': 9889280, 'steps': 19314, 'loss/train': 1.5901180505752563} +03/04/2022 12:08:45 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 12:08:48 - INFO - codeparrot_training - Step 19315: {'lr': 0.00048330306506006257, 'samples': 9889792, 'steps': 19315, 'loss/train': 0.7435954213142395} +03/04/2022 12:08:51 - INFO - codeparrot_training - Step 19316: {'lr': 0.00048330115815847465, 'samples': 9890304, 'steps': 19316, 'loss/train': 1.8161375522613525} +03/04/2022 12:08:53 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/04/2022 12:08:56 - INFO - codeparrot_training - Step 19317: {'lr': 0.0004832992511517649, 'samples': 9890816, 'steps': 19317, 'loss/train': 2.188796281814575} +03/04/2022 12:09:00 - INFO - codeparrot_training - Step 19318: {'lr': 0.00048329734403993406, 'samples': 9891328, 'steps': 19318, 'loss/train': 2.335855484008789} +03/04/2022 12:09:01 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/04/2022 12:09:05 - INFO - codeparrot_training - Step 19319: {'lr': 0.00048329543682298307, 'samples': 9891840, 'steps': 19319, 'loss/train': 1.6678653955459595} +03/04/2022 12:09:08 - INFO - codeparrot_training - Step 19320: {'lr': 0.0004832935295009127, 'samples': 9892352, 'steps': 19320, 'loss/train': 2.1617918014526367} +03/04/2022 12:09:10 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/04/2022 12:09:13 - INFO - codeparrot_training - Step 19321: {'lr': 0.0004832916220737239, 'samples': 9892864, 'steps': 19321, 'loss/train': 2.206672430038452} +03/04/2022 12:09:16 - INFO - codeparrot_training - Step 19322: {'lr': 0.0004832897145414175, 'samples': 9893376, 'steps': 19322, 'loss/train': 1.6992571353912354} +03/04/2022 12:09:18 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/04/2022 12:09:22 - INFO - codeparrot_training - Step 19323: {'lr': 0.0004832878069039943, 'samples': 9893888, 'steps': 19323, 'loss/train': 1.1923803091049194} +03/04/2022 12:09:25 - INFO - codeparrot_training - Step 19324: {'lr': 0.0004832858991614553, 'samples': 9894400, 'steps': 19324, 'loss/train': 1.9934492111206055} +03/04/2022 12:09:27 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/04/2022 12:09:30 - INFO - codeparrot_training - Step 19325: {'lr': 0.00048328399131380127, 'samples': 9894912, 'steps': 19325, 'loss/train': 1.8674728870391846} +03/04/2022 12:09:34 - INFO - codeparrot_training - Step 19326: {'lr': 0.00048328208336103305, 'samples': 9895424, 'steps': 19326, 'loss/train': 1.5498058795928955} +03/04/2022 12:09:36 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 12:09:39 - INFO - codeparrot_training - Step 19327: {'lr': 0.0004832801753031515, 'samples': 9895936, 'steps': 19327, 'loss/train': 2.139103889465332} +03/04/2022 12:09:42 - INFO - codeparrot_training - Step 19328: {'lr': 0.00048327826714015756, 'samples': 9896448, 'steps': 19328, 'loss/train': 2.107386350631714} +03/04/2022 12:09:45 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/04/2022 12:09:47 - INFO - codeparrot_training - Step 19329: {'lr': 0.00048327635887205196, 'samples': 9896960, 'steps': 19329, 'loss/train': 2.255783796310425} +03/04/2022 12:09:51 - INFO - codeparrot_training - Step 19330: {'lr': 0.00048327445049883567, 'samples': 9897472, 'steps': 19330, 'loss/train': 2.0729496479034424} +03/04/2022 12:09:53 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/04/2022 12:09:56 - INFO - codeparrot_training - Step 19331: {'lr': 0.0004832725420205095, 'samples': 9897984, 'steps': 19331, 'loss/train': 2.3872690200805664} +03/04/2022 12:09:59 - INFO - codeparrot_training - Step 19332: {'lr': 0.00048327063343707433, 'samples': 9898496, 'steps': 19332, 'loss/train': 0.9744631052017212} +03/04/2022 12:10:02 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/04/2022 12:10:04 - INFO - codeparrot_training - Step 19333: {'lr': 0.000483268724748531, 'samples': 9899008, 'steps': 19333, 'loss/train': 1.9922106266021729} +03/04/2022 12:10:07 - INFO - codeparrot_training - Step 19334: {'lr': 0.0004832668159548804, 'samples': 9899520, 'steps': 19334, 'loss/train': 1.9680490493774414} +03/04/2022 12:10:10 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/04/2022 12:10:13 - INFO - codeparrot_training - Step 19335: {'lr': 0.00048326490705612337, 'samples': 9900032, 'steps': 19335, 'loss/train': 1.9399536848068237} +03/04/2022 12:10:16 - INFO - codeparrot_training - Step 19336: {'lr': 0.0004832629980522608, 'samples': 9900544, 'steps': 19336, 'loss/train': 2.3833587169647217} +03/04/2022 12:10:18 - INFO - codeparrot_training - Skipping example with length 301 (seq_length=1024) +03/04/2022 12:10:21 - INFO - codeparrot_training - Step 19337: {'lr': 0.00048326108894329345, 'samples': 9901056, 'steps': 19337, 'loss/train': 2.703951358795166} +03/04/2022 12:10:24 - INFO - codeparrot_training - Step 19338: {'lr': 0.00048325917972922227, 'samples': 9901568, 'steps': 19338, 'loss/train': 1.470207929611206} +03/04/2022 12:10:27 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/04/2022 12:10:30 - INFO - codeparrot_training - Step 19339: {'lr': 0.00048325727041004815, 'samples': 9902080, 'steps': 19339, 'loss/train': 3.110485076904297} +03/04/2022 12:10:33 - INFO - codeparrot_training - Step 19340: {'lr': 0.0004832553609857719, 'samples': 9902592, 'steps': 19340, 'loss/train': 2.1446051597595215} +03/04/2022 12:10:35 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/04/2022 12:10:38 - INFO - codeparrot_training - Step 19341: {'lr': 0.0004832534514563943, 'samples': 9903104, 'steps': 19341, 'loss/train': 2.3085782527923584} +03/04/2022 12:10:41 - INFO - codeparrot_training - Step 19342: {'lr': 0.0004832515418219164, 'samples': 9903616, 'steps': 19342, 'loss/train': 1.6689728498458862} +03/04/2022 12:10:44 - INFO - codeparrot_training - Step 19343: {'lr': 0.0004832496320823389, 'samples': 9904128, 'steps': 19343, 'loss/train': 2.0844027996063232} +03/04/2022 12:10:44 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/04/2022 12:10:50 - INFO - codeparrot_training - Step 19344: {'lr': 0.0004832477222376627, 'samples': 9904640, 'steps': 19344, 'loss/train': 2.620640993118286} +03/04/2022 12:10:53 - INFO - codeparrot_training - Step 19345: {'lr': 0.0004832458122878888, 'samples': 9905152, 'steps': 19345, 'loss/train': 2.244377851486206} +03/04/2022 12:10:53 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/04/2022 12:10:58 - INFO - codeparrot_training - Step 19346: {'lr': 0.0004832439022330178, 'samples': 9905664, 'steps': 19346, 'loss/train': 1.867787480354309} +03/04/2022 12:11:01 - INFO - codeparrot_training - Step 19347: {'lr': 0.00048324199207305075, 'samples': 9906176, 'steps': 19347, 'loss/train': 2.215949535369873} +03/04/2022 12:11:01 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/04/2022 12:11:06 - INFO - codeparrot_training - Step 19348: {'lr': 0.0004832400818079884, 'samples': 9906688, 'steps': 19348, 'loss/train': 0.7336384654045105} +03/04/2022 12:11:09 - INFO - codeparrot_training - Step 19349: {'lr': 0.00048323817143783174, 'samples': 9907200, 'steps': 19349, 'loss/train': 1.5405983924865723} +03/04/2022 12:11:10 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 12:11:15 - INFO - codeparrot_training - Step 19350: {'lr': 0.0004832362609625815, 'samples': 9907712, 'steps': 19350, 'loss/train': 2.5343809127807617} +03/04/2022 12:11:18 - INFO - codeparrot_training - Step 19351: {'lr': 0.0004832343503822386, 'samples': 9908224, 'steps': 19351, 'loss/train': 1.4504948854446411} +03/04/2022 12:11:18 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/04/2022 12:11:23 - INFO - codeparrot_training - Step 19352: {'lr': 0.000483232439696804, 'samples': 9908736, 'steps': 19352, 'loss/train': 0.884429395198822} +03/04/2022 12:11:26 - INFO - codeparrot_training - Step 19353: {'lr': 0.0004832305289062784, 'samples': 9909248, 'steps': 19353, 'loss/train': 1.5595014095306396} +03/04/2022 12:11:27 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/04/2022 12:11:32 - INFO - codeparrot_training - Step 19354: {'lr': 0.00048322861801066265, 'samples': 9909760, 'steps': 19354, 'loss/train': 2.072275161743164} +03/04/2022 12:11:35 - INFO - codeparrot_training - Step 19355: {'lr': 0.00048322670700995775, 'samples': 9910272, 'steps': 19355, 'loss/train': 1.8293801546096802} +03/04/2022 12:11:35 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/04/2022 12:11:40 - INFO - codeparrot_training - Step 19356: {'lr': 0.0004832247959041645, 'samples': 9910784, 'steps': 19356, 'loss/train': 2.4597456455230713} +03/04/2022 12:11:43 - INFO - codeparrot_training - Step 19357: {'lr': 0.0004832228846932838, 'samples': 9911296, 'steps': 19357, 'loss/train': 1.856176495552063} +03/04/2022 12:11:43 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/04/2022 12:11:49 - INFO - codeparrot_training - Step 19358: {'lr': 0.0004832209733773164, 'samples': 9911808, 'steps': 19358, 'loss/train': 2.220309019088745} +03/04/2022 12:11:52 - INFO - codeparrot_training - Step 19359: {'lr': 0.0004832190619562632, 'samples': 9912320, 'steps': 19359, 'loss/train': 2.279001474380493} +03/04/2022 12:11:53 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/04/2022 12:11:57 - INFO - codeparrot_training - Step 19360: {'lr': 0.00048321715043012515, 'samples': 9912832, 'steps': 19360, 'loss/train': 1.7336853742599487} +03/04/2022 12:12:00 - INFO - codeparrot_training - Step 19361: {'lr': 0.00048321523879890307, 'samples': 9913344, 'steps': 19361, 'loss/train': 1.6324422359466553} +03/04/2022 12:12:01 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/04/2022 12:12:06 - INFO - codeparrot_training - Step 19362: {'lr': 0.00048321332706259773, 'samples': 9913856, 'steps': 19362, 'loss/train': 2.036705255508423} +03/04/2022 12:12:09 - INFO - codeparrot_training - Step 19363: {'lr': 0.0004832114152212101, 'samples': 9914368, 'steps': 19363, 'loss/train': 2.0809576511383057} +03/04/2022 12:12:09 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/04/2022 12:12:14 - INFO - codeparrot_training - Step 19364: {'lr': 0.000483209503274741, 'samples': 9914880, 'steps': 19364, 'loss/train': 1.6379246711730957} +03/04/2022 12:12:17 - INFO - codeparrot_training - Step 19365: {'lr': 0.0004832075912231913, 'samples': 9915392, 'steps': 19365, 'loss/train': 1.777904987335205} +03/04/2022 12:12:18 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/04/2022 12:12:23 - INFO - codeparrot_training - Step 19366: {'lr': 0.0004832056790665619, 'samples': 9915904, 'steps': 19366, 'loss/train': 0.5791195631027222} +03/04/2022 12:12:26 - INFO - codeparrot_training - Step 19367: {'lr': 0.0004832037668048536, 'samples': 9916416, 'steps': 19367, 'loss/train': 1.7071946859359741} +03/04/2022 12:12:26 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 12:12:31 - INFO - codeparrot_training - Step 19368: {'lr': 0.00048320185443806717, 'samples': 9916928, 'steps': 19368, 'loss/train': 2.0559747219085693} +03/04/2022 12:12:34 - INFO - codeparrot_training - Step 19369: {'lr': 0.0004831999419662037, 'samples': 9917440, 'steps': 19369, 'loss/train': 1.5099694728851318} +03/04/2022 12:12:35 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/04/2022 12:12:40 - INFO - codeparrot_training - Step 19370: {'lr': 0.0004831980293892639, 'samples': 9917952, 'steps': 19370, 'loss/train': 1.4393583536148071} +03/04/2022 12:12:43 - INFO - codeparrot_training - Step 19371: {'lr': 0.0004831961167072487, 'samples': 9918464, 'steps': 19371, 'loss/train': 2.0649352073669434} +03/04/2022 12:12:44 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 12:12:48 - INFO - codeparrot_training - Step 19372: {'lr': 0.0004831942039201589, 'samples': 9918976, 'steps': 19372, 'loss/train': 1.838057279586792} +03/04/2022 12:12:51 - INFO - codeparrot_training - Step 19373: {'lr': 0.0004831922910279954, 'samples': 9919488, 'steps': 19373, 'loss/train': 2.1100895404815674} +03/04/2022 12:12:53 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/04/2022 12:12:57 - INFO - codeparrot_training - Step 19374: {'lr': 0.000483190378030759, 'samples': 9920000, 'steps': 19374, 'loss/train': 2.1403770446777344} +03/04/2022 12:13:00 - INFO - codeparrot_training - Step 19375: {'lr': 0.0004831884649284507, 'samples': 9920512, 'steps': 19375, 'loss/train': 1.5291773080825806} +03/04/2022 12:13:01 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/04/2022 12:13:05 - INFO - codeparrot_training - Step 19376: {'lr': 0.00048318655172107126, 'samples': 9921024, 'steps': 19376, 'loss/train': 1.7640737295150757} +03/04/2022 12:13:08 - INFO - codeparrot_training - Step 19377: {'lr': 0.0004831846384086215, 'samples': 9921536, 'steps': 19377, 'loss/train': 1.6455087661743164} +03/04/2022 12:13:10 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/04/2022 12:13:14 - INFO - codeparrot_training - Step 19378: {'lr': 0.0004831827249911024, 'samples': 9922048, 'steps': 19378, 'loss/train': 1.585808277130127} +03/04/2022 12:13:17 - INFO - codeparrot_training - Step 19379: {'lr': 0.0004831808114685147, 'samples': 9922560, 'steps': 19379, 'loss/train': 2.1369926929473877} +03/04/2022 12:13:18 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 12:13:22 - INFO - codeparrot_training - Step 19380: {'lr': 0.00048317889784085935, 'samples': 9923072, 'steps': 19380, 'loss/train': 2.4225330352783203} +03/04/2022 12:13:25 - INFO - codeparrot_training - Step 19381: {'lr': 0.0004831769841081372, 'samples': 9923584, 'steps': 19381, 'loss/train': 4.066267490386963} +03/04/2022 12:13:27 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/04/2022 12:13:30 - INFO - codeparrot_training - Step 19382: {'lr': 0.00048317507027034913, 'samples': 9924096, 'steps': 19382, 'loss/train': 1.6341750621795654} +03/04/2022 12:13:34 - INFO - codeparrot_training - Step 19383: {'lr': 0.0004831731563274959, 'samples': 9924608, 'steps': 19383, 'loss/train': 1.903091311454773} +03/04/2022 12:13:35 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/04/2022 12:13:39 - INFO - codeparrot_training - Step 19384: {'lr': 0.0004831712422795785, 'samples': 9925120, 'steps': 19384, 'loss/train': 2.115166187286377} +03/04/2022 12:13:42 - INFO - codeparrot_training - Step 19385: {'lr': 0.00048316932812659776, 'samples': 9925632, 'steps': 19385, 'loss/train': 1.5342516899108887} +03/04/2022 12:13:44 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/04/2022 12:13:47 - INFO - codeparrot_training - Step 19386: {'lr': 0.00048316741386855445, 'samples': 9926144, 'steps': 19386, 'loss/train': 1.8595731258392334} +03/04/2022 12:13:51 - INFO - codeparrot_training - Step 19387: {'lr': 0.0004831654995054495, 'samples': 9926656, 'steps': 19387, 'loss/train': 1.5135680437088013} +03/04/2022 12:13:53 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/04/2022 12:13:56 - INFO - codeparrot_training - Step 19388: {'lr': 0.0004831635850372838, 'samples': 9927168, 'steps': 19388, 'loss/train': 2.286105155944824} +03/04/2022 12:13:59 - INFO - codeparrot_training - Step 19389: {'lr': 0.00048316167046405826, 'samples': 9927680, 'steps': 19389, 'loss/train': 1.7907754182815552} +03/04/2022 12:14:01 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 12:14:04 - INFO - codeparrot_training - Step 19390: {'lr': 0.0004831597557857735, 'samples': 9928192, 'steps': 19390, 'loss/train': 2.052823543548584} +03/04/2022 12:14:08 - INFO - codeparrot_training - Step 19391: {'lr': 0.00048315784100243063, 'samples': 9928704, 'steps': 19391, 'loss/train': 2.0606441497802734} +03/04/2022 12:14:10 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/04/2022 12:14:13 - INFO - codeparrot_training - Step 19392: {'lr': 0.0004831559261140305, 'samples': 9929216, 'steps': 19392, 'loss/train': 2.084334373474121} +03/04/2022 12:14:16 - INFO - codeparrot_training - Step 19393: {'lr': 0.0004831540111205739, 'samples': 9929728, 'steps': 19393, 'loss/train': 2.010958194732666} +03/04/2022 12:14:18 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/04/2022 12:14:21 - INFO - codeparrot_training - Step 19394: {'lr': 0.00048315209602206165, 'samples': 9930240, 'steps': 19394, 'loss/train': 2.6461446285247803} +03/04/2022 12:14:25 - INFO - codeparrot_training - Step 19395: {'lr': 0.0004831501808184947, 'samples': 9930752, 'steps': 19395, 'loss/train': 1.9386167526245117} +03/04/2022 12:14:27 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 12:14:30 - INFO - codeparrot_training - Step 19396: {'lr': 0.0004831482655098738, 'samples': 9931264, 'steps': 19396, 'loss/train': 1.460323691368103} +03/04/2022 12:14:33 - INFO - codeparrot_training - Step 19397: {'lr': 0.00048314635009619997, 'samples': 9931776, 'steps': 19397, 'loss/train': 1.9102013111114502} +03/04/2022 12:14:35 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/04/2022 12:14:38 - INFO - codeparrot_training - Step 19398: {'lr': 0.0004831444345774739, 'samples': 9932288, 'steps': 19398, 'loss/train': 2.3662991523742676} +03/04/2022 12:14:41 - INFO - codeparrot_training - Step 19399: {'lr': 0.00048314251895369663, 'samples': 9932800, 'steps': 19399, 'loss/train': 1.2344551086425781} +03/04/2022 12:14:44 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/04/2022 12:14:47 - INFO - codeparrot_training - Step 19400: {'lr': 0.000483140603224869, 'samples': 9933312, 'steps': 19400, 'loss/train': 2.3091928958892822} +03/04/2022 12:14:50 - INFO - codeparrot_training - Step 19401: {'lr': 0.00048313868739099166, 'samples': 9933824, 'steps': 19401, 'loss/train': 1.1534194946289062} +03/04/2022 12:14:52 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/04/2022 12:14:55 - INFO - codeparrot_training - Step 19402: {'lr': 0.0004831367714520657, 'samples': 9934336, 'steps': 19402, 'loss/train': 1.9784660339355469} +03/04/2022 12:14:58 - INFO - codeparrot_training - Step 19403: {'lr': 0.0004831348554080919, 'samples': 9934848, 'steps': 19403, 'loss/train': 1.7035642862319946} +03/04/2022 12:15:01 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 12:15:04 - INFO - codeparrot_training - Step 19404: {'lr': 0.0004831329392590711, 'samples': 9935360, 'steps': 19404, 'loss/train': 2.002788782119751} +03/04/2022 12:15:07 - INFO - codeparrot_training - Step 19405: {'lr': 0.00048313102300500424, 'samples': 9935872, 'steps': 19405, 'loss/train': 2.222447395324707} +03/04/2022 12:15:10 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/04/2022 12:15:12 - INFO - codeparrot_training - Step 19406: {'lr': 0.00048312910664589215, 'samples': 9936384, 'steps': 19406, 'loss/train': 1.5017746686935425} +03/04/2022 12:15:15 - INFO - codeparrot_training - Step 19407: {'lr': 0.0004831271901817357, 'samples': 9936896, 'steps': 19407, 'loss/train': 1.5154513120651245} +03/04/2022 12:15:18 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 12:15:21 - INFO - codeparrot_training - Step 19408: {'lr': 0.00048312527361253567, 'samples': 9937408, 'steps': 19408, 'loss/train': 1.360522985458374} +03/04/2022 12:15:24 - INFO - codeparrot_training - Step 19409: {'lr': 0.000483123356938293, 'samples': 9937920, 'steps': 19409, 'loss/train': 1.5016167163848877} +03/04/2022 12:15:26 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/04/2022 12:15:29 - INFO - codeparrot_training - Step 19410: {'lr': 0.00048312144015900856, 'samples': 9938432, 'steps': 19410, 'loss/train': 2.0484724044799805} +03/04/2022 12:15:32 - INFO - codeparrot_training - Step 19411: {'lr': 0.00048311952327468325, 'samples': 9938944, 'steps': 19411, 'loss/train': 1.936078667640686} +03/04/2022 12:15:35 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/04/2022 12:15:37 - INFO - codeparrot_training - Step 19412: {'lr': 0.00048311760628531777, 'samples': 9939456, 'steps': 19412, 'loss/train': 1.872390627861023} +03/04/2022 12:15:41 - INFO - codeparrot_training - Step 19413: {'lr': 0.00048311568919091316, 'samples': 9939968, 'steps': 19413, 'loss/train': 1.6997140645980835} +03/04/2022 12:15:43 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/04/2022 12:15:46 - INFO - codeparrot_training - Step 19414: {'lr': 0.00048311377199147023, 'samples': 9940480, 'steps': 19414, 'loss/train': 2.1552505493164062} +03/04/2022 12:15:49 - INFO - codeparrot_training - Step 19415: {'lr': 0.00048311185468698974, 'samples': 9940992, 'steps': 19415, 'loss/train': 1.7392278909683228} +03/04/2022 12:15:52 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 12:15:55 - INFO - codeparrot_training - Step 19416: {'lr': 0.00048310993727747277, 'samples': 9941504, 'steps': 19416, 'loss/train': 1.571963906288147} +03/04/2022 12:15:58 - INFO - codeparrot_training - Step 19417: {'lr': 0.00048310801976292, 'samples': 9942016, 'steps': 19417, 'loss/train': 2.1123247146606445} +03/04/2022 12:16:01 - INFO - codeparrot_training - Step 19418: {'lr': 0.0004831061021433323, 'samples': 9942528, 'steps': 19418, 'loss/train': 0.5974637866020203} +03/04/2022 12:16:01 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/04/2022 12:16:06 - INFO - codeparrot_training - Step 19419: {'lr': 0.00048310418441871065, 'samples': 9943040, 'steps': 19419, 'loss/train': 2.9887120723724365} +03/04/2022 12:16:09 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/04/2022 12:16:11 - INFO - codeparrot_training - Step 19420: {'lr': 0.00048310226658905585, 'samples': 9943552, 'steps': 19420, 'loss/train': 1.8934903144836426} +03/04/2022 12:16:15 - INFO - codeparrot_training - Step 19421: {'lr': 0.00048310034865436876, 'samples': 9944064, 'steps': 19421, 'loss/train': 2.0137338638305664} +03/04/2022 12:16:17 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/04/2022 12:16:20 - INFO - codeparrot_training - Step 19422: {'lr': 0.0004830984306146503, 'samples': 9944576, 'steps': 19422, 'loss/train': 1.6406397819519043} +03/04/2022 12:16:23 - INFO - codeparrot_training - Step 19423: {'lr': 0.0004830965124699012, 'samples': 9945088, 'steps': 19423, 'loss/train': 1.6938649415969849} +03/04/2022 12:16:26 - INFO - codeparrot_training - Step 19424: {'lr': 0.00048309459422012243, 'samples': 9945600, 'steps': 19424, 'loss/train': 0.2350090742111206} +03/04/2022 12:16:27 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 12:16:32 - INFO - codeparrot_training - Step 19425: {'lr': 0.0004830926758653148, 'samples': 9946112, 'steps': 19425, 'loss/train': 1.2396119832992554} +03/04/2022 12:16:35 - INFO - codeparrot_training - Step 19426: {'lr': 0.00048309075740547925, 'samples': 9946624, 'steps': 19426, 'loss/train': 2.0918478965759277} +03/04/2022 12:16:35 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/04/2022 12:16:40 - INFO - codeparrot_training - Step 19427: {'lr': 0.0004830888388406166, 'samples': 9947136, 'steps': 19427, 'loss/train': 2.319352149963379} +03/04/2022 12:16:43 - INFO - codeparrot_training - Step 19428: {'lr': 0.00048308692017072773, 'samples': 9947648, 'steps': 19428, 'loss/train': 1.8921654224395752} +03/04/2022 12:16:44 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/04/2022 12:16:48 - INFO - codeparrot_training - Step 19429: {'lr': 0.00048308500139581344, 'samples': 9948160, 'steps': 19429, 'loss/train': 2.432421922683716} +03/04/2022 12:16:52 - INFO - codeparrot_training - Step 19430: {'lr': 0.00048308308251587476, 'samples': 9948672, 'steps': 19430, 'loss/train': 2.0783071517944336} +03/04/2022 12:16:52 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/04/2022 12:16:57 - INFO - codeparrot_training - Step 19431: {'lr': 0.00048308116353091234, 'samples': 9949184, 'steps': 19431, 'loss/train': 0.9391849637031555} +03/04/2022 12:17:00 - INFO - codeparrot_training - Step 19432: {'lr': 0.00048307924444092716, 'samples': 9949696, 'steps': 19432, 'loss/train': 2.500607967376709} +03/04/2022 12:17:01 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/04/2022 12:17:05 - INFO - codeparrot_training - Step 19433: {'lr': 0.0004830773252459201, 'samples': 9950208, 'steps': 19433, 'loss/train': 2.1477081775665283} +03/04/2022 12:17:09 - INFO - codeparrot_training - Step 19434: {'lr': 0.00048307540594589194, 'samples': 9950720, 'steps': 19434, 'loss/train': 1.7711230516433716} +03/04/2022 12:17:09 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/04/2022 12:17:14 - INFO - codeparrot_training - Step 19435: {'lr': 0.0004830734865408437, 'samples': 9951232, 'steps': 19435, 'loss/train': 1.4419500827789307} +03/04/2022 12:17:17 - INFO - codeparrot_training - Step 19436: {'lr': 0.000483071567030776, 'samples': 9951744, 'steps': 19436, 'loss/train': 1.2735028266906738} +03/04/2022 12:17:18 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/04/2022 12:17:23 - INFO - codeparrot_training - Step 19437: {'lr': 0.00048306964741568994, 'samples': 9952256, 'steps': 19437, 'loss/train': 1.419663667678833} +03/04/2022 12:17:26 - INFO - codeparrot_training - Step 19438: {'lr': 0.00048306772769558624, 'samples': 9952768, 'steps': 19438, 'loss/train': 2.163883686065674} +03/04/2022 12:17:27 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 12:17:31 - INFO - codeparrot_training - Step 19439: {'lr': 0.0004830658078704659, 'samples': 9953280, 'steps': 19439, 'loss/train': 1.9183329343795776} +03/04/2022 12:17:34 - INFO - codeparrot_training - Step 19440: {'lr': 0.0004830638879403296, 'samples': 9953792, 'steps': 19440, 'loss/train': 1.5530331134796143} +03/04/2022 12:17:36 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 12:17:40 - INFO - codeparrot_training - Step 19441: {'lr': 0.00048306196790517844, 'samples': 9954304, 'steps': 19441, 'loss/train': 0.17361800372600555} +03/04/2022 12:17:43 - INFO - codeparrot_training - Step 19442: {'lr': 0.0004830600477650131, 'samples': 9954816, 'steps': 19442, 'loss/train': 1.9050164222717285} +03/04/2022 12:17:44 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/04/2022 12:17:48 - INFO - codeparrot_training - Step 19443: {'lr': 0.0004830581275198344, 'samples': 9955328, 'steps': 19443, 'loss/train': 2.2285735607147217} +03/04/2022 12:17:51 - INFO - codeparrot_training - Step 19444: {'lr': 0.00048305620716964336, 'samples': 9955840, 'steps': 19444, 'loss/train': 1.9254708290100098} +03/04/2022 12:17:53 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/04/2022 12:17:57 - INFO - codeparrot_training - Step 19445: {'lr': 0.00048305428671444083, 'samples': 9956352, 'steps': 19445, 'loss/train': 2.113628625869751} +03/04/2022 12:18:00 - INFO - codeparrot_training - Step 19446: {'lr': 0.00048305236615422763, 'samples': 9956864, 'steps': 19446, 'loss/train': 6.591501712799072} +03/04/2022 12:18:02 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/04/2022 12:18:05 - INFO - codeparrot_training - Step 19447: {'lr': 0.00048305044548900463, 'samples': 9957376, 'steps': 19447, 'loss/train': 1.8314212560653687} +03/04/2022 12:18:08 - INFO - codeparrot_training - Step 19448: {'lr': 0.0004830485247187727, 'samples': 9957888, 'steps': 19448, 'loss/train': 1.7388160228729248} +03/04/2022 12:18:11 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/04/2022 12:18:14 - INFO - codeparrot_training - Step 19449: {'lr': 0.0004830466038435327, 'samples': 9958400, 'steps': 19449, 'loss/train': 2.6245033740997314} +03/04/2022 12:18:17 - INFO - codeparrot_training - Step 19450: {'lr': 0.0004830446828632854, 'samples': 9958912, 'steps': 19450, 'loss/train': 1.7751208543777466} +03/04/2022 12:18:20 - INFO - codeparrot_training - Step 19451: {'lr': 0.00048304276177803186, 'samples': 9959424, 'steps': 19451, 'loss/train': 1.7307716608047485} +03/04/2022 12:18:20 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/04/2022 12:18:25 - INFO - codeparrot_training - Step 19452: {'lr': 0.00048304084058777285, 'samples': 9959936, 'steps': 19452, 'loss/train': 1.3346421718597412} +03/04/2022 12:18:29 - INFO - codeparrot_training - Step 19453: {'lr': 0.00048303891929250923, 'samples': 9960448, 'steps': 19453, 'loss/train': 1.4874038696289062} +03/04/2022 12:18:29 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/04/2022 12:18:34 - INFO - codeparrot_training - Step 19454: {'lr': 0.0004830369978922418, 'samples': 9960960, 'steps': 19454, 'loss/train': 2.417426586151123} +03/04/2022 12:18:37 - INFO - codeparrot_training - Step 19455: {'lr': 0.00048303507638697155, 'samples': 9961472, 'steps': 19455, 'loss/train': 1.5272533893585205} +03/04/2022 12:18:37 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/04/2022 12:18:42 - INFO - codeparrot_training - Step 19456: {'lr': 0.0004830331547766993, 'samples': 9961984, 'steps': 19456, 'loss/train': 2.231531858444214} +03/04/2022 12:18:45 - INFO - codeparrot_training - Step 19457: {'lr': 0.0004830312330614259, 'samples': 9962496, 'steps': 19457, 'loss/train': 1.2478259801864624} +03/04/2022 12:18:45 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 12:18:51 - INFO - codeparrot_training - Step 19458: {'lr': 0.00048302931124115226, 'samples': 9963008, 'steps': 19458, 'loss/train': 2.0705831050872803} +03/04/2022 12:18:54 - INFO - codeparrot_training - Step 19459: {'lr': 0.0004830273893158791, 'samples': 9963520, 'steps': 19459, 'loss/train': 1.7067620754241943} +03/04/2022 12:18:54 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/04/2022 12:18:59 - INFO - codeparrot_training - Step 19460: {'lr': 0.0004830254672856075, 'samples': 9964032, 'steps': 19460, 'loss/train': 1.600019097328186} +03/04/2022 12:19:02 - INFO - codeparrot_training - Step 19461: {'lr': 0.00048302354515033813, 'samples': 9964544, 'steps': 19461, 'loss/train': 0.7247123122215271} +03/04/2022 12:19:02 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/04/2022 12:19:08 - INFO - codeparrot_training - Step 19462: {'lr': 0.00048302162291007203, 'samples': 9965056, 'steps': 19462, 'loss/train': 0.20816490054130554} +03/04/2022 12:19:11 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/04/2022 12:19:13 - INFO - codeparrot_training - Step 19463: {'lr': 0.00048301970056480994, 'samples': 9965568, 'steps': 19463, 'loss/train': 2.3204257488250732} +03/04/2022 12:19:16 - INFO - codeparrot_training - Step 19464: {'lr': 0.00048301777811455274, 'samples': 9966080, 'steps': 19464, 'loss/train': 2.091796398162842} +03/04/2022 12:19:19 - INFO - codeparrot_training - Step 19465: {'lr': 0.0004830158555593014, 'samples': 9966592, 'steps': 19465, 'loss/train': 1.9790256023406982} +03/04/2022 12:19:19 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/04/2022 12:19:25 - INFO - codeparrot_training - Step 19466: {'lr': 0.00048301393289905663, 'samples': 9967104, 'steps': 19466, 'loss/train': 1.699397087097168} +03/04/2022 12:19:28 - INFO - codeparrot_training - Step 19467: {'lr': 0.00048301201013381946, 'samples': 9967616, 'steps': 19467, 'loss/train': 2.4085586071014404} +03/04/2022 12:19:28 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/04/2022 12:19:33 - INFO - codeparrot_training - Step 19468: {'lr': 0.00048301008726359064, 'samples': 9968128, 'steps': 19468, 'loss/train': 1.531436800956726} +03/04/2022 12:19:37 - INFO - codeparrot_training - Step 19469: {'lr': 0.00048300816428837104, 'samples': 9968640, 'steps': 19469, 'loss/train': 2.052704095840454} +03/04/2022 12:19:37 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/04/2022 12:19:42 - INFO - codeparrot_training - Step 19470: {'lr': 0.00048300624120816153, 'samples': 9969152, 'steps': 19470, 'loss/train': 1.5572563409805298} +03/04/2022 12:19:45 - INFO - codeparrot_training - Step 19471: {'lr': 0.0004830043180229631, 'samples': 9969664, 'steps': 19471, 'loss/train': 1.6688894033432007} +03/04/2022 12:19:45 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/04/2022 12:19:50 - INFO - codeparrot_training - Step 19472: {'lr': 0.0004830023947327764, 'samples': 9970176, 'steps': 19472, 'loss/train': 1.6920973062515259} +03/04/2022 12:19:53 - INFO - codeparrot_training - Step 19473: {'lr': 0.0004830004713376025, 'samples': 9970688, 'steps': 19473, 'loss/train': 2.8595988750457764} +03/04/2022 12:19:53 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/04/2022 12:19:58 - INFO - codeparrot_training - Step 19474: {'lr': 0.00048299854783744224, 'samples': 9971200, 'steps': 19474, 'loss/train': 1.881035566329956} +03/04/2022 12:20:01 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 12:20:04 - INFO - codeparrot_training - Step 19475: {'lr': 0.0004829966242322963, 'samples': 9971712, 'steps': 19475, 'loss/train': 2.28920316696167} +03/04/2022 12:20:07 - INFO - codeparrot_training - Step 19476: {'lr': 0.00048299470052216576, 'samples': 9972224, 'steps': 19476, 'loss/train': 1.7556978464126587} +03/04/2022 12:20:10 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/04/2022 12:20:12 - INFO - codeparrot_training - Step 19477: {'lr': 0.0004829927767070514, 'samples': 9972736, 'steps': 19477, 'loss/train': 2.22821307182312} +03/04/2022 12:20:15 - INFO - codeparrot_training - Step 19478: {'lr': 0.0004829908527869541, 'samples': 9973248, 'steps': 19478, 'loss/train': 1.9674879312515259} +03/04/2022 12:20:18 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 12:20:21 - INFO - codeparrot_training - Step 19479: {'lr': 0.0004829889287618746, 'samples': 9973760, 'steps': 19479, 'loss/train': 1.6904776096343994} +03/04/2022 12:20:24 - INFO - codeparrot_training - Step 19480: {'lr': 0.000482987004631814, 'samples': 9974272, 'steps': 19480, 'loss/train': 1.566239356994629} +03/04/2022 12:20:27 - INFO - codeparrot_training - Step 19481: {'lr': 0.000482985080396773, 'samples': 9974784, 'steps': 19481, 'loss/train': 4.24210786819458} +03/04/2022 12:20:27 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/04/2022 12:20:32 - INFO - codeparrot_training - Step 19482: {'lr': 0.00048298315605675257, 'samples': 9975296, 'steps': 19482, 'loss/train': 2.1614882946014404} +03/04/2022 12:20:35 - INFO - codeparrot_training - Step 19483: {'lr': 0.0004829812316117535, 'samples': 9975808, 'steps': 19483, 'loss/train': 1.0340620279312134} +03/04/2022 12:20:35 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/04/2022 12:20:41 - INFO - codeparrot_training - Step 19484: {'lr': 0.0004829793070617767, 'samples': 9976320, 'steps': 19484, 'loss/train': 1.7095084190368652} +03/04/2022 12:20:43 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/04/2022 12:20:46 - INFO - codeparrot_training - Step 19485: {'lr': 0.000482977382406823, 'samples': 9976832, 'steps': 19485, 'loss/train': 1.198760747909546} +03/04/2022 12:20:50 - INFO - codeparrot_training - Step 19486: {'lr': 0.00048297545764689327, 'samples': 9977344, 'steps': 19486, 'loss/train': 1.2047996520996094} +03/04/2022 12:20:53 - INFO - codeparrot_training - Step 19487: {'lr': 0.00048297353278198843, 'samples': 9977856, 'steps': 19487, 'loss/train': 0.14014442265033722} +03/04/2022 12:20:55 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/04/2022 12:20:58 - INFO - codeparrot_training - Step 19488: {'lr': 0.00048297160781210925, 'samples': 9978368, 'steps': 19488, 'loss/train': 1.6574344635009766} +03/04/2022 12:21:01 - INFO - codeparrot_training - Step 19489: {'lr': 0.00048296968273725673, 'samples': 9978880, 'steps': 19489, 'loss/train': 1.616039514541626} +03/04/2022 12:21:03 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/04/2022 12:21:07 - INFO - codeparrot_training - Step 19490: {'lr': 0.0004829677575574316, 'samples': 9979392, 'steps': 19490, 'loss/train': 2.202183961868286} +03/04/2022 12:21:10 - INFO - codeparrot_training - Step 19491: {'lr': 0.0004829658322726348, 'samples': 9979904, 'steps': 19491, 'loss/train': 1.9947285652160645} +03/04/2022 12:21:12 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/04/2022 12:21:15 - INFO - codeparrot_training - Step 19492: {'lr': 0.00048296390688286724, 'samples': 9980416, 'steps': 19492, 'loss/train': 2.507314682006836} +03/04/2022 12:21:18 - INFO - codeparrot_training - Step 19493: {'lr': 0.00048296198138812974, 'samples': 9980928, 'steps': 19493, 'loss/train': 2.001742124557495} +03/04/2022 12:21:21 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/04/2022 12:21:23 - INFO - codeparrot_training - Step 19494: {'lr': 0.00048296005578842314, 'samples': 9981440, 'steps': 19494, 'loss/train': 1.9592903852462769} +03/04/2022 12:21:27 - INFO - codeparrot_training - Step 19495: {'lr': 0.0004829581300837483, 'samples': 9981952, 'steps': 19495, 'loss/train': 2.031825304031372} +03/04/2022 12:21:29 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 12:21:32 - INFO - codeparrot_training - Step 19496: {'lr': 0.00048295620427410614, 'samples': 9982464, 'steps': 19496, 'loss/train': 1.0448518991470337} +03/04/2022 12:21:35 - INFO - codeparrot_training - Step 19497: {'lr': 0.00048295427835949757, 'samples': 9982976, 'steps': 19497, 'loss/train': 2.4872782230377197} +03/04/2022 12:21:37 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/04/2022 12:21:40 - INFO - codeparrot_training - Step 19498: {'lr': 0.0004829523523399233, 'samples': 9983488, 'steps': 19498, 'loss/train': 1.2650761604309082} +03/04/2022 12:21:43 - INFO - codeparrot_training - Step 19499: {'lr': 0.0004829504262153844, 'samples': 9984000, 'steps': 19499, 'loss/train': 2.1082210540771484} +03/04/2022 12:21:46 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/04/2022 12:21:49 - INFO - codeparrot_training - Step 19500: {'lr': 0.00048294849998588155, 'samples': 9984512, 'steps': 19500, 'loss/train': 1.3178647756576538} +03/04/2022 12:21:52 - INFO - codeparrot_training - Step 19501: {'lr': 0.0004829465736514157, 'samples': 9985024, 'steps': 19501, 'loss/train': 1.2574142217636108} +03/04/2022 12:21:54 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 12:21:57 - INFO - codeparrot_training - Step 19502: {'lr': 0.0004829446472119878, 'samples': 9985536, 'steps': 19502, 'loss/train': 1.8499481678009033} +03/04/2022 12:22:00 - INFO - codeparrot_training - Step 19503: {'lr': 0.0004829427206675986, 'samples': 9986048, 'steps': 19503, 'loss/train': 1.6837809085845947} +03/04/2022 12:22:02 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/04/2022 12:22:06 - INFO - codeparrot_training - Step 19504: {'lr': 0.000482940794018249, 'samples': 9986560, 'steps': 19504, 'loss/train': 1.8355480432510376} +03/04/2022 12:22:09 - INFO - codeparrot_training - Step 19505: {'lr': 0.00048293886726393984, 'samples': 9987072, 'steps': 19505, 'loss/train': 2.2327864170074463} +03/04/2022 12:22:11 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 12:22:14 - INFO - codeparrot_training - Step 19506: {'lr': 0.00048293694040467205, 'samples': 9987584, 'steps': 19506, 'loss/train': 1.8651883602142334} +03/04/2022 12:22:17 - INFO - codeparrot_training - Step 19507: {'lr': 0.00048293501344044644, 'samples': 9988096, 'steps': 19507, 'loss/train': 2.1500892639160156} +03/04/2022 12:22:20 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/04/2022 12:22:23 - INFO - codeparrot_training - Step 19508: {'lr': 0.00048293308637126393, 'samples': 9988608, 'steps': 19508, 'loss/train': 1.6436740159988403} +03/04/2022 12:22:26 - INFO - codeparrot_training - Step 19509: {'lr': 0.0004829311591971254, 'samples': 9989120, 'steps': 19509, 'loss/train': 1.94767427444458} +03/04/2022 12:22:28 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/04/2022 12:22:31 - INFO - codeparrot_training - Step 19510: {'lr': 0.0004829292319180316, 'samples': 9989632, 'steps': 19510, 'loss/train': 2.696727752685547} +03/04/2022 12:22:34 - INFO - codeparrot_training - Step 19511: {'lr': 0.00048292730453398355, 'samples': 9990144, 'steps': 19511, 'loss/train': 1.6681278944015503} +03/04/2022 12:22:37 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/04/2022 12:22:39 - INFO - codeparrot_training - Step 19512: {'lr': 0.00048292537704498203, 'samples': 9990656, 'steps': 19512, 'loss/train': 1.2130415439605713} +03/04/2022 12:22:43 - INFO - codeparrot_training - Step 19513: {'lr': 0.00048292344945102795, 'samples': 9991168, 'steps': 19513, 'loss/train': 1.0961002111434937} +03/04/2022 12:22:45 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/04/2022 12:22:48 - INFO - codeparrot_training - Step 19514: {'lr': 0.0004829215217521221, 'samples': 9991680, 'steps': 19514, 'loss/train': 2.175157308578491} +03/04/2022 12:22:51 - INFO - codeparrot_training - Step 19515: {'lr': 0.00048291959394826546, 'samples': 9992192, 'steps': 19515, 'loss/train': 1.7235057353973389} +03/04/2022 12:22:54 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/04/2022 12:22:56 - INFO - codeparrot_training - Step 19516: {'lr': 0.00048291766603945885, 'samples': 9992704, 'steps': 19516, 'loss/train': 2.1452224254608154} +03/04/2022 12:22:59 - INFO - codeparrot_training - Step 19517: {'lr': 0.0004829157380257031, 'samples': 9993216, 'steps': 19517, 'loss/train': 1.732651948928833} +03/04/2022 12:23:02 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/04/2022 12:23:05 - INFO - codeparrot_training - Step 19518: {'lr': 0.0004829138099069991, 'samples': 9993728, 'steps': 19518, 'loss/train': 2.2896955013275146} +03/04/2022 12:23:08 - INFO - codeparrot_training - Step 19519: {'lr': 0.0004829118816833478, 'samples': 9994240, 'steps': 19519, 'loss/train': 1.7804704904556274} +03/04/2022 12:23:10 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/04/2022 12:23:13 - INFO - codeparrot_training - Step 19520: {'lr': 0.00048290995335474997, 'samples': 9994752, 'steps': 19520, 'loss/train': 1.975152611732483} +03/04/2022 12:23:16 - INFO - codeparrot_training - Step 19521: {'lr': 0.0004829080249212064, 'samples': 9995264, 'steps': 19521, 'loss/train': 1.7336583137512207} +03/04/2022 12:23:19 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/04/2022 12:23:21 - INFO - codeparrot_training - Step 19522: {'lr': 0.00048290609638271823, 'samples': 9995776, 'steps': 19522, 'loss/train': 1.6004469394683838} +03/04/2022 12:23:25 - INFO - codeparrot_training - Step 19523: {'lr': 0.00048290416773928615, 'samples': 9996288, 'steps': 19523, 'loss/train': 1.9194672107696533} +03/04/2022 12:23:27 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/04/2022 12:23:30 - INFO - codeparrot_training - Step 19524: {'lr': 0.00048290223899091094, 'samples': 9996800, 'steps': 19524, 'loss/train': 1.548783302307129} +03/04/2022 12:23:33 - INFO - codeparrot_training - Step 19525: {'lr': 0.0004829003101375937, 'samples': 9997312, 'steps': 19525, 'loss/train': 1.8592116832733154} +03/04/2022 12:23:35 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/04/2022 12:23:38 - INFO - codeparrot_training - Step 19526: {'lr': 0.00048289838117933505, 'samples': 9997824, 'steps': 19526, 'loss/train': 2.3796582221984863} +03/04/2022 12:23:42 - INFO - codeparrot_training - Step 19527: {'lr': 0.0004828964521161361, 'samples': 9998336, 'steps': 19527, 'loss/train': 1.2461003065109253} +03/04/2022 12:23:44 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/04/2022 12:23:47 - INFO - codeparrot_training - Step 19528: {'lr': 0.0004828945229479975, 'samples': 9998848, 'steps': 19528, 'loss/train': 2.2604713439941406} +03/04/2022 12:23:50 - INFO - codeparrot_training - Step 19529: {'lr': 0.0004828925936749202, 'samples': 9999360, 'steps': 19529, 'loss/train': 1.5753413438796997} +03/04/2022 12:23:53 - INFO - codeparrot_training - Step 19530: {'lr': 0.0004828906642969052, 'samples': 9999872, 'steps': 19530, 'loss/train': 1.0638411045074463} +03/04/2022 12:23:53 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/04/2022 12:23:59 - INFO - codeparrot_training - Step 19531: {'lr': 0.00048288873481395323, 'samples': 10000384, 'steps': 19531, 'loss/train': 2.356968402862549} +03/04/2022 12:24:02 - INFO - codeparrot_training - Step 19532: {'lr': 0.0004828868052260652, 'samples': 10000896, 'steps': 19532, 'loss/train': 0.748901903629303} +03/04/2022 12:24:02 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 12:24:07 - INFO - codeparrot_training - Step 19533: {'lr': 0.0004828848755332419, 'samples': 10001408, 'steps': 19533, 'loss/train': 1.8329068422317505} +03/04/2022 12:24:10 - INFO - codeparrot_training - Step 19534: {'lr': 0.0004828829457354843, 'samples': 10001920, 'steps': 19534, 'loss/train': 1.0727920532226562} +03/04/2022 12:24:11 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/04/2022 12:24:16 - INFO - codeparrot_training - Step 19535: {'lr': 0.0004828810158327933, 'samples': 10002432, 'steps': 19535, 'loss/train': 1.2543997764587402} +03/04/2022 12:24:19 - INFO - codeparrot_training - Step 19536: {'lr': 0.00048287908582516964, 'samples': 10002944, 'steps': 19536, 'loss/train': 0.34468138217926025} +03/04/2022 12:24:19 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/04/2022 12:24:24 - INFO - codeparrot_training - Step 19537: {'lr': 0.00048287715571261424, 'samples': 10003456, 'steps': 19537, 'loss/train': 1.2039214372634888} +03/04/2022 12:24:28 - INFO - codeparrot_training - Step 19538: {'lr': 0.00048287522549512806, 'samples': 10003968, 'steps': 19538, 'loss/train': 0.6806887984275818} +03/04/2022 12:24:28 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/04/2022 12:24:33 - INFO - codeparrot_training - Step 19539: {'lr': 0.0004828732951727119, 'samples': 10004480, 'steps': 19539, 'loss/train': 1.8837666511535645} +03/04/2022 12:24:36 - INFO - codeparrot_training - Step 19540: {'lr': 0.00048287136474536657, 'samples': 10004992, 'steps': 19540, 'loss/train': 1.9773361682891846} +03/04/2022 12:24:37 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/04/2022 12:24:41 - INFO - codeparrot_training - Step 19541: {'lr': 0.000482869434213093, 'samples': 10005504, 'steps': 19541, 'loss/train': 2.079502582550049} +03/04/2022 12:24:44 - INFO - codeparrot_training - Step 19542: {'lr': 0.0004828675035758921, 'samples': 10006016, 'steps': 19542, 'loss/train': 1.6656849384307861} +03/04/2022 12:24:45 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/04/2022 12:24:50 - INFO - codeparrot_training - Step 19543: {'lr': 0.00048286557283376465, 'samples': 10006528, 'steps': 19543, 'loss/train': 1.924633502960205} +03/04/2022 12:24:53 - INFO - codeparrot_training - Step 19544: {'lr': 0.0004828636419867116, 'samples': 10007040, 'steps': 19544, 'loss/train': 1.9084010124206543} +03/04/2022 12:24:53 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/04/2022 12:24:58 - INFO - codeparrot_training - Step 19545: {'lr': 0.00048286171103473376, 'samples': 10007552, 'steps': 19545, 'loss/train': 2.430124521255493} +03/04/2022 12:25:01 - INFO - codeparrot_training - Step 19546: {'lr': 0.00048285977997783203, 'samples': 10008064, 'steps': 19546, 'loss/train': 2.267749547958374} +03/04/2022 12:25:01 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/04/2022 12:25:06 - INFO - codeparrot_training - Step 19547: {'lr': 0.0004828578488160073, 'samples': 10008576, 'steps': 19547, 'loss/train': 1.7599269151687622} +03/04/2022 12:25:10 - INFO - codeparrot_training - Step 19548: {'lr': 0.0004828559175492604, 'samples': 10009088, 'steps': 19548, 'loss/train': 2.2090535163879395} +03/04/2022 12:25:10 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/04/2022 12:25:15 - INFO - codeparrot_training - Step 19549: {'lr': 0.0004828539861775922, 'samples': 10009600, 'steps': 19549, 'loss/train': 2.474966049194336} +03/04/2022 12:25:18 - INFO - codeparrot_training - Step 19550: {'lr': 0.0004828520547010036, 'samples': 10010112, 'steps': 19550, 'loss/train': 2.012188196182251} +03/04/2022 12:25:18 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/04/2022 12:25:23 - INFO - codeparrot_training - Step 19551: {'lr': 0.0004828501231194955, 'samples': 10010624, 'steps': 19551, 'loss/train': 2.024797201156616} +03/04/2022 12:25:26 - INFO - codeparrot_training - Step 19552: {'lr': 0.0004828481914330687, 'samples': 10011136, 'steps': 19552, 'loss/train': 2.010770797729492} +03/04/2022 12:25:26 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/04/2022 12:25:32 - INFO - codeparrot_training - Step 19553: {'lr': 0.000482846259641724, 'samples': 10011648, 'steps': 19553, 'loss/train': 1.7437877655029297} +03/04/2022 12:25:35 - INFO - codeparrot_training - Step 19554: {'lr': 0.0004828443277454625, 'samples': 10012160, 'steps': 19554, 'loss/train': 1.518804669380188} +03/04/2022 12:25:35 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/04/2022 12:25:40 - INFO - codeparrot_training - Step 19555: {'lr': 0.0004828423957442849, 'samples': 10012672, 'steps': 19555, 'loss/train': 2.184455633163452} +03/04/2022 12:25:43 - INFO - codeparrot_training - Step 19556: {'lr': 0.00048284046363819213, 'samples': 10013184, 'steps': 19556, 'loss/train': 1.7571711540222168} +03/04/2022 12:25:43 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/04/2022 12:25:49 - INFO - codeparrot_training - Step 19557: {'lr': 0.000482838531427185, 'samples': 10013696, 'steps': 19557, 'loss/train': 1.6274282932281494} +03/04/2022 12:25:52 - INFO - codeparrot_training - Step 19558: {'lr': 0.00048283659911126445, 'samples': 10014208, 'steps': 19558, 'loss/train': 1.438607931137085} +03/04/2022 12:25:52 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/04/2022 12:25:57 - INFO - codeparrot_training - Step 19559: {'lr': 0.0004828346666904313, 'samples': 10014720, 'steps': 19559, 'loss/train': 1.5621559619903564} +03/04/2022 12:26:02 - INFO - codeparrot_training - Step 19560: {'lr': 0.00048283273416468644, 'samples': 10015232, 'steps': 19560, 'loss/train': 0.47866740822792053} +03/04/2022 12:26:06 - INFO - codeparrot_training - Step 19561: {'lr': 0.0004828308015340307, 'samples': 10015744, 'steps': 19561, 'loss/train': 2.464336633682251} +03/04/2022 12:26:08 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/04/2022 12:26:11 - INFO - codeparrot_training - Step 19562: {'lr': 0.0004828288687984651, 'samples': 10016256, 'steps': 19562, 'loss/train': 2.2050108909606934} +03/04/2022 12:26:14 - INFO - codeparrot_training - Step 19563: {'lr': 0.0004828269359579903, 'samples': 10016768, 'steps': 19563, 'loss/train': 2.899597644805908} +03/04/2022 12:26:17 - INFO - codeparrot_training - Step 19564: {'lr': 0.00048282500301260735, 'samples': 10017280, 'steps': 19564, 'loss/train': 1.4282641410827637} +03/04/2022 12:26:17 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/04/2022 12:26:23 - INFO - codeparrot_training - Step 19565: {'lr': 0.000482823069962317, 'samples': 10017792, 'steps': 19565, 'loss/train': 1.9808768033981323} +03/04/2022 12:26:26 - INFO - codeparrot_training - Step 19566: {'lr': 0.0004828211368071202, 'samples': 10018304, 'steps': 19566, 'loss/train': 1.8418406248092651} +03/04/2022 12:26:26 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/04/2022 12:26:31 - INFO - codeparrot_training - Step 19567: {'lr': 0.0004828192035470178, 'samples': 10018816, 'steps': 19567, 'loss/train': 2.2723824977874756} +03/04/2022 12:26:34 - INFO - codeparrot_training - Step 19568: {'lr': 0.00048281727018201063, 'samples': 10019328, 'steps': 19568, 'loss/train': 1.1990255117416382} +03/04/2022 12:26:34 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/04/2022 12:26:40 - INFO - codeparrot_training - Step 19569: {'lr': 0.00048281533671209955, 'samples': 10019840, 'steps': 19569, 'loss/train': 0.9779643416404724} +03/04/2022 12:26:43 - INFO - codeparrot_training - Step 19570: {'lr': 0.0004828134031372855, 'samples': 10020352, 'steps': 19570, 'loss/train': 2.1884806156158447} +03/04/2022 12:26:44 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 12:26:48 - INFO - codeparrot_training - Step 19571: {'lr': 0.00048281146945756937, 'samples': 10020864, 'steps': 19571, 'loss/train': 1.5977798700332642} +03/04/2022 12:26:51 - INFO - codeparrot_training - Step 19572: {'lr': 0.00048280953567295196, 'samples': 10021376, 'steps': 19572, 'loss/train': 1.5649935007095337} +03/04/2022 12:26:52 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 12:26:57 - INFO - codeparrot_training - Step 19573: {'lr': 0.0004828076017834342, 'samples': 10021888, 'steps': 19573, 'loss/train': 1.7698712348937988} +03/04/2022 12:27:00 - INFO - codeparrot_training - Step 19574: {'lr': 0.00048280566778901684, 'samples': 10022400, 'steps': 19574, 'loss/train': 1.3151737451553345} +03/04/2022 12:27:01 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/04/2022 12:27:05 - INFO - codeparrot_training - Step 19575: {'lr': 0.00048280373368970086, 'samples': 10022912, 'steps': 19575, 'loss/train': 2.602470874786377} +03/04/2022 12:27:08 - INFO - codeparrot_training - Step 19576: {'lr': 0.0004828017994854872, 'samples': 10023424, 'steps': 19576, 'loss/train': 1.8715189695358276} +03/04/2022 12:27:09 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/04/2022 12:27:14 - INFO - codeparrot_training - Step 19577: {'lr': 0.0004827998651763765, 'samples': 10023936, 'steps': 19577, 'loss/train': 1.833382248878479} +03/04/2022 12:27:17 - INFO - codeparrot_training - Step 19578: {'lr': 0.0004827979307623699, 'samples': 10024448, 'steps': 19578, 'loss/train': 2.0861172676086426} +03/04/2022 12:27:18 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/04/2022 12:27:22 - INFO - codeparrot_training - Step 19579: {'lr': 0.0004827959962434681, 'samples': 10024960, 'steps': 19579, 'loss/train': 1.2611104249954224} +03/04/2022 12:27:25 - INFO - codeparrot_training - Step 19580: {'lr': 0.00048279406161967197, 'samples': 10025472, 'steps': 19580, 'loss/train': 2.140249490737915} +03/04/2022 12:27:26 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/04/2022 12:27:30 - INFO - codeparrot_training - Step 19581: {'lr': 0.0004827921268909825, 'samples': 10025984, 'steps': 19581, 'loss/train': 2.5501272678375244} +03/04/2022 12:27:34 - INFO - codeparrot_training - Step 19582: {'lr': 0.0004827901920574005, 'samples': 10026496, 'steps': 19582, 'loss/train': 2.8026888370513916} +03/04/2022 12:27:35 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/04/2022 12:27:39 - INFO - codeparrot_training - Step 19583: {'lr': 0.0004827882571189268, 'samples': 10027008, 'steps': 19583, 'loss/train': 1.7835613489151} +03/04/2022 12:27:42 - INFO - codeparrot_training - Step 19584: {'lr': 0.00048278632207556226, 'samples': 10027520, 'steps': 19584, 'loss/train': 1.8873343467712402} +03/04/2022 12:27:43 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 12:27:47 - INFO - codeparrot_training - Step 19585: {'lr': 0.00048278438692730784, 'samples': 10028032, 'steps': 19585, 'loss/train': 1.5582023859024048} +03/04/2022 12:27:50 - INFO - codeparrot_training - Step 19586: {'lr': 0.00048278245167416434, 'samples': 10028544, 'steps': 19586, 'loss/train': 2.1747469902038574} +03/04/2022 12:27:56 - INFO - codeparrot_training - Step 19587: {'lr': 0.0004827805163161327, 'samples': 10029056, 'steps': 19587, 'loss/train': 1.4605070352554321} +03/04/2022 12:27:59 - INFO - codeparrot_training - Step 19588: {'lr': 0.0004827785808532137, 'samples': 10029568, 'steps': 19588, 'loss/train': 2.5666894912719727} +03/04/2022 12:28:00 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/04/2022 12:28:04 - INFO - codeparrot_training - Step 19589: {'lr': 0.0004827766452854083, 'samples': 10030080, 'steps': 19589, 'loss/train': 2.7876060009002686} +03/04/2022 12:28:07 - INFO - codeparrot_training - Step 19590: {'lr': 0.0004827747096127173, 'samples': 10030592, 'steps': 19590, 'loss/train': 1.920558214187622} +03/04/2022 12:28:08 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/04/2022 12:28:13 - INFO - codeparrot_training - Step 19591: {'lr': 0.00048277277383514165, 'samples': 10031104, 'steps': 19591, 'loss/train': 2.2619717121124268} +03/04/2022 12:28:16 - INFO - codeparrot_training - Step 19592: {'lr': 0.00048277083795268216, 'samples': 10031616, 'steps': 19592, 'loss/train': 1.9289435148239136} +03/04/2022 12:28:17 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/04/2022 12:28:21 - INFO - codeparrot_training - Step 19593: {'lr': 0.0004827689019653397, 'samples': 10032128, 'steps': 19593, 'loss/train': 1.250562310218811} +03/04/2022 12:28:24 - INFO - codeparrot_training - Step 19594: {'lr': 0.00048276696587311525, 'samples': 10032640, 'steps': 19594, 'loss/train': 1.7592219114303589} +03/04/2022 12:28:25 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/04/2022 12:28:29 - INFO - codeparrot_training - Step 19595: {'lr': 0.00048276502967600955, 'samples': 10033152, 'steps': 19595, 'loss/train': 3.874727249145508} +03/04/2022 12:28:32 - INFO - codeparrot_training - Step 19596: {'lr': 0.00048276309337402345, 'samples': 10033664, 'steps': 19596, 'loss/train': 1.7101479768753052} +03/04/2022 12:28:34 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 12:28:38 - INFO - codeparrot_training - Step 19597: {'lr': 0.000482761156967158, 'samples': 10034176, 'steps': 19597, 'loss/train': 1.588287591934204} +03/04/2022 12:28:41 - INFO - codeparrot_training - Step 19598: {'lr': 0.0004827592204554139, 'samples': 10034688, 'steps': 19598, 'loss/train': 2.2950551509857178} +03/04/2022 12:28:42 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/04/2022 12:28:46 - INFO - codeparrot_training - Step 19599: {'lr': 0.00048275728383879215, 'samples': 10035200, 'steps': 19599, 'loss/train': 1.7765518426895142} +03/04/2022 12:28:49 - INFO - codeparrot_training - Step 19600: {'lr': 0.0004827553471172935, 'samples': 10035712, 'steps': 19600, 'loss/train': 2.505875587463379} +03/04/2022 12:28:50 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/04/2022 12:28:55 - INFO - codeparrot_training - Step 19601: {'lr': 0.00048275341029091885, 'samples': 10036224, 'steps': 19601, 'loss/train': 2.125891923904419} +03/04/2022 12:28:58 - INFO - codeparrot_training - Step 19602: {'lr': 0.0004827514733596692, 'samples': 10036736, 'steps': 19602, 'loss/train': 1.9785504341125488} +03/04/2022 12:28:58 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/04/2022 12:29:03 - INFO - codeparrot_training - Step 19603: {'lr': 0.00048274953632354524, 'samples': 10037248, 'steps': 19603, 'loss/train': 2.523259401321411} +03/04/2022 12:29:06 - INFO - codeparrot_training - Step 19604: {'lr': 0.000482747599182548, 'samples': 10037760, 'steps': 19604, 'loss/train': 1.796750783920288} +03/04/2022 12:29:07 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 12:29:11 - INFO - codeparrot_training - Step 19605: {'lr': 0.00048274566193667824, 'samples': 10038272, 'steps': 19605, 'loss/train': 2.0548095703125} +03/04/2022 12:29:15 - INFO - codeparrot_training - Step 19606: {'lr': 0.0004827437245859369, 'samples': 10038784, 'steps': 19606, 'loss/train': 2.111943244934082} +03/04/2022 12:29:15 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 12:29:20 - INFO - codeparrot_training - Step 19607: {'lr': 0.0004827417871303248, 'samples': 10039296, 'steps': 19607, 'loss/train': 2.286667585372925} +03/04/2022 12:29:23 - INFO - codeparrot_training - Step 19608: {'lr': 0.00048273984956984285, 'samples': 10039808, 'steps': 19608, 'loss/train': 1.4727572202682495} +03/04/2022 12:29:24 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/04/2022 12:29:28 - INFO - codeparrot_training - Step 19609: {'lr': 0.0004827379119044919, 'samples': 10040320, 'steps': 19609, 'loss/train': 1.7012600898742676} +03/04/2022 12:29:32 - INFO - codeparrot_training - Step 19610: {'lr': 0.00048273597413427284, 'samples': 10040832, 'steps': 19610, 'loss/train': 1.28578519821167} +03/04/2022 12:29:33 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/04/2022 12:29:37 - INFO - codeparrot_training - Step 19611: {'lr': 0.00048273403625918653, 'samples': 10041344, 'steps': 19611, 'loss/train': 1.6905452013015747} +03/04/2022 12:29:40 - INFO - codeparrot_training - Step 19612: {'lr': 0.0004827320982792339, 'samples': 10041856, 'steps': 19612, 'loss/train': 2.045376777648926} +03/04/2022 12:29:41 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/04/2022 12:29:45 - INFO - codeparrot_training - Step 19613: {'lr': 0.00048273016019441585, 'samples': 10042368, 'steps': 19613, 'loss/train': 1.9756158590316772} +03/04/2022 12:29:48 - INFO - codeparrot_training - Step 19614: {'lr': 0.00048272822200473304, 'samples': 10042880, 'steps': 19614, 'loss/train': 0.9551622867584229} +03/04/2022 12:29:50 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/04/2022 12:29:54 - INFO - codeparrot_training - Step 19615: {'lr': 0.0004827262837101866, 'samples': 10043392, 'steps': 19615, 'loss/train': 1.9031625986099243} +03/04/2022 12:29:57 - INFO - codeparrot_training - Step 19616: {'lr': 0.0004827243453107772, 'samples': 10043904, 'steps': 19616, 'loss/train': 2.0338525772094727} +03/04/2022 12:29:58 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/04/2022 12:30:02 - INFO - codeparrot_training - Step 19617: {'lr': 0.0004827224068065058, 'samples': 10044416, 'steps': 19617, 'loss/train': 2.066007375717163} +03/04/2022 12:30:05 - INFO - codeparrot_training - Step 19618: {'lr': 0.0004827204681973733, 'samples': 10044928, 'steps': 19618, 'loss/train': 2.1195881366729736} +03/04/2022 12:30:06 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/04/2022 12:30:11 - INFO - codeparrot_training - Step 19619: {'lr': 0.00048271852948338057, 'samples': 10045440, 'steps': 19619, 'loss/train': 1.7249844074249268} +03/04/2022 12:30:14 - INFO - codeparrot_training - Step 19620: {'lr': 0.00048271659066452847, 'samples': 10045952, 'steps': 19620, 'loss/train': 1.456711769104004} +03/04/2022 12:30:15 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/04/2022 12:30:19 - INFO - codeparrot_training - Step 19621: {'lr': 0.0004827146517408178, 'samples': 10046464, 'steps': 19621, 'loss/train': 1.2809100151062012} +03/04/2022 12:30:22 - INFO - codeparrot_training - Step 19622: {'lr': 0.0004827127127122495, 'samples': 10046976, 'steps': 19622, 'loss/train': 2.207751750946045} +03/04/2022 12:30:23 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/04/2022 12:30:28 - INFO - codeparrot_training - Step 19623: {'lr': 0.00048271077357882455, 'samples': 10047488, 'steps': 19623, 'loss/train': 2.057893753051758} +03/04/2022 12:30:31 - INFO - codeparrot_training - Step 19624: {'lr': 0.00048270883434054364, 'samples': 10048000, 'steps': 19624, 'loss/train': 2.098374605178833} +03/04/2022 12:30:32 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 12:30:36 - INFO - codeparrot_training - Step 19625: {'lr': 0.00048270689499740774, 'samples': 10048512, 'steps': 19625, 'loss/train': 2.298408269882202} +03/04/2022 12:30:39 - INFO - codeparrot_training - Step 19626: {'lr': 0.0004827049555494176, 'samples': 10049024, 'steps': 19626, 'loss/train': 2.519592761993408} +03/04/2022 12:30:40 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/04/2022 12:30:45 - INFO - codeparrot_training - Step 19627: {'lr': 0.00048270301599657436, 'samples': 10049536, 'steps': 19627, 'loss/train': 1.2468628883361816} +03/04/2022 12:30:48 - INFO - codeparrot_training - Step 19628: {'lr': 0.0004827010763388786, 'samples': 10050048, 'steps': 19628, 'loss/train': 1.2400918006896973} +03/04/2022 12:30:49 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 12:30:53 - INFO - codeparrot_training - Step 19629: {'lr': 0.00048269913657633147, 'samples': 10050560, 'steps': 19629, 'loss/train': 0.7863187789916992} +03/04/2022 12:30:56 - INFO - codeparrot_training - Step 19630: {'lr': 0.00048269719670893357, 'samples': 10051072, 'steps': 19630, 'loss/train': 2.37595272064209} +03/04/2022 12:30:58 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/04/2022 12:31:01 - INFO - codeparrot_training - Step 19631: {'lr': 0.00048269525673668595, 'samples': 10051584, 'steps': 19631, 'loss/train': 1.8215774297714233} +03/04/2022 12:31:05 - INFO - codeparrot_training - Step 19632: {'lr': 0.00048269331665958947, 'samples': 10052096, 'steps': 19632, 'loss/train': 1.652806043624878} +03/04/2022 12:31:06 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/04/2022 12:31:10 - INFO - codeparrot_training - Step 19633: {'lr': 0.00048269137647764495, 'samples': 10052608, 'steps': 19633, 'loss/train': 1.957367181777954} +03/04/2022 12:31:13 - INFO - codeparrot_training - Step 19634: {'lr': 0.00048268943619085325, 'samples': 10053120, 'steps': 19634, 'loss/train': 2.0553481578826904} +03/04/2022 12:31:15 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/04/2022 12:31:19 - INFO - codeparrot_training - Step 19635: {'lr': 0.00048268749579921536, 'samples': 10053632, 'steps': 19635, 'loss/train': 1.7191535234451294} +03/04/2022 12:31:22 - INFO - codeparrot_training - Step 19636: {'lr': 0.00048268555530273197, 'samples': 10054144, 'steps': 19636, 'loss/train': 2.286865234375} +03/04/2022 12:31:23 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 12:31:27 - INFO - codeparrot_training - Step 19637: {'lr': 0.0004826836147014041, 'samples': 10054656, 'steps': 19637, 'loss/train': 3.7865476608276367} +03/04/2022 12:31:30 - INFO - codeparrot_training - Step 19638: {'lr': 0.0004826816739952326, 'samples': 10055168, 'steps': 19638, 'loss/train': 0.860769510269165} +03/04/2022 12:31:32 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/04/2022 12:31:35 - INFO - codeparrot_training - Step 19639: {'lr': 0.0004826797331842183, 'samples': 10055680, 'steps': 19639, 'loss/train': 1.4444221258163452} +03/04/2022 12:31:39 - INFO - codeparrot_training - Step 19640: {'lr': 0.0004826777922683622, 'samples': 10056192, 'steps': 19640, 'loss/train': 1.126543641090393} +03/04/2022 12:31:41 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/04/2022 12:31:44 - INFO - codeparrot_training - Step 19641: {'lr': 0.0004826758512476649, 'samples': 10056704, 'steps': 19641, 'loss/train': 1.6133081912994385} +03/04/2022 12:31:47 - INFO - codeparrot_training - Step 19642: {'lr': 0.0004826739101221276, 'samples': 10057216, 'steps': 19642, 'loss/train': 1.738745927810669} +03/04/2022 12:31:49 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/04/2022 12:31:53 - INFO - codeparrot_training - Step 19643: {'lr': 0.000482671968891751, 'samples': 10057728, 'steps': 19643, 'loss/train': 2.2655255794525146} +03/04/2022 12:31:56 - INFO - codeparrot_training - Step 19644: {'lr': 0.000482670027556536, 'samples': 10058240, 'steps': 19644, 'loss/train': 1.685506820678711} +03/04/2022 12:31:58 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/04/2022 12:32:01 - INFO - codeparrot_training - Step 19645: {'lr': 0.0004826680861164834, 'samples': 10058752, 'steps': 19645, 'loss/train': 1.8673630952835083} +03/04/2022 12:32:04 - INFO - codeparrot_training - Step 19646: {'lr': 0.00048266614457159426, 'samples': 10059264, 'steps': 19646, 'loss/train': 2.01196026802063} +03/04/2022 12:32:06 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/04/2022 12:32:09 - INFO - codeparrot_training - Step 19647: {'lr': 0.0004826642029218693, 'samples': 10059776, 'steps': 19647, 'loss/train': 2.2941343784332275} +03/04/2022 12:32:13 - INFO - codeparrot_training - Step 19648: {'lr': 0.00048266226116730937, 'samples': 10060288, 'steps': 19648, 'loss/train': 1.9964947700500488} +03/04/2022 12:32:15 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/04/2022 12:32:18 - INFO - codeparrot_training - Step 19649: {'lr': 0.00048266031930791555, 'samples': 10060800, 'steps': 19649, 'loss/train': 1.5013586282730103} +03/04/2022 12:32:21 - INFO - codeparrot_training - Step 19650: {'lr': 0.0004826583773436884, 'samples': 10061312, 'steps': 19650, 'loss/train': 1.6277484893798828} +03/04/2022 12:32:23 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/04/2022 12:32:26 - INFO - codeparrot_training - Step 19651: {'lr': 0.00048265643527462915, 'samples': 10061824, 'steps': 19651, 'loss/train': 1.5069152116775513} +03/04/2022 12:32:30 - INFO - codeparrot_training - Step 19652: {'lr': 0.00048265449310073847, 'samples': 10062336, 'steps': 19652, 'loss/train': 1.9873210191726685} +03/04/2022 12:32:32 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/04/2022 12:32:35 - INFO - codeparrot_training - Step 19653: {'lr': 0.0004826525508220172, 'samples': 10062848, 'steps': 19653, 'loss/train': 1.6652189493179321} +03/04/2022 12:32:38 - INFO - codeparrot_training - Step 19654: {'lr': 0.0004826506084384663, 'samples': 10063360, 'steps': 19654, 'loss/train': 1.3454701900482178} +03/04/2022 12:32:40 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 12:32:43 - INFO - codeparrot_training - Step 19655: {'lr': 0.00048264866595008665, 'samples': 10063872, 'steps': 19655, 'loss/train': 2.0290963649749756} +03/04/2022 12:32:46 - INFO - codeparrot_training - Step 19656: {'lr': 0.0004826467233568791, 'samples': 10064384, 'steps': 19656, 'loss/train': 2.2111222743988037} +03/04/2022 12:32:48 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/04/2022 12:32:52 - INFO - codeparrot_training - Step 19657: {'lr': 0.00048264478065884454, 'samples': 10064896, 'steps': 19657, 'loss/train': 1.249247431755066} +03/04/2022 12:32:55 - INFO - codeparrot_training - Step 19658: {'lr': 0.0004826428378559838, 'samples': 10065408, 'steps': 19658, 'loss/train': 1.8117727041244507} +03/04/2022 12:32:57 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/04/2022 12:33:00 - INFO - codeparrot_training - Step 19659: {'lr': 0.00048264089494829776, 'samples': 10065920, 'steps': 19659, 'loss/train': 2.988041877746582} +03/04/2022 12:33:03 - INFO - codeparrot_training - Step 19660: {'lr': 0.0004826389519357874, 'samples': 10066432, 'steps': 19660, 'loss/train': 2.1863536834716797} +03/04/2022 12:33:06 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/04/2022 12:33:09 - INFO - codeparrot_training - Step 19661: {'lr': 0.00048263700881845346, 'samples': 10066944, 'steps': 19661, 'loss/train': 1.9041731357574463} +03/04/2022 12:33:12 - INFO - codeparrot_training - Step 19662: {'lr': 0.00048263506559629687, 'samples': 10067456, 'steps': 19662, 'loss/train': 1.8874706029891968} +03/04/2022 12:33:14 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/04/2022 12:33:17 - INFO - codeparrot_training - Step 19663: {'lr': 0.00048263312226931853, 'samples': 10067968, 'steps': 19663, 'loss/train': 1.748679757118225} +03/04/2022 12:33:20 - INFO - codeparrot_training - Step 19664: {'lr': 0.0004826311788375193, 'samples': 10068480, 'steps': 19664, 'loss/train': 1.4524743556976318} +03/04/2022 12:33:22 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/04/2022 12:33:26 - INFO - codeparrot_training - Step 19665: {'lr': 0.00048262923530090007, 'samples': 10068992, 'steps': 19665, 'loss/train': 1.778059482574463} +03/04/2022 12:33:29 - INFO - codeparrot_training - Step 19666: {'lr': 0.0004826272916594616, 'samples': 10069504, 'steps': 19666, 'loss/train': 1.4261882305145264} +03/04/2022 12:33:31 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/04/2022 12:33:34 - INFO - codeparrot_training - Step 19667: {'lr': 0.000482625347913205, 'samples': 10070016, 'steps': 19667, 'loss/train': 2.420652151107788} +03/04/2022 12:33:37 - INFO - codeparrot_training - Step 19668: {'lr': 0.0004826234040621309, 'samples': 10070528, 'steps': 19668, 'loss/train': 1.6412914991378784} +03/04/2022 12:33:39 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/04/2022 12:33:42 - INFO - codeparrot_training - Step 19669: {'lr': 0.00048262146010624035, 'samples': 10071040, 'steps': 19669, 'loss/train': 1.15757155418396} +03/04/2022 12:33:46 - INFO - codeparrot_training - Step 19670: {'lr': 0.0004826195160455341, 'samples': 10071552, 'steps': 19670, 'loss/train': 0.6988943815231323} +03/04/2022 12:33:48 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/04/2022 12:33:51 - INFO - codeparrot_training - Step 19671: {'lr': 0.00048261757188001314, 'samples': 10072064, 'steps': 19671, 'loss/train': 2.032111883163452} +03/04/2022 12:33:54 - INFO - codeparrot_training - Step 19672: {'lr': 0.00048261562760967824, 'samples': 10072576, 'steps': 19672, 'loss/train': 1.2864502668380737} +03/04/2022 12:33:56 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/04/2022 12:33:59 - INFO - codeparrot_training - Step 19673: {'lr': 0.0004826136832345304, 'samples': 10073088, 'steps': 19673, 'loss/train': 1.955210566520691} +03/04/2022 12:34:02 - INFO - codeparrot_training - Step 19674: {'lr': 0.00048261173875457035, 'samples': 10073600, 'steps': 19674, 'loss/train': 2.087470769882202} +03/04/2022 12:34:04 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/04/2022 12:34:08 - INFO - codeparrot_training - Step 19675: {'lr': 0.0004826097941697991, 'samples': 10074112, 'steps': 19675, 'loss/train': 2.261869192123413} +03/04/2022 12:34:11 - INFO - codeparrot_training - Step 19676: {'lr': 0.0004826078494802174, 'samples': 10074624, 'steps': 19676, 'loss/train': 2.022291421890259} +03/04/2022 12:34:13 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/04/2022 12:34:16 - INFO - codeparrot_training - Step 19677: {'lr': 0.00048260590468582624, 'samples': 10075136, 'steps': 19677, 'loss/train': 2.183664083480835} +03/04/2022 12:34:19 - INFO - codeparrot_training - Step 19678: {'lr': 0.0004826039597866265, 'samples': 10075648, 'steps': 19678, 'loss/train': 0.8393725156784058} +03/04/2022 12:34:21 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/04/2022 12:34:24 - INFO - codeparrot_training - Step 19679: {'lr': 0.00048260201478261887, 'samples': 10076160, 'steps': 19679, 'loss/train': 2.456508159637451} +03/04/2022 12:34:28 - INFO - codeparrot_training - Step 19680: {'lr': 0.0004826000696738045, 'samples': 10076672, 'steps': 19680, 'loss/train': 1.8994140625} +03/04/2022 12:34:29 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/04/2022 12:34:33 - INFO - codeparrot_training - Step 19681: {'lr': 0.000482598124460184, 'samples': 10077184, 'steps': 19681, 'loss/train': 2.5178744792938232} +03/04/2022 12:34:36 - INFO - codeparrot_training - Step 19682: {'lr': 0.00048259617914175846, 'samples': 10077696, 'steps': 19682, 'loss/train': 1.3134336471557617} +03/04/2022 12:34:38 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/04/2022 12:34:41 - INFO - codeparrot_training - Step 19683: {'lr': 0.00048259423371852867, 'samples': 10078208, 'steps': 19683, 'loss/train': 1.6205726861953735} +03/04/2022 12:34:45 - INFO - codeparrot_training - Step 19684: {'lr': 0.0004825922881904955, 'samples': 10078720, 'steps': 19684, 'loss/train': 1.6164289712905884} +03/04/2022 12:34:46 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/04/2022 12:34:50 - INFO - codeparrot_training - Step 19685: {'lr': 0.00048259034255765984, 'samples': 10079232, 'steps': 19685, 'loss/train': 2.225207805633545} +03/04/2022 12:34:53 - INFO - codeparrot_training - Step 19686: {'lr': 0.00048258839682002253, 'samples': 10079744, 'steps': 19686, 'loss/train': 0.9545813798904419} +03/04/2022 12:34:55 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/04/2022 12:34:58 - INFO - codeparrot_training - Step 19687: {'lr': 0.00048258645097758445, 'samples': 10080256, 'steps': 19687, 'loss/train': 1.949307918548584} +03/04/2022 12:35:01 - INFO - codeparrot_training - Step 19688: {'lr': 0.0004825845050303466, 'samples': 10080768, 'steps': 19688, 'loss/train': 1.3289258480072021} +03/04/2022 12:35:03 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/04/2022 12:35:07 - INFO - codeparrot_training - Step 19689: {'lr': 0.00048258255897830967, 'samples': 10081280, 'steps': 19689, 'loss/train': 1.8775722980499268} +03/04/2022 12:35:10 - INFO - codeparrot_training - Step 19690: {'lr': 0.0004825806128214747, 'samples': 10081792, 'steps': 19690, 'loss/train': 1.3639203310012817} +03/04/2022 12:35:12 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/04/2022 12:35:15 - INFO - codeparrot_training - Step 19691: {'lr': 0.00048257866655984237, 'samples': 10082304, 'steps': 19691, 'loss/train': 1.2828947305679321} +03/04/2022 12:35:18 - INFO - codeparrot_training - Step 19692: {'lr': 0.0004825767201934138, 'samples': 10082816, 'steps': 19692, 'loss/train': 1.3774327039718628} +03/04/2022 12:35:20 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/04/2022 12:35:24 - INFO - codeparrot_training - Step 19693: {'lr': 0.0004825747737221897, 'samples': 10083328, 'steps': 19693, 'loss/train': 2.1454508304595947} +03/04/2022 12:35:27 - INFO - codeparrot_training - Step 19694: {'lr': 0.000482572827146171, 'samples': 10083840, 'steps': 19694, 'loss/train': 1.3843377828598022} +03/04/2022 12:35:29 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/04/2022 12:35:32 - INFO - codeparrot_training - Step 19695: {'lr': 0.00048257088046535864, 'samples': 10084352, 'steps': 19695, 'loss/train': 1.3183873891830444} +03/04/2022 12:35:36 - INFO - codeparrot_training - Step 19696: {'lr': 0.0004825689336797534, 'samples': 10084864, 'steps': 19696, 'loss/train': 5.601188659667969} +03/04/2022 12:35:38 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/04/2022 12:35:41 - INFO - codeparrot_training - Step 19697: {'lr': 0.00048256698678935615, 'samples': 10085376, 'steps': 19697, 'loss/train': 1.476180076599121} +03/04/2022 12:35:44 - INFO - codeparrot_training - Step 19698: {'lr': 0.00048256503979416776, 'samples': 10085888, 'steps': 19698, 'loss/train': 2.46618914604187} +03/04/2022 12:35:47 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/04/2022 12:35:49 - INFO - codeparrot_training - Step 19699: {'lr': 0.0004825630926941892, 'samples': 10086400, 'steps': 19699, 'loss/train': 2.1515254974365234} +03/04/2022 12:35:52 - INFO - codeparrot_training - Step 19700: {'lr': 0.0004825611454894213, 'samples': 10086912, 'steps': 19700, 'loss/train': 2.8739025592803955} +03/04/2022 12:35:56 - INFO - codeparrot_training - Step 19701: {'lr': 0.000482559198179865, 'samples': 10087424, 'steps': 19701, 'loss/train': 1.6649550199508667} +03/04/2022 12:35:56 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/04/2022 12:36:01 - INFO - codeparrot_training - Step 19702: {'lr': 0.00048255725076552103, 'samples': 10087936, 'steps': 19702, 'loss/train': 1.1353758573532104} +03/04/2022 12:36:04 - INFO - codeparrot_training - Step 19703: {'lr': 0.0004825553032463904, 'samples': 10088448, 'steps': 19703, 'loss/train': 1.9337323904037476} +03/04/2022 12:36:04 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/04/2022 12:36:10 - INFO - codeparrot_training - Step 19704: {'lr': 0.00048255335562247395, 'samples': 10088960, 'steps': 19704, 'loss/train': 1.4765249490737915} +03/04/2022 12:36:13 - INFO - codeparrot_training - Step 19705: {'lr': 0.0004825514078937725, 'samples': 10089472, 'steps': 19705, 'loss/train': 2.290787935256958} +03/04/2022 12:36:13 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/04/2022 12:36:18 - INFO - codeparrot_training - Step 19706: {'lr': 0.000482549460060287, 'samples': 10089984, 'steps': 19706, 'loss/train': 1.4927024841308594} +03/04/2022 12:36:21 - INFO - codeparrot_training - Step 19707: {'lr': 0.0004825475121220183, 'samples': 10090496, 'steps': 19707, 'loss/train': 0.7977847456932068} +03/04/2022 12:36:21 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/04/2022 12:36:27 - INFO - codeparrot_training - Step 19708: {'lr': 0.0004825455640789672, 'samples': 10091008, 'steps': 19708, 'loss/train': 2.2151689529418945} +03/04/2022 12:36:30 - INFO - codeparrot_training - Step 19709: {'lr': 0.00048254361593113475, 'samples': 10091520, 'steps': 19709, 'loss/train': 1.4912447929382324} +03/04/2022 12:36:30 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/04/2022 12:36:35 - INFO - codeparrot_training - Step 19710: {'lr': 0.0004825416676785217, 'samples': 10092032, 'steps': 19710, 'loss/train': 1.931646466255188} +03/04/2022 12:36:38 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/04/2022 12:36:40 - INFO - codeparrot_training - Step 19711: {'lr': 0.000482539719321129, 'samples': 10092544, 'steps': 19711, 'loss/train': 1.190773367881775} +03/04/2022 12:36:43 - INFO - codeparrot_training - Step 19712: {'lr': 0.00048253777085895745, 'samples': 10093056, 'steps': 19712, 'loss/train': 1.0326586961746216} +03/04/2022 12:36:46 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/04/2022 12:36:49 - INFO - codeparrot_training - Step 19713: {'lr': 0.000482535822292008, 'samples': 10093568, 'steps': 19713, 'loss/train': 1.6159954071044922} +03/04/2022 12:36:52 - INFO - codeparrot_training - Step 19714: {'lr': 0.0004825338736202815, 'samples': 10094080, 'steps': 19714, 'loss/train': 2.495863676071167} +03/04/2022 12:36:55 - INFO - codeparrot_training - Step 19715: {'lr': 0.00048253192484377884, 'samples': 10094592, 'steps': 19715, 'loss/train': 1.8896089792251587} +03/04/2022 12:36:55 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/04/2022 12:37:00 - INFO - codeparrot_training - Step 19716: {'lr': 0.0004825299759625008, 'samples': 10095104, 'steps': 19716, 'loss/train': 1.5112648010253906} +03/04/2022 12:37:04 - INFO - codeparrot_training - Step 19717: {'lr': 0.0004825280269764484, 'samples': 10095616, 'steps': 19717, 'loss/train': 2.1475510597229004} +03/04/2022 12:37:04 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/04/2022 12:37:09 - INFO - codeparrot_training - Step 19718: {'lr': 0.0004825260778856224, 'samples': 10096128, 'steps': 19718, 'loss/train': 2.2658729553222656} +03/04/2022 12:37:12 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/04/2022 12:37:14 - INFO - codeparrot_training - Step 19719: {'lr': 0.0004825241286900238, 'samples': 10096640, 'steps': 19719, 'loss/train': 2.0432329177856445} +03/04/2022 12:37:17 - INFO - codeparrot_training - Step 19720: {'lr': 0.0004825221793896535, 'samples': 10097152, 'steps': 19720, 'loss/train': 1.6331830024719238} +03/04/2022 12:37:21 - INFO - codeparrot_training - Step 19721: {'lr': 0.0004825202299845122, 'samples': 10097664, 'steps': 19721, 'loss/train': 2.598694324493408} +03/04/2022 12:37:21 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/04/2022 12:37:26 - INFO - codeparrot_training - Step 19722: {'lr': 0.00048251828047460077, 'samples': 10098176, 'steps': 19722, 'loss/train': 1.9113349914550781} +03/04/2022 12:37:29 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/04/2022 12:37:31 - INFO - codeparrot_training - Step 19723: {'lr': 0.0004825163308599203, 'samples': 10098688, 'steps': 19723, 'loss/train': 1.3988053798675537} +03/04/2022 12:37:34 - INFO - codeparrot_training - Step 19724: {'lr': 0.0004825143811404716, 'samples': 10099200, 'steps': 19724, 'loss/train': 1.6237815618515015} +03/04/2022 12:37:37 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/04/2022 12:37:40 - INFO - codeparrot_training - Step 19725: {'lr': 0.00048251243131625543, 'samples': 10099712, 'steps': 19725, 'loss/train': 2.0333735942840576} +03/04/2022 12:37:43 - INFO - codeparrot_training - Step 19726: {'lr': 0.0004825104813872728, 'samples': 10100224, 'steps': 19726, 'loss/train': 2.4387876987457275} +03/04/2022 12:37:46 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/04/2022 12:37:48 - INFO - codeparrot_training - Step 19727: {'lr': 0.0004825085313535245, 'samples': 10100736, 'steps': 19727, 'loss/train': 2.0156004428863525} +03/04/2022 12:37:51 - INFO - codeparrot_training - Step 19728: {'lr': 0.00048250658121501145, 'samples': 10101248, 'steps': 19728, 'loss/train': 1.7837638854980469} +03/04/2022 12:37:55 - INFO - codeparrot_training - Step 19729: {'lr': 0.00048250463097173447, 'samples': 10101760, 'steps': 19729, 'loss/train': 1.5581458806991577} +03/04/2022 12:37:55 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/04/2022 12:38:00 - INFO - codeparrot_training - Step 19730: {'lr': 0.0004825026806236946, 'samples': 10102272, 'steps': 19730, 'loss/train': 2.0141704082489014} +03/04/2022 12:38:03 - INFO - codeparrot_training - Step 19731: {'lr': 0.00048250073017089257, 'samples': 10102784, 'steps': 19731, 'loss/train': 0.9369689226150513} +03/04/2022 12:38:04 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/04/2022 12:38:08 - INFO - codeparrot_training - Step 19732: {'lr': 0.00048249877961332923, 'samples': 10103296, 'steps': 19732, 'loss/train': 2.0229740142822266} +03/04/2022 12:38:12 - INFO - codeparrot_training - Step 19733: {'lr': 0.0004824968289510056, 'samples': 10103808, 'steps': 19733, 'loss/train': 2.5996181964874268} +03/04/2022 12:38:13 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/04/2022 12:38:17 - INFO - codeparrot_training - Step 19734: {'lr': 0.0004824948781839225, 'samples': 10104320, 'steps': 19734, 'loss/train': 1.764369010925293} +03/04/2022 12:38:20 - INFO - codeparrot_training - Step 19735: {'lr': 0.0004824929273120807, 'samples': 10104832, 'steps': 19735, 'loss/train': 2.1641292572021484} +03/04/2022 12:38:21 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/04/2022 12:38:25 - INFO - codeparrot_training - Step 19736: {'lr': 0.0004824909763354813, 'samples': 10105344, 'steps': 19736, 'loss/train': 1.8608489036560059} +03/04/2022 12:38:28 - INFO - codeparrot_training - Step 19737: {'lr': 0.00048248902525412497, 'samples': 10105856, 'steps': 19737, 'loss/train': 2.047818183898926} +03/04/2022 12:38:30 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/04/2022 12:38:34 - INFO - codeparrot_training - Step 19738: {'lr': 0.0004824870740680127, 'samples': 10106368, 'steps': 19738, 'loss/train': 2.260714054107666} +03/04/2022 12:38:37 - INFO - codeparrot_training - Step 19739: {'lr': 0.0004824851227771453, 'samples': 10106880, 'steps': 19739, 'loss/train': 2.3353888988494873} +03/04/2022 12:38:42 - INFO - codeparrot_training - Step 19740: {'lr': 0.00048248317138152374, 'samples': 10107392, 'steps': 19740, 'loss/train': 1.7353488206863403} +03/04/2022 12:38:45 - INFO - codeparrot_training - Step 19741: {'lr': 0.00048248121988114887, 'samples': 10107904, 'steps': 19741, 'loss/train': 2.275988817214966} +03/04/2022 12:38:46 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/04/2022 12:38:50 - INFO - codeparrot_training - Step 19742: {'lr': 0.00048247926827602153, 'samples': 10108416, 'steps': 19742, 'loss/train': 1.7174770832061768} +03/04/2022 12:38:54 - INFO - codeparrot_training - Step 19743: {'lr': 0.0004824773165661426, 'samples': 10108928, 'steps': 19743, 'loss/train': 2.736206531524658} +03/04/2022 12:38:54 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/04/2022 12:38:59 - INFO - codeparrot_training - Step 19744: {'lr': 0.000482475364751513, 'samples': 10109440, 'steps': 19744, 'loss/train': 2.2439889907836914} +03/04/2022 12:39:02 - INFO - codeparrot_training - Step 19745: {'lr': 0.0004824734128321335, 'samples': 10109952, 'steps': 19745, 'loss/train': 1.8169364929199219} +03/04/2022 12:39:03 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/04/2022 12:39:07 - INFO - codeparrot_training - Step 19746: {'lr': 0.0004824714608080052, 'samples': 10110464, 'steps': 19746, 'loss/train': 1.9187331199645996} +03/04/2022 12:39:10 - INFO - codeparrot_training - Step 19747: {'lr': 0.00048246950867912873, 'samples': 10110976, 'steps': 19747, 'loss/train': 2.1378655433654785} +03/04/2022 12:39:11 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/04/2022 12:39:16 - INFO - codeparrot_training - Step 19748: {'lr': 0.0004824675564455052, 'samples': 10111488, 'steps': 19748, 'loss/train': 2.3286235332489014} +03/04/2022 12:39:19 - INFO - codeparrot_training - Step 19749: {'lr': 0.0004824656041071353, 'samples': 10112000, 'steps': 19749, 'loss/train': 1.7786535024642944} +03/04/2022 12:39:19 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/04/2022 12:39:24 - INFO - codeparrot_training - Step 19750: {'lr': 0.00048246365166402003, 'samples': 10112512, 'steps': 19750, 'loss/train': 2.05889892578125} +03/04/2022 12:39:27 - INFO - codeparrot_training - Step 19751: {'lr': 0.00048246169911616015, 'samples': 10113024, 'steps': 19751, 'loss/train': 2.0969083309173584} +03/04/2022 12:39:28 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/04/2022 12:39:33 - INFO - codeparrot_training - Step 19752: {'lr': 0.00048245974646355673, 'samples': 10113536, 'steps': 19752, 'loss/train': 2.4036197662353516} +03/04/2022 12:39:36 - INFO - codeparrot_training - Step 19753: {'lr': 0.00048245779370621045, 'samples': 10114048, 'steps': 19753, 'loss/train': 1.8346132040023804} +03/04/2022 12:39:36 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/04/2022 12:39:41 - INFO - codeparrot_training - Step 19754: {'lr': 0.0004824558408441223, 'samples': 10114560, 'steps': 19754, 'loss/train': 1.2586987018585205} +03/04/2022 12:39:44 - INFO - codeparrot_training - Step 19755: {'lr': 0.00048245388787729316, 'samples': 10115072, 'steps': 19755, 'loss/train': 2.221259117126465} +03/04/2022 12:39:44 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/04/2022 12:39:50 - INFO - codeparrot_training - Step 19756: {'lr': 0.00048245193480572383, 'samples': 10115584, 'steps': 19756, 'loss/train': 1.2530372142791748} +03/04/2022 12:39:53 - INFO - codeparrot_training - Step 19757: {'lr': 0.0004824499816294152, 'samples': 10116096, 'steps': 19757, 'loss/train': 1.8443191051483154} +03/04/2022 12:39:53 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/04/2022 12:39:59 - INFO - codeparrot_training - Step 19758: {'lr': 0.0004824480283483683, 'samples': 10116608, 'steps': 19758, 'loss/train': 0.9319930076599121} +03/04/2022 12:40:02 - INFO - codeparrot_training - Step 19759: {'lr': 0.0004824460749625839, 'samples': 10117120, 'steps': 19759, 'loss/train': 3.4166860580444336} +03/04/2022 12:40:05 - INFO - codeparrot_training - Step 19760: {'lr': 0.00048244412147206283, 'samples': 10117632, 'steps': 19760, 'loss/train': 3.465986728668213} +03/04/2022 12:40:05 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/04/2022 12:40:11 - INFO - codeparrot_training - Step 19761: {'lr': 0.00048244216787680607, 'samples': 10118144, 'steps': 19761, 'loss/train': 2.099868059158325} +03/04/2022 12:40:14 - INFO - codeparrot_training - Step 19762: {'lr': 0.0004824402141768145, 'samples': 10118656, 'steps': 19762, 'loss/train': 1.2014586925506592} +03/04/2022 12:40:14 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/04/2022 12:40:19 - INFO - codeparrot_training - Step 19763: {'lr': 0.0004824382603720888, 'samples': 10119168, 'steps': 19763, 'loss/train': 2.4093170166015625} +03/04/2022 12:40:22 - INFO - codeparrot_training - Step 19764: {'lr': 0.00048243630646263016, 'samples': 10119680, 'steps': 19764, 'loss/train': 1.0819671154022217} +03/04/2022 12:40:22 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/04/2022 12:40:28 - INFO - codeparrot_training - Step 19765: {'lr': 0.00048243435244843926, 'samples': 10120192, 'steps': 19765, 'loss/train': 1.0348646640777588} +03/04/2022 12:40:30 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/04/2022 12:40:33 - INFO - codeparrot_training - Step 19766: {'lr': 0.000482432398329517, 'samples': 10120704, 'steps': 19766, 'loss/train': 1.5714190006256104} +03/04/2022 12:40:36 - INFO - codeparrot_training - Step 19767: {'lr': 0.00048243044410586433, 'samples': 10121216, 'steps': 19767, 'loss/train': 1.9391505718231201} +03/04/2022 12:40:38 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/04/2022 12:40:41 - INFO - codeparrot_training - Step 19768: {'lr': 0.00048242848977748205, 'samples': 10121728, 'steps': 19768, 'loss/train': 2.2395808696746826} +03/04/2022 12:40:44 - INFO - codeparrot_training - Step 19769: {'lr': 0.0004824265353443711, 'samples': 10122240, 'steps': 19769, 'loss/train': 1.5851178169250488} +03/04/2022 12:40:47 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/04/2022 12:40:50 - INFO - codeparrot_training - Step 19770: {'lr': 0.00048242458080653233, 'samples': 10122752, 'steps': 19770, 'loss/train': 1.7473492622375488} +03/04/2022 12:40:53 - INFO - codeparrot_training - Step 19771: {'lr': 0.0004824226261639666, 'samples': 10123264, 'steps': 19771, 'loss/train': 2.1804256439208984} +03/04/2022 12:40:55 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/04/2022 12:40:58 - INFO - codeparrot_training - Step 19772: {'lr': 0.00048242067141667487, 'samples': 10123776, 'steps': 19772, 'loss/train': 1.622178554534912} +03/04/2022 12:41:01 - INFO - codeparrot_training - Step 19773: {'lr': 0.00048241871656465795, 'samples': 10124288, 'steps': 19773, 'loss/train': 2.159731864929199} +03/04/2022 12:41:03 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 12:41:07 - INFO - codeparrot_training - Step 19774: {'lr': 0.0004824167616079168, 'samples': 10124800, 'steps': 19774, 'loss/train': 1.8339184522628784} +03/04/2022 12:41:10 - INFO - codeparrot_training - Step 19775: {'lr': 0.0004824148065464522, 'samples': 10125312, 'steps': 19775, 'loss/train': 1.6273595094680786} +03/04/2022 12:41:11 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/04/2022 12:41:15 - INFO - codeparrot_training - Step 19776: {'lr': 0.00048241285138026505, 'samples': 10125824, 'steps': 19776, 'loss/train': 1.8645442724227905} +03/04/2022 12:41:18 - INFO - codeparrot_training - Step 19777: {'lr': 0.00048241089610935627, 'samples': 10126336, 'steps': 19777, 'loss/train': 2.2174816131591797} +03/04/2022 12:41:20 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/04/2022 12:41:23 - INFO - codeparrot_training - Step 19778: {'lr': 0.0004824089407337267, 'samples': 10126848, 'steps': 19778, 'loss/train': 2.3656773567199707} +03/04/2022 12:41:27 - INFO - codeparrot_training - Step 19779: {'lr': 0.00048240698525337726, 'samples': 10127360, 'steps': 19779, 'loss/train': 2.0957984924316406} +03/04/2022 12:41:28 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/04/2022 12:41:32 - INFO - codeparrot_training - Step 19780: {'lr': 0.0004824050296683089, 'samples': 10127872, 'steps': 19780, 'loss/train': 1.813086748123169} +03/04/2022 12:41:35 - INFO - codeparrot_training - Step 19781: {'lr': 0.0004824030739785223, 'samples': 10128384, 'steps': 19781, 'loss/train': 1.1297773122787476} +03/04/2022 12:41:37 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/04/2022 12:41:40 - INFO - codeparrot_training - Step 19782: {'lr': 0.00048240111818401854, 'samples': 10128896, 'steps': 19782, 'loss/train': 1.286503791809082} +03/04/2022 12:41:43 - INFO - codeparrot_training - Step 19783: {'lr': 0.0004823991622847984, 'samples': 10129408, 'steps': 19783, 'loss/train': 2.514251232147217} +03/04/2022 12:41:45 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/04/2022 12:41:49 - INFO - codeparrot_training - Step 19784: {'lr': 0.0004823972062808628, 'samples': 10129920, 'steps': 19784, 'loss/train': 0.543460488319397} +03/04/2022 12:41:52 - INFO - codeparrot_training - Step 19785: {'lr': 0.0004823952501722126, 'samples': 10130432, 'steps': 19785, 'loss/train': 2.372936248779297} +03/04/2022 12:41:53 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/04/2022 12:41:57 - INFO - codeparrot_training - Step 19786: {'lr': 0.00048239329395884865, 'samples': 10130944, 'steps': 19786, 'loss/train': 2.0636587142944336} +03/04/2022 12:42:00 - INFO - codeparrot_training - Step 19787: {'lr': 0.00048239133764077193, 'samples': 10131456, 'steps': 19787, 'loss/train': 1.55360746383667} +03/04/2022 12:42:01 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 12:42:05 - INFO - codeparrot_training - Step 19788: {'lr': 0.00048238938121798313, 'samples': 10131968, 'steps': 19788, 'loss/train': 1.9706697463989258} +03/04/2022 12:42:09 - INFO - codeparrot_training - Step 19789: {'lr': 0.00048238742469048344, 'samples': 10132480, 'steps': 19789, 'loss/train': 1.5519390106201172} +03/04/2022 12:42:10 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/04/2022 12:42:14 - INFO - codeparrot_training - Step 19790: {'lr': 0.00048238546805827345, 'samples': 10132992, 'steps': 19790, 'loss/train': 1.9163192510604858} +03/04/2022 12:42:17 - INFO - codeparrot_training - Step 19791: {'lr': 0.00048238351132135415, 'samples': 10133504, 'steps': 19791, 'loss/train': 0.7453199028968811} +03/04/2022 12:42:18 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/04/2022 12:42:23 - INFO - codeparrot_training - Step 19792: {'lr': 0.0004823815544797265, 'samples': 10134016, 'steps': 19792, 'loss/train': 2.4835805892944336} +03/04/2022 12:42:26 - INFO - codeparrot_training - Step 19793: {'lr': 0.0004823795975333912, 'samples': 10134528, 'steps': 19793, 'loss/train': 1.1235730648040771} +03/04/2022 12:42:27 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 12:42:31 - INFO - codeparrot_training - Step 19794: {'lr': 0.0004823776404823493, 'samples': 10135040, 'steps': 19794, 'loss/train': 1.400590419769287} +03/04/2022 12:42:34 - INFO - codeparrot_training - Step 19795: {'lr': 0.00048237568332660163, 'samples': 10135552, 'steps': 19795, 'loss/train': 1.948028802871704} +03/04/2022 12:42:36 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/04/2022 12:42:40 - INFO - codeparrot_training - Step 19796: {'lr': 0.0004823737260661491, 'samples': 10136064, 'steps': 19796, 'loss/train': 1.8016453981399536} +03/04/2022 12:42:43 - INFO - codeparrot_training - Step 19797: {'lr': 0.00048237176870099256, 'samples': 10136576, 'steps': 19797, 'loss/train': 2.1367990970611572} +03/04/2022 12:42:46 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/04/2022 12:42:48 - INFO - codeparrot_training - Step 19798: {'lr': 0.0004823698112311328, 'samples': 10137088, 'steps': 19798, 'loss/train': 1.521304965019226} +03/04/2022 12:42:51 - INFO - codeparrot_training - Step 19799: {'lr': 0.00048236785365657076, 'samples': 10137600, 'steps': 19799, 'loss/train': 1.541629672050476} +03/04/2022 12:42:54 - INFO - codeparrot_training - Skipping example with length 39 (seq_length=1024) +03/04/2022 12:42:56 - INFO - codeparrot_training - Step 19800: {'lr': 0.00048236589597730744, 'samples': 10138112, 'steps': 19800, 'loss/train': 1.769490122795105} +03/04/2022 12:43:00 - INFO - codeparrot_training - Step 19801: {'lr': 0.00048236393819334363, 'samples': 10138624, 'steps': 19801, 'loss/train': 2.436988115310669} +03/04/2022 12:43:02 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/04/2022 12:43:05 - INFO - codeparrot_training - Step 19802: {'lr': 0.0004823619803046802, 'samples': 10139136, 'steps': 19802, 'loss/train': 0.8740523457527161} +03/04/2022 12:43:08 - INFO - codeparrot_training - Step 19803: {'lr': 0.00048236002231131803, 'samples': 10139648, 'steps': 19803, 'loss/train': 1.0888659954071045} +03/04/2022 12:43:10 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/04/2022 12:43:13 - INFO - codeparrot_training - Step 19804: {'lr': 0.00048235806421325803, 'samples': 10140160, 'steps': 19804, 'loss/train': 3.4079155921936035} +03/04/2022 12:43:17 - INFO - codeparrot_training - Step 19805: {'lr': 0.0004823561060105011, 'samples': 10140672, 'steps': 19805, 'loss/train': 2.1065566539764404} +03/04/2022 12:43:19 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/04/2022 12:43:22 - INFO - codeparrot_training - Step 19806: {'lr': 0.00048235414770304803, 'samples': 10141184, 'steps': 19806, 'loss/train': 1.8995722532272339} +03/04/2022 12:43:25 - INFO - codeparrot_training - Step 19807: {'lr': 0.00048235218929089987, 'samples': 10141696, 'steps': 19807, 'loss/train': 2.222193956375122} +03/04/2022 12:43:27 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/04/2022 12:43:30 - INFO - codeparrot_training - Step 19808: {'lr': 0.00048235023077405724, 'samples': 10142208, 'steps': 19808, 'loss/train': 0.7014352083206177} +03/04/2022 12:43:33 - INFO - codeparrot_training - Step 19809: {'lr': 0.0004823482721525213, 'samples': 10142720, 'steps': 19809, 'loss/train': 2.1093897819519043} +03/04/2022 12:43:36 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/04/2022 12:43:39 - INFO - codeparrot_training - Step 19810: {'lr': 0.0004823463134262928, 'samples': 10143232, 'steps': 19810, 'loss/train': 0.7520968317985535} +03/04/2022 12:43:42 - INFO - codeparrot_training - Step 19811: {'lr': 0.00048234435459537265, 'samples': 10143744, 'steps': 19811, 'loss/train': 1.8982757329940796} +03/04/2022 12:43:45 - INFO - codeparrot_training - Step 19812: {'lr': 0.0004823423956597617, 'samples': 10144256, 'steps': 19812, 'loss/train': 1.940994381904602} +03/04/2022 12:43:45 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/04/2022 12:43:50 - INFO - codeparrot_training - Step 19813: {'lr': 0.0004823404366194608, 'samples': 10144768, 'steps': 19813, 'loss/train': 0.9113917946815491} +03/04/2022 12:43:54 - INFO - codeparrot_training - Step 19814: {'lr': 0.0004823384774744709, 'samples': 10145280, 'steps': 19814, 'loss/train': 0.8768632411956787} +03/04/2022 12:43:54 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/04/2022 12:43:59 - INFO - codeparrot_training - Step 19815: {'lr': 0.000482336518224793, 'samples': 10145792, 'steps': 19815, 'loss/train': 1.825161337852478} +03/04/2022 12:44:02 - INFO - codeparrot_training - Step 19816: {'lr': 0.00048233455887042764, 'samples': 10146304, 'steps': 19816, 'loss/train': 2.094974994659424} +03/04/2022 12:44:02 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 12:44:07 - INFO - codeparrot_training - Step 19817: {'lr': 0.0004823325994113761, 'samples': 10146816, 'steps': 19817, 'loss/train': 1.8246568441390991} +03/04/2022 12:44:10 - INFO - codeparrot_training - Step 19818: {'lr': 0.00048233063984763895, 'samples': 10147328, 'steps': 19818, 'loss/train': 2.088975191116333} +03/04/2022 12:44:11 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 12:44:16 - INFO - codeparrot_training - Step 19819: {'lr': 0.0004823286801792173, 'samples': 10147840, 'steps': 19819, 'loss/train': 1.2220865488052368} +03/04/2022 12:44:19 - INFO - codeparrot_training - Step 19820: {'lr': 0.0004823267204061118, 'samples': 10148352, 'steps': 19820, 'loss/train': 1.873769760131836} +03/04/2022 12:44:19 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/04/2022 12:44:24 - INFO - codeparrot_training - Step 19821: {'lr': 0.0004823247605283236, 'samples': 10148864, 'steps': 19821, 'loss/train': 1.4555636644363403} +03/04/2022 12:44:27 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/04/2022 12:44:29 - INFO - codeparrot_training - Step 19822: {'lr': 0.0004823228005458534, 'samples': 10149376, 'steps': 19822, 'loss/train': 1.1933542490005493} +03/04/2022 12:44:32 - INFO - codeparrot_training - Step 19823: {'lr': 0.00048232084045870204, 'samples': 10149888, 'steps': 19823, 'loss/train': 1.7981982231140137} +03/04/2022 12:44:35 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/04/2022 12:44:38 - INFO - codeparrot_training - Step 19824: {'lr': 0.00048231888026687065, 'samples': 10150400, 'steps': 19824, 'loss/train': 1.1931565999984741} +03/04/2022 12:44:41 - INFO - codeparrot_training - Step 19825: {'lr': 0.00048231691997035987, 'samples': 10150912, 'steps': 19825, 'loss/train': 2.326483726501465} +03/04/2022 12:44:43 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/04/2022 12:44:46 - INFO - codeparrot_training - Step 19826: {'lr': 0.00048231495956917067, 'samples': 10151424, 'steps': 19826, 'loss/train': 2.116652727127075} +03/04/2022 12:44:49 - INFO - codeparrot_training - Step 19827: {'lr': 0.00048231299906330397, 'samples': 10151936, 'steps': 19827, 'loss/train': 2.197244644165039} +03/04/2022 12:44:52 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/04/2022 12:44:55 - INFO - codeparrot_training - Step 19828: {'lr': 0.0004823110384527606, 'samples': 10152448, 'steps': 19828, 'loss/train': 1.7668880224227905} +03/04/2022 12:44:58 - INFO - codeparrot_training - Step 19829: {'lr': 0.0004823090777375414, 'samples': 10152960, 'steps': 19829, 'loss/train': 0.8269534111022949} +03/04/2022 12:45:00 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/04/2022 12:45:03 - INFO - codeparrot_training - Step 19830: {'lr': 0.0004823071169176474, 'samples': 10153472, 'steps': 19830, 'loss/train': 2.3575305938720703} +03/04/2022 12:45:06 - INFO - codeparrot_training - Step 19831: {'lr': 0.00048230515599307933, 'samples': 10153984, 'steps': 19831, 'loss/train': 1.777549386024475} +03/04/2022 12:45:08 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/04/2022 12:45:11 - INFO - codeparrot_training - Step 19832: {'lr': 0.0004823031949638382, 'samples': 10154496, 'steps': 19832, 'loss/train': 2.2322680950164795} +03/04/2022 12:45:15 - INFO - codeparrot_training - Step 19833: {'lr': 0.0004823012338299248, 'samples': 10155008, 'steps': 19833, 'loss/train': 1.8456605672836304} +03/04/2022 12:45:17 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/04/2022 12:45:20 - INFO - codeparrot_training - Step 19834: {'lr': 0.0004822992725913401, 'samples': 10155520, 'steps': 19834, 'loss/train': 2.3469038009643555} +03/04/2022 12:45:23 - INFO - codeparrot_training - Step 19835: {'lr': 0.00048229731124808484, 'samples': 10156032, 'steps': 19835, 'loss/train': 2.3397114276885986} +03/04/2022 12:45:25 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/04/2022 12:45:28 - INFO - codeparrot_training - Step 19836: {'lr': 0.00048229534980016007, 'samples': 10156544, 'steps': 19836, 'loss/train': 1.7590001821517944} +03/04/2022 12:45:31 - INFO - codeparrot_training - Step 19837: {'lr': 0.0004822933882475666, 'samples': 10157056, 'steps': 19837, 'loss/train': 1.294845461845398} +03/04/2022 12:45:33 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/04/2022 12:45:37 - INFO - codeparrot_training - Step 19838: {'lr': 0.00048229142659030527, 'samples': 10157568, 'steps': 19838, 'loss/train': 2.8006882667541504} +03/04/2022 12:45:40 - INFO - codeparrot_training - Step 19839: {'lr': 0.000482289464828377, 'samples': 10158080, 'steps': 19839, 'loss/train': 2.0161168575286865} +03/04/2022 12:45:42 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/04/2022 12:45:45 - INFO - codeparrot_training - Step 19840: {'lr': 0.00048228750296178276, 'samples': 10158592, 'steps': 19840, 'loss/train': 2.1369569301605225} +03/04/2022 12:45:48 - INFO - codeparrot_training - Step 19841: {'lr': 0.0004822855409905233, 'samples': 10159104, 'steps': 19841, 'loss/train': 1.606272578239441} +03/04/2022 12:45:50 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/04/2022 12:45:53 - INFO - codeparrot_training - Step 19842: {'lr': 0.00048228357891459954, 'samples': 10159616, 'steps': 19842, 'loss/train': 2.259392499923706} +03/04/2022 12:45:57 - INFO - codeparrot_training - Step 19843: {'lr': 0.0004822816167340124, 'samples': 10160128, 'steps': 19843, 'loss/train': 1.9413256645202637} +03/04/2022 12:45:58 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/04/2022 12:46:02 - INFO - codeparrot_training - Step 19844: {'lr': 0.00048227965444876277, 'samples': 10160640, 'steps': 19844, 'loss/train': 2.229551076889038} +03/04/2022 12:46:05 - INFO - codeparrot_training - Step 19845: {'lr': 0.0004822776920588515, 'samples': 10161152, 'steps': 19845, 'loss/train': 1.5100980997085571} +03/04/2022 12:46:07 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 12:46:10 - INFO - codeparrot_training - Step 19846: {'lr': 0.0004822757295642795, 'samples': 10161664, 'steps': 19846, 'loss/train': 1.8339672088623047} +03/04/2022 12:46:13 - INFO - codeparrot_training - Step 19847: {'lr': 0.00048227376696504765, 'samples': 10162176, 'steps': 19847, 'loss/train': 1.4135137796401978} +03/04/2022 12:46:15 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/04/2022 12:46:19 - INFO - codeparrot_training - Step 19848: {'lr': 0.0004822718042611568, 'samples': 10162688, 'steps': 19848, 'loss/train': 1.491626262664795} +03/04/2022 12:46:22 - INFO - codeparrot_training - Step 19849: {'lr': 0.0004822698414526079, 'samples': 10163200, 'steps': 19849, 'loss/train': 1.3416436910629272} +03/04/2022 12:46:23 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/04/2022 12:46:27 - INFO - codeparrot_training - Step 19850: {'lr': 0.0004822678785394017, 'samples': 10163712, 'steps': 19850, 'loss/train': 2.096189022064209} +03/04/2022 12:46:30 - INFO - codeparrot_training - Step 19851: {'lr': 0.0004822659155215393, 'samples': 10164224, 'steps': 19851, 'loss/train': 1.9066327810287476} +03/04/2022 12:46:32 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/04/2022 12:46:36 - INFO - codeparrot_training - Step 19852: {'lr': 0.00048226395239902133, 'samples': 10164736, 'steps': 19852, 'loss/train': 1.7607125043869019} +03/04/2022 12:46:39 - INFO - codeparrot_training - Step 19853: {'lr': 0.00048226198917184886, 'samples': 10165248, 'steps': 19853, 'loss/train': 2.250265121459961} +03/04/2022 12:46:41 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/04/2022 12:46:44 - INFO - codeparrot_training - Step 19854: {'lr': 0.00048226002584002276, 'samples': 10165760, 'steps': 19854, 'loss/train': 1.1960186958312988} +03/04/2022 12:46:47 - INFO - codeparrot_training - Step 19855: {'lr': 0.00048225806240354387, 'samples': 10166272, 'steps': 19855, 'loss/train': 2.1483571529388428} +03/04/2022 12:46:49 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/04/2022 12:46:53 - INFO - codeparrot_training - Step 19856: {'lr': 0.0004822560988624131, 'samples': 10166784, 'steps': 19856, 'loss/train': 1.7365542650222778} +03/04/2022 12:46:56 - INFO - codeparrot_training - Step 19857: {'lr': 0.0004822541352166312, 'samples': 10167296, 'steps': 19857, 'loss/train': 2.011840343475342} +03/04/2022 12:46:58 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/04/2022 12:47:01 - INFO - codeparrot_training - Step 19858: {'lr': 0.0004822521714661993, 'samples': 10167808, 'steps': 19858, 'loss/train': 1.406099796295166} +03/04/2022 12:47:04 - INFO - codeparrot_training - Step 19859: {'lr': 0.0004822502076111181, 'samples': 10168320, 'steps': 19859, 'loss/train': 1.5457159280776978} +03/04/2022 12:47:06 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/04/2022 12:47:10 - INFO - codeparrot_training - Step 19860: {'lr': 0.0004822482436513885, 'samples': 10168832, 'steps': 19860, 'loss/train': 2.1389973163604736} +03/04/2022 12:47:13 - INFO - codeparrot_training - Step 19861: {'lr': 0.0004822462795870115, 'samples': 10169344, 'steps': 19861, 'loss/train': 2.313721179962158} +03/04/2022 12:47:14 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 12:47:18 - INFO - codeparrot_training - Step 19862: {'lr': 0.00048224431541798784, 'samples': 10169856, 'steps': 19862, 'loss/train': 1.9697843790054321} +03/04/2022 12:47:21 - INFO - codeparrot_training - Step 19863: {'lr': 0.00048224235114431856, 'samples': 10170368, 'steps': 19863, 'loss/train': 1.612579345703125} +03/04/2022 12:47:23 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/04/2022 12:47:26 - INFO - codeparrot_training - Step 19864: {'lr': 0.0004822403867660044, 'samples': 10170880, 'steps': 19864, 'loss/train': 1.8613646030426025} +03/04/2022 12:47:30 - INFO - codeparrot_training - Step 19865: {'lr': 0.0004822384222830463, 'samples': 10171392, 'steps': 19865, 'loss/train': 1.5513694286346436} +03/04/2022 12:47:32 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/04/2022 12:47:35 - INFO - codeparrot_training - Step 19866: {'lr': 0.0004822364576954452, 'samples': 10171904, 'steps': 19866, 'loss/train': 2.2861578464508057} +03/04/2022 12:47:38 - INFO - codeparrot_training - Step 19867: {'lr': 0.0004822344930032019, 'samples': 10172416, 'steps': 19867, 'loss/train': 2.0221612453460693} +03/04/2022 12:47:40 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/04/2022 12:47:43 - INFO - codeparrot_training - Step 19868: {'lr': 0.00048223252820631736, 'samples': 10172928, 'steps': 19868, 'loss/train': 1.6351757049560547} +03/04/2022 12:47:47 - INFO - codeparrot_training - Step 19869: {'lr': 0.00048223056330479235, 'samples': 10173440, 'steps': 19869, 'loss/train': 1.9015299081802368} +03/04/2022 12:47:49 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/04/2022 12:47:52 - INFO - codeparrot_training - Step 19870: {'lr': 0.00048222859829862784, 'samples': 10173952, 'steps': 19870, 'loss/train': 2.0677883625030518} +03/04/2022 12:47:55 - INFO - codeparrot_training - Step 19871: {'lr': 0.0004822266331878248, 'samples': 10174464, 'steps': 19871, 'loss/train': 5.846251964569092} +03/04/2022 12:47:59 - INFO - codeparrot_training - Step 19872: {'lr': 0.00048222466797238396, 'samples': 10174976, 'steps': 19872, 'loss/train': 2.1159582138061523} +03/04/2022 12:48:00 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/04/2022 12:48:04 - INFO - codeparrot_training - Step 19873: {'lr': 0.00048222270265230627, 'samples': 10175488, 'steps': 19873, 'loss/train': 2.1772701740264893} +03/04/2022 12:48:07 - INFO - codeparrot_training - Step 19874: {'lr': 0.0004822207372275926, 'samples': 10176000, 'steps': 19874, 'loss/train': 2.099034070968628} +03/04/2022 12:48:09 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/04/2022 12:48:12 - INFO - codeparrot_training - Step 19875: {'lr': 0.0004822187716982439, 'samples': 10176512, 'steps': 19875, 'loss/train': 1.6294924020767212} +03/04/2022 12:48:16 - INFO - codeparrot_training - Step 19876: {'lr': 0.000482216806064261, 'samples': 10177024, 'steps': 19876, 'loss/train': 0.6671847701072693} +03/04/2022 12:48:18 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/04/2022 12:48:22 - INFO - codeparrot_training - Step 19877: {'lr': 0.0004822148403256447, 'samples': 10177536, 'steps': 19877, 'loss/train': 1.3587850332260132} +03/04/2022 12:48:25 - INFO - codeparrot_training - Step 19878: {'lr': 0.00048221287448239604, 'samples': 10178048, 'steps': 19878, 'loss/train': 1.6287637948989868} +03/04/2022 12:48:28 - INFO - codeparrot_training - Step 19879: {'lr': 0.00048221090853451586, 'samples': 10178560, 'steps': 19879, 'loss/train': 4.206445217132568} +03/04/2022 12:48:29 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/04/2022 12:48:33 - INFO - codeparrot_training - Step 19880: {'lr': 0.000482208942482005, 'samples': 10179072, 'steps': 19880, 'loss/train': 2.585996627807617} +03/04/2022 12:48:37 - INFO - codeparrot_training - Step 19881: {'lr': 0.00048220697632486443, 'samples': 10179584, 'steps': 19881, 'loss/train': 1.856677770614624} +03/04/2022 12:48:38 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/04/2022 12:48:42 - INFO - codeparrot_training - Step 19882: {'lr': 0.0004822050100630949, 'samples': 10180096, 'steps': 19882, 'loss/train': 2.6133460998535156} +03/04/2022 12:48:45 - INFO - codeparrot_training - Step 19883: {'lr': 0.0004822030436966974, 'samples': 10180608, 'steps': 19883, 'loss/train': 1.9130083322525024} +03/04/2022 12:48:47 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/04/2022 12:48:50 - INFO - codeparrot_training - Step 19884: {'lr': 0.0004822010772256728, 'samples': 10181120, 'steps': 19884, 'loss/train': 1.904091238975525} +03/04/2022 12:48:54 - INFO - codeparrot_training - Step 19885: {'lr': 0.00048219911065002196, 'samples': 10181632, 'steps': 19885, 'loss/train': 0.36119067668914795} +03/04/2022 12:48:56 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/04/2022 12:48:59 - INFO - codeparrot_training - Step 19886: {'lr': 0.00048219714396974587, 'samples': 10182144, 'steps': 19886, 'loss/train': 2.3573157787323} +03/04/2022 12:49:02 - INFO - codeparrot_training - Step 19887: {'lr': 0.0004821951771848452, 'samples': 10182656, 'steps': 19887, 'loss/train': 1.3326640129089355} +03/04/2022 12:49:04 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/04/2022 12:49:07 - INFO - codeparrot_training - Step 19888: {'lr': 0.00048219321029532104, 'samples': 10183168, 'steps': 19888, 'loss/train': 2.4931702613830566} +03/04/2022 12:49:10 - INFO - codeparrot_training - Step 19889: {'lr': 0.0004821912433011742, 'samples': 10183680, 'steps': 19889, 'loss/train': 2.361409902572632} +03/04/2022 12:49:12 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/04/2022 12:49:16 - INFO - codeparrot_training - Step 19890: {'lr': 0.00048218927620240557, 'samples': 10184192, 'steps': 19890, 'loss/train': 2.5654208660125732} +03/04/2022 12:49:19 - INFO - codeparrot_training - Step 19891: {'lr': 0.00048218730899901596, 'samples': 10184704, 'steps': 19891, 'loss/train': 1.270777940750122} +03/04/2022 12:49:21 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 12:49:24 - INFO - codeparrot_training - Step 19892: {'lr': 0.0004821853416910065, 'samples': 10185216, 'steps': 19892, 'loss/train': 1.8993537425994873} +03/04/2022 12:49:27 - INFO - codeparrot_training - Step 19893: {'lr': 0.0004821833742783778, 'samples': 10185728, 'steps': 19893, 'loss/train': 2.244206666946411} +03/04/2022 12:49:29 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/04/2022 12:49:32 - INFO - codeparrot_training - Step 19894: {'lr': 0.0004821814067611308, 'samples': 10186240, 'steps': 19894, 'loss/train': 2.8118410110473633} +03/04/2022 12:49:36 - INFO - codeparrot_training - Step 19895: {'lr': 0.00048217943913926646, 'samples': 10186752, 'steps': 19895, 'loss/train': 2.512181043624878} +03/04/2022 12:49:37 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/04/2022 12:49:41 - INFO - codeparrot_training - Step 19896: {'lr': 0.00048217747141278574, 'samples': 10187264, 'steps': 19896, 'loss/train': 1.631456971168518} +03/04/2022 12:49:44 - INFO - codeparrot_training - Step 19897: {'lr': 0.00048217550358168937, 'samples': 10187776, 'steps': 19897, 'loss/train': 2.365751266479492} +03/04/2022 12:49:46 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/04/2022 12:49:49 - INFO - codeparrot_training - Step 19898: {'lr': 0.00048217353564597833, 'samples': 10188288, 'steps': 19898, 'loss/train': 1.9473241567611694} +03/04/2022 12:49:52 - INFO - codeparrot_training - Step 19899: {'lr': 0.0004821715676056534, 'samples': 10188800, 'steps': 19899, 'loss/train': 1.387442708015442} +03/04/2022 12:49:54 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/04/2022 12:49:58 - INFO - codeparrot_training - Step 19900: {'lr': 0.0004821695994607156, 'samples': 10189312, 'steps': 19900, 'loss/train': 2.386030673980713} +03/04/2022 12:50:01 - INFO - codeparrot_training - Step 19901: {'lr': 0.0004821676312111658, 'samples': 10189824, 'steps': 19901, 'loss/train': 2.32594633102417} +03/04/2022 12:50:03 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/04/2022 12:50:06 - INFO - codeparrot_training - Step 19902: {'lr': 0.0004821656628570048, 'samples': 10190336, 'steps': 19902, 'loss/train': 1.980303406715393} +03/04/2022 12:50:09 - INFO - codeparrot_training - Step 19903: {'lr': 0.00048216369439823355, 'samples': 10190848, 'steps': 19903, 'loss/train': 0.3905542492866516} +03/04/2022 12:50:11 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/04/2022 12:50:14 - INFO - codeparrot_training - Step 19904: {'lr': 0.0004821617258348529, 'samples': 10191360, 'steps': 19904, 'loss/train': 2.0039732456207275} +03/04/2022 12:50:18 - INFO - codeparrot_training - Step 19905: {'lr': 0.0004821597571668638, 'samples': 10191872, 'steps': 19905, 'loss/train': 2.573153018951416} +03/04/2022 12:50:19 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 12:50:23 - INFO - codeparrot_training - Step 19906: {'lr': 0.00048215778839426706, 'samples': 10192384, 'steps': 19906, 'loss/train': 1.1939237117767334} +03/04/2022 12:50:26 - INFO - codeparrot_training - Step 19907: {'lr': 0.0004821558195170636, 'samples': 10192896, 'steps': 19907, 'loss/train': 4.5415873527526855} +03/04/2022 12:50:28 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/04/2022 12:50:31 - INFO - codeparrot_training - Step 19908: {'lr': 0.00048215385053525434, 'samples': 10193408, 'steps': 19908, 'loss/train': 1.3344182968139648} +03/04/2022 12:50:35 - INFO - codeparrot_training - Step 19909: {'lr': 0.00048215188144884013, 'samples': 10193920, 'steps': 19909, 'loss/train': 2.006150722503662} +03/04/2022 12:50:36 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/04/2022 12:50:40 - INFO - codeparrot_training - Step 19910: {'lr': 0.0004821499122578218, 'samples': 10194432, 'steps': 19910, 'loss/train': 0.7829998135566711} +03/04/2022 12:50:43 - INFO - codeparrot_training - Step 19911: {'lr': 0.00048214794296220045, 'samples': 10194944, 'steps': 19911, 'loss/train': 1.2131595611572266} +03/04/2022 12:50:44 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/04/2022 12:50:48 - INFO - codeparrot_training - Step 19912: {'lr': 0.00048214597356197665, 'samples': 10195456, 'steps': 19912, 'loss/train': 1.990240454673767} +03/04/2022 12:50:51 - INFO - codeparrot_training - Step 19913: {'lr': 0.00048214400405715153, 'samples': 10195968, 'steps': 19913, 'loss/train': 2.4458274841308594} +03/04/2022 12:50:53 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/04/2022 12:50:57 - INFO - codeparrot_training - Step 19914: {'lr': 0.000482142034447726, 'samples': 10196480, 'steps': 19914, 'loss/train': 0.8386738300323486} +03/04/2022 12:51:00 - INFO - codeparrot_training - Step 19915: {'lr': 0.0004821400647337007, 'samples': 10196992, 'steps': 19915, 'loss/train': 2.039153814315796} +03/04/2022 12:51:01 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/04/2022 12:51:05 - INFO - codeparrot_training - Step 19916: {'lr': 0.0004821380949150768, 'samples': 10197504, 'steps': 19916, 'loss/train': 2.3632099628448486} +03/04/2022 12:51:08 - INFO - codeparrot_training - Step 19917: {'lr': 0.0004821361249918549, 'samples': 10198016, 'steps': 19917, 'loss/train': 0.9795604348182678} +03/04/2022 12:51:09 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/04/2022 12:51:13 - INFO - codeparrot_training - Step 19918: {'lr': 0.0004821341549640361, 'samples': 10198528, 'steps': 19918, 'loss/train': 0.4375520348548889} +03/04/2022 12:51:17 - INFO - codeparrot_training - Step 19919: {'lr': 0.00048213218483162133, 'samples': 10199040, 'steps': 19919, 'loss/train': 2.4106063842773438} +03/04/2022 12:51:18 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/04/2022 12:51:22 - INFO - codeparrot_training - Step 19920: {'lr': 0.0004821302145946113, 'samples': 10199552, 'steps': 19920, 'loss/train': 2.4637293815612793} +03/04/2022 12:51:25 - INFO - codeparrot_training - Step 19921: {'lr': 0.00048212824425300694, 'samples': 10200064, 'steps': 19921, 'loss/train': 1.8230139017105103} +03/04/2022 12:51:26 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/04/2022 12:51:30 - INFO - codeparrot_training - Step 19922: {'lr': 0.0004821262738068093, 'samples': 10200576, 'steps': 19922, 'loss/train': 0.28044453263282776} +03/04/2022 12:51:33 - INFO - codeparrot_training - Step 19923: {'lr': 0.00048212430325601905, 'samples': 10201088, 'steps': 19923, 'loss/train': 2.140770673751831} +03/04/2022 12:51:35 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/04/2022 12:51:39 - INFO - codeparrot_training - Step 19924: {'lr': 0.0004821223326006372, 'samples': 10201600, 'steps': 19924, 'loss/train': 2.0337555408477783} +03/04/2022 12:51:42 - INFO - codeparrot_training - Step 19925: {'lr': 0.0004821203618406645, 'samples': 10202112, 'steps': 19925, 'loss/train': 2.077075481414795} +03/04/2022 12:51:43 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/04/2022 12:51:47 - INFO - codeparrot_training - Step 19926: {'lr': 0.0004821183909761021, 'samples': 10202624, 'steps': 19926, 'loss/train': 1.4259023666381836} +03/04/2022 12:51:50 - INFO - codeparrot_training - Step 19927: {'lr': 0.00048211642000695065, 'samples': 10203136, 'steps': 19927, 'loss/train': 2.1770501136779785} +03/04/2022 12:51:52 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/04/2022 12:51:56 - INFO - codeparrot_training - Step 19928: {'lr': 0.0004821144489332112, 'samples': 10203648, 'steps': 19928, 'loss/train': 1.353190302848816} +03/04/2022 12:51:59 - INFO - codeparrot_training - Step 19929: {'lr': 0.0004821124777548845, 'samples': 10204160, 'steps': 19929, 'loss/train': 2.052112102508545} +03/04/2022 12:52:00 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/04/2022 12:52:04 - INFO - codeparrot_training - Step 19930: {'lr': 0.0004821105064719715, 'samples': 10204672, 'steps': 19930, 'loss/train': 1.6590656042099} +03/04/2022 12:52:07 - INFO - codeparrot_training - Step 19931: {'lr': 0.0004821085350844731, 'samples': 10205184, 'steps': 19931, 'loss/train': 1.5206894874572754} +03/04/2022 12:52:08 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/04/2022 12:52:12 - INFO - codeparrot_training - Step 19932: {'lr': 0.0004821065635923902, 'samples': 10205696, 'steps': 19932, 'loss/train': 1.9014419317245483} +03/04/2022 12:52:16 - INFO - codeparrot_training - Step 19933: {'lr': 0.0004821045919957237, 'samples': 10206208, 'steps': 19933, 'loss/train': 2.737725019454956} +03/04/2022 12:52:17 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/04/2022 12:52:21 - INFO - codeparrot_training - Step 19934: {'lr': 0.00048210262029447425, 'samples': 10206720, 'steps': 19934, 'loss/train': 1.7995728254318237} +03/04/2022 12:52:24 - INFO - codeparrot_training - Step 19935: {'lr': 0.0004821006484886431, 'samples': 10207232, 'steps': 19935, 'loss/train': 2.1414871215820312} +03/04/2022 12:52:25 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/04/2022 12:52:29 - INFO - codeparrot_training - Step 19936: {'lr': 0.000482098676578231, 'samples': 10207744, 'steps': 19936, 'loss/train': 1.7058907747268677} +03/04/2022 12:52:32 - INFO - codeparrot_training - Step 19937: {'lr': 0.0004820967045632388, 'samples': 10208256, 'steps': 19937, 'loss/train': 1.9326146841049194} +03/04/2022 12:52:33 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/04/2022 12:52:38 - INFO - codeparrot_training - Step 19938: {'lr': 0.00048209473244366737, 'samples': 10208768, 'steps': 19938, 'loss/train': 2.094433546066284} +03/04/2022 12:52:41 - INFO - codeparrot_training - Step 19939: {'lr': 0.00048209276021951765, 'samples': 10209280, 'steps': 19939, 'loss/train': 1.6312905550003052} +03/04/2022 12:52:42 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 12:52:46 - INFO - codeparrot_training - Step 19940: {'lr': 0.00048209078789079055, 'samples': 10209792, 'steps': 19940, 'loss/train': 1.726961612701416} +03/04/2022 12:52:49 - INFO - codeparrot_training - Step 19941: {'lr': 0.00048208881545748684, 'samples': 10210304, 'steps': 19941, 'loss/train': 2.3568150997161865} +03/04/2022 12:52:50 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/04/2022 12:52:55 - INFO - codeparrot_training - Step 19942: {'lr': 0.00048208684291960755, 'samples': 10210816, 'steps': 19942, 'loss/train': 1.687651515007019} +03/04/2022 12:52:58 - INFO - codeparrot_training - Step 19943: {'lr': 0.0004820848702771535, 'samples': 10211328, 'steps': 19943, 'loss/train': 1.6996532678604126} +03/04/2022 12:52:58 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/04/2022 12:53:03 - INFO - codeparrot_training - Step 19944: {'lr': 0.0004820828975301256, 'samples': 10211840, 'steps': 19944, 'loss/train': 2.243648052215576} +03/04/2022 12:53:06 - INFO - codeparrot_training - Step 19945: {'lr': 0.0004820809246785247, 'samples': 10212352, 'steps': 19945, 'loss/train': 1.5898510217666626} +03/04/2022 12:53:07 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/04/2022 12:53:12 - INFO - codeparrot_training - Step 19946: {'lr': 0.00048207895172235174, 'samples': 10212864, 'steps': 19946, 'loss/train': 1.6597234010696411} +03/04/2022 12:53:15 - INFO - codeparrot_training - Step 19947: {'lr': 0.00048207697866160755, 'samples': 10213376, 'steps': 19947, 'loss/train': 2.011300563812256} +03/04/2022 12:53:17 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/04/2022 12:53:21 - INFO - codeparrot_training - Step 19948: {'lr': 0.0004820750054962931, 'samples': 10213888, 'steps': 19948, 'loss/train': 1.3744240999221802} +03/04/2022 12:53:24 - INFO - codeparrot_training - Step 19949: {'lr': 0.00048207303222640917, 'samples': 10214400, 'steps': 19949, 'loss/train': 1.8370249271392822} +03/04/2022 12:53:26 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/04/2022 12:53:29 - INFO - codeparrot_training - Step 19950: {'lr': 0.00048207105885195677, 'samples': 10214912, 'steps': 19950, 'loss/train': 2.258617401123047} +03/04/2022 12:53:32 - INFO - codeparrot_training - Step 19951: {'lr': 0.0004820690853729367, 'samples': 10215424, 'steps': 19951, 'loss/train': 1.6942468881607056} +03/04/2022 12:53:34 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/04/2022 12:53:37 - INFO - codeparrot_training - Step 19952: {'lr': 0.00048206711178934994, 'samples': 10215936, 'steps': 19952, 'loss/train': 1.6091153621673584} +03/04/2022 12:53:40 - INFO - codeparrot_training - Step 19953: {'lr': 0.00048206513810119725, 'samples': 10216448, 'steps': 19953, 'loss/train': 2.026717185974121} +03/04/2022 12:53:42 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/04/2022 12:53:46 - INFO - codeparrot_training - Step 19954: {'lr': 0.0004820631643084796, 'samples': 10216960, 'steps': 19954, 'loss/train': 1.5112526416778564} +03/04/2022 12:53:49 - INFO - codeparrot_training - Step 19955: {'lr': 0.00048206119041119787, 'samples': 10217472, 'steps': 19955, 'loss/train': 1.1783504486083984} +03/04/2022 12:53:51 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/04/2022 12:53:54 - INFO - codeparrot_training - Step 19956: {'lr': 0.000482059216409353, 'samples': 10217984, 'steps': 19956, 'loss/train': 2.051764726638794} +03/04/2022 12:53:57 - INFO - codeparrot_training - Step 19957: {'lr': 0.0004820572423029458, 'samples': 10218496, 'steps': 19957, 'loss/train': 2.219106435775757} +03/04/2022 12:53:59 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/04/2022 12:54:03 - INFO - codeparrot_training - Step 19958: {'lr': 0.00048205526809197717, 'samples': 10219008, 'steps': 19958, 'loss/train': 1.5578713417053223} +03/04/2022 12:54:06 - INFO - codeparrot_training - Step 19959: {'lr': 0.000482053293776448, 'samples': 10219520, 'steps': 19959, 'loss/train': 2.391291856765747} +03/04/2022 12:54:07 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/04/2022 12:54:11 - INFO - codeparrot_training - Step 19960: {'lr': 0.0004820513193563593, 'samples': 10220032, 'steps': 19960, 'loss/train': 1.622519612312317} +03/04/2022 12:54:14 - INFO - codeparrot_training - Step 19961: {'lr': 0.00048204934483171176, 'samples': 10220544, 'steps': 19961, 'loss/train': 1.7340925931930542} +03/04/2022 12:54:16 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/04/2022 12:54:19 - INFO - codeparrot_training - Step 19962: {'lr': 0.0004820473702025064, 'samples': 10221056, 'steps': 19962, 'loss/train': 2.8198986053466797} +03/04/2022 12:54:23 - INFO - codeparrot_training - Step 19963: {'lr': 0.000482045395468744, 'samples': 10221568, 'steps': 19963, 'loss/train': 0.433694988489151} +03/04/2022 12:54:24 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/04/2022 12:54:28 - INFO - codeparrot_training - Step 19964: {'lr': 0.0004820434206304256, 'samples': 10222080, 'steps': 19964, 'loss/train': 0.6892015337944031} +03/04/2022 12:54:31 - INFO - codeparrot_training - Step 19965: {'lr': 0.000482041445687552, 'samples': 10222592, 'steps': 19965, 'loss/train': 0.16242405772209167} +03/04/2022 12:54:33 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/04/2022 12:54:37 - INFO - codeparrot_training - Step 19966: {'lr': 0.0004820394706401242, 'samples': 10223104, 'steps': 19966, 'loss/train': 1.3020055294036865} +03/04/2022 12:54:40 - INFO - codeparrot_training - Step 19967: {'lr': 0.0004820374954881429, 'samples': 10223616, 'steps': 19967, 'loss/train': 2.1176605224609375} +03/04/2022 12:54:43 - INFO - codeparrot_training - Step 19968: {'lr': 0.000482035520231609, 'samples': 10224128, 'steps': 19968, 'loss/train': 2.1836509704589844} +03/04/2022 12:54:44 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/04/2022 12:54:48 - INFO - codeparrot_training - Step 19969: {'lr': 0.00048203354487052363, 'samples': 10224640, 'steps': 19969, 'loss/train': 1.9374854564666748} +03/04/2022 12:54:52 - INFO - codeparrot_training - Step 19970: {'lr': 0.00048203156940488745, 'samples': 10225152, 'steps': 19970, 'loss/train': 1.6065590381622314} +03/04/2022 12:54:52 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/04/2022 12:54:57 - INFO - codeparrot_training - Step 19971: {'lr': 0.00048202959383470144, 'samples': 10225664, 'steps': 19971, 'loss/train': 2.0499842166900635} +03/04/2022 12:55:00 - INFO - codeparrot_training - Step 19972: {'lr': 0.00048202761815996646, 'samples': 10226176, 'steps': 19972, 'loss/train': 1.7869300842285156} +03/04/2022 12:55:00 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/04/2022 12:55:05 - INFO - codeparrot_training - Step 19973: {'lr': 0.0004820256423806835, 'samples': 10226688, 'steps': 19973, 'loss/train': 2.4714343547821045} +03/04/2022 12:55:08 - INFO - codeparrot_training - Step 19974: {'lr': 0.00048202366649685325, 'samples': 10227200, 'steps': 19974, 'loss/train': 1.6435937881469727} +03/04/2022 12:55:08 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/04/2022 12:55:14 - INFO - codeparrot_training - Step 19975: {'lr': 0.0004820216905084768, 'samples': 10227712, 'steps': 19975, 'loss/train': 1.4493975639343262} +03/04/2022 12:55:17 - INFO - codeparrot_training - Step 19976: {'lr': 0.00048201971441555485, 'samples': 10228224, 'steps': 19976, 'loss/train': 1.781044363975525} +03/04/2022 12:55:17 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/04/2022 12:55:22 - INFO - codeparrot_training - Step 19977: {'lr': 0.0004820177382180885, 'samples': 10228736, 'steps': 19977, 'loss/train': 1.7119731903076172} +03/04/2022 12:55:25 - INFO - codeparrot_training - Step 19978: {'lr': 0.00048201576191607843, 'samples': 10229248, 'steps': 19978, 'loss/train': 1.9400750398635864} +03/04/2022 12:55:25 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/04/2022 12:55:31 - INFO - codeparrot_training - Step 19979: {'lr': 0.00048201378550952575, 'samples': 10229760, 'steps': 19979, 'loss/train': 2.0348732471466064} +03/04/2022 12:55:33 - INFO - codeparrot_training - Skipping example with length 109 (seq_length=1024) +03/04/2022 12:55:36 - INFO - codeparrot_training - Step 19980: {'lr': 0.0004820118089984312, 'samples': 10230272, 'steps': 19980, 'loss/train': 1.7032426595687866} +03/04/2022 12:55:39 - INFO - codeparrot_training - Step 19981: {'lr': 0.0004820098323827957, 'samples': 10230784, 'steps': 19981, 'loss/train': 1.5216456651687622} +03/04/2022 12:55:42 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/04/2022 12:55:44 - INFO - codeparrot_training - Step 19982: {'lr': 0.0004820078556626202, 'samples': 10231296, 'steps': 19982, 'loss/train': 1.7924262285232544} +03/04/2022 12:55:47 - INFO - codeparrot_training - Step 19983: {'lr': 0.0004820058788379055, 'samples': 10231808, 'steps': 19983, 'loss/train': 2.1892576217651367} +03/04/2022 12:55:50 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/04/2022 12:55:53 - INFO - codeparrot_training - Step 19984: {'lr': 0.0004820039019086525, 'samples': 10232320, 'steps': 19984, 'loss/train': 1.3113913536071777} +03/04/2022 12:55:56 - INFO - codeparrot_training - Step 19985: {'lr': 0.00048200192487486216, 'samples': 10232832, 'steps': 19985, 'loss/train': 2.098022699356079} +03/04/2022 12:55:59 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/04/2022 12:56:01 - INFO - codeparrot_training - Step 19986: {'lr': 0.00048199994773653535, 'samples': 10233344, 'steps': 19986, 'loss/train': 2.157574415206909} +03/04/2022 12:56:04 - INFO - codeparrot_training - Step 19987: {'lr': 0.0004819979704936729, 'samples': 10233856, 'steps': 19987, 'loss/train': 1.7193831205368042} +03/04/2022 12:56:07 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/04/2022 12:56:09 - INFO - codeparrot_training - Step 19988: {'lr': 0.00048199599314627576, 'samples': 10234368, 'steps': 19988, 'loss/train': 1.6702628135681152} +03/04/2022 12:56:13 - INFO - codeparrot_training - Step 19989: {'lr': 0.00048199401569434477, 'samples': 10234880, 'steps': 19989, 'loss/train': 1.913305640220642} +03/04/2022 12:56:15 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/04/2022 12:56:18 - INFO - codeparrot_training - Step 19990: {'lr': 0.00048199203813788086, 'samples': 10235392, 'steps': 19990, 'loss/train': 2.1595559120178223} +03/04/2022 12:56:21 - INFO - codeparrot_training - Step 19991: {'lr': 0.00048199006047688496, 'samples': 10235904, 'steps': 19991, 'loss/train': 1.6218160390853882} +03/04/2022 12:56:24 - INFO - codeparrot_training - Step 19992: {'lr': 0.0004819880827113579, 'samples': 10236416, 'steps': 19992, 'loss/train': 1.8791284561157227} +03/04/2022 12:56:24 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/04/2022 12:56:30 - INFO - codeparrot_training - Step 19993: {'lr': 0.0004819861048413006, 'samples': 10236928, 'steps': 19993, 'loss/train': 2.498661518096924} +03/04/2022 12:56:33 - INFO - codeparrot_training - Step 19994: {'lr': 0.00048198412686671394, 'samples': 10237440, 'steps': 19994, 'loss/train': 0.9555248618125916} +03/04/2022 12:56:33 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/04/2022 12:56:38 - INFO - codeparrot_training - Step 19995: {'lr': 0.0004819821487875988, 'samples': 10237952, 'steps': 19995, 'loss/train': 2.326756238937378} +03/04/2022 12:56:41 - INFO - codeparrot_training - Step 19996: {'lr': 0.0004819801706039561, 'samples': 10238464, 'steps': 19996, 'loss/train': 1.367641806602478} +03/04/2022 12:56:41 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/04/2022 12:56:46 - INFO - codeparrot_training - Step 19997: {'lr': 0.0004819781923157867, 'samples': 10238976, 'steps': 19997, 'loss/train': 2.3526673316955566} +03/04/2022 12:56:50 - INFO - codeparrot_training - Step 19998: {'lr': 0.00048197621392309154, 'samples': 10239488, 'steps': 19998, 'loss/train': 1.8712676763534546} +03/04/2022 12:56:50 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/04/2022 12:56:55 - INFO - codeparrot_training - Step 19999: {'lr': 0.00048197423542587143, 'samples': 10240000, 'steps': 19999, 'loss/train': 1.2787634134292603} +03/04/2022 12:56:55 - INFO - codeparrot_training - Evaluating and saving model checkpoint